Source file src/cmd/compile/internal/ssa/branchelim.go

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import "cmd/internal/src"
     8  
     9  // branchelim tries to eliminate branches by
    10  // generating CondSelect instructions.
    11  //
    12  // Search for basic blocks that look like
    13  //
    14  //	bb0            bb0
    15  //	 | \          /   \
    16  //	 | bb1  or  bb1   bb2    <- trivial if/else blocks
    17  //	 | /          \   /
    18  //	bb2            bb3
    19  //
    20  // where the intermediate blocks are mostly empty (with no side-effects);
    21  // rewrite Phis in the postdominator as CondSelects.
    22  func branchelim(f *Func) {
    23  	// FIXME: add support for lowering CondSelects on more architectures
    24  	switch f.Config.arch {
    25  	case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64":
    26  		// implemented
    27  	default:
    28  		return
    29  	}
    30  
    31  	// Find all the values used in computing the address of any load.
    32  	// Typically these values have operations like AddPtr, Lsh64x64, etc.
    33  	loadAddr := f.newSparseSet(f.NumValues())
    34  	defer f.retSparseSet(loadAddr)
    35  	for _, b := range f.Blocks {
    36  		for _, v := range b.Values {
    37  			switch v.Op {
    38  			case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64:
    39  				loadAddr.add(v.Args[0].ID)
    40  			case OpMove:
    41  				loadAddr.add(v.Args[1].ID)
    42  			}
    43  		}
    44  	}
    45  	po := f.postorder()
    46  	for {
    47  		n := loadAddr.size()
    48  		for _, b := range po {
    49  			for i := len(b.Values) - 1; i >= 0; i-- {
    50  				v := b.Values[i]
    51  				if !loadAddr.contains(v.ID) {
    52  					continue
    53  				}
    54  				for _, a := range v.Args {
    55  					if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
    56  						loadAddr.add(a.ID)
    57  					}
    58  				}
    59  			}
    60  		}
    61  		if loadAddr.size() == n {
    62  			break
    63  		}
    64  	}
    65  
    66  	change := true
    67  	for change {
    68  		change = false
    69  		for _, b := range f.Blocks {
    70  			change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
    71  		}
    72  	}
    73  }
    74  
    75  func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
    76  	if loadAddr.contains(v.ID) {
    77  		// The result of the soon-to-be conditional move is used to compute a load address.
    78  		// We want to avoid generating a conditional move in this case
    79  		// because the load address would now be data-dependent on the condition.
    80  		// Previously it would only be control-dependent on the condition, which is faster
    81  		// if the branch predicts well (or possibly even if it doesn't, if the load will
    82  		// be an expensive cache miss).
    83  		// See issue #26306.
    84  		return false
    85  	}
    86  	if arch == "loong64" {
    87  		// We should not generate conditional moves if neither of the arguments is constant zero,
    88  		// because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the
    89  		// register pressure.
    90  		if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) &&
    91  			!(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) {
    92  			return false
    93  		}
    94  	}
    95  	// For now, stick to simple scalars that fit in registers
    96  	switch {
    97  	case v.Type.Size() > v.Block.Func.Config.RegSize:
    98  		return false
    99  	case v.Type.IsPtrShaped():
   100  		return true
   101  	case v.Type.IsInteger():
   102  		if arch == "amd64" && v.Type.Size() < 2 {
   103  			// amd64 doesn't support CMOV with byte registers
   104  			return false
   105  		}
   106  		return true
   107  	default:
   108  		return false
   109  	}
   110  }
   111  
   112  // elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
   113  // loadAddr is a set of values which are used to compute the address of a load.
   114  // Those values are exempt from CMOV generation.
   115  func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
   116  	// See if dom is an If with one arm that
   117  	// is trivial and succeeded by the other
   118  	// successor of dom.
   119  	if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
   120  		return false
   121  	}
   122  	var simple, post *Block
   123  	for i := range dom.Succs {
   124  		bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
   125  		if isLeafPlain(bb) && bb.Succs[0].Block() == other {
   126  			simple = bb
   127  			post = other
   128  			break
   129  		}
   130  	}
   131  	if simple == nil || len(post.Preds) != 2 || post == dom {
   132  		return false
   133  	}
   134  
   135  	// We've found our diamond CFG of blocks.
   136  	// Now decide if fusing 'simple' into dom+post
   137  	// looks profitable.
   138  
   139  	// Check that there are Phis, and that all of them
   140  	// can be safely rewritten to CondSelect.
   141  	hasphis := false
   142  	for _, v := range post.Values {
   143  		if v.Op == OpPhi {
   144  			hasphis = true
   145  			if !canCondSelect(v, f.Config.arch, loadAddr) {
   146  				return false
   147  			}
   148  		}
   149  	}
   150  	if !hasphis {
   151  		return false
   152  	}
   153  
   154  	// Pick some upper bound for the number of instructions
   155  	// we'd be willing to execute just to generate a dead
   156  	// argument to CondSelect. In the worst case, this is
   157  	// the number of useless instructions executed.
   158  	const maxfuseinsts = 2
   159  
   160  	if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) {
   161  		return false
   162  	}
   163  
   164  	// Replace Phi instructions in b with CondSelect instructions
   165  	swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
   166  	for _, v := range post.Values {
   167  		if v.Op != OpPhi {
   168  			continue
   169  		}
   170  		v.Op = OpCondSelect
   171  		if swap {
   172  			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   173  		}
   174  		v.AddArg(dom.Controls[0])
   175  	}
   176  
   177  	// Put all of the instructions into 'dom'
   178  	// and update the CFG appropriately.
   179  	dom.Kind = post.Kind
   180  	dom.CopyControls(post)
   181  	dom.Aux = post.Aux
   182  	dom.Succs = append(dom.Succs[:0], post.Succs...)
   183  	for i := range dom.Succs {
   184  		e := dom.Succs[i]
   185  		e.b.Preds[e.i].b = dom
   186  	}
   187  
   188  	// Try really hard to preserve statement marks attached to blocks.
   189  	simplePos := simple.Pos
   190  	postPos := post.Pos
   191  	simpleStmt := simplePos.IsStmt() == src.PosIsStmt
   192  	postStmt := postPos.IsStmt() == src.PosIsStmt
   193  
   194  	for _, v := range simple.Values {
   195  		v.Block = dom
   196  	}
   197  	for _, v := range post.Values {
   198  		v.Block = dom
   199  	}
   200  
   201  	// findBlockPos determines if b contains a stmt-marked value
   202  	// that has the same line number as the Pos for b itself.
   203  	// (i.e. is the position on b actually redundant?)
   204  	findBlockPos := func(b *Block) bool {
   205  		pos := b.Pos
   206  		for _, v := range b.Values {
   207  			// See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
   208  			if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
   209  				return true
   210  			}
   211  		}
   212  		return false
   213  	}
   214  	if simpleStmt {
   215  		simpleStmt = !findBlockPos(simple)
   216  		if !simpleStmt && simplePos.SameFileAndLine(postPos) {
   217  			postStmt = false
   218  		}
   219  
   220  	}
   221  	if postStmt {
   222  		postStmt = !findBlockPos(post)
   223  	}
   224  
   225  	// If simpleStmt and/or postStmt are still true, then try harder
   226  	// to find the corresponding statement marks new homes.
   227  
   228  	// setBlockPos determines if b contains a can-be-statement value
   229  	// that has the same line number as the Pos for b itself, and
   230  	// puts a statement mark on it, and returns whether it succeeded
   231  	// in this operation.
   232  	setBlockPos := func(b *Block) bool {
   233  		pos := b.Pos
   234  		for _, v := range b.Values {
   235  			if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
   236  				v.Pos = v.Pos.WithIsStmt()
   237  				return true
   238  			}
   239  		}
   240  		return false
   241  	}
   242  	// If necessary and possible, add a mark to a value in simple
   243  	if simpleStmt {
   244  		if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
   245  			postStmt = false
   246  		}
   247  	}
   248  	// If necessary and possible, add a mark to a value in post
   249  	if postStmt {
   250  		postStmt = !setBlockPos(post)
   251  	}
   252  
   253  	// Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
   254  	// try the values in the successor block if it is uncomplicated.
   255  	if postStmt {
   256  		if dom.Pos.IsStmt() != src.PosIsStmt {
   257  			dom.Pos = postPos
   258  		} else {
   259  			// Try the successor block
   260  			if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
   261  				succ := dom.Succs[0].Block()
   262  				for _, v := range succ.Values {
   263  					if isPoorStatementOp(v.Op) {
   264  						continue
   265  					}
   266  					if postPos.SameFileAndLine(v.Pos) {
   267  						v.Pos = v.Pos.WithIsStmt()
   268  					}
   269  					postStmt = false
   270  					break
   271  				}
   272  				// If postStmt still true, tag the block itself if possible
   273  				if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
   274  					succ.Pos = postPos
   275  				}
   276  			}
   277  		}
   278  	}
   279  
   280  	dom.Values = append(dom.Values, simple.Values...)
   281  	dom.Values = append(dom.Values, post.Values...)
   282  
   283  	// Trash 'post' and 'simple'
   284  	clobberBlock(post)
   285  	clobberBlock(simple)
   286  
   287  	f.invalidateCFG()
   288  	return true
   289  }
   290  
   291  // is this a BlockPlain with one predecessor?
   292  func isLeafPlain(b *Block) bool {
   293  	return b.Kind == BlockPlain && len(b.Preds) == 1
   294  }
   295  
   296  func clobberBlock(b *Block) {
   297  	b.Values = nil
   298  	b.Preds = nil
   299  	b.Succs = nil
   300  	b.Aux = nil
   301  	b.ResetControls()
   302  	b.Likely = BranchUnknown
   303  	b.Kind = BlockInvalid
   304  }
   305  
   306  // elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
   307  // loadAddr is a set of values which are used to compute the address of a load.
   308  // Those values are exempt from CMOV generation.
   309  func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
   310  	// See if 'b' ends in an if/else: it should
   311  	// have two successors, both of which are BlockPlain
   312  	// and succeeded by the same block.
   313  	if b.Kind != BlockIf || b.Likely != BranchUnknown {
   314  		return false
   315  	}
   316  	yes, no := b.Succs[0].Block(), b.Succs[1].Block()
   317  	if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) {
   318  		return false
   319  	}
   320  	if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) {
   321  		return false
   322  	}
   323  	if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
   324  		return false
   325  	}
   326  	// block that postdominates the if/else
   327  	post := b.Succs[0].Block().Succs[0].Block()
   328  	if len(post.Preds) != 2 || post == b {
   329  		return false
   330  	}
   331  	hasphis := false
   332  	for _, v := range post.Values {
   333  		if v.Op == OpPhi {
   334  			hasphis = true
   335  			if !canCondSelect(v, f.Config.arch, loadAddr) {
   336  				return false
   337  			}
   338  		}
   339  	}
   340  	if !hasphis {
   341  		return false
   342  	}
   343  
   344  	// Don't generate CondSelects if branch is cheaper.
   345  	if !shouldElimIfElse(no, yes, post, f.Config.arch) {
   346  		return false
   347  	}
   348  
   349  	// now we're committed: rewrite each Phi as a CondSelect
   350  	swap := post.Preds[0].Block() != b.Succs[0].Block()
   351  	for _, v := range post.Values {
   352  		if v.Op != OpPhi {
   353  			continue
   354  		}
   355  		v.Op = OpCondSelect
   356  		if swap {
   357  			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   358  		}
   359  		v.AddArg(b.Controls[0])
   360  	}
   361  
   362  	// Move the contents of all of these
   363  	// blocks into 'b' and update CFG edges accordingly
   364  	b.Kind = post.Kind
   365  	b.CopyControls(post)
   366  	b.Aux = post.Aux
   367  	b.Succs = append(b.Succs[:0], post.Succs...)
   368  	for i := range b.Succs {
   369  		e := b.Succs[i]
   370  		e.b.Preds[e.i].b = b
   371  	}
   372  	for i := range post.Values {
   373  		post.Values[i].Block = b
   374  	}
   375  	for i := range yes.Values {
   376  		yes.Values[i].Block = b
   377  	}
   378  	for i := range no.Values {
   379  		no.Values[i].Block = b
   380  	}
   381  	b.Values = append(b.Values, yes.Values...)
   382  	b.Values = append(b.Values, no.Values...)
   383  	b.Values = append(b.Values, post.Values...)
   384  
   385  	// trash post, yes, and no
   386  	clobberBlock(yes)
   387  	clobberBlock(no)
   388  	clobberBlock(post)
   389  
   390  	f.invalidateCFG()
   391  	return true
   392  }
   393  
   394  // shouldElimIfElse reports whether estimated cost of eliminating branch
   395  // is lower than threshold.
   396  func shouldElimIfElse(no, yes, post *Block, arch string) bool {
   397  	switch arch {
   398  	default:
   399  		return true
   400  	case "amd64":
   401  		const maxcost = 2
   402  		phi := 0
   403  		other := 0
   404  		for _, v := range post.Values {
   405  			if v.Op == OpPhi {
   406  				// Each phi results in CondSelect, which lowers into CMOV,
   407  				// CMOV has latency >1 on most CPUs.
   408  				phi++
   409  			}
   410  			for _, x := range v.Args {
   411  				if x.Block == no || x.Block == yes {
   412  					other++
   413  				}
   414  			}
   415  		}
   416  		cost := phi * 1
   417  		if phi > 1 {
   418  			// If we have more than 1 phi and some values in post have args
   419  			// in yes or no blocks, we may have to recalculate condition, because
   420  			// those args may clobber flags. For now assume that all operations clobber flags.
   421  			cost += other * 1
   422  		}
   423  		return cost < maxcost
   424  	}
   425  }
   426  
   427  // canSpeculativelyExecute reports whether every value in the block can
   428  // be evaluated without causing any observable side effects (memory
   429  // accesses, panics and so on) except for execution time changes. It
   430  // also ensures that the block does not contain any phis which we can't
   431  // speculatively execute.
   432  // Warning: this function cannot currently detect values that represent
   433  // instructions the execution of which need to be guarded with CPU
   434  // hardware feature checks. See issue #34950.
   435  func canSpeculativelyExecute(b *Block) bool {
   436  	// don't fuse memory ops, Phi ops, divides (can panic),
   437  	// or anything else with side-effects
   438  	for _, v := range b.Values {
   439  		if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() ||
   440  			v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
   441  			return false
   442  		}
   443  	}
   444  	return true
   445  }
   446  
   447  func isDivMod(op Op) bool {
   448  	switch op {
   449  	case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
   450  		OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
   451  		OpDiv32F, OpDiv64F,
   452  		OpMod8, OpMod8u, OpMod16, OpMod16u,
   453  		OpMod32, OpMod32u, OpMod64, OpMod64u:
   454  		return true
   455  	default:
   456  		return false
   457  	}
   458  }
   459  
   460  func isPtrArithmetic(op Op) bool {
   461  	// Pointer arithmetic can't be speculatively executed because the result
   462  	// may be an invalid pointer (if, for example, the condition is that the
   463  	// base pointer is not nil). See issue 56990.
   464  	switch op {
   465  	case OpOffPtr, OpAddPtr, OpSubPtr:
   466  		return true
   467  	default:
   468  		return false
   469  	}
   470  }
   471  

View as plain text