Source file src/cmd/compile/internal/ssa/_gen/S390XOps.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import "strings"
     8  
     9  // Notes:
    10  //  - Integer types live in the low portion of registers. Upper portions are junk.
    11  //  - Boolean types use the low-order byte of a register. 0=false, 1=true.
    12  //    Upper bytes are junk.
    13  //  - When doing sub-register operations, we try to write the whole
    14  //    destination register to avoid a partial-register write.
    15  //  - Unused portions of AuxInt (or the Val portion of ValAndOff) are
    16  //    filled by sign-extending the used portion. Users of AuxInt which interpret
    17  //    AuxInt as unsigned (e.g. shifts) must be careful.
    18  //  - The SB 'register' is implemented using instruction-relative addressing. This
    19  //    places some limitations on when and how memory operands that are addressed
    20  //    relative to SB can be used:
    21  //
    22  //     1. Pseudo-instructions do not always map to a single machine instruction when
    23  //        using the SB 'register' to address data. This is because many machine
    24  //        instructions do not have relative long (RL suffix) equivalents. For example,
    25  //        ADDload, which is assembled as AG.
    26  //
    27  //     2. Loads and stores using relative addressing require the data be aligned
    28  //        according to its size (8-bytes for double words, 4-bytes for words
    29  //        and so on).
    30  //
    31  //    We can always work around these by inserting LARL instructions (load address
    32  //    relative long) in the assembler, but typically this results in worse code
    33  //    generation because the address can't be re-used. Inserting instructions in the
    34  //    assembler also means clobbering the temp register and it is a long-term goal
    35  //    to prevent the compiler doing this so that it can be allocated as a normal
    36  //    register.
    37  //
    38  // For more information about the z/Architecture, the instruction set and the
    39  // addressing modes it supports take a look at the z/Architecture Principles of
    40  // Operation: http://publibfp.boulder.ibm.com/epubs/pdf/dz9zr010.pdf
    41  //
    42  // Suffixes encode the bit width of pseudo-instructions.
    43  // D (double word)  = 64 bit (frequently omitted)
    44  // W (word)         = 32 bit
    45  // H (half word)    = 16 bit
    46  // B (byte)         = 8 bit
    47  // S (single prec.) = 32 bit (double precision is omitted)
    48  
    49  // copied from ../../s390x/reg.go
    50  var regNamesS390X = []string{
    51  	"R0",
    52  	"R1",
    53  	"R2",
    54  	"R3",
    55  	"R4",
    56  	"R5",
    57  	"R6",
    58  	"R7",
    59  	"R8",
    60  	"R9",
    61  	"R10",
    62  	"R11",
    63  	"R12",
    64  	"g", // R13
    65  	"R14",
    66  	"SP", // R15
    67  	"F0",
    68  	"F1",
    69  	"F2",
    70  	"F3",
    71  	"F4",
    72  	"F5",
    73  	"F6",
    74  	"F7",
    75  	"F8",
    76  	"F9",
    77  	"F10",
    78  	"F11",
    79  	"F12",
    80  	"F13",
    81  	"F14",
    82  	"F15",
    83  
    84  	// If you add registers, update asyncPreempt in runtime.
    85  
    86  	//pseudo-registers
    87  	"SB",
    88  }
    89  
    90  func init() {
    91  	// Make map from reg names to reg integers.
    92  	if len(regNamesS390X) > 64 {
    93  		panic("too many registers")
    94  	}
    95  	num := map[string]int{}
    96  	for i, name := range regNamesS390X {
    97  		num[name] = i
    98  	}
    99  	buildReg := func(s string) regMask {
   100  		m := regMask(0)
   101  		for _, r := range strings.Split(s, " ") {
   102  			if n, ok := num[r]; ok {
   103  				m |= regMask(1) << uint(n)
   104  				continue
   105  			}
   106  			panic("register " + r + " not found")
   107  		}
   108  		return m
   109  	}
   110  
   111  	// Common individual register masks
   112  	var (
   113  		sp  = buildReg("SP")
   114  		sb  = buildReg("SB")
   115  		r0  = buildReg("R0")
   116  		tmp = buildReg("R11") // R11 is used as a temporary in a small number of instructions.
   117  
   118  		// R10 is reserved by the assembler.
   119  		gp   = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14")
   120  		gpg  = gp | buildReg("g")
   121  		gpsp = gp | sp
   122  
   123  		// R0 is considered to contain the value 0 in address calculations.
   124  		ptr     = gp &^ r0
   125  		ptrsp   = ptr | sp
   126  		ptrspsb = ptrsp | sb
   127  
   128  		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15")
   129  		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
   130  		r1         = buildReg("R1")
   131  		r2         = buildReg("R2")
   132  		r3         = buildReg("R3")
   133  		r9         = buildReg("R9")
   134  	)
   135  	// Common slices of register masks
   136  	var (
   137  		gponly = []regMask{gp}
   138  		fponly = []regMask{fp}
   139  	)
   140  
   141  	// Common regInfo
   142  	var (
   143  		gp01    = regInfo{inputs: []regMask{}, outputs: gponly}
   144  		gp11    = regInfo{inputs: []regMask{gp}, outputs: gponly}
   145  		gp11sp  = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
   146  		gp21    = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   147  		gp21sp  = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
   148  		gp21tmp = regInfo{inputs: []regMask{gp &^ tmp, gp &^ tmp}, outputs: []regMask{gp &^ tmp}, clobbers: tmp}
   149  
   150  		// R0 evaluates to 0 when used as the number of bits to shift
   151  		// so we need to exclude it from that operand.
   152  		sh21 = regInfo{inputs: []regMask{gp, ptr}, outputs: gponly}
   153  
   154  		addr    = regInfo{inputs: []regMask{sp | sb}, outputs: gponly}
   155  		addridx = regInfo{inputs: []regMask{sp | sb, ptrsp}, outputs: gponly}
   156  
   157  		gp2flags       = regInfo{inputs: []regMask{gpsp, gpsp}}
   158  		gp1flags       = regInfo{inputs: []regMask{gpsp}}
   159  		gp2flags1      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   160  		gp11flags      = regInfo{inputs: []regMask{gp}, outputs: gponly}
   161  		gp21flags      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   162  		gp2flags1flags = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   163  
   164  		gpload       = regInfo{inputs: []regMask{ptrspsb, 0}, outputs: gponly}
   165  		gploadidx    = regInfo{inputs: []regMask{ptrspsb, ptrsp, 0}, outputs: gponly}
   166  		gpopload     = regInfo{inputs: []regMask{gp, ptrsp, 0}, outputs: gponly}
   167  		gpstore      = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}}
   168  		gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}}
   169  		gpstoreidx   = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}}
   170  		gpstorebr    = regInfo{inputs: []regMask{ptrsp, gpsp, 0}}
   171  		gpstorelaa   = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}, outputs: gponly}
   172  		gpstorelab   = regInfo{inputs: []regMask{r1, gpsp, 0}, clobbers: r1}
   173  
   174  		gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}}
   175  
   176  		fp01        = regInfo{inputs: []regMask{}, outputs: fponly}
   177  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
   178  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
   179  		fp21clobber = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
   180  		fpgp        = regInfo{inputs: fponly, outputs: gponly}
   181  		gpfp        = regInfo{inputs: gponly, outputs: fponly}
   182  		fp11        = regInfo{inputs: fponly, outputs: fponly}
   183  		fp1flags    = regInfo{inputs: []regMask{fp}}
   184  		fp11clobber = regInfo{inputs: fponly, outputs: fponly}
   185  		fp2flags    = regInfo{inputs: []regMask{fp, fp}}
   186  
   187  		fpload    = regInfo{inputs: []regMask{ptrspsb, 0}, outputs: fponly}
   188  		fploadidx = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}, outputs: fponly}
   189  
   190  		fpstore    = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
   191  		fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}
   192  
   193  		sync = regInfo{inputs: []regMask{0}}
   194  
   195  		// LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
   196  		cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}
   197  
   198  		// LoweredAtomicExchange overwrites the output before executing
   199  		// CS{,G}, so the output register must not be the same as the
   200  		// input register. For now we just force the output register to
   201  		// R0.
   202  		exchange = regInfo{inputs: []regMask{ptrsp, gpsp &^ r0, 0}, outputs: []regMask{r0, 0}}
   203  	)
   204  
   205  	var S390Xops = []opData{
   206  		// fp ops
   207  		{name: "FADDS", argLength: 2, reg: fp21clobber, typ: "(Float32,Flags)", asm: "FADDS", commutative: true, resultInArg0: true}, // fp32 arg0 + arg1
   208  		{name: "FADD", argLength: 2, reg: fp21clobber, typ: "(Float64,Flags)", asm: "FADD", commutative: true, resultInArg0: true},   // fp64 arg0 + arg1
   209  		{name: "FSUBS", argLength: 2, reg: fp21clobber, typ: "(Float32,Flags)", asm: "FSUBS", resultInArg0: true},                    // fp32 arg0 - arg1
   210  		{name: "FSUB", argLength: 2, reg: fp21clobber, typ: "(Float64,Flags)", asm: "FSUB", resultInArg0: true},                      // fp64 arg0 - arg1
   211  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true, resultInArg0: true},                                // fp32 arg0 * arg1
   212  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true, resultInArg0: true},                                  // fp64 arg0 * arg1
   213  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS", resultInArg0: true},                                                   // fp32 arg0 / arg1
   214  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV", resultInArg0: true},                                                     // fp64 arg0 / arg1
   215  		{name: "FNEGS", argLength: 1, reg: fp11clobber, asm: "FNEGS", clobberFlags: true},                                            // fp32 -arg0
   216  		{name: "FNEG", argLength: 1, reg: fp11clobber, asm: "FNEG", clobberFlags: true},                                              // fp64 -arg0
   217  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS", resultInArg0: true},                                                 // fp32 arg1 * arg2 + arg0
   218  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true},                                                   // fp64 arg1 * arg2 + arg0
   219  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true},                                                 // fp32 arg1 * arg2 - arg0
   220  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true},                                                   // fp64 arg1 * arg2 - arg0
   221  		{name: "LPDFR", argLength: 1, reg: fp11, asm: "LPDFR"},                                                                       // fp64/fp32 set sign bit
   222  		{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"},                                                                       // fp64/fp32 clear sign bit
   223  		{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"},                                                                       // fp64/fp32 copy arg1 sign bit to arg0
   224  
   225  		// Round to integer, float64 only.
   226  		//
   227  		// aux | rounding mode
   228  		// ----+-----------------------------------
   229  		//   1 | round to nearest, ties away from 0
   230  		//   4 | round to nearest, ties to even
   231  		//   5 | round toward 0
   232  		//   6 | round toward +∞
   233  		//   7 | round toward -∞
   234  		{name: "FIDBR", argLength: 1, reg: fp11, asm: "FIDBR", aux: "Int8"},
   235  
   236  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp32 load
   237  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp64 load
   238  		{name: "FMOVSconst", reg: fp01, asm: "FMOVS", aux: "Float32", rematerializeable: true},                               // fp32 constant
   239  		{name: "FMOVDconst", reg: fp01, asm: "FMOVD", aux: "Float64", rematerializeable: true},                               // fp64 constant
   240  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", aux: "SymOff", symEffect: "Read"},                 // fp32 load indexed by i
   241  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", aux: "SymOff", symEffect: "Read"},                 // fp64 load indexed by i
   242  
   243  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp32 store
   244  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp64 store
   245  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", aux: "SymOff", symEffect: "Write"},                 // fp32 indexed by i store
   246  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", aux: "SymOff", symEffect: "Write"},                 // fp64 indexed by i store
   247  
   248  		// binary ops
   249  		{name: "ADD", argLength: 2, reg: gp21sp, asm: "ADD", commutative: true, clobberFlags: true},                                                                  // arg0 + arg1
   250  		{name: "ADDW", argLength: 2, reg: gp21sp, asm: "ADDW", commutative: true, clobberFlags: true},                                                                // arg0 + arg1
   251  		{name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int32", typ: "UInt64", clobberFlags: true},                                                   // arg0 + auxint
   252  		{name: "ADDWconst", argLength: 1, reg: gp11sp, asm: "ADDW", aux: "Int32", clobberFlags: true},                                                                // arg0 + auxint
   253  		{name: "ADDload", argLength: 3, reg: gpopload, asm: "ADD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 + *arg1. arg2=mem
   254  		{name: "ADDWload", argLength: 3, reg: gpopload, asm: "ADDW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + *arg1. arg2=mem
   255  
   256  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB", clobberFlags: true},                                                                                       // arg0 - arg1
   257  		{name: "SUBW", argLength: 2, reg: gp21, asm: "SUBW", clobberFlags: true},                                                                                     // arg0 - arg1
   258  		{name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                // arg0 - auxint
   259  		{name: "SUBWconst", argLength: 1, reg: gp11, asm: "SUBW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 - auxint
   260  		{name: "SUBload", argLength: 3, reg: gpopload, asm: "SUB", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 - *arg1. arg2=mem
   261  		{name: "SUBWload", argLength: 3, reg: gpopload, asm: "SUBW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - *arg1. arg2=mem
   262  
   263  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true},                                // arg0 * arg1
   264  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true, resultInArg0: true, clobberFlags: true},                                // arg0 * arg1
   265  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64", resultInArg0: true, clobberFlags: true},                                // arg0 * auxint
   266  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int32", resultInArg0: true, clobberFlags: true},                                // arg0 * auxint
   267  		{name: "MULLDload", argLength: 3, reg: gpopload, asm: "MULLD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
   268  		{name: "MULLWload", argLength: 3, reg: gpopload, asm: "MULLW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
   269  
   270  		{name: "MULHD", argLength: 2, reg: gp21tmp, asm: "MULHD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true},   // (arg0 * arg1) >> width
   271  		{name: "MULHDU", argLength: 2, reg: gp21tmp, asm: "MULHDU", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width
   272  
   273  		{name: "DIVD", argLength: 2, reg: gp21tmp, asm: "DIVD", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
   274  		{name: "DIVW", argLength: 2, reg: gp21tmp, asm: "DIVW", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
   275  		{name: "DIVDU", argLength: 2, reg: gp21tmp, asm: "DIVDU", resultInArg0: true, clobberFlags: true}, // arg0 / arg1
   276  		{name: "DIVWU", argLength: 2, reg: gp21tmp, asm: "DIVWU", resultInArg0: true, clobberFlags: true}, // arg0 / arg1
   277  
   278  		{name: "MODD", argLength: 2, reg: gp21tmp, asm: "MODD", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   279  		{name: "MODW", argLength: 2, reg: gp21tmp, asm: "MODW", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   280  
   281  		{name: "MODDU", argLength: 2, reg: gp21tmp, asm: "MODDU", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   282  		{name: "MODWU", argLength: 2, reg: gp21tmp, asm: "MODWU", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   283  
   284  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true, clobberFlags: true},                                                                    // arg0 & arg1
   285  		{name: "ANDW", argLength: 2, reg: gp21, asm: "ANDW", commutative: true, clobberFlags: true},                                                                  // arg0 & arg1
   286  		{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 & auxint
   287  		{name: "ANDWconst", argLength: 1, reg: gp11, asm: "ANDW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 & auxint
   288  		{name: "ANDload", argLength: 3, reg: gpopload, asm: "AND", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 & *arg1. arg2=mem
   289  		{name: "ANDWload", argLength: 3, reg: gpopload, asm: "ANDW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & *arg1. arg2=mem
   290  
   291  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true, clobberFlags: true},                                                                    // arg0 | arg1
   292  		{name: "ORW", argLength: 2, reg: gp21, asm: "ORW", commutative: true, clobberFlags: true},                                                                  // arg0 | arg1
   293  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 | auxint
   294  		{name: "ORWconst", argLength: 1, reg: gp11, asm: "ORW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 | auxint
   295  		{name: "ORload", argLength: 3, reg: gpopload, asm: "OR", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 | *arg1. arg2=mem
   296  		{name: "ORWload", argLength: 3, reg: gpopload, asm: "ORW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | *arg1. arg2=mem
   297  
   298  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true, clobberFlags: true},                                                                    // arg0 ^ arg1
   299  		{name: "XORW", argLength: 2, reg: gp21, asm: "XORW", commutative: true, clobberFlags: true},                                                                  // arg0 ^ arg1
   300  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 ^ auxint
   301  		{name: "XORWconst", argLength: 1, reg: gp11, asm: "XORW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 ^ auxint
   302  		{name: "XORload", argLength: 3, reg: gpopload, asm: "XOR", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 ^ *arg1. arg2=mem
   303  		{name: "XORWload", argLength: 3, reg: gpopload, asm: "XORW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ *arg1. arg2=mem
   304  
   305  		// Arithmetic ops with carry/borrow chain.
   306  		//
   307  		// A carry is represented by a condition code of 2 or 3 (GT or OV).
   308  		// A borrow is represented by a condition code of 0 or 1 (EQ or LT).
   309  		{name: "ADDC", argLength: 2, reg: gp21flags, asm: "ADDC", typ: "(UInt64,Flags)", commutative: true},                          // (arg0 + arg1, carry out)
   310  		{name: "ADDCconst", argLength: 1, reg: gp11flags, asm: "ADDC", typ: "(UInt64,Flags)", aux: "Int16"},                          // (arg0 + auxint, carry out)
   311  		{name: "ADDE", argLength: 3, reg: gp2flags1flags, asm: "ADDE", typ: "(UInt64,Flags)", commutative: true, resultInArg0: true}, // (arg0 + arg1 + arg2 (carry in), carry out)
   312  		{name: "SUBC", argLength: 2, reg: gp21flags, asm: "SUBC", typ: "(UInt64,Flags)"},                                             // (arg0 - arg1, borrow out)
   313  		{name: "SUBE", argLength: 3, reg: gp2flags1flags, asm: "SUBE", typ: "(UInt64,Flags)", resultInArg0: true},                    // (arg0 - arg1 - arg2 (borrow in), borrow out)
   314  
   315  		// Comparisons.
   316  		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"},   // arg0 compare to arg1
   317  		{name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"}, // arg0 compare to arg1
   318  
   319  		{name: "CMPU", argLength: 2, reg: gp2flags, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   320  		{name: "CMPWU", argLength: 2, reg: gp2flags, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   321  
   322  		{name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", typ: "Flags", aux: "Int32"},     // arg0 compare to auxint
   323  		{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int32"},   // arg0 compare to auxint
   324  		{name: "CMPUconst", argLength: 1, reg: gp1flags, asm: "CMPU", typ: "Flags", aux: "Int32"},   // arg0 compare to auxint
   325  		{name: "CMPWUconst", argLength: 1, reg: gp1flags, asm: "CMPWU", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint
   326  
   327  		{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "CEBR", typ: "Flags"},  // arg0 compare to arg1, f32
   328  		{name: "FCMP", argLength: 2, reg: fp2flags, asm: "FCMPU", typ: "Flags"},  // arg0 compare to arg1, f64
   329  		{name: "LTDBR", argLength: 1, reg: fp1flags, asm: "LTDBR", typ: "Flags"}, // arg0 compare to 0, f64
   330  		{name: "LTEBR", argLength: 1, reg: fp1flags, asm: "LTEBR", typ: "Flags"}, // arg0 compare to 0, f32
   331  
   332  		{name: "SLD", argLength: 2, reg: sh21, asm: "SLD"},                    // arg0 << arg1, shift amount is mod 64
   333  		{name: "SLW", argLength: 2, reg: sh21, asm: "SLW"},                    // arg0 << arg1, shift amount is mod 64
   334  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "UInt8"}, // arg0 << auxint, shift amount 0-63
   335  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "UInt8"}, // arg0 << auxint, shift amount 0-31
   336  
   337  		{name: "SRD", argLength: 2, reg: sh21, asm: "SRD"},                    // unsigned arg0 >> arg1, shift amount is mod 64
   338  		{name: "SRW", argLength: 2, reg: sh21, asm: "SRW"},                    // unsigned uint32(arg0) >> arg1, shift amount is mod 64
   339  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "UInt8"}, // unsigned arg0 >> auxint, shift amount 0-63
   340  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "UInt8"}, // unsigned uint32(arg0) >> auxint, shift amount 0-31
   341  
   342  		// Arithmetic shifts clobber flags.
   343  		{name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true},                    // signed arg0 >> arg1, shift amount is mod 64
   344  		{name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true},                    // signed int32(arg0) >> arg1, shift amount is mod 64
   345  		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "UInt8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
   346  		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "UInt8", clobberFlags: true}, // signed int32(arg0) >> auxint, shift amount 0-31
   347  
   348  		// Rotate instructions.
   349  		// Note: no RLLGconst - use RISBGZ instead.
   350  		{name: "RLLG", argLength: 2, reg: sh21, asm: "RLLG"},                  // arg0 rotate left arg1, rotate amount 0-63
   351  		{name: "RLL", argLength: 2, reg: sh21, asm: "RLL"},                    // arg0 rotate left arg1, rotate amount 0-31
   352  		{name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "UInt8"}, // arg0 rotate left auxint, rotate amount 0-31
   353  
   354  		// Rotate then (and|or|xor|insert) selected bits instructions.
   355  		//
   356  		// Aux is an s390x.RotateParams struct containing Start, End and rotation
   357  		// Amount fields.
   358  		//
   359  		// arg1 is rotated left by the rotation amount then the bits from the start
   360  		// bit to the end bit (inclusive) are combined with arg0 using the logical
   361  		// operation specified. Bit indices are specified from left to right - the
   362  		// MSB is 0 and the LSB is 63.
   363  		//
   364  		// Examples:
   365  		//               |          aux         |
   366  		// | instruction | start | end | amount |          arg0         |          arg1         |         result        |
   367  		// +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+
   368  		// | RXSBG (XOR) |     0 |   1 |      0 | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_ffff | 0x3fff_ffff_ffff_ffff |
   369  		// | RXSBG (XOR) |    62 |  63 |      0 | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_fffc |
   370  		// | RXSBG (XOR) |     0 |  47 |     16 | 0xffff_ffff_ffff_ffff | 0x0000_0000_0000_ffff | 0xffff_ffff_0000_ffff |
   371  		// +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+
   372  		//
   373  		{name: "RXSBG", argLength: 2, reg: gp21, asm: "RXSBG", resultInArg0: true, aux: "S390XRotateParams", clobberFlags: true}, // rotate then xor selected bits
   374  		{name: "RISBGZ", argLength: 1, reg: gp11, asm: "RISBGZ", aux: "S390XRotateParams", clobberFlags: true},                   // rotate then insert selected bits [into zero]
   375  
   376  		// unary ops
   377  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG", clobberFlags: true},   // -arg0
   378  		{name: "NEGW", argLength: 1, reg: gp11, asm: "NEGW", clobberFlags: true}, // -arg0
   379  
   380  		{name: "NOT", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true},  // ^arg0
   381  		{name: "NOTW", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true}, // ^arg0
   382  
   383  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},   // sqrt(arg0)
   384  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32
   385  
   386  		// Conditional register-register moves.
   387  		// The aux for these values is an s390x.CCMask value representing the condition code mask.
   388  		{name: "LOCGR", argLength: 3, reg: gp2flags1, resultInArg0: true, asm: "LOCGR", aux: "S390XCCMask"}, // load arg1 into arg0 if the condition code in arg2 matches a masked bit in aux.
   389  
   390  		{name: "MOVBreg", argLength: 1, reg: gp11sp, asm: "MOVB", typ: "Int64"},    // sign extend arg0 from int8 to int64
   391  		{name: "MOVBZreg", argLength: 1, reg: gp11sp, asm: "MOVBZ", typ: "UInt64"}, // zero extend arg0 from int8 to int64
   392  		{name: "MOVHreg", argLength: 1, reg: gp11sp, asm: "MOVH", typ: "Int64"},    // sign extend arg0 from int16 to int64
   393  		{name: "MOVHZreg", argLength: 1, reg: gp11sp, asm: "MOVHZ", typ: "UInt64"}, // zero extend arg0 from int16 to int64
   394  		{name: "MOVWreg", argLength: 1, reg: gp11sp, asm: "MOVW", typ: "Int64"},    // sign extend arg0 from int32 to int64
   395  		{name: "MOVWZreg", argLength: 1, reg: gp11sp, asm: "MOVWZ", typ: "UInt64"}, // zero extend arg0 from int32 to int64
   396  
   397  		{name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
   398  
   399  		{name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"}, // move int64 to float64 (no conversion)
   400  		{name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"}, // move float64 to int64 (no conversion)
   401  
   402  		{name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA", clobberFlags: true}, // convert float64 to int32
   403  		{name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA", clobberFlags: true}, // convert float64 to int64
   404  		{name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA", clobberFlags: true}, // convert float32 to int32
   405  		{name: "CGEBRA", argLength: 1, reg: fpgp, asm: "CGEBRA", clobberFlags: true}, // convert float32 to int64
   406  		{name: "CEFBRA", argLength: 1, reg: gpfp, asm: "CEFBRA", clobberFlags: true}, // convert int32 to float32
   407  		{name: "CDFBRA", argLength: 1, reg: gpfp, asm: "CDFBRA", clobberFlags: true}, // convert int32 to float64
   408  		{name: "CEGBRA", argLength: 1, reg: gpfp, asm: "CEGBRA", clobberFlags: true}, // convert int64 to float32
   409  		{name: "CDGBRA", argLength: 1, reg: gpfp, asm: "CDGBRA", clobberFlags: true}, // convert int64 to float64
   410  		{name: "CLFEBR", argLength: 1, reg: fpgp, asm: "CLFEBR", clobberFlags: true}, // convert float32 to uint32
   411  		{name: "CLFDBR", argLength: 1, reg: fpgp, asm: "CLFDBR", clobberFlags: true}, // convert float64 to uint32
   412  		{name: "CLGEBR", argLength: 1, reg: fpgp, asm: "CLGEBR", clobberFlags: true}, // convert float32 to uint64
   413  		{name: "CLGDBR", argLength: 1, reg: fpgp, asm: "CLGDBR", clobberFlags: true}, // convert float64 to uint64
   414  		{name: "CELFBR", argLength: 1, reg: gpfp, asm: "CELFBR", clobberFlags: true}, // convert uint32 to float32
   415  		{name: "CDLFBR", argLength: 1, reg: gpfp, asm: "CDLFBR", clobberFlags: true}, // convert uint32 to float64
   416  		{name: "CELGBR", argLength: 1, reg: gpfp, asm: "CELGBR", clobberFlags: true}, // convert uint64 to float32
   417  		{name: "CDLGBR", argLength: 1, reg: gpfp, asm: "CDLGBR", clobberFlags: true}, // convert uint64 to float64
   418  
   419  		{name: "LEDBR", argLength: 1, reg: fp11, asm: "LEDBR"}, // convert float64 to float32
   420  		{name: "LDEBR", argLength: 1, reg: fp11, asm: "LDEBR"}, // convert float32 to float64
   421  
   422  		{name: "MOVDaddr", argLength: 1, reg: addr, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
   423  		{name: "MOVDaddridx", argLength: 2, reg: addridx, aux: "SymOff", symEffect: "Addr"},                    // arg0 + arg1 + auxint + aux
   424  
   425  		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
   426  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte from arg0+auxint+aux. arg1=mem.  Zero extend.
   427  		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   428  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
   429  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   430  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
   431  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   432  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"},   // load 8 bytes from arg0+auxint+aux. arg1=mem
   433  
   434  		{name: "MOVWBR", argLength: 1, reg: gp11, asm: "MOVWBR"}, // arg0 swap bytes
   435  		{name: "MOVDBR", argLength: 1, reg: gp11, asm: "MOVDBR"}, // arg0 swap bytes
   436  
   437  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   438  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   439  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   440  
   441  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store byte in arg1 to arg0+auxint+aux. arg2=mem
   442  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
   443  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
   444  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
   445  		{name: "MOVHBRstore", argLength: 3, reg: gpstorebr, asm: "MOVHBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   446  		{name: "MOVWBRstore", argLength: 3, reg: gpstorebr, asm: "MOVWBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   447  		{name: "MOVDBRstore", argLength: 3, reg: gpstorebr, asm: "MOVDBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   448  
   449  		{name: "MVC", argLength: 3, reg: gpmvc, asm: "MVC", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, symEffect: "None"}, // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size,off
   450  
   451  		// indexed loads/stores
   452  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", symEffect: "Read"},   // load a byte from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   453  		{name: "MOVBloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVB", aux: "SymOff", typ: "Int8", symEffect: "Read"},      // load a byte from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   454  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", symEffect: "Read"},  // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   455  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVH", aux: "SymOff", typ: "Int16", symEffect: "Read"},     // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   456  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", symEffect: "Read"},  // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   457  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVW", aux: "SymOff", typ: "Int32", symEffect: "Read"},     // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   458  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVD", aux: "SymOff", typ: "UInt64", symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
   459  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHBR", aux: "SymOff", typ: "Int16", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   460  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWBR", aux: "SymOff", typ: "Int32", symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   461  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVDBR", aux: "SymOff", typ: "Int64", symEffect: "Read"}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   462  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", symEffect: "Write"},                // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
   463  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVH", aux: "SymOff", symEffect: "Write"},                // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   464  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", symEffect: "Write"},                // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   465  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVD", aux: "SymOff", symEffect: "Write"},                // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   466  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVHBR", aux: "SymOff", symEffect: "Write"},            // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   467  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVWBR", aux: "SymOff", symEffect: "Write"},            // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   468  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVDBR", aux: "SymOff", symEffect: "Write"},            // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   469  
   470  		// For storeconst ops, the AuxInt field encodes both
   471  		// the value to store and an address offset of the store.
   472  		// Cast AuxInt to a ValAndOff to extract Val and Off fields.
   473  		{name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux.  arg1=mem
   474  		{name: "MOVHstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVH", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 2 bytes of ...
   475  		{name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 4 bytes of ...
   476  		{name: "MOVDstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVD", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of ...
   477  
   478  		{name: "CLEAR", argLength: 2, reg: regInfo{inputs: []regMask{ptr, 0}}, asm: "CLEAR", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Write"},
   479  
   480  		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                                // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   481  		{name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                                  // tail call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   482  		{name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R12"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   483  		{name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                         // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   484  
   485  		// (InvertFlags (CMP a b)) == (CMP b a)
   486  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   487  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   488  
   489  		// Pseudo-ops
   490  		{name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem
   491  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   492  		// and sorts it to the very beginning of the block to prevent other
   493  		// use of R12 (the closure pointer)
   494  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R12")}}, zeroWidth: true},
   495  		// arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   496  		// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
   497  		{name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
   498  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   499  		// I.e., if f calls g "calls" getcallerpc,
   500  		// the result should be the PC within f that g will return to.
   501  		// See runtime/stubs.go for a more detailed discussion.
   502  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   503  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{ptrsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   504  		// Round ops to block fused-multiply-add extraction.
   505  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   506  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   507  
   508  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, aux=# of buffer entries needed
   509  		// It saves all GP registers if necessary,
   510  		// but clobbers R14 (LR) because it's a call,
   511  		// and also clobbers R1 as the PLT stub does.
   512  		// Returns a pointer to a write barrier buffer in R9.
   513  		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R14") | r1, outputs: []regMask{r9}}, clobberFlags: true, aux: "Int64"},
   514  
   515  		// There are three of these functions so that they can have three different register inputs.
   516  		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
   517  		// default registers to match so we don't need to copy registers around unnecessarily.
   518  		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
   519  		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
   520  		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
   521  
   522  		// Constant condition code values. The condition code can be 0, 1, 2 or 3.
   523  		{name: "FlagEQ"}, // CC=0 (equal)
   524  		{name: "FlagLT"}, // CC=1 (less than)
   525  		{name: "FlagGT"}, // CC=2 (greater than)
   526  		{name: "FlagOV"}, // CC=3 (overflow)
   527  
   528  		// Fast-BCR-serialization to ensure store-load ordering.
   529  		{name: "SYNC", argLength: 1, reg: sync, asm: "SYNC", typ: "Mem"},
   530  
   531  		// Atomic loads. These are just normal loads but return <value,memory> tuples
   532  		// so they can be properly ordered with other loads.
   533  		// load from arg0+auxint+aux.  arg1=mem.
   534  		{name: "MOVBZatomicload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   535  		{name: "MOVWZatomicload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   536  		{name: "MOVDatomicload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   537  
   538  		// Atomic stores. These are just normal stores.
   539  		// store arg1 to arg0+auxint+aux. arg2=mem.
   540  		{name: "MOVBatomicstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   541  		{name: "MOVWatomicstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   542  		{name: "MOVDatomicstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   543  
   544  		// Atomic adds.
   545  		// *(arg0+auxint+aux) += arg1.  arg2=mem.
   546  		// Returns a tuple of <old contents of *(arg0+auxint+aux), memory>.
   547  		{name: "LAA", argLength: 3, reg: gpstorelaa, asm: "LAA", typ: "(UInt32,Mem)", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   548  		{name: "LAAG", argLength: 3, reg: gpstorelaa, asm: "LAAG", typ: "(UInt64,Mem)", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   549  		{name: "AddTupleFirst32", argLength: 2}, // arg1=tuple <x,y>.  Returns <x+arg0,y>.
   550  		{name: "AddTupleFirst64", argLength: 2}, // arg1=tuple <x,y>.  Returns <x+arg0,y>.
   551  
   552  		// Atomic bitwise operations.
   553  		// Note: 'floor' operations round the pointer down to the nearest word boundary
   554  		// which reflects how they are used in the runtime.
   555  		{name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true},         // *arg0 &= arg1. arg2 = mem.
   556  		{name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem.
   557  		{name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true},         // *arg0 |= arg1. arg2 = mem.
   558  		{name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem.
   559  
   560  		// Compare and swap.
   561  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory.
   562  		// if *(arg0+auxint+aux) == arg1 {
   563  		//   *(arg0+auxint+aux) = arg2
   564  		//   return (true, memory)
   565  		// } else {
   566  		//   return (false, memory)
   567  		// }
   568  		// Note that these instructions also return the old value in arg1, but we ignore it.
   569  		// TODO: have these return flags instead of bool.  The current system generates:
   570  		//    CS ...
   571  		//    MOVD  $0, ret
   572  		//    BNE   2(PC)
   573  		//    MOVD  $1, ret
   574  		//    CMPW  ret, $0
   575  		//    BNE ...
   576  		// instead of just
   577  		//    CS ...
   578  		//    BEQ ...
   579  		// but we can't do that because memory-using ops can't generate flags yet
   580  		// (flagalloc wants to move flag-generating instructions around).
   581  		{name: "LoweredAtomicCas32", argLength: 4, reg: cas, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   582  		{name: "LoweredAtomicCas64", argLength: 4, reg: cas, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   583  
   584  		// Lowered atomic swaps, emulated using compare-and-swap.
   585  		// store arg1 to arg0+auxint+aux, arg2=mem.
   586  		{name: "LoweredAtomicExchange32", argLength: 3, reg: exchange, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   587  		{name: "LoweredAtomicExchange64", argLength: 3, reg: exchange, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   588  
   589  		// find leftmost one
   590  		{
   591  			name:         "FLOGR",
   592  			argLength:    1,
   593  			reg:          regInfo{inputs: gponly, outputs: []regMask{buildReg("R0")}, clobbers: buildReg("R1")},
   594  			asm:          "FLOGR",
   595  			typ:          "UInt64",
   596  			clobberFlags: true,
   597  		},
   598  
   599  		// population count
   600  		//
   601  		// Counts the number of ones in each byte of arg0
   602  		// and places the result into the corresponding byte
   603  		// of the result.
   604  		{
   605  			name:         "POPCNT",
   606  			argLength:    1,
   607  			reg:          gp11,
   608  			asm:          "POPCNT",
   609  			typ:          "UInt64",
   610  			clobberFlags: true,
   611  		},
   612  
   613  		// unsigned multiplication (64x64 → 128)
   614  		//
   615  		// Multiply the two 64-bit input operands together and place the 128-bit result into
   616  		// an even-odd register pair. The second register in the target pair also contains
   617  		// one of the input operands. Since we don't currently have a way to specify an
   618  		// even-odd register pair we hardcode this register pair as R2:R3.
   619  		{
   620  			name:      "MLGR",
   621  			argLength: 2,
   622  			reg:       regInfo{inputs: []regMask{gp, r3}, outputs: []regMask{r2, r3}},
   623  			asm:       "MLGR",
   624  		},
   625  
   626  		// pseudo operations to sum the output of the POPCNT instruction
   627  		{name: "SumBytes2", argLength: 1, typ: "UInt8"}, // sum the rightmost 2 bytes in arg0 ignoring overflow
   628  		{name: "SumBytes4", argLength: 1, typ: "UInt8"}, // sum the rightmost 4 bytes in arg0 ignoring overflow
   629  		{name: "SumBytes8", argLength: 1, typ: "UInt8"}, // sum all the bytes in arg0 ignoring overflow
   630  
   631  		// store multiple
   632  		{
   633  			name:           "STMG2",
   634  			argLength:      4,
   635  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), 0}},
   636  			aux:            "SymOff",
   637  			typ:            "Mem",
   638  			asm:            "STMG",
   639  			faultOnNilArg0: true,
   640  			symEffect:      "Write",
   641  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   642  		},
   643  		{
   644  			name:           "STMG3",
   645  			argLength:      5,
   646  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), 0}},
   647  			aux:            "SymOff",
   648  			typ:            "Mem",
   649  			asm:            "STMG",
   650  			faultOnNilArg0: true,
   651  			symEffect:      "Write",
   652  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   653  		},
   654  		{
   655  			name:      "STMG4",
   656  			argLength: 6,
   657  			reg: regInfo{inputs: []regMask{
   658  				ptrsp,
   659  				buildReg("R1"),
   660  				buildReg("R2"),
   661  				buildReg("R3"),
   662  				buildReg("R4"),
   663  				0,
   664  			}},
   665  			aux:            "SymOff",
   666  			typ:            "Mem",
   667  			asm:            "STMG",
   668  			faultOnNilArg0: true,
   669  			symEffect:      "Write",
   670  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   671  		},
   672  		{
   673  			name:           "STM2",
   674  			argLength:      4,
   675  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), 0}},
   676  			aux:            "SymOff",
   677  			typ:            "Mem",
   678  			asm:            "STMY",
   679  			faultOnNilArg0: true,
   680  			symEffect:      "Write",
   681  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   682  		},
   683  		{
   684  			name:           "STM3",
   685  			argLength:      5,
   686  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), 0}},
   687  			aux:            "SymOff",
   688  			typ:            "Mem",
   689  			asm:            "STMY",
   690  			faultOnNilArg0: true,
   691  			symEffect:      "Write",
   692  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   693  		},
   694  		{
   695  			name:      "STM4",
   696  			argLength: 6,
   697  			reg: regInfo{inputs: []regMask{
   698  				ptrsp,
   699  				buildReg("R1"),
   700  				buildReg("R2"),
   701  				buildReg("R3"),
   702  				buildReg("R4"),
   703  				0,
   704  			}},
   705  			aux:            "SymOff",
   706  			typ:            "Mem",
   707  			asm:            "STMY",
   708  			faultOnNilArg0: true,
   709  			symEffect:      "Write",
   710  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   711  		},
   712  
   713  		// large move
   714  		// auxint = remaining bytes after loop (rem)
   715  		// arg0 = address of dst memory (in R1, changed as a side effect)
   716  		// arg1 = address of src memory (in R2, changed as a side effect)
   717  		// arg2 = pointer to last address to move in loop + 256
   718  		// arg3 = mem
   719  		// returns mem
   720  		//
   721  		// mvc: MVC  $256, 0(R2), 0(R1)
   722  		//      MOVD $256(R1), R1
   723  		//      MOVD $256(R2), R2
   724  		//      CMP  R2, Rarg2
   725  		//      BNE  mvc
   726  		//	MVC  $rem, 0(R2), 0(R1) // if rem > 0
   727  		{
   728  			name:      "LoweredMove",
   729  			aux:       "Int64",
   730  			argLength: 4,
   731  			reg: regInfo{
   732  				inputs:   []regMask{buildReg("R1"), buildReg("R2"), gpsp},
   733  				clobbers: buildReg("R1 R2"),
   734  			},
   735  			clobberFlags:   true,
   736  			typ:            "Mem",
   737  			faultOnNilArg0: true,
   738  			faultOnNilArg1: true,
   739  		},
   740  
   741  		// large clear
   742  		// auxint = remaining bytes after loop (rem)
   743  		// arg0 = address of dst memory (in R1, changed as a side effect)
   744  		// arg1 = pointer to last address to zero in loop + 256
   745  		// arg2 = mem
   746  		// returns mem
   747  		//
   748  		// clear: CLEAR $256, 0(R1)
   749  		//        MOVD  $256(R1), R1
   750  		//        CMP   R1, Rarg2
   751  		//        BNE   clear
   752  		//	  CLEAR $rem, 0(R1) // if rem > 0
   753  		{
   754  			name:      "LoweredZero",
   755  			aux:       "Int64",
   756  			argLength: 3,
   757  			reg: regInfo{
   758  				inputs:   []regMask{buildReg("R1"), gpsp},
   759  				clobbers: buildReg("R1"),
   760  			},
   761  			clobberFlags:   true,
   762  			typ:            "Mem",
   763  			faultOnNilArg0: true,
   764  		},
   765  	}
   766  
   767  	// All blocks on s390x have their condition code mask (s390x.CCMask) as the Aux value.
   768  	// The condition code mask is a 4-bit mask where each bit corresponds to a condition
   769  	// code value. If the value of the condition code matches a bit set in the condition
   770  	// code mask then the first successor is executed. Otherwise the second successor is
   771  	// executed.
   772  	//
   773  	// | condition code value |  mask bit  |
   774  	// +----------------------+------------+
   775  	// | 0 (equal)            | 0b1000 (8) |
   776  	// | 1 (less than)        | 0b0100 (4) |
   777  	// | 2 (greater than)     | 0b0010 (2) |
   778  	// | 3 (unordered)        | 0b0001 (1) |
   779  	//
   780  	// Note: that compare-and-branch instructions must not have bit 3 (0b0001) set.
   781  	var S390Xblocks = []blockData{
   782  		// branch on condition
   783  		{name: "BRC", controls: 1, aux: "S390XCCMask"}, // condition code value (flags) is Controls[0]
   784  
   785  		// compare-and-branch (register-register)
   786  		//  - integrates comparison of Controls[0] with Controls[1]
   787  		//  - both control values must be in general purpose registers
   788  		{name: "CRJ", controls: 2, aux: "S390XCCMask"},   // signed 32-bit integer comparison
   789  		{name: "CGRJ", controls: 2, aux: "S390XCCMask"},  // signed 64-bit integer comparison
   790  		{name: "CLRJ", controls: 2, aux: "S390XCCMask"},  // unsigned 32-bit integer comparison
   791  		{name: "CLGRJ", controls: 2, aux: "S390XCCMask"}, // unsigned 64-bit integer comparison
   792  
   793  		// compare-and-branch (register-immediate)
   794  		//  - integrates comparison of Controls[0] with AuxInt
   795  		//  - control value must be in a general purpose register
   796  		//  - the AuxInt value is sign-extended for signed comparisons
   797  		//    and zero-extended for unsigned comparisons
   798  		{name: "CIJ", controls: 1, aux: "S390XCCMaskInt8"},    // signed 32-bit integer comparison
   799  		{name: "CGIJ", controls: 1, aux: "S390XCCMaskInt8"},   // signed 64-bit integer comparison
   800  		{name: "CLIJ", controls: 1, aux: "S390XCCMaskUint8"},  // unsigned 32-bit integer comparison
   801  		{name: "CLGIJ", controls: 1, aux: "S390XCCMaskUint8"}, // unsigned 64-bit integer comparison
   802  	}
   803  
   804  	archs = append(archs, arch{
   805  		name:            "S390X",
   806  		pkg:             "cmd/internal/obj/s390x",
   807  		genfile:         "../../s390x/ssa.go",
   808  		ops:             S390Xops,
   809  		blocks:          S390Xblocks,
   810  		regnames:        regNamesS390X,
   811  		gpregmask:       gp,
   812  		fpregmask:       fp,
   813  		framepointerreg: -1, // not used
   814  		linkreg:         int8(num["R14"]),
   815  		imports: []string{
   816  			"cmd/internal/obj/s390x",
   817  		},
   818  	})
   819  }
   820  

View as plain text