PPC64Ops.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import "strings"
     8  
     9  // Notes:
    10  //  - Less-than-64-bit integer types live in the low portion of registers.
    11  //    The upper portion is junk.
    12  //  - Boolean types are zero or 1; stored in a byte, with upper bytes of the register containing junk.
    13  //  - *const instructions may use a constant larger than the instruction can encode.
    14  //    In this case the assembler expands to multiple instructions and uses tmp
    15  //    register (R31).
    16  
    17  var regNamesPPC64 = []string{
    18  	"R0", // REGZERO, not used, but simplifies counting in regalloc
    19  	"SP", // REGSP
    20  	"SB", // REGSB
    21  	"R3",
    22  	"R4",
    23  	"R5",
    24  	"R6",
    25  	"R7",
    26  	"R8",
    27  	"R9",
    28  	"R10",
    29  	"R11", // REGCTXT for closures
    30  	"R12",
    31  	"R13", // REGTLS
    32  	"R14",
    33  	"R15",
    34  	"R16",
    35  	"R17",
    36  	"R18",
    37  	"R19",
    38  	"R20",
    39  	"R21",
    40  	"R22",
    41  	"R23",
    42  	"R24",
    43  	"R25",
    44  	"R26",
    45  	"R27",
    46  	"R28",
    47  	"R29",
    48  	"g",   // REGG.  Using name "g" and setting Config.hasGReg makes it "just happen".
    49  	"R31", // REGTMP
    50  
    51  	"F0",
    52  	"F1",
    53  	"F2",
    54  	"F3",
    55  	"F4",
    56  	"F5",
    57  	"F6",
    58  	"F7",
    59  	"F8",
    60  	"F9",
    61  	"F10",
    62  	"F11",
    63  	"F12",
    64  	"F13",
    65  	"F14",
    66  	"F15",
    67  	"F16",
    68  	"F17",
    69  	"F18",
    70  	"F19",
    71  	"F20",
    72  	"F21",
    73  	"F22",
    74  	"F23",
    75  	"F24",
    76  	"F25",
    77  	"F26",
    78  	"F27",
    79  	"F28",
    80  	"F29",
    81  	"F30",
    82  	// "F31", the allocator is limited to 64 entries. We sacrifice this FPR to support XER.
    83  
    84  	"XER",
    85  
    86  	// If you add registers, update asyncPreempt in runtime.
    87  
    88  	// "CR0",
    89  	// "CR1",
    90  	// "CR2",
    91  	// "CR3",
    92  	// "CR4",
    93  	// "CR5",
    94  	// "CR6",
    95  	// "CR7",
    96  
    97  	// "CR",
    98  	// "LR",
    99  	// "CTR",
   100  }
   101  
   102  func init() {
   103  	// Make map from reg names to reg integers.
   104  	if len(regNamesPPC64) > 64 {
   105  		panic("too many registers")
   106  	}
   107  	num := map[string]int{}
   108  	for i, name := range regNamesPPC64 {
   109  		num[name] = i
   110  	}
   111  	buildReg := func(s string) regMask {
   112  		m := regMask(0)
   113  		for _, r := range strings.Split(s, " ") {
   114  			if n, ok := num[r]; ok {
   115  				m |= regMask(1) << uint(n)
   116  				continue
   117  			}
   118  			panic("register " + r + " not found")
   119  		}
   120  		return m
   121  	}
   122  
   123  	var (
   124  		gp  = buildReg("R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29")
   125  		fp  = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30")
   126  		sp  = buildReg("SP")
   127  		sb  = buildReg("SB")
   128  		gr  = buildReg("g")
   129  		xer = buildReg("XER")
   130  		// cr  = buildReg("CR")
   131  		// ctr = buildReg("CTR")
   132  		// lr  = buildReg("LR")
   133  		tmp     = buildReg("R31")
   134  		ctxt    = buildReg("R11")
   135  		callptr = buildReg("R12")
   136  		// tls = buildReg("R13")
   137  		gp01        = regInfo{inputs: nil, outputs: []regMask{gp}}
   138  		gp11        = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   139  		xergp       = regInfo{inputs: []regMask{xer}, outputs: []regMask{gp}, clobbers: xer}
   140  		gp11cxer    = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   141  		gp11xer     = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp, xer}}
   142  		gp21        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   143  		gp21a0      = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
   144  		gp21cxer    = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}, clobbers: xer}
   145  		gp21xer     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer}
   146  		gp2xer1xer  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer}
   147  		gp31        = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
   148  		gp1cr       = regInfo{inputs: []regMask{gp | sp | sb}}
   149  		gp2cr       = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   150  		crgp        = regInfo{inputs: nil, outputs: []regMask{gp}}
   151  		crgp11      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
   152  		crgp21      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
   153  		gpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
   154  		gploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   155  		prefreg     = regInfo{inputs: []regMask{gp | sp | sb}}
   156  		gpstore     = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
   157  		gpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}}
   158  		gpstorezero = regInfo{inputs: []regMask{gp | sp | sb}} // ppc64.REGZERO is reserved zero value
   159  		gpxchg      = regInfo{inputs: []regMask{gp | sp | sb, gp}, outputs: []regMask{gp}}
   160  		gpcas       = regInfo{inputs: []regMask{gp | sp | sb, gp, gp}, outputs: []regMask{gp}}
   161  		fp01        = regInfo{inputs: nil, outputs: []regMask{fp}}
   162  		fp11        = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
   163  		fpgp        = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
   164  		gpfp        = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
   165  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
   166  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
   167  		fp2cr       = regInfo{inputs: []regMask{fp, fp}}
   168  		fpload      = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{fp}}
   169  		fploadidx   = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{fp}}
   170  		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
   171  		fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
   172  		callerSave  = regMask(gp | fp | gr | xer)
   173  		r3          = buildReg("R3")
   174  		r4          = buildReg("R4")
   175  		r5          = buildReg("R5")
   176  		r6          = buildReg("R6")
   177  	)
   178  	ops := []opData{
   179  		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},                              // arg0 + arg1
   180  		{name: "ADDCC", argLength: 2, reg: gp21, asm: "ADDCC", commutative: true, typ: "(Int,Flags)"},      // arg0 + arg1
   181  		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "Int64"},                              // arg0 + auxInt
   182  		{name: "ADDCCconst", argLength: 1, reg: gp11cxer, asm: "ADDCCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0 + auxInt sets CC, clobbers XER
   183  		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},                            // arg0+arg1
   184  		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true},                          // arg0+arg1
   185  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                                                 // arg0-arg1
   186  		{name: "SUBCC", argLength: 2, reg: gp21, asm: "SUBCC", typ: "(Int,Flags)"},                         // arg0-arg1 sets CC
   187  		{name: "SUBFCconst", argLength: 1, reg: gp11cxer, asm: "SUBC", aux: "Int64"},                       // auxInt - arg0 (carry is ignored)
   188  		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                                               // arg0-arg1
   189  		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                                             // arg0-arg1
   190  
   191  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
   192  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
   193  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   194  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
   195  		{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
   196  
   197  		{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
   198  		{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
   199  		{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
   200  		{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
   201  
   202  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
   203  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
   204  
   205  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD"},   // arg0*arg1 + arg2
   206  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // arg0*arg1 + arg2
   207  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB"},   // arg0*arg1 - arg2
   208  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // arg0*arg1 - arg2
   209  
   210  		{name: "SRAD", argLength: 2, reg: gp21cxer, asm: "SRAD"}, // signed arg0 >> (arg1&127), 64 bit width (note: 127, not 63!)
   211  		{name: "SRAW", argLength: 2, reg: gp21cxer, asm: "SRAW"}, // signed arg0 >> (arg1&63), 32 bit width
   212  		{name: "SRD", argLength: 2, reg: gp21, asm: "SRD"},       // unsigned arg0 >> (arg1&127), 64 bit width
   213  		{name: "SRW", argLength: 2, reg: gp21, asm: "SRW"},       // unsigned arg0 >> (arg1&63), 32 bit width
   214  		{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"},       // arg0 << (arg1&127), 64 bit width
   215  		{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"},       // arg0 << (arg1&63), 32 bit width
   216  
   217  		{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"},   // arg0 rotate left by arg1 mod 64
   218  		{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
   219  		// The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA.
   220  		// The constant shift values are packed into the aux int32.
   221  		{name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, //
   222  		{name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, //
   223  
   224  		// Operations which consume or generate the CA (xer)
   225  		{name: "ADDC", argLength: 2, reg: gp21xer, asm: "ADDC", commutative: true, typ: "(UInt64, UInt64)"},    // arg0 + arg1 -> out, CA
   226  		{name: "SUBC", argLength: 2, reg: gp21xer, asm: "SUBC", typ: "(UInt64, UInt64)"},                       // arg0 - arg1 -> out, CA
   227  		{name: "ADDCconst", argLength: 1, reg: gp11xer, asm: "ADDC", typ: "(UInt64, UInt64)", aux: "Int64"},    // arg0 + imm16 -> out, CA
   228  		{name: "SUBCconst", argLength: 1, reg: gp11xer, asm: "SUBC", typ: "(UInt64, UInt64)", aux: "Int64"},    // imm16 - arg0 -> out, CA
   229  		{name: "ADDE", argLength: 3, reg: gp2xer1xer, asm: "ADDE", typ: "(UInt64, UInt64)", commutative: true}, // arg0 + arg1 + CA (arg2) -> out, CA
   230  		{name: "SUBE", argLength: 3, reg: gp2xer1xer, asm: "SUBE", typ: "(UInt64, UInt64)"},                    // arg0 - arg1 - CA (arg2) -> out, CA
   231  		{name: "ADDZEzero", argLength: 1, reg: xergp, asm: "ADDZE", typ: "UInt64"},                             // CA (arg0) + $0 -> out
   232  		{name: "SUBZEzero", argLength: 1, reg: xergp, asm: "SUBZE", typ: "UInt64"},                             // $0 - CA (arg0) -> out
   233  
   234  		{name: "SRADconst", argLength: 1, reg: gp11cxer, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   235  		{name: "SRAWconst", argLength: 1, reg: gp11cxer, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   236  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
   237  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int64"},       // unsigned arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
   238  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 64, 64 bit width
   239  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},       // arg0 << auxInt, 0 <= auxInt < 32, 32 bit width
   240  
   241  		{name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
   242  		{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
   243  		{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
   244  
   245  		{name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"},                      // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
   246  		{name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"},                       // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
   247  		{name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above
   248  		{name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int64"},                     // Auxint is encoded similarly to RLWINM, but only MB and SH are valid. ME is always 63.
   249  		{name: "RLDICR", argLength: 1, reg: gp11, asm: "RLDICR", aux: "Int64"},                     // Likewise, but only ME and SH are valid. MB is always 0.
   250  
   251  		{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD"},                          // count leading zeros
   252  		{name: "CNTLZDCC", argLength: 1, reg: gp11, asm: "CNTLZDCC", typ: "(Int, Flags)"}, // count leading zeros, sets CC
   253  		{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW"},                          // count leading zeros (32 bit)
   254  
   255  		{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
   256  		{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
   257  
   258  		{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
   259  		{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
   260  		{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresponding byte
   261  
   262  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
   263  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
   264  
   265  		{name: "DIVD", argLength: 2, reg: gp21, asm: "DIVD", typ: "Int64"},   // arg0/arg1 (signed 64-bit)
   266  		{name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},   // arg0/arg1 (signed 32-bit)
   267  		{name: "DIVDU", argLength: 2, reg: gp21, asm: "DIVDU", typ: "Int64"}, // arg0/arg1 (unsigned 64-bit)
   268  		{name: "DIVWU", argLength: 2, reg: gp21, asm: "DIVWU", typ: "Int32"}, // arg0/arg1 (unsigned 32-bit)
   269  
   270  		{name: "MODUD", argLength: 2, reg: gp21, asm: "MODUD", typ: "UInt64"}, // arg0 % arg1 (unsigned 64-bit)
   271  		{name: "MODSD", argLength: 2, reg: gp21, asm: "MODSD", typ: "Int64"},  // arg0 % arg1 (signed 64-bit)
   272  		{name: "MODUW", argLength: 2, reg: gp21, asm: "MODUW", typ: "UInt32"}, // arg0 % arg1 (unsigned 32-bit)
   273  		{name: "MODSW", argLength: 2, reg: gp21, asm: "MODSW", typ: "Int32"},  // arg0 % arg1 (signed 32-bit)
   274  		// MOD is implemented as rem := arg0 - (arg0/arg1) * arg1
   275  
   276  		// Conversions are all float-to-float register operations.  "Integer" refers to encoding in the FP register.
   277  		{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
   278  		{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
   279  		{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"},   // convert 64-bit integer to float
   280  		{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
   281  		{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"},     // round float to 32-bit value
   282  
   283  		// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
   284  		// Because the 32-bit load-literal-bits instructions have impoverished addressability, always widen the
   285  		// data instead and use FMOVDload and FMOVDstore instead (this will also dodge endianess issues).
   286  		// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
   287  		// the word-load instructions.  (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
   288  
   289  		{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"},   // move 64 bits of F register into G register
   290  		{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
   291  
   292  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true},                           // arg0&arg1
   293  		{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"},                                            // arg0&^arg1
   294  		{name: "ANDNCC", argLength: 2, reg: gp21, asm: "ANDNCC", typ: "(Int64,Flags)"},                  // arg0&^arg1 sets CC
   295  		{name: "ANDCC", argLength: 2, reg: gp21, asm: "ANDCC", commutative: true, typ: "(Int64,Flags)"}, // arg0&arg1 sets CC
   296  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true},                             // arg0|arg1
   297  		{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"},                                              // arg0|^arg1
   298  		{name: "ORCC", argLength: 2, reg: gp21, asm: "ORCC", commutative: true, typ: "(Int,Flags)"},     // arg0|arg1 sets CC
   299  		{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true},                           // ^(arg0|arg1)
   300  		{name: "NORCC", argLength: 2, reg: gp21, asm: "NORCC", commutative: true, typ: "(Int,Flags)"},   // ^(arg0|arg1) sets CC
   301  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", typ: "Int64", commutative: true},             // arg0^arg1
   302  		{name: "XORCC", argLength: 2, reg: gp21, asm: "XORCC", commutative: true, typ: "(Int,Flags)"},   // arg0^arg1 sets CC
   303  		{name: "EQV", argLength: 2, reg: gp21, asm: "EQV", typ: "Int64", commutative: true},             // arg0^^arg1
   304  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                              // -arg0 (integer)
   305  		{name: "NEGCC", argLength: 1, reg: gp11, asm: "NEGCC", typ: "(Int,Flags)"},                      // -arg0 (integer) sets CC
   306  		{name: "BRD", argLength: 1, reg: gp11, asm: "BRD"},                                              // reversebytes64(arg0)
   307  		{name: "BRW", argLength: 1, reg: gp11, asm: "BRW"},                                              // reversebytes32(arg0)
   308  		{name: "BRH", argLength: 1, reg: gp11, asm: "BRH"},                                              // reversebytes16(arg0)
   309  		{name: "FNEG", argLength: 1, reg: fp11, asm: "FNEG"},                                            // -arg0 (floating point)
   310  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},                                          // sqrt(arg0) (floating point)
   311  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                                        // sqrt(arg0) (floating point, single precision)
   312  		{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"},                                          // floor(arg0), float64
   313  		{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"},                                           // ceil(arg0), float64
   314  		{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"},                                          // trunc(arg0), float64
   315  		{name: "FROUND", argLength: 1, reg: fp11, asm: "FRIN"},                                          // round(arg0), float64
   316  		{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"},                                            // abs(arg0), float64
   317  		{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"},                                          // -abs(arg0), float64
   318  		{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"},                                        // copysign arg0 -> arg1, float64
   319  
   320  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"},                                                                                       // arg0|aux
   321  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"},                                                                                     // arg0^aux
   322  		{name: "ANDCCconst", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}, asm: "ANDCC", aux: "Int64", typ: "(Int,Flags)"}, // arg0&aux == 0 // and-immediate sets CC on PPC, always.
   323  
   324  		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB", typ: "Int64"},   // sign extend int8 to int64
   325  		{name: "MOVBZreg", argLength: 1, reg: gp11, asm: "MOVBZ", typ: "Int64"}, // zero extend uint8 to uint64
   326  		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH", typ: "Int64"},   // sign extend int16 to int64
   327  		{name: "MOVHZreg", argLength: 1, reg: gp11, asm: "MOVHZ", typ: "Int64"}, // zero extend uint16 to uint64
   328  		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW", typ: "Int64"},   // sign extend int32 to int64
   329  		{name: "MOVWZreg", argLength: 1, reg: gp11, asm: "MOVWZ", typ: "Int64"}, // zero extend uint32 to uint64
   330  
   331  		// Load bytes in the endian order of the arch from arg0+aux+auxint into a 64 bit register.
   332  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte zero extend
   333  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"},    // load 2 bytes sign extend
   334  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend
   335  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"},    // load 4 bytes sign extend
   336  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend
   337  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"},    // load 8 bytes
   338  
   339  		// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
   340  		// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
   341  		// In these cases the index register field is set to 0 and the full address is in the base register.
   342  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order
   343  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order
   344  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order
   345  
   346  		// In these cases an index register is used in addition to a base register
   347  		// Loads from memory location arg[0] + arg[1].
   348  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, asm: "MOVBZ", typ: "UInt8"},  // zero extend uint8 to uint64
   349  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, asm: "MOVH", typ: "Int16"},    // sign extend int16 to int64
   350  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, asm: "MOVHZ", typ: "UInt16"}, // zero extend uint16 to uint64
   351  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, asm: "MOVW", typ: "Int32"},    // sign extend int32 to int64
   352  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, asm: "MOVWZ", typ: "UInt32"}, // zero extend uint32 to uint64
   353  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, asm: "MOVD", typ: "Int64"},
   354  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVHBR", typ: "Int16"}, // sign extend int16 to int64
   355  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVWBR", typ: "Int32"}, // sign extend int32 to int64
   356  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, asm: "MOVDBR", typ: "Int64"},
   357  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", typ: "Float64"},
   358  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", typ: "Float32"},
   359  
   360  		// Prefetch instruction
   361  		// Do prefetch of address generated with arg0 and arg1 with option aux. arg0=addr,arg1=memory, aux=option.
   362  		{name: "DCBT", argLength: 2, aux: "Int64", reg: prefreg, asm: "DCBT", hasSideEffects: true},
   363  
   364  		// Store bytes in the reverse endian order of the arch into arg0.
   365  		// These are indexed stores with no offset field in the instruction so the auxint fields are not used.
   366  		{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order
   367  		{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order
   368  		{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order
   369  
   370  		// Floating point loads from arg0+aux+auxint
   371  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
   372  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load single float
   373  
   374  		// Store bytes in the endian order of the arch into arg0+aux+auxint
   375  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte
   376  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes
   377  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes
   378  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes
   379  
   380  		// Store floating point value into arg0+aux+auxint
   381  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store double flot
   382  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store single float
   383  
   384  		// Stores using index and base registers
   385  		// Stores to arg[0] + arg[1]
   386  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVB", typ: "Mem"},     // store bye
   387  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVH", typ: "Mem"},     // store half word
   388  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVW", typ: "Mem"},     // store word
   389  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVD", typ: "Mem"},     // store double word
   390  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", typ: "Mem"},   // store double float
   391  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", typ: "Mem"},   // store single float
   392  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVHBR", typ: "Mem"}, // store half word reversed byte using index reg
   393  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVWBR", typ: "Mem"}, // store word reversed byte using index reg
   394  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, asm: "MOVDBR", typ: "Mem"}, // store double word reversed byte using index reg
   395  
   396  		// The following ops store 0 into arg0+aux+auxint arg1=mem
   397  		{name: "MOVBstorezero", argLength: 2, reg: gpstorezero, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 1 byte
   398  		{name: "MOVHstorezero", argLength: 2, reg: gpstorezero, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 2 bytes
   399  		{name: "MOVWstorezero", argLength: 2, reg: gpstorezero, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 4 bytes
   400  		{name: "MOVDstorezero", argLength: 2, reg: gpstorezero, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store zero 8 bytes
   401  
   402  		{name: "MOVDaddr", argLength: 1, reg: regInfo{inputs: []regMask{sp | sb | gp}, outputs: []regMask{gp}}, aux: "SymOff", asm: "MOVD", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxInt + aux.(*gc.Sym), arg0=SP/SB/GP
   403  
   404  		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "Int64", rematerializeable: true}, //
   405  		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", rematerializeable: true},           //
   406  		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float32", asm: "FMOVS", rematerializeable: true},           //
   407  		{name: "FCMPU", argLength: 2, reg: fp2cr, asm: "FCMPU", typ: "Flags"},
   408  
   409  		{name: "CMP", argLength: 2, reg: gp2cr, asm: "CMP", typ: "Flags"},     // arg0 compare to arg1
   410  		{name: "CMPU", argLength: 2, reg: gp2cr, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   411  		{name: "CMPW", argLength: 2, reg: gp2cr, asm: "CMPW", typ: "Flags"},   // arg0 compare to arg1
   412  		{name: "CMPWU", argLength: 2, reg: gp2cr, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   413  		{name: "CMPconst", argLength: 1, reg: gp1cr, asm: "CMP", aux: "Int64", typ: "Flags"},
   414  		{name: "CMPUconst", argLength: 1, reg: gp1cr, asm: "CMPU", aux: "Int64", typ: "Flags"},
   415  		{name: "CMPWconst", argLength: 1, reg: gp1cr, asm: "CMPW", aux: "Int32", typ: "Flags"},
   416  		{name: "CMPWUconst", argLength: 1, reg: gp1cr, asm: "CMPWU", aux: "Int32", typ: "Flags"},
   417  
   418  		// ISEL  arg2 ? arg0 : arg1
   419  		// ISELZ arg1 ? arg0 : $0
   420  		// auxInt values 0=LT 1=GT 2=EQ 3=SO (summary overflow/unordered) 4=GE 5=LE 6=NE 7=NSO (not summary overflow/not unordered)
   421  		// Note, auxInt^4 inverts the comparison condition. For example, LT^4 becomes GE, and "ISEL [a] x y z" is equivalent to ISEL [a^4] y x z".
   422  		{name: "ISEL", argLength: 3, reg: crgp21, asm: "ISEL", aux: "Int32", typ: "Int32"},
   423  		{name: "ISELZ", argLength: 2, reg: crgp11, asm: "ISEL", aux: "Int32"},
   424  
   425  		// SETBC auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 1 : 0
   426  		{name: "SETBC", argLength: 1, reg: crgp, asm: "SETBC", aux: "Int32", typ: "Int32"},
   427  		// SETBCR auxInt values 0=LT 1=GT 2=EQ     (CRbit=1)? 0 : 1
   428  		{name: "SETBCR", argLength: 1, reg: crgp, asm: "SETBCR", aux: "Int32", typ: "Int32"},
   429  
   430  		// pseudo-ops
   431  		{name: "Equal", argLength: 1, reg: crgp},         // bool, true flags encode x==y false otherwise.
   432  		{name: "NotEqual", argLength: 1, reg: crgp},      // bool, true flags encode x!=y false otherwise.
   433  		{name: "LessThan", argLength: 1, reg: crgp},      // bool, true flags encode  x<y false otherwise.
   434  		{name: "FLessThan", argLength: 1, reg: crgp},     // bool, true flags encode  x<y false otherwise.
   435  		{name: "LessEqual", argLength: 1, reg: crgp},     // bool, true flags encode  x<=y false otherwise.
   436  		{name: "FLessEqual", argLength: 1, reg: crgp},    // bool, true flags encode  x<=y false otherwise; PPC <= === !> which is wrong for NaN
   437  		{name: "GreaterThan", argLength: 1, reg: crgp},   // bool, true flags encode  x>y false otherwise.
   438  		{name: "FGreaterThan", argLength: 1, reg: crgp},  // bool, true flags encode  x>y false otherwise.
   439  		{name: "GreaterEqual", argLength: 1, reg: crgp},  // bool, true flags encode  x>=y false otherwise.
   440  		{name: "FGreaterEqual", argLength: 1, reg: crgp}, // bool, true flags encode  x>=y false otherwise.; PPC >= === !< which is wrong for NaN
   441  
   442  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   443  		// and sorts it to the very beginning of the block to prevent other
   444  		// use of the closure pointer.
   445  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{ctxt}}, zeroWidth: true},
   446  
   447  		// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
   448  		{name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
   449  
   450  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   451  		// I.e., if f calls g "calls" getcallerpc,
   452  		// the result should be the PC within f that g will return to.
   453  		// See runtime/stubs.go for a more detailed discussion.
   454  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   455  
   456  		//arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   457  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   458  		// Round ops to block fused-multiply-add extraction.
   459  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   460  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   461  
   462  		{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                       // call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   463  		{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                         // tail call static function aux.(*obj.LSym).  arg0=mem, auxint=argsize, returns mem
   464  		{name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{callptr, ctxt, 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
   465  		{name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{callptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},            // call fn by pointer.  arg0=codeptr, arg1=mem, auxint=argsize, returns mem
   466  
   467  		// large or unaligned zeroing
   468  		// arg0 = address of memory to zero (in R3, changed as side effect)
   469  		// returns mem
   470  		//
   471  		// a loop is generated when there is more than one iteration
   472  		// needed to clear 4 doublewords
   473  		//
   474  		//	XXLXOR	VS32,VS32,VS32
   475  		// 	MOVD	$len/32,R31
   476  		//	MOVD	R31,CTR
   477  		//	MOVD	$16,R31
   478  		//	loop:
   479  		//	STXVD2X VS32,(R0)(R3)
   480  		//	STXVD2X	VS32,(R31)(R3)
   481  		//	ADD	R3,32
   482  		//	BC	loop
   483  
   484  		// remaining doubleword clears generated as needed
   485  		//	MOVD	R0,(R3)
   486  		//	MOVD	R0,8(R3)
   487  		//	MOVD	R0,16(R3)
   488  		//	MOVD	R0,24(R3)
   489  
   490  		// one or more of these to clear remainder < 8 bytes
   491  		//	MOVW	R0,n1(R3)
   492  		//	MOVH	R0,n2(R3)
   493  		//	MOVB	R0,n3(R3)
   494  		{
   495  			name:      "LoweredZero",
   496  			aux:       "Int64",
   497  			argLength: 2,
   498  			reg: regInfo{
   499  				inputs:   []regMask{buildReg("R20")},
   500  				clobbers: buildReg("R20"),
   501  			},
   502  			clobberFlags:   true,
   503  			typ:            "Mem",
   504  			faultOnNilArg0: true,
   505  			unsafePoint:    true,
   506  		},
   507  		{
   508  			name:      "LoweredZeroShort",
   509  			aux:       "Int64",
   510  			argLength: 2,
   511  			reg: regInfo{
   512  				inputs: []regMask{gp}},
   513  			typ:            "Mem",
   514  			faultOnNilArg0: true,
   515  			unsafePoint:    true,
   516  		},
   517  		{
   518  			name:      "LoweredQuadZeroShort",
   519  			aux:       "Int64",
   520  			argLength: 2,
   521  			reg: regInfo{
   522  				inputs: []regMask{gp},
   523  			},
   524  			typ:            "Mem",
   525  			faultOnNilArg0: true,
   526  			unsafePoint:    true,
   527  		},
   528  		{
   529  			name:      "LoweredQuadZero",
   530  			aux:       "Int64",
   531  			argLength: 2,
   532  			reg: regInfo{
   533  				inputs:   []regMask{buildReg("R20")},
   534  				clobbers: buildReg("R20"),
   535  			},
   536  			clobberFlags:   true,
   537  			typ:            "Mem",
   538  			faultOnNilArg0: true,
   539  			unsafePoint:    true,
   540  		},
   541  
   542  		// R31 is temp register
   543  		// Loop code:
   544  		//	MOVD len/32,R31		set up loop ctr
   545  		//	MOVD R31,CTR
   546  		//	MOVD $16,R31		index register
   547  		// loop:
   548  		//	LXVD2X (R0)(R4),VS32
   549  		//	LXVD2X (R31)(R4),VS33
   550  		//	ADD  R4,$32          increment src
   551  		//	STXVD2X VS32,(R0)(R3)
   552  		//	STXVD2X VS33,(R31)(R3)
   553  		//	ADD  R3,$32          increment dst
   554  		//	BC 16,0,loop         branch ctr
   555  		// For this purpose, VS32 and VS33 are treated as
   556  		// scratch registers. Since regalloc does not
   557  		// track vector registers, even if it could be marked
   558  		// as clobbered it would have no effect.
   559  		// TODO: If vector registers are managed by regalloc
   560  		// mark these as clobbered.
   561  		//
   562  		// Bytes not moved by this loop are moved
   563  		// with a combination of the following instructions,
   564  		// starting with the largest sizes and generating as
   565  		// many as needed, using the appropriate offset value.
   566  		//	MOVD  n(R4),R14
   567  		//	MOVD  R14,n(R3)
   568  		//	MOVW  n1(R4),R14
   569  		//	MOVW  R14,n1(R3)
   570  		//	MOVH  n2(R4),R14
   571  		//	MOVH  R14,n2(R3)
   572  		//	MOVB  n3(R4),R14
   573  		//	MOVB  R14,n3(R3)
   574  
   575  		{
   576  			name:      "LoweredMove",
   577  			aux:       "Int64",
   578  			argLength: 3,
   579  			reg: regInfo{
   580  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   581  				clobbers: buildReg("R20 R21"),
   582  			},
   583  			clobberFlags:   true,
   584  			typ:            "Mem",
   585  			faultOnNilArg0: true,
   586  			faultOnNilArg1: true,
   587  			unsafePoint:    true,
   588  		},
   589  		{
   590  			name:      "LoweredMoveShort",
   591  			aux:       "Int64",
   592  			argLength: 3,
   593  			reg: regInfo{
   594  				inputs: []regMask{gp, gp},
   595  			},
   596  			typ:            "Mem",
   597  			faultOnNilArg0: true,
   598  			faultOnNilArg1: true,
   599  			unsafePoint:    true,
   600  		},
   601  
   602  		// The following is similar to the LoweredMove, but uses
   603  		// LXV instead of LXVD2X, which does not require an index
   604  		// register and will do 4 in a loop instead of only.
   605  		{
   606  			name:      "LoweredQuadMove",
   607  			aux:       "Int64",
   608  			argLength: 3,
   609  			reg: regInfo{
   610  				inputs:   []regMask{buildReg("R20"), buildReg("R21")},
   611  				clobbers: buildReg("R20 R21"),
   612  			},
   613  			clobberFlags:   true,
   614  			typ:            "Mem",
   615  			faultOnNilArg0: true,
   616  			faultOnNilArg1: true,
   617  			unsafePoint:    true,
   618  		},
   619  
   620  		{
   621  			name:      "LoweredQuadMoveShort",
   622  			aux:       "Int64",
   623  			argLength: 3,
   624  			reg: regInfo{
   625  				inputs: []regMask{gp, gp},
   626  			},
   627  			typ:            "Mem",
   628  			faultOnNilArg0: true,
   629  			faultOnNilArg1: true,
   630  			unsafePoint:    true,
   631  		},
   632  
   633  		{name: "LoweredAtomicStore8", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   634  		{name: "LoweredAtomicStore32", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   635  		{name: "LoweredAtomicStore64", argLength: 3, reg: gpstore, typ: "Mem", aux: "Int64", faultOnNilArg0: true, hasSideEffects: true},
   636  
   637  		{name: "LoweredAtomicLoad8", argLength: 2, reg: gpload, typ: "UInt8", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   638  		{name: "LoweredAtomicLoad32", argLength: 2, reg: gpload, typ: "UInt32", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   639  		{name: "LoweredAtomicLoad64", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   640  		{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
   641  
   642  		// atomic add32, 64
   643  		// LWSYNC
   644  		// LDAR         (Rarg0), Rout
   645  		// ADD		Rarg1, Rout
   646  		// STDCCC       Rout, (Rarg0)
   647  		// BNE          -3(PC)
   648  		// return new sum
   649  		{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   650  		{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   651  
   652  		// atomic exchange32, 64
   653  		// LWSYNC
   654  		// LDAR         (Rarg0), Rout
   655  		// STDCCC       Rarg1, (Rarg0)
   656  		// BNE          -2(PC)
   657  		// ISYNC
   658  		// return old val
   659  		{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   660  		{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   661  
   662  		// atomic compare and swap.
   663  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
   664  		// if *arg0 == arg1 {
   665  		//   *arg0 = arg2
   666  		//   return (true, memory)
   667  		// } else {
   668  		//   return (false, memory)
   669  		// }
   670  		// SYNC
   671  		// LDAR		(Rarg0), Rtmp
   672  		// CMP		Rarg1, Rtmp
   673  		// BNE		3(PC)
   674  		// STDCCC	Rarg2, (Rarg0)
   675  		// BNE		-4(PC)
   676  		// CBNZ         Rtmp, -4(PC)
   677  		// CSET         EQ, Rout
   678  		{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   679  		{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
   680  
   681  		// atomic 8/32 and/or.
   682  		// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
   683  		// LBAR/LWAT	(Rarg0), Rtmp
   684  		// AND/OR	Rarg1, Rtmp
   685  		// STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
   686  		// BNE		Rtmp, -3(PC)
   687  		{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   688  		{name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
   689  		{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   690  		{name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
   691  
   692  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed
   693  		// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and R20 and R21,
   694  		// but may clobber anything else, including R31 (REGTMP).
   695  		// Returns a pointer to a write barrier buffer in R29.
   696  		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},
   697  
   698  		{name: "LoweredPubBarrier", argLength: 1, asm: "LWSYNC", hasSideEffects: true}, // Do data barrier. arg0=memory
   699  		// There are three of these functions so that they can have three different register inputs.
   700  		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
   701  		// default registers to match so we don't need to copy registers around unnecessarily.
   702  		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r6}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   703  		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r5}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   704  		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
   705  
   706  		// (InvertFlags (CMP a b)) == (CMP b a)
   707  		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
   708  		// then we do (LessThan (InvertFlags (CMP b a))) instead.
   709  		// Rewrites will convert this to (GreaterThan (CMP b a)).
   710  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   711  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   712  
   713  		// Constant flag values. For any comparison, there are 3 possible
   714  		// outcomes: either the three from the signed total order (<,==,>)
   715  		// or the three from the unsigned total order, depending on which
   716  		// comparison operation was used (CMP or CMPU -- PPC is different from
   717  		// the other architectures, which have a single comparison producing
   718  		// both signed and unsigned comparison results.)
   719  
   720  		// These ops are for temporary use by rewrite rules. They
   721  		// cannot appear in the generated assembly.
   722  		{name: "FlagEQ"}, // equal
   723  		{name: "FlagLT"}, // signed < or unsigned <
   724  		{name: "FlagGT"}, // signed > or unsigned >
   725  	}
   726  
   727  	blocks := []blockData{
   728  		{name: "EQ", controls: 1},
   729  		{name: "NE", controls: 1},
   730  		{name: "LT", controls: 1},
   731  		{name: "LE", controls: 1},
   732  		{name: "GT", controls: 1},
   733  		{name: "GE", controls: 1},
   734  		{name: "FLT", controls: 1},
   735  		{name: "FLE", controls: 1},
   736  		{name: "FGT", controls: 1},
   737  		{name: "FGE", controls: 1},
   738  	}
   739  
   740  	archs = append(archs, arch{
   741  		name:               "PPC64",
   742  		pkg:                "cmd/internal/obj/ppc64",
   743  		genfile:            "../../ppc64/ssa.go",
   744  		ops:                ops,
   745  		blocks:             blocks,
   746  		regnames:           regNamesPPC64,
   747  		ParamIntRegNames:   "R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17",
   748  		ParamFloatRegNames: "F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12",
   749  		gpregmask:          gp,
   750  		fpregmask:          fp,
   751  		specialregmask:     xer,
   752  		framepointerreg:    -1,
   753  		linkreg:            -1, // not used
   754  	})
   755  }
   756
View as plain text