Source file test/codegen/mathbits.go
1 // asmcheck 2 3 // Copyright 2018 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package codegen 8 9 import "math/bits" 10 11 // ----------------------- // 12 // bits.LeadingZeros // 13 // ----------------------- // 14 15 func LeadingZeros(n uint) int { 16 // amd64/v1,amd64/v2:"BSRQ" 17 // amd64/v3:"LZCNTQ", -"BSRQ" 18 // s390x:"FLOGR" 19 // arm:"CLZ" arm64:"CLZ" 20 // mips:"CLZ" 21 // wasm:"I64Clz" 22 // ppc64le:"CNTLZD" 23 // ppc64:"CNTLZD" 24 return bits.LeadingZeros(n) 25 } 26 27 func LeadingZeros64(n uint64) int { 28 // amd64/v1,amd64/v2:"BSRQ" 29 // amd64/v3:"LZCNTQ", -"BSRQ" 30 // s390x:"FLOGR" 31 // arm:"CLZ" arm64:"CLZ" 32 // mips:"CLZ" 33 // wasm:"I64Clz" 34 // ppc64le:"CNTLZD" 35 // ppc64:"CNTLZD" 36 return bits.LeadingZeros64(n) 37 } 38 39 func LeadingZeros32(n uint32) int { 40 // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" 41 // amd64/v3: "LZCNTL",- "BSRL" 42 // s390x:"FLOGR" 43 // arm:"CLZ" arm64:"CLZW" 44 // mips:"CLZ" 45 // wasm:"I64Clz" 46 // ppc64le:"CNTLZW" 47 // ppc64:"CNTLZW" 48 return bits.LeadingZeros32(n) 49 } 50 51 func LeadingZeros16(n uint16) int { 52 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 53 // amd64/v3: "LZCNTL",- "BSRL" 54 // s390x:"FLOGR" 55 // arm:"CLZ" arm64:"CLZ" 56 // mips:"CLZ" 57 // wasm:"I64Clz" 58 // ppc64le:"CNTLZD" 59 // ppc64:"CNTLZD" 60 return bits.LeadingZeros16(n) 61 } 62 63 func LeadingZeros8(n uint8) int { 64 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 65 // amd64/v3: "LZCNTL",- "BSRL" 66 // s390x:"FLOGR" 67 // arm:"CLZ" arm64:"CLZ" 68 // mips:"CLZ" 69 // wasm:"I64Clz" 70 // ppc64le:"CNTLZD" 71 // ppc64:"CNTLZD" 72 return bits.LeadingZeros8(n) 73 } 74 75 // --------------- // 76 // bits.Len* // 77 // --------------- // 78 79 func Len(n uint) int { 80 // amd64/v1,amd64/v2:"BSRQ" 81 // amd64/v3: "LZCNTQ" 82 // s390x:"FLOGR" 83 // arm:"CLZ" arm64:"CLZ" 84 // mips:"CLZ" 85 // wasm:"I64Clz" 86 // ppc64le:"SUBC","CNTLZD" 87 // ppc64:"SUBC","CNTLZD" 88 return bits.Len(n) 89 } 90 91 func Len64(n uint64) int { 92 // amd64/v1,amd64/v2:"BSRQ" 93 // amd64/v3: "LZCNTQ" 94 // s390x:"FLOGR" 95 // arm:"CLZ" arm64:"CLZ" 96 // mips:"CLZ" 97 // wasm:"I64Clz" 98 // ppc64le:"SUBC","CNTLZD" 99 // ppc64:"SUBC","CNTLZD" 100 return bits.Len64(n) 101 } 102 103 func SubFromLen64(n uint64) int { 104 // ppc64le:"CNTLZD",-"SUBC" 105 // ppc64:"CNTLZD",-"SUBC" 106 return 64 - bits.Len64(n) 107 } 108 109 func Len32(n uint32) int { 110 // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" 111 // amd64/v3: "LZCNTL" 112 // s390x:"FLOGR" 113 // arm:"CLZ" arm64:"CLZ" 114 // mips:"CLZ" 115 // wasm:"I64Clz" 116 // ppc64: "CNTLZW" 117 // ppc64le: "CNTLZW" 118 return bits.Len32(n) 119 } 120 121 func Len16(n uint16) int { 122 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 123 // amd64/v3: "LZCNTL" 124 // s390x:"FLOGR" 125 // arm:"CLZ" arm64:"CLZ" 126 // mips:"CLZ" 127 // wasm:"I64Clz" 128 // ppc64le:"SUBC","CNTLZD" 129 // ppc64:"SUBC","CNTLZD" 130 return bits.Len16(n) 131 } 132 133 func Len8(n uint8) int { 134 // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" 135 // amd64/v3: "LZCNTL" 136 // s390x:"FLOGR" 137 // arm:"CLZ" arm64:"CLZ" 138 // mips:"CLZ" 139 // wasm:"I64Clz" 140 // ppc64le:"SUBC","CNTLZD" 141 // ppc64:"SUBC","CNTLZD" 142 return bits.Len8(n) 143 } 144 145 // -------------------- // 146 // bits.OnesCount // 147 // -------------------- // 148 149 // TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested. 150 func OnesCount(n uint) int { 151 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 152 // amd64:"POPCNTQ" 153 // arm64:"VCNT","VUADDLV" 154 // s390x:"POPCNT" 155 // ppc64:"POPCNTD" 156 // ppc64le:"POPCNTD" 157 // wasm:"I64Popcnt" 158 return bits.OnesCount(n) 159 } 160 161 func OnesCount64(n uint64) int { 162 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 163 // amd64:"POPCNTQ" 164 // arm64:"VCNT","VUADDLV" 165 // s390x:"POPCNT" 166 // ppc64:"POPCNTD" 167 // ppc64le:"POPCNTD" 168 // wasm:"I64Popcnt" 169 return bits.OnesCount64(n) 170 } 171 172 func OnesCount32(n uint32) int { 173 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 174 // amd64:"POPCNTL" 175 // arm64:"VCNT","VUADDLV" 176 // s390x:"POPCNT" 177 // ppc64:"POPCNTW" 178 // ppc64le:"POPCNTW" 179 // wasm:"I64Popcnt" 180 return bits.OnesCount32(n) 181 } 182 183 func OnesCount16(n uint16) int { 184 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 185 // amd64:"POPCNTL" 186 // arm64:"VCNT","VUADDLV" 187 // s390x:"POPCNT" 188 // ppc64:"POPCNTW" 189 // ppc64le:"POPCNTW" 190 // wasm:"I64Popcnt" 191 return bits.OnesCount16(n) 192 } 193 194 func OnesCount8(n uint8) int { 195 // s390x:"POPCNT" 196 // ppc64:"POPCNTB" 197 // ppc64le:"POPCNTB" 198 // wasm:"I64Popcnt" 199 return bits.OnesCount8(n) 200 } 201 202 // ----------------------- // 203 // bits.ReverseBytes // 204 // ----------------------- // 205 206 func ReverseBytes(n uint) uint { 207 // amd64:"BSWAPQ" 208 // s390x:"MOVDBR" 209 // arm64:"REV" 210 return bits.ReverseBytes(n) 211 } 212 213 func ReverseBytes64(n uint64) uint64 { 214 // amd64:"BSWAPQ" 215 // s390x:"MOVDBR" 216 // arm64:"REV" 217 return bits.ReverseBytes64(n) 218 } 219 220 func ReverseBytes32(n uint32) uint32 { 221 // amd64:"BSWAPL" 222 // s390x:"MOVWBR" 223 // arm64:"REVW" 224 return bits.ReverseBytes32(n) 225 } 226 227 func ReverseBytes16(n uint16) uint16 { 228 // amd64:"ROLW" 229 // arm64:"REV16W",-"UBFX",-"ORR" 230 // arm/5:"SLL","SRL","ORR" 231 // arm/6:"REV16" 232 // arm/7:"REV16" 233 return bits.ReverseBytes16(n) 234 } 235 236 // --------------------- // 237 // bits.RotateLeft // 238 // --------------------- // 239 240 func RotateLeft64(n uint64) uint64 { 241 // amd64:"ROLQ" 242 // arm64:"ROR" 243 // ppc64:"ROTL" 244 // ppc64le:"ROTL" 245 // s390x:"RISBGZ\t[$]0, [$]63, [$]37, " 246 // wasm:"I64Rotl" 247 return bits.RotateLeft64(n, 37) 248 } 249 250 func RotateLeft32(n uint32) uint32 { 251 // amd64:"ROLL" 386:"ROLL" 252 // arm:`MOVW\tR[0-9]+@>23` 253 // arm64:"RORW" 254 // ppc64:"ROTLW" 255 // ppc64le:"ROTLW" 256 // s390x:"RLL" 257 // wasm:"I32Rotl" 258 return bits.RotateLeft32(n, 9) 259 } 260 261 func RotateLeft16(n uint16, s int) uint16 { 262 // amd64:"ROLW" 386:"ROLW" 263 // arm64:"RORW",-"CSEL" 264 return bits.RotateLeft16(n, s) 265 } 266 267 func RotateLeft8(n uint8, s int) uint8 { 268 // amd64:"ROLB" 386:"ROLB" 269 // arm64:"LSL","LSR",-"CSEL" 270 return bits.RotateLeft8(n, s) 271 } 272 273 func RotateLeftVariable(n uint, m int) uint { 274 // amd64:"ROLQ" 275 // arm64:"ROR" 276 // ppc64:"ROTL" 277 // ppc64le:"ROTL" 278 // s390x:"RLLG" 279 // wasm:"I64Rotl" 280 return bits.RotateLeft(n, m) 281 } 282 283 func RotateLeftVariable64(n uint64, m int) uint64 { 284 // amd64:"ROLQ" 285 // arm64:"ROR" 286 // ppc64:"ROTL" 287 // ppc64le:"ROTL" 288 // s390x:"RLLG" 289 // wasm:"I64Rotl" 290 return bits.RotateLeft64(n, m) 291 } 292 293 func RotateLeftVariable32(n uint32, m int) uint32 { 294 // arm:`MOVW\tR[0-9]+@>R[0-9]+` 295 // amd64:"ROLL" 296 // arm64:"RORW" 297 // ppc64:"ROTLW" 298 // ppc64le:"ROTLW" 299 // s390x:"RLL" 300 // wasm:"I32Rotl" 301 return bits.RotateLeft32(n, m) 302 } 303 304 // ------------------------ // 305 // bits.TrailingZeros // 306 // ------------------------ // 307 308 func TrailingZeros(n uint) int { 309 // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" 310 // amd64/v3:"TZCNTQ" 311 // arm:"CLZ" 312 // arm64:"RBIT","CLZ" 313 // s390x:"FLOGR" 314 // ppc64/power8:"ANDN","POPCNTD" 315 // ppc64le/power8:"ANDN","POPCNTD" 316 // ppc64/power9: "CNTTZD" 317 // ppc64le/power9: "CNTTZD" 318 // wasm:"I64Ctz" 319 return bits.TrailingZeros(n) 320 } 321 322 func TrailingZeros64(n uint64) int { 323 // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" 324 // amd64/v3:"TZCNTQ" 325 // arm64:"RBIT","CLZ" 326 // s390x:"FLOGR" 327 // ppc64/power8:"ANDN","POPCNTD" 328 // ppc64le/power8:"ANDN","POPCNTD" 329 // ppc64/power9: "CNTTZD" 330 // ppc64le/power9: "CNTTZD" 331 // wasm:"I64Ctz" 332 return bits.TrailingZeros64(n) 333 } 334 335 func TrailingZeros64Subtract(n uint64) int { 336 // ppc64le/power8:"NEG","SUBC","ANDN","POPCNTD" 337 // ppc64le/power9:"SUBC","CNTTZD" 338 return bits.TrailingZeros64(1 - n) 339 } 340 341 func TrailingZeros32(n uint32) int { 342 // amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ" 343 // amd64/v3:"TZCNTL" 344 // arm:"CLZ" 345 // arm64:"RBITW","CLZW" 346 // s390x:"FLOGR","MOVWZ" 347 // ppc64/power8:"ANDN","POPCNTW" 348 // ppc64le/power8:"ANDN","POPCNTW" 349 // ppc64/power9: "CNTTZW" 350 // ppc64le/power9: "CNTTZW" 351 // wasm:"I64Ctz" 352 return bits.TrailingZeros32(n) 353 } 354 355 func TrailingZeros16(n uint16) int { 356 // amd64:"BSFL","BTSL\\t\\$16" 357 // 386:"BSFL\t" 358 // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" 359 // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" 360 // s390x:"FLOGR","OR\t\\$65536" 361 // ppc64/power8:"POPCNTD","OR\\t\\$65536" 362 // ppc64le/power8:"POPCNTD","OR\\t\\$65536" 363 // ppc64/power9:"CNTTZD","OR\\t\\$65536" 364 // ppc64le/power9:"CNTTZD","OR\\t\\$65536" 365 // wasm:"I64Ctz" 366 return bits.TrailingZeros16(n) 367 } 368 369 func TrailingZeros8(n uint8) int { 370 // amd64:"BSFL","BTSL\\t\\$8" 371 // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" 372 // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" 373 // s390x:"FLOGR","OR\t\\$256" 374 // wasm:"I64Ctz" 375 return bits.TrailingZeros8(n) 376 } 377 378 // IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero. 379 380 func IterateBits(n uint) int { 381 i := 0 382 for n != 0 { 383 // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" 384 // amd64/v3:"TZCNTQ" 385 i += bits.TrailingZeros(n) 386 n &= n - 1 387 } 388 return i 389 } 390 391 func IterateBits64(n uint64) int { 392 i := 0 393 for n != 0 { 394 // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" 395 // amd64/v3:"TZCNTQ" 396 i += bits.TrailingZeros64(n) 397 n &= n - 1 398 } 399 return i 400 } 401 402 func IterateBits32(n uint32) int { 403 i := 0 404 for n != 0 { 405 // amd64/v1,amd64/v2:"BSFL",-"BTSQ" 406 // amd64/v3:"TZCNTL" 407 i += bits.TrailingZeros32(n) 408 n &= n - 1 409 } 410 return i 411 } 412 413 func IterateBits16(n uint16) int { 414 i := 0 415 for n != 0 { 416 // amd64/v1,amd64/v2:"BSFL",-"BTSL" 417 // amd64/v3:"TZCNTL" 418 // arm64:"RBITW","CLZW",-"ORR" 419 i += bits.TrailingZeros16(n) 420 n &= n - 1 421 } 422 return i 423 } 424 425 func IterateBits8(n uint8) int { 426 i := 0 427 for n != 0 { 428 // amd64/v1,amd64/v2:"BSFL",-"BTSL" 429 // amd64/v3:"TZCNTL" 430 // arm64:"RBITW","CLZW",-"ORR" 431 i += bits.TrailingZeros8(n) 432 n &= n - 1 433 } 434 return i 435 } 436 437 // --------------- // 438 // bits.Add* // 439 // --------------- // 440 441 func Add(x, y, ci uint) (r, co uint) { 442 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 443 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 444 // ppc64: "ADDC", "ADDE", "ADDZE" 445 // ppc64le: "ADDC", "ADDE", "ADDZE" 446 // s390x:"ADDE","ADDC\t[$]-1," 447 // riscv64: "ADD","SLTU" 448 return bits.Add(x, y, ci) 449 } 450 451 func AddC(x, ci uint) (r, co uint) { 452 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 453 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 454 // loong64: "ADDV", "SGTU" 455 // ppc64: "ADDC", "ADDE", "ADDZE" 456 // ppc64le: "ADDC", "ADDE", "ADDZE" 457 // s390x:"ADDE","ADDC\t[$]-1," 458 // riscv64: "ADD","SLTU" 459 return bits.Add(x, 7, ci) 460 } 461 462 func AddZ(x, y uint) (r, co uint) { 463 // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" 464 // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" 465 // loong64: "ADDV", "SGTU" 466 // ppc64: "ADDC", -"ADDE", "ADDZE" 467 // ppc64le: "ADDC", -"ADDE", "ADDZE" 468 // s390x:"ADDC",-"ADDC\t[$]-1," 469 // riscv64: "ADD","SLTU" 470 return bits.Add(x, y, 0) 471 } 472 473 func AddR(x, y, ci uint) uint { 474 // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" 475 // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" 476 // loong64: "ADDV", -"SGTU" 477 // ppc64: "ADDC", "ADDE", -"ADDZE" 478 // ppc64le: "ADDC", "ADDE", -"ADDZE" 479 // s390x:"ADDE","ADDC\t[$]-1," 480 // riscv64: "ADD",-"SLTU" 481 r, _ := bits.Add(x, y, ci) 482 return r 483 } 484 485 func AddM(p, q, r *[3]uint) { 486 var c uint 487 r[0], c = bits.Add(p[0], q[0], c) 488 // arm64:"ADCS",-"ADD\t",-"CMP" 489 // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" 490 // s390x:"ADDE",-"ADDC\t[$]-1," 491 r[1], c = bits.Add(p[1], q[1], c) 492 r[2], c = bits.Add(p[2], q[2], c) 493 } 494 495 func Add64(x, y, ci uint64) (r, co uint64) { 496 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 497 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 498 // loong64: "ADDV", "SGTU" 499 // ppc64: "ADDC", "ADDE", "ADDZE" 500 // ppc64le: "ADDC", "ADDE", "ADDZE" 501 // s390x:"ADDE","ADDC\t[$]-1," 502 // riscv64: "ADD","SLTU" 503 return bits.Add64(x, y, ci) 504 } 505 506 func Add64C(x, ci uint64) (r, co uint64) { 507 // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" 508 // amd64:"NEGL","ADCQ","SBBQ","NEGQ" 509 // loong64: "ADDV", "SGTU" 510 // ppc64: "ADDC", "ADDE", "ADDZE" 511 // ppc64le: "ADDC", "ADDE", "ADDZE" 512 // s390x:"ADDE","ADDC\t[$]-1," 513 // riscv64: "ADD","SLTU" 514 return bits.Add64(x, 7, ci) 515 } 516 517 func Add64Z(x, y uint64) (r, co uint64) { 518 // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" 519 // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" 520 // loong64: "ADDV", "SGTU" 521 // ppc64: "ADDC", -"ADDE", "ADDZE" 522 // ppc64le: "ADDC", -"ADDE", "ADDZE" 523 // s390x:"ADDC",-"ADDC\t[$]-1," 524 // riscv64: "ADD","SLTU" 525 return bits.Add64(x, y, 0) 526 } 527 528 func Add64R(x, y, ci uint64) uint64 { 529 // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" 530 // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" 531 // loong64: "ADDV", -"SGTU" 532 // ppc64: "ADDC", "ADDE", -"ADDZE" 533 // ppc64le: "ADDC", "ADDE", -"ADDZE" 534 // s390x:"ADDE","ADDC\t[$]-1," 535 // riscv64: "ADD",-"SLTU" 536 r, _ := bits.Add64(x, y, ci) 537 return r 538 } 539 func Add64M(p, q, r *[3]uint64) { 540 var c uint64 541 r[0], c = bits.Add64(p[0], q[0], c) 542 // arm64:"ADCS",-"ADD\t",-"CMP" 543 // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" 544 // ppc64: -"ADDC", "ADDE", -"ADDZE" 545 // ppc64le: -"ADDC", "ADDE", -"ADDZE" 546 // s390x:"ADDE",-"ADDC\t[$]-1," 547 r[1], c = bits.Add64(p[1], q[1], c) 548 r[2], c = bits.Add64(p[2], q[2], c) 549 } 550 551 func Add64MSaveC(p, q, r, c *[2]uint64) { 552 // ppc64: "ADDC\tR", "ADDZE" 553 // ppc64le: "ADDC\tR", "ADDZE" 554 r[0], c[0] = bits.Add64(p[0], q[0], 0) 555 // ppc64: "ADDC\t[$]-1", "ADDE", "ADDZE" 556 // ppc64le: "ADDC\t[$]-1", "ADDE", "ADDZE" 557 r[1], c[1] = bits.Add64(p[1], q[1], c[0]) 558 } 559 560 func Add64PanicOnOverflowEQ(a, b uint64) uint64 { 561 r, c := bits.Add64(a, b, 0) 562 // s390x:"BRC\t[$]3,",-"ADDE" 563 if c == 1 { 564 panic("overflow") 565 } 566 return r 567 } 568 569 func Add64PanicOnOverflowNE(a, b uint64) uint64 { 570 r, c := bits.Add64(a, b, 0) 571 // s390x:"BRC\t[$]3,",-"ADDE" 572 if c != 0 { 573 panic("overflow") 574 } 575 return r 576 } 577 578 func Add64PanicOnOverflowGT(a, b uint64) uint64 { 579 r, c := bits.Add64(a, b, 0) 580 // s390x:"BRC\t[$]3,",-"ADDE" 581 if c > 0 { 582 panic("overflow") 583 } 584 return r 585 } 586 587 func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 588 var r [2]uint64 589 var c uint64 590 r[0], c = bits.Add64(a[0], b[0], c) 591 r[1], c = bits.Add64(a[1], b[1], c) 592 // s390x:"BRC\t[$]3," 593 if c == 1 { 594 panic("overflow") 595 } 596 return r 597 } 598 599 func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 600 var r [2]uint64 601 var c uint64 602 r[0], c = bits.Add64(a[0], b[0], c) 603 r[1], c = bits.Add64(a[1], b[1], c) 604 // s390x:"BRC\t[$]3," 605 if c != 0 { 606 panic("overflow") 607 } 608 return r 609 } 610 611 func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 612 var r [2]uint64 613 var c uint64 614 r[0], c = bits.Add64(a[0], b[0], c) 615 r[1], c = bits.Add64(a[1], b[1], c) 616 // s390x:"BRC\t[$]3," 617 if c > 0 { 618 panic("overflow") 619 } 620 return r 621 } 622 623 // Verify independent carry chain operations are scheduled efficiently 624 // and do not cause unnecessary save/restore of the CA bit. 625 // 626 // This is an example of why CarryChainTail priority must be lower 627 // (earlier in the block) than Memory. f[0]=f1 could be scheduled 628 // after the first two lower 64 bit limb adds, but before either 629 // high 64 bit limbs are added. 630 // 631 // This is what happened on PPC64 when compiling 632 // crypto/internal/edwards25519/field.feMulGeneric. 633 func Add64MultipleChains(a, b, c, d [2]uint64) { 634 var cx, d1, d2 uint64 635 a1, a2 := a[0], a[1] 636 b1, b2 := b[0], b[1] 637 c1, c2 := c[0], c[1] 638 639 // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 640 // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 641 d1, cx = bits.Add64(a1, b1, 0) 642 // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER" 643 // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER" 644 d2, _ = bits.Add64(a2, b2, cx) 645 646 // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 647 // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" 648 d1, cx = bits.Add64(c1, d1, 0) 649 // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER" 650 // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER" 651 d2, _ = bits.Add64(c2, d2, cx) 652 d[0] = d1 653 d[1] = d2 654 } 655 656 // --------------- // 657 // bits.Sub* // 658 // --------------- // 659 660 func Sub(x, y, ci uint) (r, co uint) { 661 // amd64:"NEGL","SBBQ","NEGQ" 662 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 663 // loong64:"SUBV","SGTU" 664 // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" 665 // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" 666 // s390x:"SUBE" 667 // riscv64: "SUB","SLTU" 668 return bits.Sub(x, y, ci) 669 } 670 671 func SubC(x, ci uint) (r, co uint) { 672 // amd64:"NEGL","SBBQ","NEGQ" 673 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 674 // loong64:"SUBV","SGTU" 675 // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" 676 // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" 677 // s390x:"SUBE" 678 // riscv64: "SUB","SLTU" 679 return bits.Sub(x, 7, ci) 680 } 681 682 func SubZ(x, y uint) (r, co uint) { 683 // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" 684 // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" 685 // loong64:"SUBV","SGTU" 686 // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG" 687 // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG" 688 // s390x:"SUBC" 689 // riscv64: "SUB","SLTU" 690 return bits.Sub(x, y, 0) 691 } 692 693 func SubR(x, y, ci uint) uint { 694 // amd64:"NEGL","SBBQ",-"NEGQ" 695 // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" 696 // loong64:"SUBV",-"SGTU" 697 // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG" 698 // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG" 699 // s390x:"SUBE" 700 // riscv64: "SUB",-"SLTU" 701 r, _ := bits.Sub(x, y, ci) 702 return r 703 } 704 func SubM(p, q, r *[3]uint) { 705 var c uint 706 r[0], c = bits.Sub(p[0], q[0], c) 707 // amd64:"SBBQ",-"NEGL",-"NEGQ" 708 // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" 709 // ppc64:-"SUBC", "SUBE", -"SUBZE", -"NEG" 710 // ppc64le:-"SUBC", "SUBE", -"SUBZE", -"NEG" 711 // s390x:"SUBE" 712 r[1], c = bits.Sub(p[1], q[1], c) 713 r[2], c = bits.Sub(p[2], q[2], c) 714 } 715 716 func Sub64(x, y, ci uint64) (r, co uint64) { 717 // amd64:"NEGL","SBBQ","NEGQ" 718 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 719 // loong64:"SUBV","SGTU" 720 // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" 721 // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" 722 // s390x:"SUBE" 723 // riscv64: "SUB","SLTU" 724 return bits.Sub64(x, y, ci) 725 } 726 727 func Sub64C(x, ci uint64) (r, co uint64) { 728 // amd64:"NEGL","SBBQ","NEGQ" 729 // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" 730 // loong64:"SUBV","SGTU" 731 // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" 732 // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" 733 // s390x:"SUBE" 734 // riscv64: "SUB","SLTU" 735 return bits.Sub64(x, 7, ci) 736 } 737 738 func Sub64Z(x, y uint64) (r, co uint64) { 739 // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" 740 // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" 741 // loong64:"SUBV","SGTU" 742 // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG" 743 // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG" 744 // s390x:"SUBC" 745 // riscv64: "SUB","SLTU" 746 return bits.Sub64(x, y, 0) 747 } 748 749 func Sub64R(x, y, ci uint64) uint64 { 750 // amd64:"NEGL","SBBQ",-"NEGQ" 751 // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" 752 // loong64:"SUBV",-"SGTU" 753 // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG" 754 // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG" 755 // s390x:"SUBE" 756 // riscv64: "SUB",-"SLTU" 757 r, _ := bits.Sub64(x, y, ci) 758 return r 759 } 760 func Sub64M(p, q, r *[3]uint64) { 761 var c uint64 762 r[0], c = bits.Sub64(p[0], q[0], c) 763 // amd64:"SBBQ",-"NEGL",-"NEGQ" 764 // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" 765 // s390x:"SUBE" 766 r[1], c = bits.Sub64(p[1], q[1], c) 767 r[2], c = bits.Sub64(p[2], q[2], c) 768 } 769 770 func Sub64MSaveC(p, q, r, c *[2]uint64) { 771 // ppc64:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG" 772 // ppc64le:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG" 773 r[0], c[0] = bits.Sub64(p[0], q[0], 0) 774 // ppc64:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG" 775 // ppc64le:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG" 776 r[1], c[1] = bits.Sub64(p[1], q[1], c[0]) 777 } 778 779 func Sub64PanicOnOverflowEQ(a, b uint64) uint64 { 780 r, b := bits.Sub64(a, b, 0) 781 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 782 if b == 1 { 783 panic("overflow") 784 } 785 return r 786 } 787 788 func Sub64PanicOnOverflowNE(a, b uint64) uint64 { 789 r, b := bits.Sub64(a, b, 0) 790 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 791 if b != 0 { 792 panic("overflow") 793 } 794 return r 795 } 796 797 func Sub64PanicOnOverflowGT(a, b uint64) uint64 { 798 r, b := bits.Sub64(a, b, 0) 799 // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" 800 if b > 0 { 801 panic("overflow") 802 } 803 return r 804 } 805 806 func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 807 var r [2]uint64 808 var c uint64 809 r[0], c = bits.Sub64(a[0], b[0], c) 810 r[1], c = bits.Sub64(a[1], b[1], c) 811 // s390x:"BRC\t[$]12," 812 if c == 1 { 813 panic("overflow") 814 } 815 return r 816 } 817 818 func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 819 var r [2]uint64 820 var c uint64 821 r[0], c = bits.Sub64(a[0], b[0], c) 822 r[1], c = bits.Sub64(a[1], b[1], c) 823 // s390x:"BRC\t[$]12," 824 if c != 0 { 825 panic("overflow") 826 } 827 return r 828 } 829 830 func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 831 var r [2]uint64 832 var c uint64 833 r[0], c = bits.Sub64(a[0], b[0], c) 834 r[1], c = bits.Sub64(a[1], b[1], c) 835 // s390x:"BRC\t[$]12," 836 if c > 0 { 837 panic("overflow") 838 } 839 return r 840 } 841 842 // --------------- // 843 // bits.Mul* // 844 // --------------- // 845 846 func Mul(x, y uint) (hi, lo uint) { 847 // amd64:"MULQ" 848 // arm64:"UMULH","MUL" 849 // ppc64:"MULHDU","MULLD" 850 // ppc64le:"MULHDU","MULLD" 851 // s390x:"MLGR" 852 // mips64: "MULVU" 853 return bits.Mul(x, y) 854 } 855 856 func Mul64(x, y uint64) (hi, lo uint64) { 857 // amd64:"MULQ" 858 // arm64:"UMULH","MUL" 859 // ppc64:"MULHDU","MULLD" 860 // ppc64le:"MULHDU","MULLD" 861 // s390x:"MLGR" 862 // mips64: "MULVU" 863 // riscv64:"MULHU","MUL" 864 return bits.Mul64(x, y) 865 } 866 867 func Mul64HiOnly(x, y uint64) uint64 { 868 // arm64:"UMULH",-"MUL" 869 // riscv64:"MULHU",-"MUL\t" 870 hi, _ := bits.Mul64(x, y) 871 return hi 872 } 873 874 func Mul64LoOnly(x, y uint64) uint64 { 875 // arm64:"MUL",-"UMULH" 876 // riscv64:"MUL\t",-"MULHU" 877 _, lo := bits.Mul64(x, y) 878 return lo 879 } 880 881 // --------------- // 882 // bits.Div* // 883 // --------------- // 884 885 func Div(hi, lo, x uint) (q, r uint) { 886 // amd64:"DIVQ" 887 return bits.Div(hi, lo, x) 888 } 889 890 func Div32(hi, lo, x uint32) (q, r uint32) { 891 // arm64:"ORR","UDIV","MSUB",-"UREM" 892 return bits.Div32(hi, lo, x) 893 } 894 895 func Div64(hi, lo, x uint64) (q, r uint64) { 896 // amd64:"DIVQ" 897 return bits.Div64(hi, lo, x) 898 } 899 900 func Div64degenerate(x uint64) (q, r uint64) { 901 // amd64:-"DIVQ" 902 return bits.Div64(0, x, 5) 903 } 904