Text file src/crypto/sha256/sha256block_ppc64x.s

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64 || ppc64le
     6  
     7  // Based on CRYPTOGAMS code with the following comment:
     8  // # ====================================================================
     9  // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    10  // # project. The module is, however, dual licensed under OpenSSL and
    11  // # CRYPTOGAMS licenses depending on where you obtain it. For further
    12  // # details see http://www.openssl.org/~appro/cryptogams/.
    13  // # ====================================================================
    14  
    15  #include "textflag.h"
    16  
    17  // SHA256 block routine. See sha256block.go for Go equivalent.
    18  //
    19  // The algorithm is detailed in FIPS 180-4:
    20  //
    21  //  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    22  //
    23  // Wt = Mt; for 0 <= t <= 15
    24  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    25  //
    26  // a = H0
    27  // b = H1
    28  // c = H2
    29  // d = H3
    30  // e = H4
    31  // f = H5
    32  // g = H6
    33  // h = H7
    34  //
    35  // for t = 0 to 63 {
    36  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    37  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    38  //    h = g
    39  //    g = f
    40  //    f = e
    41  //    e = d + T1
    42  //    d = c
    43  //    c = b
    44  //    b = a
    45  //    a = T1 + T2
    46  // }
    47  //
    48  // H0 = a + H0
    49  // H1 = b + H1
    50  // H2 = c + H2
    51  // H3 = d + H3
    52  // H4 = e + H4
    53  // H5 = f + H5
    54  // H6 = g + H6
    55  // H7 = h + H7
    56  
    57  #define CTX	R3
    58  #define INP	R4
    59  #define END	R5
    60  #define TBL	R6
    61  #define IDX	R7
    62  #define LEN	R9
    63  #define TEMP	R12
    64  
    65  #define HEX00	R0
    66  #define HEX10	R10
    67  
    68  // V0-V7 are A-H
    69  // V8-V23 are used for the message schedule
    70  #define KI	V24
    71  #define FUNC	V25
    72  #define S0	V26
    73  #define S1	V27
    74  #define s0	V28
    75  #define s1	V29
    76  #define LEMASK	V31	// Permutation control register for little endian
    77  
    78  // 4 copies of each Kt, to fill all 4 words of a vector register
    79  DATA  ·kcon+0x000(SB)/8, $0x428a2f98428a2f98
    80  DATA  ·kcon+0x008(SB)/8, $0x428a2f98428a2f98
    81  DATA  ·kcon+0x010(SB)/8, $0x7137449171374491
    82  DATA  ·kcon+0x018(SB)/8, $0x7137449171374491
    83  DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf
    84  DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf
    85  DATA  ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5
    86  DATA  ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5
    87  DATA  ·kcon+0x040(SB)/8, $0x3956c25b3956c25b
    88  DATA  ·kcon+0x048(SB)/8, $0x3956c25b3956c25b
    89  DATA  ·kcon+0x050(SB)/8, $0x59f111f159f111f1
    90  DATA  ·kcon+0x058(SB)/8, $0x59f111f159f111f1
    91  DATA  ·kcon+0x060(SB)/8, $0x923f82a4923f82a4
    92  DATA  ·kcon+0x068(SB)/8, $0x923f82a4923f82a4
    93  DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5
    94  DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5
    95  DATA  ·kcon+0x080(SB)/8, $0xd807aa98d807aa98
    96  DATA  ·kcon+0x088(SB)/8, $0xd807aa98d807aa98
    97  DATA  ·kcon+0x090(SB)/8, $0x12835b0112835b01
    98  DATA  ·kcon+0x098(SB)/8, $0x12835b0112835b01
    99  DATA  ·kcon+0x0A0(SB)/8, $0x243185be243185be
   100  DATA  ·kcon+0x0A8(SB)/8, $0x243185be243185be
   101  DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3
   102  DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3
   103  DATA  ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74
   104  DATA  ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74
   105  DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe
   106  DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe
   107  DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7
   108  DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7
   109  DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174
   110  DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174
   111  DATA  ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1
   112  DATA  ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1
   113  DATA  ·kcon+0x110(SB)/8, $0xefbe4786efbe4786
   114  DATA  ·kcon+0x118(SB)/8, $0xefbe4786efbe4786
   115  DATA  ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6
   116  DATA  ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6
   117  DATA  ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc
   118  DATA  ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc
   119  DATA  ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f
   120  DATA  ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f
   121  DATA  ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa
   122  DATA  ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa
   123  DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc
   124  DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc
   125  DATA  ·kcon+0x170(SB)/8, $0x76f988da76f988da
   126  DATA  ·kcon+0x178(SB)/8, $0x76f988da76f988da
   127  DATA  ·kcon+0x180(SB)/8, $0x983e5152983e5152
   128  DATA  ·kcon+0x188(SB)/8, $0x983e5152983e5152
   129  DATA  ·kcon+0x190(SB)/8, $0xa831c66da831c66d
   130  DATA  ·kcon+0x198(SB)/8, $0xa831c66da831c66d
   131  DATA  ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8
   132  DATA  ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8
   133  DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7
   134  DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7
   135  DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3
   136  DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3
   137  DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147
   138  DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147
   139  DATA  ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351
   140  DATA  ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351
   141  DATA  ·kcon+0x1F0(SB)/8, $0x1429296714292967
   142  DATA  ·kcon+0x1F8(SB)/8, $0x1429296714292967
   143  DATA  ·kcon+0x200(SB)/8, $0x27b70a8527b70a85
   144  DATA  ·kcon+0x208(SB)/8, $0x27b70a8527b70a85
   145  DATA  ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138
   146  DATA  ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138
   147  DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc
   148  DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc
   149  DATA  ·kcon+0x230(SB)/8, $0x53380d1353380d13
   150  DATA  ·kcon+0x238(SB)/8, $0x53380d1353380d13
   151  DATA  ·kcon+0x240(SB)/8, $0x650a7354650a7354
   152  DATA  ·kcon+0x248(SB)/8, $0x650a7354650a7354
   153  DATA  ·kcon+0x250(SB)/8, $0x766a0abb766a0abb
   154  DATA  ·kcon+0x258(SB)/8, $0x766a0abb766a0abb
   155  DATA  ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e
   156  DATA  ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e
   157  DATA  ·kcon+0x270(SB)/8, $0x92722c8592722c85
   158  DATA  ·kcon+0x278(SB)/8, $0x92722c8592722c85
   159  DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1
   160  DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1
   161  DATA  ·kcon+0x290(SB)/8, $0xa81a664ba81a664b
   162  DATA  ·kcon+0x298(SB)/8, $0xa81a664ba81a664b
   163  DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70
   164  DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70
   165  DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3
   166  DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3
   167  DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d192e819
   168  DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d192e819
   169  DATA  ·kcon+0x2D0(SB)/8, $0xd6990624d6990624
   170  DATA  ·kcon+0x2D8(SB)/8, $0xd6990624d6990624
   171  DATA  ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585
   172  DATA  ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585
   173  DATA  ·kcon+0x2F0(SB)/8, $0x106aa070106aa070
   174  DATA  ·kcon+0x2F8(SB)/8, $0x106aa070106aa070
   175  DATA  ·kcon+0x300(SB)/8, $0x19a4c11619a4c116
   176  DATA  ·kcon+0x308(SB)/8, $0x19a4c11619a4c116
   177  DATA  ·kcon+0x310(SB)/8, $0x1e376c081e376c08
   178  DATA  ·kcon+0x318(SB)/8, $0x1e376c081e376c08
   179  DATA  ·kcon+0x320(SB)/8, $0x2748774c2748774c
   180  DATA  ·kcon+0x328(SB)/8, $0x2748774c2748774c
   181  DATA  ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5
   182  DATA  ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5
   183  DATA  ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3
   184  DATA  ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3
   185  DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a
   186  DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a
   187  DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f
   188  DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f
   189  DATA  ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3
   190  DATA  ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3
   191  DATA  ·kcon+0x380(SB)/8, $0x748f82ee748f82ee
   192  DATA  ·kcon+0x388(SB)/8, $0x748f82ee748f82ee
   193  DATA  ·kcon+0x390(SB)/8, $0x78a5636f78a5636f
   194  DATA  ·kcon+0x398(SB)/8, $0x78a5636f78a5636f
   195  DATA  ·kcon+0x3A0(SB)/8, $0x84c8781484c87814
   196  DATA  ·kcon+0x3A8(SB)/8, $0x84c8781484c87814
   197  DATA  ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208
   198  DATA  ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208
   199  DATA  ·kcon+0x3C0(SB)/8, $0x90befffa90befffa
   200  DATA  ·kcon+0x3C8(SB)/8, $0x90befffa90befffa
   201  DATA  ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb
   202  DATA  ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb
   203  DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7
   204  DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7
   205  DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2
   206  DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2
   207  DATA  ·kcon+0x400(SB)/8, $0x0000000000000000
   208  DATA  ·kcon+0x408(SB)/8, $0x0000000000000000
   209  
   210  #ifdef GOARCH_ppc64le
   211  DATA  ·kcon+0x410(SB)/8, $0x1011121310111213	// permutation control vectors
   212  DATA  ·kcon+0x418(SB)/8, $0x1011121300010203
   213  DATA  ·kcon+0x420(SB)/8, $0x1011121310111213
   214  DATA  ·kcon+0x428(SB)/8, $0x0405060700010203
   215  DATA  ·kcon+0x430(SB)/8, $0x1011121308090a0b
   216  DATA  ·kcon+0x438(SB)/8, $0x0405060700010203
   217  #else
   218  DATA  ·kcon+0x410(SB)/8, $0x1011121300010203
   219  DATA  ·kcon+0x418(SB)/8, $0x1011121310111213	// permutation control vectors
   220  DATA  ·kcon+0x420(SB)/8, $0x0405060700010203
   221  DATA  ·kcon+0x428(SB)/8, $0x1011121310111213
   222  DATA  ·kcon+0x430(SB)/8, $0x0001020304050607
   223  DATA  ·kcon+0x438(SB)/8, $0x08090a0b10111213
   224  #endif
   225  
   226  GLOBL ·kcon(SB), RODATA, $1088
   227  
   228  #define SHA256ROUND0(a, b, c, d, e, f, g, h, xi) \
   229  	VSEL		g, f, e, FUNC; \
   230  	VSHASIGMAW	$15, e, $1, S1; \
   231  	VADDUWM		xi, h, h; \
   232  	VSHASIGMAW	$0, a, $1, S0; \
   233  	VADDUWM		FUNC, h, h; \
   234  	VXOR		b, a, FUNC; \
   235  	VADDUWM		S1, h, h; \
   236  	VSEL		b, c, FUNC, FUNC; \
   237  	VADDUWM		KI, g, g; \
   238  	VADDUWM		h, d, d; \
   239  	VADDUWM		FUNC, S0, S0; \
   240  	LVX		(TBL)(IDX), KI; \
   241  	ADD		$16, IDX; \
   242  	VADDUWM		S0, h, h
   243  
   244  #define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14) \
   245  	VSHASIGMAW	$0, xj_1, $0, s0; \
   246  	VSEL		g, f, e, FUNC; \
   247  	VSHASIGMAW	$15, e, $1, S1; \
   248  	VADDUWM		xi, h, h; \
   249  	VSHASIGMAW	$0, a, $1, S0; \
   250  	VSHASIGMAW	$15, xj_14, $0, s1; \
   251  	VADDUWM		FUNC, h, h; \
   252  	VXOR		b, a, FUNC; \
   253  	VADDUWM		xj_9, xj, xj; \
   254  	VADDUWM		S1, h, h; \
   255  	VSEL		b, c, FUNC, FUNC; \
   256  	VADDUWM		KI, g, g; \
   257  	VADDUWM		h, d, d; \
   258  	VADDUWM		FUNC, S0, S0; \
   259  	VADDUWM		s0, xj, xj; \
   260  	LVX		(TBL)(IDX), KI; \
   261  	ADD		$16, IDX; \
   262  	VADDUWM		S0, h, h; \
   263  	VADDUWM		s1, xj, xj
   264  
   265  #ifdef GOARCH_ppc64le
   266  #define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt
   267  #else
   268  #define VPERMLE(va,vb,vc,vt)
   269  #endif
   270  
   271  // func block(dig *digest, p []byte)
   272  TEXT ·block(SB),0,$0-32
   273  	MOVD	dig+0(FP), CTX
   274  	MOVD	p_base+8(FP), INP
   275  	MOVD	p_len+16(FP), LEN
   276  
   277  	SRD	$6, LEN
   278  	SLD	$6, LEN
   279  	ADD	INP, LEN, END
   280  
   281  	CMP	INP, END
   282  	BEQ	end
   283  
   284  	MOVD	$·kcon(SB), TBL
   285  	MOVWZ	$0x10, HEX10
   286  	MOVWZ	$8, IDX
   287  
   288  #ifdef GOARCH_ppc64le
   289  	LVSL	(IDX)(R0), LEMASK
   290  	VSPLTISB	$0x0F, KI
   291  	VXOR	KI, LEMASK, LEMASK
   292  #endif
   293  
   294  	LXVW4X	(CTX)(HEX00), VS32	// v0 = vs32
   295  	LXVW4X	(CTX)(HEX10), VS36	// v4 = vs36
   296  
   297  	// unpack the input values into vector registers
   298  	VSLDOI	$4, V0, V0, V1
   299  	VSLDOI	$8, V0, V0, V2
   300  	VSLDOI	$12, V0, V0, V3
   301  	VSLDOI	$4, V4, V4, V5
   302  	VSLDOI	$8, V4, V4, V6
   303  	VSLDOI	$12, V4, V4, V7
   304  
   305  loop:
   306  	LVX	(TBL)(HEX00), KI
   307  	MOVWZ	$16, IDX
   308  
   309  	LXVD2X	(INP)(R0), VS40	// load v8 (=vs40) in advance
   310  	ADD	$16, INP
   311  
   312  	// Offload to VSR24-31 (aka FPR24-31)
   313  	XXLOR	V0, V0, VS24
   314  	XXLOR	V1, V1, VS25
   315  	XXLOR	V2, V2, VS26
   316  	XXLOR	V3, V3, VS27
   317  	XXLOR	V4, V4, VS28
   318  	XXLOR	V5, V5, VS29
   319  	XXLOR	V6, V6, VS30
   320  	XXLOR	V7, V7, VS31
   321  
   322  	VADDUWM	KI, V7, V7	// h+K[i]
   323  	LVX	(TBL)(IDX), KI
   324  	ADD	$16, IDX
   325  
   326  	VPERMLE(V8, V8, LEMASK, V8)
   327  	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8)
   328  	VSLDOI	$4, V8, V8, V9
   329  	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9)
   330  	VSLDOI	$4, V9, V9, V10
   331  	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10)
   332  	LXVD2X	(INP)(R0), VS44	// load v12 (=vs44) in advance
   333  	ADD	$16, INP, INP
   334  	VSLDOI	$4, V10, V10, V11
   335  	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11)
   336  	VPERMLE(V12, V12, LEMASK, V12)
   337  	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12)
   338  	VSLDOI	$4, V12, V12, V13
   339  	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13)
   340  	VSLDOI	$4, V13, V13, V14
   341  	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14)
   342  	LXVD2X	(INP)(R0), VS48	// load v16 (=vs48) in advance
   343  	ADD	$16, INP, INP
   344  	VSLDOI	$4, V14, V14, V15
   345  	SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15)
   346  	VPERMLE(V16, V16, LEMASK, V16)
   347  	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16)
   348  	VSLDOI	$4, V16, V16, V17
   349  	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17)
   350  	VSLDOI	$4, V17, V17, V18
   351  	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18)
   352  	VSLDOI	$4, V18, V18, V19
   353  	LXVD2X	(INP)(R0), VS52	// load v20 (=vs52) in advance
   354  	ADD	$16, INP, INP
   355  	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19)
   356  	VPERMLE(V20, V20, LEMASK, V20)
   357  	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20)
   358  	VSLDOI	$4, V20, V20, V21
   359  	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21)
   360  	VSLDOI	$4, V21, V21, V22
   361  	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22)
   362  	VSLDOI	$4, V22, V22, V23
   363  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   364  
   365  	MOVWZ	$3, TEMP
   366  	MOVWZ	TEMP, CTR
   367  
   368  L16_xx:
   369  	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23)
   370  	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8)
   371  	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9)
   372  	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10)
   373  	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11)
   374  	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12)
   375  	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13)
   376  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14)
   377  	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15)
   378  	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16)
   379  	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17)
   380  	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18)
   381  	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19)
   382  	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20)
   383  	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21)
   384  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   385  
   386  	BC	0x10, 0, L16_xx		// bdnz
   387  
   388  	XXLOR	VS24, VS24, V10
   389  
   390  	XXLOR	VS25, VS25, V11
   391  	VADDUWM	V10, V0, V0
   392  	XXLOR	VS26, VS26, V12
   393  	VADDUWM	V11, V1, V1
   394  	XXLOR	VS27, VS27, V13
   395  	VADDUWM	V12, V2, V2
   396  	XXLOR	VS28, VS28, V14
   397  	VADDUWM	V13, V3, V3
   398  	XXLOR	VS29, VS29, V15
   399  	VADDUWM	V14, V4, V4
   400  	XXLOR	VS30, VS30, V16
   401  	VADDUWM	V15, V5, V5
   402  	XXLOR	VS31, VS31, V17
   403  	VADDUWM	V16, V6, V6
   404  	VADDUWM	V17, V7, V7
   405  
   406  	CMPU	INP, END
   407  	BLT	loop
   408  
   409  	LVX	(TBL)(IDX), V8
   410  	ADD	$16, IDX
   411  	VPERM	V0, V1, KI, V0
   412  	LVX	(TBL)(IDX), V9
   413  	VPERM	V4, V5, KI, V4
   414  	VPERM	V0, V2, V8, V0
   415  	VPERM	V4, V6, V8, V4
   416  	VPERM	V0, V3, V9, V0
   417  	VPERM	V4, V7, V9, V4
   418  	STXVD2X	VS32, (CTX+HEX00)	// v0 = vs32
   419  	STXVD2X	VS36, (CTX+HEX10)	// v4 = vs36
   420  
   421  end:
   422  	RET
   423  
   424  

View as plain text