Text file src/runtime/memclr_ppc64x.s

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64 || ppc64le
     6  
     7  #include "textflag.h"
     8  
     9  // See memclrNoHeapPointers Go doc for important implementation constraints.
    10  
    11  // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
    12  TEXT runtime¬∑memclrNoHeapPointers<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-16
    13  	// R3 = ptr
    14  	// R4 = n
    15  
    16  	// Determine if there are doublewords to clear
    17  check:
    18  	ANDCC $7, R4, R5  // R5: leftover bytes to clear
    19  	SRD   $3, R4, R6  // R6: double words to clear
    20  	CMP   R6, $0, CR1 // CR1[EQ] set if no double words
    21  
    22  	BC    12, 6, nozerolarge // only single bytes
    23  	CMP   R4, $512
    24  	BLT   under512           // special case for < 512
    25  	ANDCC $127, R3, R8       // check for 128 alignment of address
    26  	BEQ   zero512setup
    27  
    28  	ANDCC $7, R3, R15
    29  	BEQ   zero512xsetup // at least 8 byte aligned
    30  
    31  	// zero bytes up to 8 byte alignment
    32  
    33  	ANDCC $1, R3, R15 // check for byte alignment
    34  	BEQ   byte2
    35  	MOVB  R0, 0(R3)   // zero 1 byte
    36  	ADD   $1, R3      // bump ptr by 1
    37  	ADD   $-1, R4
    38  
    39  byte2:
    40  	ANDCC $2, R3, R15 // check for 2 byte alignment
    41  	BEQ   byte4
    42  	MOVH  R0, 0(R3)   // zero 2 bytes
    43  	ADD   $2, R3      // bump ptr by 2
    44  	ADD   $-2, R4
    45  
    46  byte4:
    47  	ANDCC $4, R3, R15   // check for 4 byte alignment
    48  	BEQ   zero512xsetup
    49  	MOVW  R0, 0(R3)     // zero 4 bytes
    50  	ADD   $4, R3        // bump ptr by 4
    51  	ADD   $-4, R4
    52  	BR    zero512xsetup // ptr should now be 8 byte aligned
    53  
    54  under512:
    55  	SRDCC $3, R6, R7  // 64 byte chunks?
    56  	XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
    57  	BEQ   lt64gt8
    58  
    59  	// Prepare to clear 64 bytes at a time.
    60  
    61  zero64setup:
    62  	DCBTST (R3)             // prepare data cache
    63  	MOVD   R7, CTR          // number of 64 byte chunks
    64  	MOVD   $16, R8
    65  	MOVD   $32, R16
    66  	MOVD   $48, R17
    67  
    68  zero64:
    69  	STXVD2X VS32, (R3+R0)   // store 16 bytes
    70  	STXVD2X VS32, (R3+R8)
    71  	STXVD2X VS32, (R3+R16)
    72  	STXVD2X VS32, (R3+R17)
    73  	ADD     $64, R3
    74  	ADD     $-64, R4
    75  	BDNZ    zero64          // dec ctr, br zero64 if ctr not 0
    76  	SRDCC   $3, R4, R6	// remaining doublewords
    77  	BEQ     nozerolarge
    78  
    79  lt64gt8:
    80  	CMP	R4, $32
    81  	BLT	lt32gt8
    82  	MOVD	$16, R8
    83  	STXVD2X	VS32, (R3+R0)
    84  	STXVD2X	VS32, (R3+R8)
    85  	ADD	$-32, R4
    86  	ADD	$32, R3
    87  lt32gt8:
    88  	CMP	R4, $16
    89  	BLT	lt16gt8
    90  	STXVD2X	VS32, (R3+R0)
    91  	ADD	$16, R3
    92  	ADD	$-16, R4
    93  lt16gt8:
    94  	CMP	R4, $8
    95  	BLT	nozerolarge
    96  	MOVD	R0, 0(R3)
    97  	ADD	$8, R3
    98  	ADD	$-8, R4
    99  
   100  nozerolarge:
   101  	ANDCC $7, R4, R5 // any remaining bytes
   102  	BC    4, 1, LR   // ble lr
   103  
   104  zerotail:
   105  	MOVD R5, CTR // set up to clear tail bytes
   106  
   107  zerotailloop:
   108  	MOVB R0, 0(R3)           // clear single bytes
   109  	ADD  $1, R3
   110  	BDNZ zerotailloop // dec ctr, br zerotailloop if ctr not 0
   111  	RET
   112  
   113  zero512xsetup:  // 512 chunk with extra needed
   114  	ANDCC $8, R3, R11    // 8 byte alignment?
   115  	BEQ   zero512setup16
   116  	MOVD  R0, 0(R3)      // clear 8 bytes
   117  	ADD   $8, R3         // update ptr to next 8
   118  	ADD   $-8, R4        // dec count by 8
   119  
   120  zero512setup16:
   121  	ANDCC $127, R3, R14 // < 128 byte alignment
   122  	BEQ   zero512setup  // handle 128 byte alignment
   123  	MOVD  $128, R15
   124  	SUB   R14, R15, R14 // find increment to 128 alignment
   125  	SRD   $4, R14, R15  // number of 16 byte chunks
   126  
   127  zero512presetup:
   128  	MOVD   R15, CTR         // loop counter of 16 bytes
   129  	XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
   130  
   131  zero512preloop:  // clear up to 128 alignment
   132  	STXVD2X VS32, (R3+R0)         // clear 16 bytes
   133  	ADD     $16, R3               // update ptr
   134  	ADD     $-16, R4              // dec count
   135  	BDNZ    zero512preloop
   136  
   137  zero512setup:  // setup for dcbz loop
   138  	CMP  R4, $512   // check if at least 512
   139  	BLT  remain
   140  	SRD  $9, R4, R8 // loop count for 512 chunks
   141  	MOVD R8, CTR    // set up counter
   142  	MOVD $128, R9   // index regs for 128 bytes
   143  	MOVD $256, R10
   144  	MOVD $384, R11
   145  	PCALIGN	$32
   146  
   147  zero512:
   148  	DCBZ (R3+R0)        // clear first chunk
   149  	DCBZ (R3+R9)        // clear second chunk
   150  	DCBZ (R3+R10)       // clear third chunk
   151  	DCBZ (R3+R11)       // clear fourth chunk
   152  	ADD  $512, R3
   153  	BDNZ zero512
   154  	ANDCC $511, R4
   155  
   156  remain:
   157  	CMP  R4, $128  // check if 128 byte chunks left
   158  	BLT  smaller
   159  	DCBZ (R3+R0)   // clear 128
   160  	ADD  $128, R3
   161  	ADD  $-128, R4
   162  	BR   remain
   163  
   164  smaller:
   165  	ANDCC $127, R4, R7 // find leftovers
   166  	BEQ   done
   167  	CMP   R7, $64      // more than 64, do 64 at a time
   168  	XXLXOR VS32, VS32, VS32
   169  	BLT   lt64gt8	   // less than 64
   170  	SRD   $6, R7, R7   // set up counter for 64
   171  	BR    zero64setup
   172  
   173  done:
   174  	RET
   175  

View as plain text