...

Text file src/internal/bytealg/indexbyte_riscv64.s

Documentation: internal/bytealg

     1// Copyright 2019 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "asm_riscv64.h"
     6#include "go_asm.h"
     7#include "textflag.h"
     8
     9TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
    10	// X10 = b_base
    11	// X11 = b_len
    12	// X12 = b_cap (unused)
    13	// X13 = byte to find
    14	AND	$0xff, X13, X12		// x12 byte to look for
    15
    16	SLTI	$24, X11, X14
    17	BNEZ	X14, small
    18	JMP	indexByteBig<>(SB)
    19
    20small:
    21	MOV	X10, X13		// store base for later
    22	ADD	X10, X11		// end
    23	SUB	$1, X10
    24loop:
    25	ADD	$1, X10
    26	BEQ	X10, X11, notfound
    27	MOVBU	(X10), X14
    28	BNE	X12, X14, loop
    29
    30	SUB	X13, X10		// remove base
    31	RET
    32
    33notfound:
    34	MOV	$-1, X10
    35	RET
    36
    37TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
    38	// X10 = b_base
    39	// X11 = b_len
    40	// X12 = byte to find
    41	AND	$0xff, X12		// x12 byte to look for
    42
    43	SLTI	$24, X11, X14
    44	BNEZ	X14, small
    45	JMP	indexByteBig<>(SB)
    46
    47small:
    48	MOV	X10, X13		// store base for later
    49	ADD	X10, X11		// end
    50	SUB	$1, X10
    51loop:
    52	ADD	$1, X10
    53	BEQ	X10, X11, notfound
    54	MOVBU	(X10), X14
    55	BNE	X12, X14, loop
    56
    57	SUB	X13, X10		// remove base
    58	RET
    59
    60notfound:
    61	MOV	$-1, X10
    62	RET
    63
    64TEXT indexByteBig<>(SB),NOSPLIT|NOFRAME,$0
    65	// On entry:
    66	// X10 = b_base
    67	// X11 = b_len (at least 16 bytes)
    68	// X12 = byte to find
    69	// On exit:
    70	// X10 = index of first instance of sought byte, if found, or -1 otherwise
    71
    72	MOV	X10, X13		// store base for later
    73
    74#ifndef hasV
    75	MOVB	internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5
    76	BEQZ	X5, indexbyte_scalar
    77#endif
    78
    79	PCALIGN	$16
    80vector_loop:
    81	VSETVLI	X11, E8, M8, TA, MA, X5
    82	VLE8V	(X10), V8
    83	VMSEQVX	X12, V8, V0
    84	VFIRSTM	V0, X6
    85	BGEZ	X6, vector_found
    86	ADD	X5, X10
    87	SUB	X5, X11
    88	BNEZ	X11, vector_loop
    89	JMP	notfound
    90
    91vector_found:
    92	SUB	X13, X10
    93	ADD	X6, X10
    94	RET
    95
    96indexbyte_scalar:
    97	ADD	X10, X11		// end
    98
    99	// Process the first few bytes until we get to an 8 byte boundary
   100	// No need to check for end here as we have at least 16 bytes in
   101	// the buffer.
   102
   103unalignedloop:
   104	AND	$7, X10, X14
   105	BEQZ	X14, aligned
   106	MOVBU	(X10), X14
   107	BEQ	X12, X14, found
   108	ADD	$1, X10
   109	JMP	unalignedloop
   110
   111aligned:
   112	AND	$~7, X11, X15		// X15 = end of aligned data
   113
   114	// We have at least 9 bytes left
   115
   116	// Use 'Determine if a word has a byte equal to n' bit hack from
   117	// https://graphics.stanford.edu/~seander/bithacks.html to determine
   118	// whether the byte is present somewhere in the next 8 bytes of the
   119	// array.
   120
   121	MOV	$0x0101010101010101, X16
   122	SLLI	$7, X16, X17		// X17 = 0x8080808080808080
   123
   124	MUL	X12, X16, X18		// broadcast X12 to every byte in X18
   125
   126alignedloop:
   127	MOV	(X10), X14
   128	XOR	X14, X18, X19
   129
   130	// If the LSB in X12 is present somewhere in the 8 bytes we've just
   131	// loaded into X14 then at least one of the bytes in X19 will be 0
   132	// after the XOR.  If any of the bytes in X19 are zero then
   133	//
   134	// ((X19 - X16) & (~X19) & X17)
   135	//
   136	// will be non-zero.  The expression will evaluate to zero if none of
   137	// the bytes in X19 are zero, i.e., X12 is not present in X14.
   138
   139	SUB	X16, X19, X20
   140	ANDN	X19, X17, X21
   141	AND	X20, X21
   142	BNEZ	X21, tailloop		// If X21 != 0 X12 is present in X14
   143	ADD	$8, X10
   144	BNE	X10, X15, alignedloop
   145
   146tailloop:
   147	SUB	$1, X10
   148
   149loop:
   150	ADD	$1, X10
   151	BEQ	X10, X11, notfound
   152	MOVBU	(X10), X14
   153	BNE	X12, X14, loop
   154
   155found:
   156	SUB	X13, X10		// remove base
   157	RET
   158
   159notfound:
   160	MOV	$-1, X10
   161	RET

View as plain text