...

Text file src/internal/bytealg/compare_riscv64.s

Documentation: internal/bytealg

     1// Copyright 2022 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "asm_riscv64.h"
     6#include "go_asm.h"
     7#include "textflag.h"
     8
     9TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
    10	// X10 = a_base
    11	// X11 = a_len
    12	// X12 = a_cap (unused)
    13	// X13 = b_base (want in X12)
    14	// X14 = b_len (want in X13)
    15	// X15 = b_cap (unused)
    16	MOV	X13, X12
    17	MOV	X14, X13
    18	JMP	compare<>(SB)
    19
    20TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    21	// X10 = a_base
    22	// X11 = a_len
    23	// X12 = b_base
    24	// X13 = b_len
    25	JMP	compare<>(SB)
    26
    27// On entry:
    28// X10 points to start of a
    29// X11 length of a
    30// X12 points to start of b
    31// X13 length of b
    32// return value in X10 (-1/0/1)
    33TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    34	BEQ	X10, X12, cmp_len
    35
    36	MIN	X11, X13, X5
    37	BEQZ	X5, cmp_len
    38
    39	MOV	$16, X6
    40	BLT	X5, X6, check8_unaligned
    41
    42#ifndef hasV
    43	MOVB	internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X6
    44	BEQZ	X6, compare_scalar
    45#endif
    46
    47	// Use vector if not 8 byte aligned.
    48	OR	X10, X12, X6
    49	AND	$7, X6
    50	BNEZ	X6, vector_loop
    51
    52	// Use scalar if 8 byte aligned and <= 128 bytes.
    53	SUB	$128, X5, X6
    54	BLEZ	X6, compare_scalar_aligned
    55
    56	PCALIGN	$16
    57vector_loop:
    58	VSETVLI	X5, E8, M8, TA, MA, X6
    59	VLE8V	(X10), V8
    60	VLE8V	(X12), V16
    61	VMSNEVV	V8, V16, V0
    62	VFIRSTM	V0, X7
    63	BGEZ	X7, vector_not_eq
    64	ADD	X6, X10
    65	ADD	X6, X12
    66	SUB	X6, X5
    67	BNEZ	X5, vector_loop
    68	JMP	cmp_len
    69
    70vector_not_eq:
    71	// Load first differing bytes in X8/X9.
    72	ADD	X7, X10
    73	ADD	X7, X12
    74	MOVBU	(X10), X8
    75	MOVBU	(X12), X9
    76	JMP	cmp
    77
    78compare_scalar:
    79	MOV	$32, X6
    80	BLT	X5, X6, check8_unaligned
    81
    82	// Check alignment - if alignment differs we have to do one byte at a time.
    83	AND	$7, X10, X7
    84	AND	$7, X12, X8
    85	BNE	X7, X8, check8_unaligned
    86	BEQZ	X7, compare32
    87
    88	// Check one byte at a time until we reach 8 byte alignment.
    89	SUB	X7, X0, X7
    90	ADD	$8, X7, X7
    91	SUB	X7, X5, X5
    92align:
    93	SUB	$1, X7
    94	MOVBU	0(X10), X8
    95	MOVBU	0(X12), X9
    96	BNE	X8, X9, cmp
    97	ADD	$1, X10
    98	ADD	$1, X12
    99	BNEZ	X7, align
   100
   101compare_scalar_aligned:
   102	MOV	$32, X6
   103	BLT	X5, X6, check16
   104compare32:
   105	MOV	0(X10), X15
   106	MOV	0(X12), X16
   107	MOV	8(X10), X17
   108	MOV	8(X12), X18
   109	BNE	X15, X16, cmp8a
   110	BNE	X17, X18, cmp8b
   111	MOV	16(X10), X15
   112	MOV	16(X12), X16
   113	MOV	24(X10), X17
   114	MOV	24(X12), X18
   115	BNE	X15, X16, cmp8a
   116	BNE	X17, X18, cmp8b
   117	ADD	$32, X10
   118	ADD	$32, X12
   119	SUB	$32, X5
   120	BGE	X5, X6, compare32
   121	BEQZ	X5, cmp_len
   122
   123check16:
   124	MOV	$16, X6
   125	BLT	X5, X6, check8_unaligned
   126compare16:
   127	MOV	0(X10), X15
   128	MOV	0(X12), X16
   129	MOV	8(X10), X17
   130	MOV	8(X12), X18
   131	BNE	X15, X16, cmp8a
   132	BNE	X17, X18, cmp8b
   133	ADD	$16, X10
   134	ADD	$16, X12
   135	SUB	$16, X5
   136	BEQZ	X5, cmp_len
   137
   138check8_unaligned:
   139	MOV	$8, X6
   140	BLT	X5, X6, check4_unaligned
   141compare8_unaligned:
   142	MOVBU	0(X10), X8
   143	MOVBU	1(X10), X15
   144	MOVBU	2(X10), X17
   145	MOVBU	3(X10), X19
   146	MOVBU	4(X10), X21
   147	MOVBU	5(X10), X23
   148	MOVBU	6(X10), X25
   149	MOVBU	7(X10), X29
   150	MOVBU	0(X12), X9
   151	MOVBU	1(X12), X16
   152	MOVBU	2(X12), X18
   153	MOVBU	3(X12), X20
   154	MOVBU	4(X12), X22
   155	MOVBU	5(X12), X24
   156	MOVBU	6(X12), X28
   157	MOVBU	7(X12), X30
   158	BNE	X8, X9, cmp1a
   159	BNE	X15, X16, cmp1b
   160	BNE	X17, X18, cmp1c
   161	BNE	X19, X20, cmp1d
   162	BNE	X21, X22, cmp1e
   163	BNE	X23, X24, cmp1f
   164	BNE	X25, X28, cmp1g
   165	BNE	X29, X30, cmp1h
   166	ADD	$8, X10
   167	ADD	$8, X12
   168	SUB	$8, X5
   169	BGE	X5, X6, compare8_unaligned
   170	BEQZ	X5, cmp_len
   171
   172check4_unaligned:
   173	MOV	$4, X6
   174	BLT	X5, X6, compare1
   175compare4_unaligned:
   176	MOVBU	0(X10), X8
   177	MOVBU	1(X10), X15
   178	MOVBU	2(X10), X17
   179	MOVBU	3(X10), X19
   180	MOVBU	0(X12), X9
   181	MOVBU	1(X12), X16
   182	MOVBU	2(X12), X18
   183	MOVBU	3(X12), X20
   184	BNE	X8, X9, cmp1a
   185	BNE	X15, X16, cmp1b
   186	BNE	X17, X18, cmp1c
   187	BNE	X19, X20, cmp1d
   188	ADD	$4, X10
   189	ADD	$4, X12
   190	SUB	$4, X5
   191	BGE	X5, X6, compare4_unaligned
   192
   193compare1:
   194	BEQZ	X5, cmp_len
   195	MOVBU	0(X10), X8
   196	MOVBU	0(X12), X9
   197	BNE	X8, X9, cmp
   198	ADD	$1, X10
   199	ADD	$1, X12
   200	SUB	$1, X5
   201	JMP	compare1
   202
   203	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   204cmp8a:
   205	MOV	X15, X17
   206	MOV	X16, X18
   207
   208	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   209cmp8b:
   210	MOV	$0xff, X19
   211cmp8_loop:
   212	AND	X17, X19, X8
   213	AND	X18, X19, X9
   214	BNE	X8, X9, cmp
   215	SLLI	$8, X19
   216	JMP	cmp8_loop
   217
   218cmp1a:
   219	SLTU	X9, X8, X5
   220	SLTU	X8, X9, X6
   221	JMP	cmp_ret
   222cmp1b:
   223	SLTU	X16, X15, X5
   224	SLTU	X15, X16, X6
   225	JMP	cmp_ret
   226cmp1c:
   227	SLTU	X18, X17, X5
   228	SLTU	X17, X18, X6
   229	JMP	cmp_ret
   230cmp1d:
   231	SLTU	X20, X19, X5
   232	SLTU	X19, X20, X6
   233	JMP	cmp_ret
   234cmp1e:
   235	SLTU	X22, X21, X5
   236	SLTU	X21, X22, X6
   237	JMP	cmp_ret
   238cmp1f:
   239	SLTU	X24, X23, X5
   240	SLTU	X23, X24, X6
   241	JMP	cmp_ret
   242cmp1g:
   243	SLTU	X28, X25, X5
   244	SLTU	X25, X28, X6
   245	JMP	cmp_ret
   246cmp1h:
   247	SLTU	X30, X29, X5
   248	SLTU	X29, X30, X6
   249	JMP	cmp_ret
   250
   251cmp_len:
   252	MOV	X11, X8
   253	MOV	X13, X9
   254cmp:
   255	SLTU	X9, X8, X5
   256	SLTU	X8, X9, X6
   257cmp_ret:
   258	SUB	X5, X6, X10
   259	RET

View as plain text