...

Text file src/internal/bytealg/equal_riscv64.s

Documentation: internal/bytealg

     1// Copyright 2019 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "asm_riscv64.h"
     6#include "go_asm.h"
     7#include "textflag.h"
     8
     9#define	CTXT	S10
    10
    11// func memequal_varlen(a, b unsafe.Pointer) bool
    12TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
    13	// X10 = a_base
    14	// X11 = b_base
    15	MOV	8(CTXT), X12    // compiler stores size at offset 8 in the closure
    16	JMP	runtime·memequal<ABIInternal>(SB)
    17
    18// func memequal(a, b unsafe.Pointer, size uintptr) bool
    19TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    20	// X10 = a_base
    21	// X11 = b_base
    22	// X12 = size
    23	BNE	X10, X11, length_check
    24	MOV	$0, X12
    25
    26length_check:
    27	BEQZ	X12, done
    28
    29	MOV	$32, X23
    30	BLT	X12, X23, loop4_check
    31
    32#ifndef hasV
    33	MOVB	internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5
    34	BEQZ	X5, equal_scalar
    35#endif
    36
    37	// Use vector if not 8 byte aligned.
    38	OR	X10, X11, X5
    39	AND	$7, X5
    40	BNEZ	X5, vector_loop
    41
    42	// Use scalar if 8 byte aligned and <= 64 bytes.
    43	SUB	$64, X12, X6
    44	BLEZ	X6, loop32_check
    45
    46	PCALIGN	$16
    47vector_loop:
    48	VSETVLI	X12, E8, M8, TA, MA, X5
    49	VLE8V	(X10), V8
    50	VLE8V	(X11), V16
    51	VMSNEVV	V8, V16, V0
    52	VFIRSTM	V0, X6
    53	BGEZ	X6, done
    54	ADD	X5, X10
    55	ADD	X5, X11
    56	SUB	X5, X12
    57	BNEZ	X12, vector_loop
    58	JMP	done
    59
    60equal_scalar:
    61	// Check alignment - if alignment differs we have to do one byte at a time.
    62	AND	$7, X10, X9
    63	AND	$7, X11, X19
    64	BNE	X9, X19, loop4_check
    65	BEQZ	X9, loop32_check
    66
    67	// Check one byte at a time until we reach 8 byte alignment.
    68	SUB	X9, X0, X9
    69	ADD	$8, X9, X9
    70	SUB	X9, X12, X12
    71align:
    72	SUB	$1, X9
    73	MOVBU	0(X10), X19
    74	MOVBU	0(X11), X20
    75	BNE	X19, X20, done
    76	ADD	$1, X10
    77	ADD	$1, X11
    78	BNEZ	X9, align
    79
    80loop32_check:
    81	MOV	$32, X9
    82	BLT	X12, X9, loop16_check
    83loop32:
    84	MOV	0(X10), X19
    85	MOV	0(X11), X20
    86	MOV	8(X10), X21
    87	MOV	8(X11), X22
    88	BNE	X19, X20, done
    89	BNE	X21, X22, done
    90	MOV	16(X10), X14
    91	MOV	16(X11), X15
    92	MOV	24(X10), X16
    93	MOV	24(X11), X17
    94	BNE	X14, X15, done
    95	BNE	X16, X17, done
    96	ADD	$32, X10
    97	ADD	$32, X11
    98	SUB	$32, X12
    99	BGE	X12, X9, loop32
   100	BEQZ	X12, done
   101
   102loop16_check:
   103	MOV	$16, X23
   104	BLT	X12, X23, loop4_check
   105loop16:
   106	MOV	0(X10), X19
   107	MOV	0(X11), X20
   108	MOV	8(X10), X21
   109	MOV	8(X11), X22
   110	BNE	X19, X20, done
   111	BNE	X21, X22, done
   112	ADD	$16, X10
   113	ADD	$16, X11
   114	SUB	$16, X12
   115	BGE	X12, X23, loop16
   116	BEQZ	X12, done
   117
   118loop4_check:
   119	MOV	$4, X23
   120	BLT	X12, X23, loop1
   121loop4:
   122	MOVBU	0(X10), X19
   123	MOVBU	0(X11), X20
   124	MOVBU	1(X10), X21
   125	MOVBU	1(X11), X22
   126	BNE	X19, X20, done
   127	BNE	X21, X22, done
   128	MOVBU	2(X10), X14
   129	MOVBU	2(X11), X15
   130	MOVBU	3(X10), X16
   131	MOVBU	3(X11), X17
   132	BNE	X14, X15, done
   133	BNE	X16, X17, done
   134	ADD	$4, X10
   135	ADD	$4, X11
   136	SUB	$4, X12
   137	BGE	X12, X23, loop4
   138
   139loop1:
   140	BEQZ	X12, done
   141	MOVBU	0(X10), X19
   142	MOVBU	0(X11), X20
   143	BNE	X19, X20, done
   144	ADD	$1, X10
   145	ADD	$1, X11
   146	SUB	$1, X12
   147	JMP	loop1
   148
   149done:
   150	// If X12 is zero then memory is equivalent.
   151	SEQZ	X12, X10
   152	RET

View as plain text