...

Text file src/crypto/internal/fips140/sha512/sha512block_loong64.s

Documentation: crypto/internal/fips140/sha512

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// SHA512 block routine. See sha512block.go for Go equivalent.
    10
    11#define REGTMP	R30
    12#define REGTMP1	R16
    13#define REGTMP2	R17
    14#define REGTMP3	R18
    15#define REGTMP4	R7
    16#define REGTMP5	R6
    17#define REG_KT	R19
    18
    19// W[i] = M[i]; for 0 <= i <= 15
    20#define LOAD0(index) \
    21	MOVV	(index*8)(R5), REGTMP4; \
    22	REVBV	REGTMP4, REGTMP4; \
    23	MOVV	REGTMP4, (index*8)(R3)
    24
    25// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 79
    26//   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    27//   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    28#define LOAD1(index) \
    29	MOVV	(((index-2)&0xf)*8)(R3), REGTMP4; \
    30	MOVV	(((index-15)&0xf)*8)(R3), REGTMP1; \
    31	MOVV	(((index-7)&0xf)*8)(R3), REGTMP; \
    32	MOVV	REGTMP4, REGTMP2; \
    33	MOVV	REGTMP4, REGTMP3; \
    34	ROTRV	$19, REGTMP4; \
    35	ROTRV	$61, REGTMP2; \
    36	SRLV	$6, REGTMP3; \
    37	XOR	REGTMP2, REGTMP4; \
    38	XOR	REGTMP3, REGTMP4; \
    39	ROTRV	$1, REGTMP1, REGTMP5; \
    40	SRLV	$7, REGTMP1, REGTMP2; \
    41	ROTRV	$8, REGTMP1; \
    42	ADDV	REGTMP, REGTMP4; \
    43	MOVV	(((index-16)&0xf)*8)(R3), REGTMP; \
    44	XOR	REGTMP1, REGTMP5; \
    45	XOR	REGTMP2, REGTMP5; \
    46	ADDV	REGTMP, REGTMP5; \
    47	ADDV	REGTMP5, REGTMP4; \
    48	MOVV	REGTMP4, ((index&0xf)*8)(R3)
    49
    50// h is also used as an accumulator. Wt is passed in REGTMP4.
    51//   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    52//     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
    53//     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    54//                 = ((y XOR z) AND x) XOR z
    55// Calculate T1 in REGTMP4
    56#define SHA512T1(index, e, f, g, h) \
    57	MOVV	(index*8)(REG_KT), REGTMP5; \
    58	ADDV	REGTMP5, h; \
    59	ADDV	REGTMP4, h; \
    60	ROTRV	$14, e, REGTMP5; \
    61	ROTRV	$18, e, REGTMP; \
    62	ROTRV	$41, e, REGTMP3; \
    63	XOR	f, g, REGTMP2; \
    64	XOR	REGTMP, REGTMP5; \
    65	AND	e, REGTMP2; \
    66	XOR	REGTMP5, REGTMP3; \
    67	XOR	g, REGTMP2; \
    68	ADDV	REGTMP3, h; \
    69	ADDV	h, REGTMP2, REGTMP4
    70
    71// T2 = BIGSIGMA0(a) + Maj(a, b, c)
    72// BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
    73// Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
    74//              = ((y XOR z) AND x) XOR (y AND z)
    75// Calculate T2 in REGTMP1
    76#define SHA512T2(a, b, c) \
    77	ROTRV	$28, a, REGTMP5; \
    78	ROTRV	$34, a, REGTMP3; \
    79	ROTRV	$39, a, REGTMP2; \
    80	XOR	b, c, REGTMP; \
    81	AND	b, c, REGTMP1; \
    82	XOR	REGTMP3, REGTMP5; \
    83	AND	REGTMP, a, REGTMP; \
    84	XOR	REGTMP2, REGTMP5; \
    85	XOR	REGTMP, REGTMP1; \
    86	ADDV	REGTMP5, REGTMP1
    87
    88// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
    89// The values for e and a are stored in d and h, ready for rotation.
    90#define SHA512ROUND(index, a, b, c, d, e, f, g, h) \
    91	SHA512T1(index, e, f, g, h); \
    92	SHA512T2(a, b, c); \
    93	ADDV	REGTMP4, d; \
    94	ADDV	REGTMP1, REGTMP4, h
    95
    96#define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \
    97	LOAD0(index); \
    98	SHA512ROUND(index, a, b, c, d, e, f, g, h)
    99
   100#define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \
   101	LOAD1(index); \
   102	SHA512ROUND(index, a, b, c, d, e, f, g, h)
   103
   104// A stack frame size of 128 bytes is required here, because
   105// the frame size used for data expansion is 128 bytes.
   106// See the definition of the macro LOAD1 above (8 bytes * 16 entries).
   107//
   108// func block(dig *Digest, p []byte)
   109TEXT ·block(SB),NOSPLIT,$128-32
   110	MOVV	p_len+16(FP), R6
   111	MOVV	p_base+8(FP), R5
   112	AND	$~127, R6
   113	BEQ	R6, end
   114
   115	MOVV	$·_K(SB), REG_KT		// const table
   116
   117	// p_len >= 128
   118	MOVV	dig+0(FP), R4
   119	ADDV	R5, R6, R25
   120	MOVV	(0*8)(R4), R8	// a = H0
   121	MOVV	(1*8)(R4), R9	// b = H1
   122	MOVV	(2*8)(R4), R10	// c = H2
   123	MOVV	(3*8)(R4), R11	// d = H3
   124	MOVV	(4*8)(R4), R12	// e = H4
   125	MOVV	(5*8)(R4), R13	// f = H5
   126	MOVV	(6*8)(R4), R14	// g = H6
   127	MOVV	(7*8)(R4), R15	// h = H7
   128
   129loop:
   130	SHA512ROUND0( 0, R8,  R9,  R10, R11, R12, R13, R14, R15)
   131	SHA512ROUND0( 1, R15, R8,  R9,  R10, R11, R12, R13, R14)
   132	SHA512ROUND0( 2, R14, R15, R8,  R9,  R10, R11, R12, R13)
   133	SHA512ROUND0( 3, R13, R14, R15, R8,  R9,  R10, R11, R12)
   134	SHA512ROUND0( 4, R12, R13, R14, R15, R8,  R9,  R10, R11)
   135	SHA512ROUND0( 5, R11, R12, R13, R14, R15, R8,  R9,  R10)
   136	SHA512ROUND0( 6, R10, R11, R12, R13, R14, R15, R8,  R9)
   137	SHA512ROUND0( 7, R9,  R10, R11, R12, R13, R14, R15, R8)
   138	SHA512ROUND0( 8, R8,  R9,  R10, R11, R12, R13, R14, R15)
   139	SHA512ROUND0( 9, R15, R8,  R9,  R10, R11, R12, R13, R14)
   140	SHA512ROUND0(10, R14, R15, R8,  R9,  R10, R11, R12, R13)
   141	SHA512ROUND0(11, R13, R14, R15, R8,  R9,  R10, R11, R12)
   142	SHA512ROUND0(12, R12, R13, R14, R15, R8,  R9,  R10, R11)
   143	SHA512ROUND0(13, R11, R12, R13, R14, R15, R8,  R9,  R10)
   144	SHA512ROUND0(14, R10, R11, R12, R13, R14, R15, R8,  R9)
   145	SHA512ROUND0(15, R9,  R10, R11, R12, R13, R14, R15, R8)
   146
   147	SHA512ROUND1(16, R8,  R9,  R10, R11, R12, R13, R14, R15)
   148	SHA512ROUND1(17, R15, R8,  R9,  R10, R11, R12, R13, R14)
   149	SHA512ROUND1(18, R14, R15, R8,  R9,  R10, R11, R12, R13)
   150	SHA512ROUND1(19, R13, R14, R15, R8,  R9,  R10, R11, R12)
   151	SHA512ROUND1(20, R12, R13, R14, R15, R8,  R9,  R10, R11)
   152	SHA512ROUND1(21, R11, R12, R13, R14, R15, R8,  R9,  R10)
   153	SHA512ROUND1(22, R10, R11, R12, R13, R14, R15, R8,  R9)
   154	SHA512ROUND1(23, R9,  R10, R11, R12, R13, R14, R15, R8)
   155	SHA512ROUND1(24, R8,  R9,  R10, R11, R12, R13, R14, R15)
   156	SHA512ROUND1(25, R15, R8,  R9,  R10, R11, R12, R13, R14)
   157	SHA512ROUND1(26, R14, R15, R8,  R9,  R10, R11, R12, R13)
   158	SHA512ROUND1(27, R13, R14, R15, R8,  R9,  R10, R11, R12)
   159	SHA512ROUND1(28, R12, R13, R14, R15, R8,  R9,  R10, R11)
   160	SHA512ROUND1(29, R11, R12, R13, R14, R15, R8,  R9,  R10)
   161	SHA512ROUND1(30, R10, R11, R12, R13, R14, R15, R8,  R9)
   162	SHA512ROUND1(31, R9,  R10, R11, R12, R13, R14, R15, R8)
   163	SHA512ROUND1(32, R8,  R9,  R10, R11, R12, R13, R14, R15)
   164	SHA512ROUND1(33, R15, R8,  R9,  R10, R11, R12, R13, R14)
   165	SHA512ROUND1(34, R14, R15, R8,  R9,  R10, R11, R12, R13)
   166	SHA512ROUND1(35, R13, R14, R15, R8,  R9,  R10, R11, R12)
   167	SHA512ROUND1(36, R12, R13, R14, R15, R8,  R9,  R10, R11)
   168	SHA512ROUND1(37, R11, R12, R13, R14, R15, R8,  R9,  R10)
   169	SHA512ROUND1(38, R10, R11, R12, R13, R14, R15, R8,  R9)
   170	SHA512ROUND1(39, R9,  R10, R11, R12, R13, R14, R15, R8)
   171	SHA512ROUND1(40, R8,  R9,  R10, R11, R12, R13, R14, R15)
   172	SHA512ROUND1(41, R15, R8,  R9,  R10, R11, R12, R13, R14)
   173	SHA512ROUND1(42, R14, R15, R8,  R9,  R10, R11, R12, R13)
   174	SHA512ROUND1(43, R13, R14, R15, R8,  R9,  R10, R11, R12)
   175	SHA512ROUND1(44, R12, R13, R14, R15, R8,  R9,  R10, R11)
   176	SHA512ROUND1(45, R11, R12, R13, R14, R15, R8,  R9,  R10)
   177	SHA512ROUND1(46, R10, R11, R12, R13, R14, R15, R8,  R9)
   178	SHA512ROUND1(47, R9,  R10, R11, R12, R13, R14, R15, R8)
   179	SHA512ROUND1(48, R8,  R9,  R10, R11, R12, R13, R14, R15)
   180	SHA512ROUND1(49, R15, R8,  R9,  R10, R11, R12, R13, R14)
   181	SHA512ROUND1(50, R14, R15, R8,  R9,  R10, R11, R12, R13)
   182	SHA512ROUND1(51, R13, R14, R15, R8,  R9,  R10, R11, R12)
   183	SHA512ROUND1(52, R12, R13, R14, R15, R8,  R9,  R10, R11)
   184	SHA512ROUND1(53, R11, R12, R13, R14, R15, R8,  R9,  R10)
   185	SHA512ROUND1(54, R10, R11, R12, R13, R14, R15, R8,  R9)
   186	SHA512ROUND1(55, R9,  R10, R11, R12, R13, R14, R15, R8)
   187	SHA512ROUND1(56, R8,  R9,  R10, R11, R12, R13, R14, R15)
   188	SHA512ROUND1(57, R15, R8,  R9,  R10, R11, R12, R13, R14)
   189	SHA512ROUND1(58, R14, R15, R8,  R9,  R10, R11, R12, R13)
   190	SHA512ROUND1(59, R13, R14, R15, R8,  R9,  R10, R11, R12)
   191	SHA512ROUND1(60, R12, R13, R14, R15, R8,  R9,  R10, R11)
   192	SHA512ROUND1(61, R11, R12, R13, R14, R15, R8,  R9,  R10)
   193	SHA512ROUND1(62, R10, R11, R12, R13, R14, R15, R8,  R9)
   194	SHA512ROUND1(63, R9,  R10, R11, R12, R13, R14, R15, R8)
   195	SHA512ROUND1(64, R8,  R9,  R10, R11, R12, R13, R14, R15)
   196	SHA512ROUND1(65, R15, R8,  R9,  R10, R11, R12, R13, R14)
   197	SHA512ROUND1(66, R14, R15, R8,  R9,  R10, R11, R12, R13)
   198	SHA512ROUND1(67, R13, R14, R15, R8,  R9,  R10, R11, R12)
   199	SHA512ROUND1(68, R12, R13, R14, R15, R8,  R9,  R10, R11)
   200	SHA512ROUND1(69, R11, R12, R13, R14, R15, R8,  R9,  R10)
   201	SHA512ROUND1(70, R10, R11, R12, R13, R14, R15, R8,  R9)
   202	SHA512ROUND1(71, R9,  R10, R11, R12, R13, R14, R15, R8)
   203	SHA512ROUND1(72, R8,  R9,  R10, R11, R12, R13, R14, R15)
   204	SHA512ROUND1(73, R15, R8,  R9,  R10, R11, R12, R13, R14)
   205	SHA512ROUND1(74, R14, R15, R8,  R9,  R10, R11, R12, R13)
   206	SHA512ROUND1(75, R13, R14, R15, R8,  R9,  R10, R11, R12)
   207	SHA512ROUND1(76, R12, R13, R14, R15, R8,  R9,  R10, R11)
   208	SHA512ROUND1(77, R11, R12, R13, R14, R15, R8,  R9,  R10)
   209	SHA512ROUND1(78, R10, R11, R12, R13, R14, R15, R8,  R9)
   210	SHA512ROUND1(79, R9,  R10, R11, R12, R13, R14, R15, R8)
   211
   212	MOVV	(0*8)(R4), REGTMP
   213	MOVV	(1*8)(R4), REGTMP1
   214	MOVV	(2*8)(R4), REGTMP2
   215	MOVV	(3*8)(R4), REGTMP3
   216	ADDV	REGTMP, R8	// H0 = a + H0
   217	ADDV	REGTMP1, R9	// H1 = b + H1
   218	ADDV	REGTMP2, R10	// H2 = c + H2
   219	ADDV	REGTMP3, R11	// H3 = d + H3
   220	MOVV	R8, (0*8)(R4)
   221	MOVV	R9, (1*8)(R4)
   222	MOVV	R10, (2*8)(R4)
   223	MOVV	R11, (3*8)(R4)
   224	MOVV	(4*8)(R4), REGTMP
   225	MOVV	(5*8)(R4), REGTMP1
   226	MOVV	(6*8)(R4), REGTMP2
   227	MOVV	(7*8)(R4), REGTMP3
   228	ADDV	REGTMP, R12	// H4 = e + H4
   229	ADDV	REGTMP1, R13	// H5 = f + H5
   230	ADDV	REGTMP2, R14	// H6 = g + H6
   231	ADDV	REGTMP3, R15	// H7 = h + H7
   232	MOVV	R12, (4*8)(R4)
   233	MOVV	R13, (5*8)(R4)
   234	MOVV	R14, (6*8)(R4)
   235	MOVV	R15, (7*8)(R4)
   236
   237	ADDV	$128, R5
   238	BNE	R5, R25, loop
   239
   240end:
   241	RET

View as plain text