...

Text file src/crypto/internal/fips140/sha256/sha256block_loong64.s

Documentation: crypto/internal/fips140/sha256

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// SHA256 block routine. See sha256block.go for Go equivalent.
    10//
    11// The algorithm is detailed in FIPS 180-4:
    12//
    13//  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    14//
    15// W[i] = M[i]; for 0 <= i <= 15
    16// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
    17//
    18// a = H0
    19// b = H1
    20// c = H2
    21// d = H3
    22// e = H4
    23// f = H5
    24// g = H6
    25// h = H7
    26//
    27// for i = 0 to 63 {
    28//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + K[i] + W[i]
    29//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    30//    h = g
    31//    g = f
    32//    f = e
    33//    e = d + T1
    34//    d = c
    35//    c = b
    36//    b = a
    37//    a = T1 + T2
    38// }
    39//
    40// H0 = a + H0
    41// H1 = b + H1
    42// H2 = c + H2
    43// H3 = d + H3
    44// H4 = e + H4
    45// H5 = f + H5
    46// H6 = g + H6
    47// H7 = h + H7
    48
    49#define REGTMP	R30
    50#define REGTMP1	R16
    51#define REGTMP2	R17
    52#define REGTMP3	R18
    53#define REGTMP4	R7
    54#define REGTMP5	R6
    55#define REG_KT	R19
    56
    57// W[i] = M[i]; for 0 <= i <= 15
    58#define LOAD0(index) \
    59	MOVW	(index*4)(R5), REGTMP4; \
    60	REVB2W	REGTMP4, REGTMP4; \
    61	MOVW	REGTMP4, (index*4)(R3)
    62
    63// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
    64//   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
    65//   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
    66#define LOAD1(index) \
    67	MOVW	(((index-2)&0xf)*4)(R3), REGTMP4; \
    68	MOVW	(((index-15)&0xf)*4)(R3), REGTMP1; \
    69	MOVW	(((index-7)&0xf)*4)(R3), REGTMP; \
    70	MOVW	REGTMP4, REGTMP2; \
    71	MOVW	REGTMP4, REGTMP3; \
    72	ROTR	$17, REGTMP4; \
    73	ROTR	$19, REGTMP2; \
    74	SRL	$10, REGTMP3; \
    75	XOR	REGTMP2, REGTMP4; \
    76	XOR	REGTMP3, REGTMP4; \
    77	ROTR	$7, REGTMP1, REGTMP5; \
    78	SRL	$3, REGTMP1, REGTMP3; \
    79	ROTR	$18, REGTMP1, REGTMP2; \
    80	ADD	REGTMP, REGTMP4; \
    81	MOVW	(((index-16)&0xf)*4)(R3), REGTMP; \
    82	XOR	REGTMP3, REGTMP5; \
    83	XOR	REGTMP2, REGTMP5; \
    84	ADD	REGTMP, REGTMP5; \
    85	ADD	REGTMP5, REGTMP4; \
    86	MOVW	REGTMP4, ((index&0xf)*4)(R3)
    87
    88// T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    89// BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
    90// Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    91//             = ((y XOR z) AND x) XOR z
    92// Calculate T1 in REGTMP4
    93#define SHA256T1(index, e, f, g, h) \
    94	MOVW	(index*4)(REG_KT), REGTMP5; \
    95	ADDV	REGTMP5, h; \
    96	ADD	REGTMP4, h; \
    97	ROTR	$6, e, REGTMP5; \
    98	ROTR	$11, e, REGTMP; \
    99	ROTR	$25, e, REGTMP3; \
   100	XOR	f, g, REGTMP2; \
   101	XOR	REGTMP, REGTMP5; \
   102	AND	e, REGTMP2; \
   103	XOR	REGTMP5, REGTMP3; \
   104	XOR	g, REGTMP2; \
   105	ADD	REGTMP3, h; \
   106	ADD	h, REGTMP2, REGTMP4
   107
   108// T2 = BIGSIGMA0(a) + Maj(a, b, c)
   109// BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
   110// Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   111//              = ((y XOR z) AND x) XOR (y AND z)
   112// Calculate T2 in REGTMP1
   113#define SHA256T2(a, b, c) \
   114	ROTR	$2, a, REGTMP5; \
   115	ROTR	$13, a, REGTMP3; \
   116	ROTR	$22, a, REGTMP2; \
   117	XOR	b, c, REGTMP; \
   118	AND	b, c, REGTMP1; \
   119	XOR	REGTMP3, REGTMP5; \
   120	AND	REGTMP, a, REGTMP; \
   121	XOR	REGTMP2, REGTMP5; \
   122	XOR	REGTMP, REGTMP1; \
   123	ADD	REGTMP5, REGTMP1
   124
   125// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   126// The values for e and a are stored in d and h, ready for rotation.
   127#define SHA256ROUND(index, a, b, c, d, e, f, g, h) \
   128	SHA256T1(index, e, f, g, h); \
   129	SHA256T2(a, b, c); \
   130	ADD	REGTMP4, d; \
   131	ADD	REGTMP1, REGTMP4, h
   132
   133#define SHA256ROUND0(index, a, b, c, d, e, f, g, h) \
   134	LOAD0(index); \
   135	SHA256ROUND(index, a, b, c, d, e, f, g, h)
   136
   137#define SHA256ROUND1(index, a, b, c, d, e, f, g, h) \
   138	LOAD1(index); \
   139	SHA256ROUND(index, a, b, c, d, e, f, g, h)
   140
   141// A stack frame size of 64 bytes is required here, because
   142// the frame size used for data expansion is 64 bytes.
   143// See the definition of the macro LOAD1 above (4 bytes * 16 entries).
   144//
   145//func block(dig *Digest, p []byte)
   146TEXT ·block(SB),NOSPLIT,$64-32
   147	MOVV	p_base+8(FP), R5
   148	MOVV	p_len+16(FP), R6
   149	AND	$~63, R6
   150	BEQ	R6, end
   151
   152	MOVV	$·_K(SB), REG_KT		// const table
   153
   154	// p_len >= 64
   155	MOVV	dig+0(FP), R4
   156	ADDV	R5, R6, R25
   157	MOVW	(0*4)(R4), R8	// a = H0
   158	MOVW	(1*4)(R4), R9	// b = H1
   159	MOVW	(2*4)(R4), R10	// c = H2
   160	MOVW	(3*4)(R4), R11	// d = H3
   161	MOVW	(4*4)(R4), R12	// e = H4
   162	MOVW	(5*4)(R4), R13	// f = H5
   163	MOVW	(6*4)(R4), R14	// g = H6
   164	MOVW	(7*4)(R4), R15	// h = H7
   165
   166loop:
   167	SHA256ROUND0(0,  R8,  R9,  R10, R11, R12, R13, R14, R15)
   168	SHA256ROUND0(1,  R15, R8,  R9,  R10, R11, R12, R13, R14)
   169	SHA256ROUND0(2,  R14, R15, R8,  R9,  R10, R11, R12, R13)
   170	SHA256ROUND0(3,  R13, R14, R15, R8,  R9,  R10, R11, R12)
   171	SHA256ROUND0(4,  R12, R13, R14, R15, R8,  R9,  R10, R11)
   172	SHA256ROUND0(5,  R11, R12, R13, R14, R15, R8,  R9,  R10)
   173	SHA256ROUND0(6,  R10, R11, R12, R13, R14, R15, R8,  R9)
   174	SHA256ROUND0(7,  R9,  R10, R11, R12, R13, R14, R15, R8)
   175	SHA256ROUND0(8,  R8,  R9,  R10, R11, R12, R13, R14, R15)
   176	SHA256ROUND0(9,  R15, R8,  R9,  R10, R11, R12, R13, R14)
   177	SHA256ROUND0(10, R14, R15, R8,  R9,  R10, R11, R12, R13)
   178	SHA256ROUND0(11, R13, R14, R15, R8,  R9,  R10, R11, R12)
   179	SHA256ROUND0(12, R12, R13, R14, R15, R8,  R9,  R10, R11)
   180	SHA256ROUND0(13, R11, R12, R13, R14, R15, R8,  R9,  R10)
   181	SHA256ROUND0(14, R10, R11, R12, R13, R14, R15, R8,  R9)
   182	SHA256ROUND0(15, R9,  R10, R11, R12, R13, R14, R15, R8)
   183
   184	SHA256ROUND1(16, R8,  R9,  R10, R11, R12, R13, R14, R15)
   185	SHA256ROUND1(17, R15, R8,  R9,  R10, R11, R12, R13, R14)
   186	SHA256ROUND1(18, R14, R15, R8,  R9,  R10, R11, R12, R13)
   187	SHA256ROUND1(19, R13, R14, R15, R8,  R9,  R10, R11, R12)
   188	SHA256ROUND1(20, R12, R13, R14, R15, R8,  R9,  R10, R11)
   189	SHA256ROUND1(21, R11, R12, R13, R14, R15, R8,  R9,  R10)
   190	SHA256ROUND1(22, R10, R11, R12, R13, R14, R15, R8,  R9)
   191	SHA256ROUND1(23, R9,  R10, R11, R12, R13, R14, R15, R8)
   192	SHA256ROUND1(24, R8,  R9,  R10, R11, R12, R13, R14, R15)
   193	SHA256ROUND1(25, R15, R8,  R9,  R10, R11, R12, R13, R14)
   194	SHA256ROUND1(26, R14, R15, R8,  R9,  R10, R11, R12, R13)
   195	SHA256ROUND1(27, R13, R14, R15, R8,  R9,  R10, R11, R12)
   196	SHA256ROUND1(28, R12, R13, R14, R15, R8,  R9,  R10, R11)
   197	SHA256ROUND1(29, R11, R12, R13, R14, R15, R8,  R9,  R10)
   198	SHA256ROUND1(30, R10, R11, R12, R13, R14, R15, R8,  R9)
   199	SHA256ROUND1(31, R9,  R10, R11, R12, R13, R14, R15, R8)
   200	SHA256ROUND1(32, R8,  R9,  R10, R11, R12, R13, R14, R15)
   201	SHA256ROUND1(33, R15, R8,  R9,  R10, R11, R12, R13, R14)
   202	SHA256ROUND1(34, R14, R15, R8,  R9,  R10, R11, R12, R13)
   203	SHA256ROUND1(35, R13, R14, R15, R8,  R9,  R10, R11, R12)
   204	SHA256ROUND1(36, R12, R13, R14, R15, R8,  R9,  R10, R11)
   205	SHA256ROUND1(37, R11, R12, R13, R14, R15, R8,  R9,  R10)
   206	SHA256ROUND1(38, R10, R11, R12, R13, R14, R15, R8,  R9)
   207	SHA256ROUND1(39, R9,  R10, R11, R12, R13, R14, R15, R8)
   208	SHA256ROUND1(40, R8,  R9,  R10, R11, R12, R13, R14, R15)
   209	SHA256ROUND1(41, R15, R8,  R9,  R10, R11, R12, R13, R14)
   210	SHA256ROUND1(42, R14, R15, R8,  R9,  R10, R11, R12, R13)
   211	SHA256ROUND1(43, R13, R14, R15, R8,  R9,  R10, R11, R12)
   212	SHA256ROUND1(44, R12, R13, R14, R15, R8,  R9,  R10, R11)
   213	SHA256ROUND1(45, R11, R12, R13, R14, R15, R8,  R9,  R10)
   214	SHA256ROUND1(46, R10, R11, R12, R13, R14, R15, R8,  R9)
   215	SHA256ROUND1(47, R9,  R10, R11, R12, R13, R14, R15, R8)
   216	SHA256ROUND1(48, R8,  R9,  R10, R11, R12, R13, R14, R15)
   217	SHA256ROUND1(49, R15, R8,  R9,  R10, R11, R12, R13, R14)
   218	SHA256ROUND1(50, R14, R15, R8,  R9,  R10, R11, R12, R13)
   219	SHA256ROUND1(51, R13, R14, R15, R8,  R9,  R10, R11, R12)
   220	SHA256ROUND1(52, R12, R13, R14, R15, R8,  R9,  R10, R11)
   221	SHA256ROUND1(53, R11, R12, R13, R14, R15, R8,  R9,  R10)
   222	SHA256ROUND1(54, R10, R11, R12, R13, R14, R15, R8,  R9)
   223	SHA256ROUND1(55, R9,  R10, R11, R12, R13, R14, R15, R8)
   224	SHA256ROUND1(56, R8,  R9,  R10, R11, R12, R13, R14, R15)
   225	SHA256ROUND1(57, R15, R8,  R9,  R10, R11, R12, R13, R14)
   226	SHA256ROUND1(58, R14, R15, R8,  R9,  R10, R11, R12, R13)
   227	SHA256ROUND1(59, R13, R14, R15, R8,  R9,  R10, R11, R12)
   228	SHA256ROUND1(60, R12, R13, R14, R15, R8,  R9,  R10, R11)
   229	SHA256ROUND1(61, R11, R12, R13, R14, R15, R8,  R9,  R10)
   230	SHA256ROUND1(62, R10, R11, R12, R13, R14, R15, R8,  R9)
   231	SHA256ROUND1(63, R9,  R10, R11, R12, R13, R14, R15, R8)
   232
   233	MOVW	(0*4)(R4), REGTMP
   234	MOVW	(1*4)(R4), REGTMP1
   235	MOVW	(2*4)(R4), REGTMP2
   236	MOVW	(3*4)(R4), REGTMP3
   237	ADD	REGTMP, R8	// H0 = a + H0
   238	ADD	REGTMP1, R9	// H1 = b + H1
   239	ADD	REGTMP2, R10	// H2 = c + H2
   240	ADD	REGTMP3, R11	// H3 = d + H3
   241	MOVW	R8, (0*4)(R4)
   242	MOVW	R9, (1*4)(R4)
   243	MOVW	R10, (2*4)(R4)
   244	MOVW	R11, (3*4)(R4)
   245	MOVW	(4*4)(R4), REGTMP
   246	MOVW	(5*4)(R4), REGTMP1
   247	MOVW	(6*4)(R4), REGTMP2
   248	MOVW	(7*4)(R4), REGTMP3
   249	ADD	REGTMP, R12	// H4 = e + H4
   250	ADD	REGTMP1, R13	// H5 = f + H5
   251	ADD	REGTMP2, R14	// H6 = g + H6
   252	ADD	REGTMP3, R15	// H7 = h + H7
   253	MOVW	R12, (4*4)(R4)
   254	MOVW	R13, (5*4)(R4)
   255	MOVW	R14, (6*4)(R4)
   256	MOVW	R15, (7*4)(R4)
   257
   258	ADDV	$64, R5
   259	BNE	R5, R25, loop
   260
   261end:
   262	RET

View as plain text