...

Text file src/crypto/internal/fips140/edwards25519/field/fe_amd64.s

Documentation: crypto/internal/fips140/edwards25519/field

     1// Code generated by command: go run fe_amd64_asm.go -out ../fe_amd64.s -stubs ../fe_amd64.go -pkg field. DO NOT EDIT.
     2
     3//go:build !purego
     4
     5#include "textflag.h"
     6
     7// func feMul(out *Element, a *Element, b *Element)
     8TEXT ·feMul(SB), NOSPLIT, $0-24
     9	MOVQ a+8(FP), CX
    10	MOVQ b+16(FP), BX
    11
    12	// r0 = a0×b0
    13	MOVQ (CX), AX
    14	MULQ (BX)
    15	MOVQ AX, DI
    16	MOVQ DX, SI
    17
    18	// r0 += 19×a1×b4
    19	MOVQ 8(CX), DX
    20	LEAQ (DX)(DX*8), AX
    21	LEAQ (DX)(AX*2), AX
    22	MULQ 32(BX)
    23	ADDQ AX, DI
    24	ADCQ DX, SI
    25
    26	// r0 += 19×a2×b3
    27	MOVQ 16(CX), DX
    28	LEAQ (DX)(DX*8), AX
    29	LEAQ (DX)(AX*2), AX
    30	MULQ 24(BX)
    31	ADDQ AX, DI
    32	ADCQ DX, SI
    33
    34	// r0 += 19×a3×b2
    35	MOVQ 24(CX), DX
    36	LEAQ (DX)(DX*8), AX
    37	LEAQ (DX)(AX*2), AX
    38	MULQ 16(BX)
    39	ADDQ AX, DI
    40	ADCQ DX, SI
    41
    42	// r0 += 19×a4×b1
    43	MOVQ 32(CX), DX
    44	LEAQ (DX)(DX*8), AX
    45	LEAQ (DX)(AX*2), AX
    46	MULQ 8(BX)
    47	ADDQ AX, DI
    48	ADCQ DX, SI
    49
    50	// r1 = a0×b1
    51	MOVQ (CX), AX
    52	MULQ 8(BX)
    53	MOVQ AX, R9
    54	MOVQ DX, R8
    55
    56	// r1 += a1×b0
    57	MOVQ 8(CX), AX
    58	MULQ (BX)
    59	ADDQ AX, R9
    60	ADCQ DX, R8
    61
    62	// r1 += 19×a2×b4
    63	MOVQ 16(CX), DX
    64	LEAQ (DX)(DX*8), AX
    65	LEAQ (DX)(AX*2), AX
    66	MULQ 32(BX)
    67	ADDQ AX, R9
    68	ADCQ DX, R8
    69
    70	// r1 += 19×a3×b3
    71	MOVQ 24(CX), DX
    72	LEAQ (DX)(DX*8), AX
    73	LEAQ (DX)(AX*2), AX
    74	MULQ 24(BX)
    75	ADDQ AX, R9
    76	ADCQ DX, R8
    77
    78	// r1 += 19×a4×b2
    79	MOVQ 32(CX), DX
    80	LEAQ (DX)(DX*8), AX
    81	LEAQ (DX)(AX*2), AX
    82	MULQ 16(BX)
    83	ADDQ AX, R9
    84	ADCQ DX, R8
    85
    86	// r2 = a0×b2
    87	MOVQ (CX), AX
    88	MULQ 16(BX)
    89	MOVQ AX, R11
    90	MOVQ DX, R10
    91
    92	// r2 += a1×b1
    93	MOVQ 8(CX), AX
    94	MULQ 8(BX)
    95	ADDQ AX, R11
    96	ADCQ DX, R10
    97
    98	// r2 += a2×b0
    99	MOVQ 16(CX), AX
   100	MULQ (BX)
   101	ADDQ AX, R11
   102	ADCQ DX, R10
   103
   104	// r2 += 19×a3×b4
   105	MOVQ 24(CX), DX
   106	LEAQ (DX)(DX*8), AX
   107	LEAQ (DX)(AX*2), AX
   108	MULQ 32(BX)
   109	ADDQ AX, R11
   110	ADCQ DX, R10
   111
   112	// r2 += 19×a4×b3
   113	MOVQ 32(CX), DX
   114	LEAQ (DX)(DX*8), AX
   115	LEAQ (DX)(AX*2), AX
   116	MULQ 24(BX)
   117	ADDQ AX, R11
   118	ADCQ DX, R10
   119
   120	// r3 = a0×b3
   121	MOVQ (CX), AX
   122	MULQ 24(BX)
   123	MOVQ AX, R13
   124	MOVQ DX, R12
   125
   126	// r3 += a1×b2
   127	MOVQ 8(CX), AX
   128	MULQ 16(BX)
   129	ADDQ AX, R13
   130	ADCQ DX, R12
   131
   132	// r3 += a2×b1
   133	MOVQ 16(CX), AX
   134	MULQ 8(BX)
   135	ADDQ AX, R13
   136	ADCQ DX, R12
   137
   138	// r3 += a3×b0
   139	MOVQ 24(CX), AX
   140	MULQ (BX)
   141	ADDQ AX, R13
   142	ADCQ DX, R12
   143
   144	// r3 += 19×a4×b4
   145	MOVQ 32(CX), DX
   146	LEAQ (DX)(DX*8), AX
   147	LEAQ (DX)(AX*2), AX
   148	MULQ 32(BX)
   149	ADDQ AX, R13
   150	ADCQ DX, R12
   151
   152	// r4 = a0×b4
   153	MOVQ (CX), AX
   154	MULQ 32(BX)
   155	MOVQ AX, R15
   156	MOVQ DX, R14
   157
   158	// r4 += a1×b3
   159	MOVQ 8(CX), AX
   160	MULQ 24(BX)
   161	ADDQ AX, R15
   162	ADCQ DX, R14
   163
   164	// r4 += a2×b2
   165	MOVQ 16(CX), AX
   166	MULQ 16(BX)
   167	ADDQ AX, R15
   168	ADCQ DX, R14
   169
   170	// r4 += a3×b1
   171	MOVQ 24(CX), AX
   172	MULQ 8(BX)
   173	ADDQ AX, R15
   174	ADCQ DX, R14
   175
   176	// r4 += a4×b0
   177	MOVQ 32(CX), AX
   178	MULQ (BX)
   179	ADDQ AX, R15
   180	ADCQ DX, R14
   181
   182	// First reduction chain
   183	MOVQ   $0x0007ffffffffffff, AX
   184	SHLQ   $0x0d, DI, SI
   185	SHLQ   $0x0d, R9, R8
   186	SHLQ   $0x0d, R11, R10
   187	SHLQ   $0x0d, R13, R12
   188	SHLQ   $0x0d, R15, R14
   189	ANDQ   AX, DI
   190	IMUL3Q $0x13, R14, R14
   191	ADDQ   R14, DI
   192	ANDQ   AX, R9
   193	ADDQ   SI, R9
   194	ANDQ   AX, R11
   195	ADDQ   R8, R11
   196	ANDQ   AX, R13
   197	ADDQ   R10, R13
   198	ANDQ   AX, R15
   199	ADDQ   R12, R15
   200
   201	// Second reduction chain (carryPropagate)
   202	MOVQ   DI, SI
   203	SHRQ   $0x33, SI
   204	MOVQ   R9, R8
   205	SHRQ   $0x33, R8
   206	MOVQ   R11, R10
   207	SHRQ   $0x33, R10
   208	MOVQ   R13, R12
   209	SHRQ   $0x33, R12
   210	MOVQ   R15, R14
   211	SHRQ   $0x33, R14
   212	ANDQ   AX, DI
   213	IMUL3Q $0x13, R14, R14
   214	ADDQ   R14, DI
   215	ANDQ   AX, R9
   216	ADDQ   SI, R9
   217	ANDQ   AX, R11
   218	ADDQ   R8, R11
   219	ANDQ   AX, R13
   220	ADDQ   R10, R13
   221	ANDQ   AX, R15
   222	ADDQ   R12, R15
   223
   224	// Store output
   225	MOVQ out+0(FP), AX
   226	MOVQ DI, (AX)
   227	MOVQ R9, 8(AX)
   228	MOVQ R11, 16(AX)
   229	MOVQ R13, 24(AX)
   230	MOVQ R15, 32(AX)
   231	RET
   232
   233// func feSquare(out *Element, a *Element)
   234TEXT ·feSquare(SB), NOSPLIT, $0-16
   235	MOVQ a+8(FP), CX
   236
   237	// r0 = l0×l0
   238	MOVQ (CX), AX
   239	MULQ (CX)
   240	MOVQ AX, SI
   241	MOVQ DX, BX
   242
   243	// r0 += 38×l1×l4
   244	MOVQ 8(CX), DX
   245	LEAQ (DX)(DX*8), AX
   246	LEAQ (DX)(AX*2), AX
   247	SHLQ $0x01, AX
   248	MULQ 32(CX)
   249	ADDQ AX, SI
   250	ADCQ DX, BX
   251
   252	// r0 += 38×l2×l3
   253	MOVQ 16(CX), DX
   254	LEAQ (DX)(DX*8), AX
   255	LEAQ (DX)(AX*2), AX
   256	SHLQ $0x01, AX
   257	MULQ 24(CX)
   258	ADDQ AX, SI
   259	ADCQ DX, BX
   260
   261	// r1 = 2×l0×l1
   262	MOVQ (CX), AX
   263	SHLQ $0x01, AX
   264	MULQ 8(CX)
   265	MOVQ AX, R8
   266	MOVQ DX, DI
   267
   268	// r1 += 38×l2×l4
   269	MOVQ 16(CX), DX
   270	LEAQ (DX)(DX*8), AX
   271	LEAQ (DX)(AX*2), AX
   272	SHLQ $0x01, AX
   273	MULQ 32(CX)
   274	ADDQ AX, R8
   275	ADCQ DX, DI
   276
   277	// r1 += 19×l3×l3
   278	MOVQ 24(CX), DX
   279	LEAQ (DX)(DX*8), AX
   280	LEAQ (DX)(AX*2), AX
   281	MULQ 24(CX)
   282	ADDQ AX, R8
   283	ADCQ DX, DI
   284
   285	// r2 = 2×l0×l2
   286	MOVQ (CX), AX
   287	SHLQ $0x01, AX
   288	MULQ 16(CX)
   289	MOVQ AX, R10
   290	MOVQ DX, R9
   291
   292	// r2 += l1×l1
   293	MOVQ 8(CX), AX
   294	MULQ 8(CX)
   295	ADDQ AX, R10
   296	ADCQ DX, R9
   297
   298	// r2 += 38×l3×l4
   299	MOVQ 24(CX), DX
   300	LEAQ (DX)(DX*8), AX
   301	LEAQ (DX)(AX*2), AX
   302	SHLQ $0x01, AX
   303	MULQ 32(CX)
   304	ADDQ AX, R10
   305	ADCQ DX, R9
   306
   307	// r3 = 2×l0×l3
   308	MOVQ (CX), AX
   309	SHLQ $0x01, AX
   310	MULQ 24(CX)
   311	MOVQ AX, R12
   312	MOVQ DX, R11
   313
   314	// r3 += 2×l1×l2
   315	MOVQ 8(CX), AX
   316	SHLQ $0x01, AX
   317	MULQ 16(CX)
   318	ADDQ AX, R12
   319	ADCQ DX, R11
   320
   321	// r3 += 19×l4×l4
   322	MOVQ 32(CX), DX
   323	LEAQ (DX)(DX*8), AX
   324	LEAQ (DX)(AX*2), AX
   325	MULQ 32(CX)
   326	ADDQ AX, R12
   327	ADCQ DX, R11
   328
   329	// r4 = 2×l0×l4
   330	MOVQ (CX), AX
   331	SHLQ $0x01, AX
   332	MULQ 32(CX)
   333	MOVQ AX, R14
   334	MOVQ DX, R13
   335
   336	// r4 += 2×l1×l3
   337	MOVQ 8(CX), AX
   338	SHLQ $0x01, AX
   339	MULQ 24(CX)
   340	ADDQ AX, R14
   341	ADCQ DX, R13
   342
   343	// r4 += l2×l2
   344	MOVQ 16(CX), AX
   345	MULQ 16(CX)
   346	ADDQ AX, R14
   347	ADCQ DX, R13
   348
   349	// First reduction chain
   350	MOVQ   $0x0007ffffffffffff, AX
   351	SHLQ   $0x0d, SI, BX
   352	SHLQ   $0x0d, R8, DI
   353	SHLQ   $0x0d, R10, R9
   354	SHLQ   $0x0d, R12, R11
   355	SHLQ   $0x0d, R14, R13
   356	ANDQ   AX, SI
   357	IMUL3Q $0x13, R13, R13
   358	ADDQ   R13, SI
   359	ANDQ   AX, R8
   360	ADDQ   BX, R8
   361	ANDQ   AX, R10
   362	ADDQ   DI, R10
   363	ANDQ   AX, R12
   364	ADDQ   R9, R12
   365	ANDQ   AX, R14
   366	ADDQ   R11, R14
   367
   368	// Second reduction chain (carryPropagate)
   369	MOVQ   SI, BX
   370	SHRQ   $0x33, BX
   371	MOVQ   R8, DI
   372	SHRQ   $0x33, DI
   373	MOVQ   R10, R9
   374	SHRQ   $0x33, R9
   375	MOVQ   R12, R11
   376	SHRQ   $0x33, R11
   377	MOVQ   R14, R13
   378	SHRQ   $0x33, R13
   379	ANDQ   AX, SI
   380	IMUL3Q $0x13, R13, R13
   381	ADDQ   R13, SI
   382	ANDQ   AX, R8
   383	ADDQ   BX, R8
   384	ANDQ   AX, R10
   385	ADDQ   DI, R10
   386	ANDQ   AX, R12
   387	ADDQ   R9, R12
   388	ANDQ   AX, R14
   389	ADDQ   R11, R14
   390
   391	// Store output
   392	MOVQ out+0(FP), AX
   393	MOVQ SI, (AX)
   394	MOVQ R8, 8(AX)
   395	MOVQ R10, 16(AX)
   396	MOVQ R12, 24(AX)
   397	MOVQ R14, 32(AX)
   398	RET

View as plain text