1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "textflag.h"
7
8// func Cas(ptr *int32, old, new int32) bool
9// Atomically:
10// if *ptr == old {
11// *ptr = new
12// return true
13// } else {
14// return false
15// }
16TEXT ·Cas(SB), NOSPLIT, $0-17
17 MOVV ptr+0(FP), R4
18 MOVW old+8(FP), R5
19 MOVW new+12(FP), R6
20
21 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLAMCAS(SB), R8
22 BEQ R8, ll_sc
23 MOVV R5, R7 // backup old value
24 AMCASDBW R6, (R4), R5
25 BNE R7, R5, cas_fail0
26 MOVV $1, R4
27 MOVB R4, ret+16(FP)
28 RET
29cas_fail0:
30 MOVB R0, ret+16(FP)
31 RET
32
33ll_sc:
34 // Implemented using the ll-sc instruction pair
35 DBAR $0x14 // LoadAcquire barrier
36cas_again:
37 MOVV R6, R7
38 LL (R4), R8
39 BNE R5, R8, cas_fail1
40 SC R7, (R4)
41 BEQ R7, cas_again
42 MOVV $1, R4
43 MOVB R4, ret+16(FP)
44 DBAR $0x12 // StoreRelease barrier
45 RET
46cas_fail1:
47 MOVV $0, R4
48 JMP -4(PC)
49
50// func Cas64(ptr *uint64, old, new uint64) bool
51// Atomically:
52// if *ptr == old {
53// *ptr = new
54// return true
55// } else {
56// return false
57// }
58TEXT ·Cas64(SB), NOSPLIT, $0-25
59 MOVV ptr+0(FP), R4
60 MOVV old+8(FP), R5
61 MOVV new+16(FP), R6
62
63 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLAMCAS(SB), R8
64 BEQ R8, ll_sc_64
65 MOVV R5, R7 // backup old value
66 AMCASDBV R6, (R4), R5
67 BNE R7, R5, cas64_fail0
68 MOVV $1, R4
69 MOVB R4, ret+24(FP)
70 RET
71cas64_fail0:
72 MOVB R0, ret+24(FP)
73 RET
74
75ll_sc_64:
76 // Implemented using the ll-sc instruction pair
77 DBAR $0x14
78cas64_again:
79 MOVV R6, R7
80 LLV (R4), R8
81 BNE R5, R8, cas64_fail1
82 SCV R7, (R4)
83 BEQ R7, cas64_again
84 MOVV $1, R4
85 MOVB R4, ret+24(FP)
86 DBAR $0x12
87 RET
88cas64_fail1:
89 MOVV $0, R4
90 JMP -4(PC)
91
92TEXT ·Casint32(SB),NOSPLIT,$0-17
93 JMP ·Cas(SB)
94
95TEXT ·Casint64(SB),NOSPLIT,$0-25
96 JMP ·Cas64(SB)
97
98TEXT ·Casuintptr(SB), NOSPLIT, $0-25
99 JMP ·Cas64(SB)
100
101TEXT ·CasRel(SB), NOSPLIT, $0-17
102 JMP ·Cas(SB)
103
104TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
105 JMP ·Load64(SB)
106
107TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
108 JMP ·Load64(SB)
109
110TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
111 JMP ·Store64(SB)
112
113TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
114 JMP ·Xadd64(SB)
115
116TEXT ·Loadint64(SB), NOSPLIT, $0-16
117 JMP ·Load64(SB)
118
119TEXT ·Xaddint32(SB),NOSPLIT,$0-20
120 JMP ·Xadd(SB)
121
122TEXT ·Xaddint64(SB), NOSPLIT, $0-24
123 JMP ·Xadd64(SB)
124
125// func Casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
126// Atomically:
127// if *ptr == old {
128// *ptr = new
129// return true
130// } else {
131// return false
132// }
133TEXT ·Casp1(SB), NOSPLIT, $0-25
134 JMP ·Cas64(SB)
135
136// uint32 Xadd(uint32 volatile *ptr, int32 delta)
137// Atomically:
138// *val += delta;
139// return *val;
140TEXT ·Xadd(SB), NOSPLIT, $0-20
141 MOVV ptr+0(FP), R4
142 MOVW delta+8(FP), R5
143 AMADDDBW R5, (R4), R6
144 ADDV R6, R5, R4
145 MOVW R4, ret+16(FP)
146 RET
147
148// func Xadd64(ptr *uint64, delta int64) uint64
149TEXT ·Xadd64(SB), NOSPLIT, $0-24
150 MOVV ptr+0(FP), R4
151 MOVV delta+8(FP), R5
152 AMADDDBV R5, (R4), R6
153 ADDV R6, R5, R4
154 MOVV R4, ret+16(FP)
155 RET
156
157// uint8 Xchg8(ptr *uint8, new uint8)
158// Atomically:
159// old := *ptr;
160// *ptr = new;
161// return old;
162TEXT ·Xchg8(SB), NOSPLIT, $0-17
163 MOVV ptr+0(FP), R4
164 MOVBU new+8(FP), R5
165
166 // R6 = ((ptr & 3) * 8)
167 AND $3, R4, R6
168 SLLV $3, R6
169
170 // R7 = ((0xFF) << R6) ^ (-1)
171 MOVV $0xFF, R8
172 SLLV R6, R8, R7
173 XOR $-1, R7
174
175 // R4 = ptr & (~3)
176 MOVV $~3, R8
177 AND R8, R4
178
179 // R5 = ((val) << R6)
180 SLLV R6, R5
181
182 DBAR $0x14 // LoadAcquire barrier
183_xchg8_again:
184 LL (R4), R8
185 MOVV R8, R9 // backup old val
186 AND R7, R8
187 OR R5, R8
188 SC R8, (R4)
189 BEQ R8, _xchg8_again
190 DBAR $0x12 // StoreRelease barrier
191 SRLV R6, R9, R9
192 MOVBU R9, ret+16(FP)
193 RET
194
195// func Xchg(ptr *uint32, new uint32) uint32
196TEXT ·Xchg(SB), NOSPLIT, $0-20
197 MOVV ptr+0(FP), R4
198 MOVW new+8(FP), R5
199 AMSWAPDBW R5, (R4), R6
200 MOVW R6, ret+16(FP)
201 RET
202
203// func Xchg64(ptr *uint64, new uint64) uint64
204TEXT ·Xchg64(SB), NOSPLIT, $0-24
205 MOVV ptr+0(FP), R4
206 MOVV new+8(FP), R5
207 AMSWAPDBV R5, (R4), R6
208 MOVV R6, ret+16(FP)
209 RET
210
211TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
212 JMP ·Xchg64(SB)
213
214// func Xchgint32(ptr *int32, new int32) int32
215TEXT ·Xchgint32(SB), NOSPLIT, $0-20
216 JMP ·Xchg(SB)
217
218// func Xchgint64(ptr *int64, new int64) int64
219TEXT ·Xchgint64(SB), NOSPLIT, $0-24
220 JMP ·Xchg64(SB)
221
222TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
223 JMP ·Store64(SB)
224
225TEXT ·StoreRel(SB), NOSPLIT, $0-12
226 JMP ·Store(SB)
227
228TEXT ·StoreRel64(SB), NOSPLIT, $0-16
229 JMP ·Store64(SB)
230
231TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
232 JMP ·Store64(SB)
233
234TEXT ·Store(SB), NOSPLIT, $0-12
235 MOVV ptr+0(FP), R4
236 MOVW val+8(FP), R5
237 AMSWAPDBW R5, (R4), R0
238 RET
239
240TEXT ·Store8(SB), NOSPLIT, $0-9
241 MOVV ptr+0(FP), R4
242 MOVB val+8(FP), R5
243 MOVBU internal∕cpu·Loong64+const_offsetLoong64HasLAM_BH(SB), R6
244 BEQ R6, _legacy_store8_
245 AMSWAPDBB R5, (R4), R0
246 RET
247_legacy_store8_:
248 // StoreRelease barrier
249 DBAR $0x12
250 MOVB R5, 0(R4)
251 DBAR $0x18
252 RET
253
254TEXT ·Store64(SB), NOSPLIT, $0-16
255 MOVV ptr+0(FP), R4
256 MOVV val+8(FP), R5
257 AMSWAPDBV R5, (R4), R0
258 RET
259
260// void Or8(byte volatile*, byte);
261TEXT ·Or8(SB), NOSPLIT, $0-9
262 MOVV ptr+0(FP), R4
263 MOVBU val+8(FP), R5
264 // R6 = ptr & (~3)
265 MOVV $~3, R6
266 AND R4, R6
267 // R7 = ((ptr & 3) * 8)
268 AND $3, R4, R7
269 SLLV $3, R7
270 // R5 = val << R7
271 SLLV R7, R5
272 AMORDBW R5, (R6), R0
273 RET
274
275// void And8(byte volatile*, byte);
276TEXT ·And8(SB), NOSPLIT, $0-9
277 MOVV ptr+0(FP), R4
278 MOVBU val+8(FP), R5
279 // R6 = ptr & (~3)
280 MOVV $~3, R6
281 AND R4, R6
282 // R7 = ((ptr & 3) * 8)
283 AND $3, R4, R7
284 SLLV $3, R7
285 // R5 = ((val ^ 0xFF) << R7) ^ (-1)
286 XOR $255, R5
287 SLLV R7, R5
288 XOR $-1, R5
289 AMANDDBW R5, (R6), R0
290 RET
291
292// func Or(addr *uint32, v uint32)
293TEXT ·Or(SB), NOSPLIT, $0-12
294 MOVV ptr+0(FP), R4
295 MOVW val+8(FP), R5
296 AMORDBW R5, (R4), R0
297 RET
298
299// func And(addr *uint32, v uint32)
300TEXT ·And(SB), NOSPLIT, $0-12
301 MOVV ptr+0(FP), R4
302 MOVW val+8(FP), R5
303 AMANDDBW R5, (R4), R0
304 RET
305
306// func Or32(addr *uint32, v uint32) old uint32
307TEXT ·Or32(SB), NOSPLIT, $0-20
308 MOVV ptr+0(FP), R4
309 MOVW val+8(FP), R5
310 AMORDBW R5, (R4), R6
311 MOVW R6, ret+16(FP)
312 RET
313
314// func And32(addr *uint32, v uint32) old uint32
315TEXT ·And32(SB), NOSPLIT, $0-20
316 MOVV ptr+0(FP), R4
317 MOVW val+8(FP), R5
318 AMANDDBW R5, (R4), R6
319 MOVW R6, ret+16(FP)
320 RET
321
322// func Or64(addr *uint64, v uint64) old uint64
323TEXT ·Or64(SB), NOSPLIT, $0-24
324 MOVV ptr+0(FP), R4
325 MOVV val+8(FP), R5
326 AMORDBV R5, (R4), R6
327 MOVV R6, ret+16(FP)
328 RET
329
330// func And64(addr *uint64, v uint64) old uint64
331TEXT ·And64(SB), NOSPLIT, $0-24
332 MOVV ptr+0(FP), R4
333 MOVV val+8(FP), R5
334 AMANDDBV R5, (R4), R6
335 MOVV R6, ret+16(FP)
336 RET
337
338// func Anduintptr(addr *uintptr, v uintptr) old uintptr
339TEXT ·Anduintptr(SB), NOSPLIT, $0-24
340 JMP ·And64(SB)
341
342// func Oruintptr(addr *uintptr, v uintptr) old uintptr
343TEXT ·Oruintptr(SB), NOSPLIT, $0-24
344 JMP ·Or64(SB)
345
346// uint32 internal∕runtime∕atomic·Load(uint32 volatile* ptr)
347TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-12
348 MOVV ptr+0(FP), R19
349 MOVWU 0(R19), R19
350 DBAR $0x14 // LoadAcquire barrier
351 MOVW R19, ret+8(FP)
352 RET
353
354// uint8 internal∕runtime∕atomic·Load8(uint8 volatile* ptr)
355TEXT ·Load8(SB),NOSPLIT|NOFRAME,$0-9
356 MOVV ptr+0(FP), R19
357 MOVBU 0(R19), R19
358 DBAR $0x14
359 MOVB R19, ret+8(FP)
360 RET
361
362// uint64 internal∕runtime∕atomic·Load64(uint64 volatile* ptr)
363TEXT ·Load64(SB),NOSPLIT|NOFRAME,$0-16
364 MOVV ptr+0(FP), R19
365 MOVV 0(R19), R19
366 DBAR $0x14
367 MOVV R19, ret+8(FP)
368 RET
369
370// void *internal∕runtime∕atomic·Loadp(void *volatile *ptr)
371TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16
372 JMP ·Load64(SB)
373
374// uint32 internal∕runtime∕atomic·LoadAcq(uint32 volatile* ptr)
375TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12
376 JMP ·Load(SB)
377
378// uint64 ·LoadAcq64(uint64 volatile* ptr)
379TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16
380 JMP ·Load64(SB)
381
382// uintptr ·LoadAcquintptr(uintptr volatile* ptr)
383TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16
384 JMP ·Load64(SB)
385
View as plain text