Skip to content

Commit e608ccb

Browse files
committed
[dotnet-nolsx] context2: use correct instructions for float/vector load&store based on CPU features
1 parent f28eed1 commit e608ccb

File tree

2 files changed

+162
-0
lines changed

2 files changed

+162
-0
lines changed

src/runtime/src/coreclr/pal/src/arch/loongarch64/asmconstants.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
#define CONTEXT_FLOATING_POINT_BIT (2)
1313
#define CONTEXT_DEBUG_REGISTERS_BIT (3)
1414

15+
#define CONTEXT_CPUCFG_2 (2)
16+
#define CONTEXT_CPUCFG_LSX_BIT (6)
17+
#define CONTEXT_CPUCFG_LASX_BIT (7)
18+
1519
#define CONTEXT_CONTROL (CONTEXT_LOONGARCH64 | (1 << CONTEXT_CONTROL_BIT))
1620
#define CONTEXT_INTEGER (CONTEXT_LOONGARCH64 | (1 << CONTEXT_INTEGER_BIT))
1721
#define CONTEXT_FLOATING_POINT (CONTEXT_LOONGARCH64 | (1 << CONTEXT_FLOATING_POINT_BIT))

src/runtime/src/coreclr/pal/src/arch/loongarch64/context2.S

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,84 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT):
3737
andi $t1, $r21, (1 << CONTEXT_FLOATING_POINT_BIT)
3838
beqz $t1, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT)
3939

40+
addi.w $t1, $r0, CONTEXT_CPUCFG_2
41+
cpucfg $t1, $t1
42+
andi $t3, $t1, (1 << CONTEXT_CPUCFG_LASX_BIT)
43+
bnez $t3, LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT_LASX)
44+
andi $t3, $t1, (1 << CONTEXT_CPUCFG_LSX_BIT)
45+
bnez $t3, LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT_LSX)
46+
47+
fld.d $f0, $a0, CONTEXT_FPU_OFFSET + 0
48+
fld.d $f1, $a0, CONTEXT_FPU_OFFSET + 8*1
49+
fld.d $f2, $a0, CONTEXT_FPU_OFFSET + 8*2
50+
fld.d $f3, $a0, CONTEXT_FPU_OFFSET + 8*3
51+
fld.d $f4, $a0, CONTEXT_FPU_OFFSET + 8*4
52+
fld.d $f5, $a0, CONTEXT_FPU_OFFSET + 8*5
53+
fld.d $f6, $a0, CONTEXT_FPU_OFFSET + 8*6
54+
fld.d $f7, $a0, CONTEXT_FPU_OFFSET + 8*7
55+
fld.d $f8, $a0, CONTEXT_FPU_OFFSET + 8*8
56+
fld.d $f9, $a0, CONTEXT_FPU_OFFSET + 8*9
57+
fld.d $f10, $a0, CONTEXT_FPU_OFFSET + 8*10
58+
fld.d $f11, $a0, CONTEXT_FPU_OFFSET + 8*11
59+
fld.d $f12, $a0, CONTEXT_FPU_OFFSET + 8*12
60+
fld.d $f13, $a0, CONTEXT_FPU_OFFSET + 8*13
61+
fld.d $f14, $a0, CONTEXT_FPU_OFFSET + 8*14
62+
fld.d $f15, $a0, CONTEXT_FPU_OFFSET + 8*15
63+
fld.d $f16, $a0, CONTEXT_FPU_OFFSET + 8*16
64+
fld.d $f17, $a0, CONTEXT_FPU_OFFSET + 8*17
65+
fld.d $f18, $a0, CONTEXT_FPU_OFFSET + 8*18
66+
fld.d $f19, $a0, CONTEXT_FPU_OFFSET + 8*19
67+
fld.d $f20, $a0, CONTEXT_FPU_OFFSET + 8*20
68+
fld.d $f21, $a0, CONTEXT_FPU_OFFSET + 8*21
69+
fld.d $f22, $a0, CONTEXT_FPU_OFFSET + 8*22
70+
fld.d $f23, $a0, CONTEXT_FPU_OFFSET + 8*23
71+
fld.d $f24, $a0, CONTEXT_FPU_OFFSET + 8*24
72+
fld.d $f25, $a0, CONTEXT_FPU_OFFSET + 8*25
73+
fld.d $f26, $a0, CONTEXT_FPU_OFFSET + 8*26
74+
fld.d $f27, $a0, CONTEXT_FPU_OFFSET + 8*27
75+
fld.d $f28, $a0, CONTEXT_FPU_OFFSET + 8*28
76+
fld.d $f29, $a0, CONTEXT_FPU_OFFSET + 8*29
77+
fld.d $f30, $a0, CONTEXT_FPU_OFFSET + 8*30
78+
fld.d $f31, $a0, CONTEXT_FPU_OFFSET + 8*31
79+
b LOCAL_LABEL(Restore_CONTEXT_FLOATING_CONTROL)
80+
81+
LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT_LSX):
82+
// 128-bits SIMD:LSX.
83+
vld $vr0, $a0, CONTEXT_FPU_OFFSET + 0
84+
vld $vr1, $a0, CONTEXT_FPU_OFFSET + 16*1
85+
vld $vr2, $a0, CONTEXT_FPU_OFFSET + 16*2
86+
vld $vr3, $a0, CONTEXT_FPU_OFFSET + 16*3
87+
vld $vr4, $a0, CONTEXT_FPU_OFFSET + 16*4
88+
vld $vr5, $a0, CONTEXT_FPU_OFFSET + 16*5
89+
vld $vr6, $a0, CONTEXT_FPU_OFFSET + 16*6
90+
vld $vr7, $a0, CONTEXT_FPU_OFFSET + 16*7
91+
vld $vr8, $a0, CONTEXT_FPU_OFFSET + 16*8
92+
vld $vr9, $a0, CONTEXT_FPU_OFFSET + 16*9
93+
vld $vr10, $a0, CONTEXT_FPU_OFFSET + 16*10
94+
vld $vr11, $a0, CONTEXT_FPU_OFFSET + 16*11
95+
vld $vr12, $a0, CONTEXT_FPU_OFFSET + 16*12
96+
vld $vr13, $a0, CONTEXT_FPU_OFFSET + 16*13
97+
vld $vr14, $a0, CONTEXT_FPU_OFFSET + 16*14
98+
vld $vr15, $a0, CONTEXT_FPU_OFFSET + 16*15
99+
vld $vr16, $a0, CONTEXT_FPU_OFFSET + 16*16
100+
vld $vr17, $a0, CONTEXT_FPU_OFFSET + 16*17
101+
vld $vr18, $a0, CONTEXT_FPU_OFFSET + 16*18
102+
vld $vr19, $a0, CONTEXT_FPU_OFFSET + 16*19
103+
vld $vr20, $a0, CONTEXT_FPU_OFFSET + 16*20
104+
vld $vr21, $a0, CONTEXT_FPU_OFFSET + 16*21
105+
vld $vr22, $a0, CONTEXT_FPU_OFFSET + 16*22
106+
vld $vr23, $a0, CONTEXT_FPU_OFFSET + 16*23
107+
vld $vr24, $a0, CONTEXT_FPU_OFFSET + 16*24
108+
vld $vr25, $a0, CONTEXT_FPU_OFFSET + 16*25
109+
vld $vr26, $a0, CONTEXT_FPU_OFFSET + 16*26
110+
vld $vr27, $a0, CONTEXT_FPU_OFFSET + 16*27
111+
vld $vr28, $a0, CONTEXT_FPU_OFFSET + 16*28
112+
vld $vr29, $a0, CONTEXT_FPU_OFFSET + 16*29
113+
vld $vr30, $a0, CONTEXT_FPU_OFFSET + 16*30
114+
vld $vr31, $a0, CONTEXT_FPU_OFFSET + 16*31
115+
b LOCAL_LABEL(Restore_CONTEXT_FLOATING_CONTROL)
116+
117+
LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT_LASX):
40118
// 256-bits SIMD:LASX.
41119
xvld $xr0, $a0, CONTEXT_FPU_OFFSET + 0
42120
xvld $xr1, $a0, CONTEXT_FPU_OFFSET + 32*1
@@ -71,6 +149,7 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT):
71149
xvld $xr30, $a0, CONTEXT_FPU_OFFSET + 32*30
72150
xvld $xr31, $a0, CONTEXT_FPU_OFFSET + 32*31
73151

152+
LOCAL_LABEL(Restore_CONTEXT_FLOATING_CONTROL):
74153
ld.d $t1, $a0, CONTEXT_FLOAT_CONTROL_OFFSET
75154
movgr2cf $fcc0, $t1
76155
srli.d $t1, $t1, 8
@@ -220,6 +299,84 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER):
220299
andi $t3, $t1, (1 << CONTEXT_FLOATING_POINT_BIT)
221300
beqz $t3, LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT)
222301

302+
addi.w $t1, $r0, CONTEXT_CPUCFG_2
303+
cpucfg $t1, $t1
304+
andi $t3, $t1, (1 << CONTEXT_CPUCFG_LASX_BIT)
305+
bnez $t3, LOCAL_LABEL(Store_CONTEXT_FLOATING_POINT_LASX)
306+
andi $t3, $t1, (1 << CONTEXT_CPUCFG_LSX_BIT)
307+
bnez $t3, LOCAL_LABEL(Store_CONTEXT_FLOATING_POINT_LSX)
308+
309+
fst.d $f0 , $a0, CONTEXT_FPU_OFFSET + 8*0
310+
fst.d $f1 , $a0, CONTEXT_FPU_OFFSET + 8*1
311+
fst.d $f2 , $a0, CONTEXT_FPU_OFFSET + 8*2
312+
fst.d $f3 , $a0, CONTEXT_FPU_OFFSET + 8*3
313+
fst.d $f4 , $a0, CONTEXT_FPU_OFFSET + 8*4
314+
fst.d $f5 , $a0, CONTEXT_FPU_OFFSET + 8*5
315+
fst.d $f6 , $a0, CONTEXT_FPU_OFFSET + 8*6
316+
fst.d $f7 , $a0, CONTEXT_FPU_OFFSET + 8*7
317+
fst.d $f8 , $a0, CONTEXT_FPU_OFFSET + 8*8
318+
fst.d $f9 , $a0, CONTEXT_FPU_OFFSET + 8*9
319+
fst.d $f10, $a0, CONTEXT_FPU_OFFSET + 8*10
320+
fst.d $f11, $a0, CONTEXT_FPU_OFFSET + 8*11
321+
fst.d $f12, $a0, CONTEXT_FPU_OFFSET + 8*12
322+
fst.d $f13, $a0, CONTEXT_FPU_OFFSET + 8*13
323+
fst.d $f14, $a0, CONTEXT_FPU_OFFSET + 8*14
324+
fst.d $f15, $a0, CONTEXT_FPU_OFFSET + 8*15
325+
fst.d $f16, $a0, CONTEXT_FPU_OFFSET + 8*16
326+
fst.d $f17, $a0, CONTEXT_FPU_OFFSET + 8*17
327+
fst.d $f18, $a0, CONTEXT_FPU_OFFSET + 8*18
328+
fst.d $f19, $a0, CONTEXT_FPU_OFFSET + 8*19
329+
fst.d $f20, $a0, CONTEXT_FPU_OFFSET + 8*20
330+
fst.d $f21, $a0, CONTEXT_FPU_OFFSET + 8*21
331+
fst.d $f22, $a0, CONTEXT_FPU_OFFSET + 8*22
332+
fst.d $f23, $a0, CONTEXT_FPU_OFFSET + 8*23
333+
fst.d $f24, $a0, CONTEXT_FPU_OFFSET + 8*24
334+
fst.d $f25, $a0, CONTEXT_FPU_OFFSET + 8*25
335+
fst.d $f26, $a0, CONTEXT_FPU_OFFSET + 8*26
336+
fst.d $f27, $a0, CONTEXT_FPU_OFFSET + 8*27
337+
fst.d $f28, $a0, CONTEXT_FPU_OFFSET + 8*28
338+
fst.d $f29, $a0, CONTEXT_FPU_OFFSET + 8*29
339+
fst.d $f30, $a0, CONTEXT_FPU_OFFSET + 8*30
340+
fst.d $f31, $a0, CONTEXT_FPU_OFFSET + 8*31
341+
b LOCAL_LABEL(Store_CONTEXT_FLOAT_CONTROL)
342+
343+
LOCAL_LABEL(Store_CONTEXT_FLOATING_POINT_LSX):
344+
// 128-bits SIMD:LASX.
345+
vst $vr0 , $a0, CONTEXT_FPU_OFFSET + 16*0
346+
vst $vr1 , $a0, CONTEXT_FPU_OFFSET + 16*1
347+
vst $vr2 , $a0, CONTEXT_FPU_OFFSET + 16*2
348+
vst $vr3 , $a0, CONTEXT_FPU_OFFSET + 16*3
349+
vst $vr4 , $a0, CONTEXT_FPU_OFFSET + 16*4
350+
vst $vr5 , $a0, CONTEXT_FPU_OFFSET + 16*5
351+
vst $vr6 , $a0, CONTEXT_FPU_OFFSET + 16*6
352+
vst $vr7 , $a0, CONTEXT_FPU_OFFSET + 16*7
353+
vst $vr8 , $a0, CONTEXT_FPU_OFFSET + 16*8
354+
vst $vr9 , $a0, CONTEXT_FPU_OFFSET + 16*9
355+
vst $vr10, $a0, CONTEXT_FPU_OFFSET + 16*10
356+
vst $vr11, $a0, CONTEXT_FPU_OFFSET + 16*11
357+
vst $vr12, $a0, CONTEXT_FPU_OFFSET + 16*12
358+
vst $vr13, $a0, CONTEXT_FPU_OFFSET + 16*13
359+
vst $vr14, $a0, CONTEXT_FPU_OFFSET + 16*14
360+
vst $vr15, $a0, CONTEXT_FPU_OFFSET + 16*15
361+
vst $vr16, $a0, CONTEXT_FPU_OFFSET + 16*16
362+
vst $vr17, $a0, CONTEXT_FPU_OFFSET + 16*17
363+
vst $vr18, $a0, CONTEXT_FPU_OFFSET + 16*18
364+
vst $vr19, $a0, CONTEXT_FPU_OFFSET + 16*19
365+
vst $vr20, $a0, CONTEXT_FPU_OFFSET + 16*20
366+
vst $vr21, $a0, CONTEXT_FPU_OFFSET + 16*21
367+
vst $vr22, $a0, CONTEXT_FPU_OFFSET + 16*22
368+
vst $vr23, $a0, CONTEXT_FPU_OFFSET + 16*23
369+
vst $vr24, $a0, CONTEXT_FPU_OFFSET + 16*24
370+
vst $vr25, $a0, CONTEXT_FPU_OFFSET + 16*25
371+
vst $vr26, $a0, CONTEXT_FPU_OFFSET + 16*26
372+
vst $vr27, $a0, CONTEXT_FPU_OFFSET + 16*27
373+
vst $vr28, $a0, CONTEXT_FPU_OFFSET + 16*28
374+
vst $vr29, $a0, CONTEXT_FPU_OFFSET + 16*29
375+
vst $vr30, $a0, CONTEXT_FPU_OFFSET + 16*30
376+
vst $vr31, $a0, CONTEXT_FPU_OFFSET + 16*31
377+
b LOCAL_LABEL(Store_CONTEXT_FLOAT_CONTROL)
378+
379+
LOCAL_LABEL(Store_CONTEXT_FLOATING_POINT_LASX):
223380
// 256-bits SIMD:LASX.
224381
xvst $xr0 , $a0, CONTEXT_FPU_OFFSET + 32*0
225382
xvst $xr1 , $a0, CONTEXT_FPU_OFFSET + 32*1
@@ -254,6 +411,7 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER):
254411
xvst $xr30, $a0, CONTEXT_FPU_OFFSET + 32*30
255412
xvst $xr31, $a0, CONTEXT_FPU_OFFSET + 32*31
256413

414+
LOCAL_LABEL(Store_CONTEXT_FLOAT_CONTROL):
257415
ori $t0, $r0, 0
258416
movcf2gr $t0, $fcc0
259417
st.b $t0, $a0, CONTEXT_FLOAT_CONTROL_OFFSET

0 commit comments

Comments
 (0)