From: limeidan Date: Thu, 11 Jul 2024 13:03:45 +0000 (+0800) Subject: cmd/internal/obj/loong64: optimize immediate loading X-Git-Tag: go1.25rc1~965 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=b9630c56db42dd58fd7ce21ea854815d0ab31878;p=gostls13.git cmd/internal/obj/loong64: optimize immediate loading | old | new | | sec/op | sec/op vs base | BinaryTree17 11.08 ± 2% 11.16 ± 1% ~ (p=0.529 n=10) Fannkuch11 2.716 ± 0% 2.737 ± 0% +0.79% (p=0.000 n=10) FmtFprintfEmpty 67.37n ± 0% 66.42n ± 0% -1.41% (p=0.000 n=10) FmtFprintfString 95.28n ± 0% 90.85n ± 0% -4.64% (p=0.000 n=10) FmtFprintfInt 97.69n ± 0% 98.06n ± 0% +0.38% (p=0.000 n=10) FmtFprintfIntInt 149.1n ± 0% 147.4n ± 0% -1.14% (p=0.000 n=10) FmtFprintfPrefixedInt 223.6n ± 0% 196.5n ± 0% -12.10% (p=0.000 n=10) FmtFprintfFloat 290.9n ± 0% 281.6n ± 1% -3.21% (p=0.000 n=10) FmtManyArgs 670.6n ± 0% 642.6n ± 0% -4.18% (p=0.000 n=10) GobDecode 10.26m ± 1% 10.23m ± 1% ~ (p=0.105 n=10) GobEncode 12.09m ± 1% 11.94m ± 1% -1.24% (p=0.000 n=10) Gzip 316.9m ± 0% 315.9m ± 0% -0.32% (p=0.001 n=10) Gunzip 65.48m ± 0% 59.77m ± 0% -8.72% (p=0.000 n=10) HTTPClientServer 70.36µ ± 0% 68.72µ ± 0% -2.34% (p=0.000 n=10) JSONEncode 13.61m ± 1% 13.19m ± 1% -3.13% (p=0.000 n=10) JSONDecode 57.52m ± 1% 54.15m ± 1% -5.86% (p=0.000 n=10) Mandelbrot200 4.577m ± 0% 4.572m ± 0% -0.10% (p=0.002 n=10) GoParse 6.466m ± 0% 6.363m ± 0% -1.58% (p=0.000 n=10) RegexpMatchEasy0_32 89.20n ± 0% 87.72n ± 0% -1.65% (p=0.000 n=10) RegexpMatchEasy0_1K 748.6n ± 0% 907.6n ± 0% +21.22% (p=0.000 n=10) RegexpMatchEasy1_32 94.14n ± 0% 93.81n ± 0% -0.35% (p=0.000 n=10) RegexpMatchEasy1_1K 832.1n ± 0% 953.6n ± 0% +14.59% (p=0.000 n=10) RegexpMatchMedium_32 982.7n ± 0% 1018.0n ± 0% +3.59% (p=0.000 n=10) RegexpMatchMedium_1K 30.51µ ± 0% 30.00µ ± 0% -1.65% (p=0.000 n=10) RegexpMatchHard_32 1.721µ ± 0% 1.664µ ± 0% -3.34% (p=0.000 n=10) RegexpMatchHard_1K 50.76µ ± 0% 50.92µ ± 0% +0.32% (p=0.000 n=10) Revcomp 870.5m ± 0% 710.5m ± 0% -18.38% (p=0.000 n=10) Template 93.18m ± 1% 93.67m ± 1% ~ (p=0.123 n=10) TimeParse 309.2n ± 0% 307.8n ± 0% -0.45% (p=0.000 n=10) TimeFormat 401.5n ± 0% 394.2n ± 0% -1.82% (p=0.000 n=10) geomean 72.73µ 71.70µ -1.41% Change-Id: Id8d342ef3bb82a420434b2b841674683efef67be Reviewed-on: https://go-review.googlesource.com/c/go/+/620737 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: abner chenc Reviewed-by: sophie zhao --- diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index 6e1aa1cd95..3760b77625 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -465,6 +465,8 @@ func TestLOONG64Encoder(t *testing.T) { testEndToEnd(t, "loong64", "loong64enc1") testEndToEnd(t, "loong64", "loong64enc2") testEndToEnd(t, "loong64", "loong64enc3") + testEndToEnd(t, "loong64", "loong64enc4") + testEndToEnd(t, "loong64", "loong64enc5") testEndToEnd(t, "loong64", "loong64") } diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index 4a88aca031..3a3eb10a74 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -516,3 +516,27 @@ lable2: XVPCNTH X3, X2 // 62249c76 XVPCNTW X3, X2 // 62289c76 XVPCNTV X3, X2 // 622c9c76 + + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 + + // MOVV C_UCON, r + MOVV $0x54321000, R4 // MOVV $1412567040, R4 // 2464a814 + MOVV $0xffffffff8432f000, R4 // MOVV $-2077036544, R4 // e4650815 + + // MOVV C_ADDCON, r + MOVV $0xfffffffffffff821, R4 // MOVV $-2015, R4 // 0484e002 + + // MOVV C_ANDCON, r + MOVV $0x821, R4 // MOVV $2081, R4 // 0484a003 + + // ADDV C_SCON, [r1], r2 + ADDV $0x321, R4 // ADDV $801, R4 // 8484cc02 + ADDV $0x321, R5, R4 // ADDV $801, R5, R4 // a484cc02 + ADDV $0xfffffffffffffc21, R4 // ADDV $-991, R4 // 8484f002 + ADDV $0xfffffffffffffc21, R5, R4 // ADDV $-991, R5, R4 // a484f002 + + // AND C_SCON, [r1], r2 + AND $0x321, R4 // AND $801, R4 // 84844c03 + AND $0x321, R5, R4 // AND $801, R5, R4 // a4844c03 diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc2.s b/src/cmd/asm/internal/asm/testdata/loong64enc2.s index e497b83627..ee3bad74b1 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc2.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc2.s @@ -77,3 +77,49 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 MOVH name(SB), R4 // 1e00001ac4034028 MOVHU R4, name(SB) // 1e00001ac4034029 MOVHU name(SB), R4 // 1e00001ac403402a + + // MOVV C_DCON12_20S, r + MOVV $0x273fffff80000000, R4 // MOVV $2828260563841187840, R4 // 0400001584cc0903 + MOVV $0xf73fffff80000000, R4 // MOVV $-630503949979353088, R4 // 0400001584cc3d03 + + // MOVV C_DCON20S_20, r + MOVV $0xfff800000f000000, R4 // MOVV $-2251799562027008, R4 // 04001e1404000017 + + // MOVV C_DCON12_12S, r + MOVV $0x273ffffffffff800, R4 // MOVV $2828260565988669440, R4 // 0400e00284cc0903 + MOVV $0xf73ffffffffff800, R4 // MOVV $-630503947831871488, R4 // 0400e00284cc3d03 + + // MOVV C_DCON20S_12S, r + MOVV $0xfff80000fffff800, R4 // MOVV $-2251795518720000, R4 // 0400a00204000017 + MOVV $0xfff8000000000000, R4 // MOVV $-2251799813685248, R4 // 0400800204000017 + + // MOVV C_DCON12_12U, r + MOVV $0x2730000000000800, R4 // MOVV $2823756966361303040, R4 // 0400a00384cc0903 + MOVV $0xf730000000000800, R4 // MOVV $-635007547459237888, R4 // 0400a00384cc3d03 + + // MOVV C_DCON20S_12U, r + MOVV $0xfff8000000000800, R4 // MOVV $-2251799813683200, R4 // 0400a00304000017 + + // ADDV/AND C_DCON12_0, [r1], r2 + ADDV $0x3210000000000000, R4 // ADDV $3607383301523767296, R4 // 1e840c0384f81000 + ADDV $0x3210000000000000, R5, R4 // ADDV $3607383301523767296, R5, R4 // 1e840c03a4f81000 + ADDV $0xc210000000000000, R4 // ADDV $-4463067230724161536, R4 // 1e84300384f81000 + ADDV $0xc210000000000000, R5, R4 // ADDV $-4463067230724161536, R5, R4 // 1e843003a4f81000 + AND $0x3210000000000000, R4 // AND $3607383301523767296, R4 // 1e840c0384f81400 + AND $0x3210000000000000, R5, R4 // AND $3607383301523767296, R5, R4 // 1e840c03a4f81400 + AND $0xc210000000000000, R4 // AND $-4463067230724161536, R4 // 1e84300384f81400 + AND $0xc210000000000000, R5, R4 // AND $-4463067230724161536, R5, R4 // 1e843003a4f81400 + + // ADDV/AND C_UCON, [r1], r2 + ADDV $0x43210000, R4 // ADDV $1126236160, R4 // 1e42861484f81000 + ADDV $0x43210000, R5, R4 // ADDV $1126236160, R5, R4 // 1e428614a4f81000 + ADDV $0xffffffffc3210000, R4 // ADDV $-1021247488, R4 // 1e42861584f81000 + ADDV $0xffffffffc3210000, R5, R4 // ADDV $-1021247488, R5, R4 // 1e428615a4f81000 + AND $0x43210000, R4 // AND $1126236160, R4 // 1e42861484f81400 + AND $0x43210000, R5, R4 // AND $1126236160, R5, R4 // 1e428614a4f81400 + AND $0xffffffffc3210000, R4 // AND $-1021247488, R4 // 1e42861584f81400 + AND $0xffffffffc3210000, R5, R4 // AND $-1021247488, R5, R4 // 1e428615a4f81400 + + // AND C_ADDCON, [r1], r2 + AND $0xfffffffffffffc21, R4 // AND $-991, R4 // 1e84b00284f81400 + AND $0xfffffffffffffc21, R5, R4 // AND $-991, R5, R4 // 1e84b002a4f81400 diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc3.s b/src/cmd/asm/internal/asm/testdata/loong64enc3.s index 2600884309..2d83bd719a 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc3.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc3.s @@ -121,3 +121,68 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 XOR $74565, R4, R5 // 5e020014de178d0385f81500 XOR $4097, R4 // 3e000014de07800384f81500 XOR $4097, R4, R5 // 3e000014de07800385f81500 + + // MOVV C_DCON32_12S, r + MOVV $0x27312345fffff800, R4 // MOVV $2824077224892692480, R4 // 0400a002a468241684cc0903 + MOVV $0xf7312345fffff800, R4 // MOVV $-634687288927848448, R4 // 0400a002a468241684cc3d03 + + // MOVV C_DCON32_0, r + MOVV $0x2731234500000000, R4 // MOVV $2824077220597727232, R4 // 04008002a468241684cc0903 + MOVV $0xf731234500000000, R4 // MOVV $-634687293222813696, R4 // 04008002a468241684cc3d03 + + // MOVV C_DCON32_20, r + MOVV $0x2731234512345000, R4 // MOVV $2824077220903145472, R4 // a4682414a468241684cc0903 + MOVV $0xf731234512345000, R4 // MOVV $-634687292917395456, R4 // a4682414a468241684cc3d03 + + // MOVV C_DCON12_32S, r + MOVV $0x273fffff80000800, R4 // MOVV $2828260563841189888, R4 // 040000158400a00384cc0903 + MOVV $0xf73fffff80000800, R4 // MOVV $-630503949979351040, R4 // 040000158400a00384cc3d03 + + // MOVV C_DCON20S_32, r + MOVV $0xfff8000080000800, R4 // MOVV $-2251797666199552, R4 // 040000158400a00304000017 + + // MOVV C_DCON32_12U, r + MOVV $0x2731234500000800, R4 // MOVV $2824077220597729280, R4 // 0400a003a468241684cc0903 + MOVV $0xf731234500000800, R4 // MOVV $-634687293222811648, R4 // 0400a003a468241684cc3d03 + + // ADDV/AND C_DCON12_20S, [r1], r2 + ADDV $0x273fffff80000000, R4 // ADDV $2828260563841187840, R4 // 1e000015decf090384f81000 + ADDV $0x273fffff80000000, R4, R5 // ADDV $2828260563841187840, R4, R5 // 1e000015decf090385f81000 + AND $0x273fffff80000000, R4 // AND $2828260563841187840, R4 // 1e000015decf090384f81400 + AND $0x273fffff80000000, R4, R5 // AND $2828260563841187840, R4, R5 // 1e000015decf090385f81400 + + // ADDV/AND C_DCON20S_20, [r1], r2 + ADDV $0xfff800000f000000, R4 // ADDV $-2251799562027008, R4 // 1e001e141e00001784f81000 + ADDV $0xfff800000f000000, R4, R5 // ADDV $-2251799562027008, R4, R5 // 1e001e141e00001785f81000 + AND $0xfff800000f000000, R4 // AND $-2251799562027008, R4 // 1e001e141e00001784f81400 + AND $0xfff800000f000000, R4, R5 // AND $-2251799562027008, R4, R5 // 1e001e141e00001785f81400 + + // ADDV/AND C_DCON12_12S, [r1], r2 + ADDV $0x273ffffffffff800, R4 // ADDV $2828260565988669440, R4 // 1e00e002decf090384f81000 + ADDV $0x273ffffffffff800, R4, R5 // ADDV $2828260565988669440, R4, R5 // 1e00e002decf090385f81000 + AND $0x273ffffffffff800, R4 // AND $2828260565988669440, R4 // 1e00e002decf090384f81400 + AND $0x273ffffffffff800, R4, R5 // AND $2828260565988669440, R4, R5 // 1e00e002decf090385f81400 + + // ADDV/AND C_DCON20S_12S, [r1], r2 + ADDV $0xfff80000fffff800, R4 // ADDV $-2251795518720000, R4 // 1e00a0021e00001784f81000 + ADDV $0xfff80000fffff800, R4, R5 // ADDV $-2251795518720000, R4, R5 // 1e00a0021e00001785f81000 + AND $0xfff80000fffff800, R4 // AND $-2251795518720000, R4 // 1e00a0021e00001784f81400 + AND $0xfff80000fffff800, R4, R5 // AND $-2251795518720000, R4, R5 // 1e00a0021e00001785f81400 + + // ADDV/AND C_DCON20S_0, [r1], r2 + ADDV $0xfff8000000000000, R4 // ADDV $-2251799813685248, R4 // 1e0080021e00001784f81000 + ADDV $0xfff8000000000000, R4, R5 // ADDV $-2251799813685248, R4, R5 // 1e0080021e00001785f81000 + AND $0xfff8000000000000, R4 // AND $-2251799813685248, R4 // 1e0080021e00001784f81400 + AND $0xfff8000000000000, R4, R5 // AND $-2251799813685248, R4, R5 // 1e0080021e00001785f81400 + + // ADDV/AND C_DCON12_12U, [r1], r2 + ADDV $0x2730000000000800, R4 // ADDV $2823756966361303040, R4 // 1e00a003decf090384f81000 + ADDV $0x2730000000000800, R4, R5 // ADDV $2823756966361303040, R4, R5 // 1e00a003decf090385f81000 + AND $0x2730000000000800, R4 // AND $2823756966361303040, R4 // 1e00a003decf090384f81400 + AND $0x2730000000000800, R4, R5 // AND $2823756966361303040, R4, R5 // 1e00a003decf090385f81400 + + // ADDV/AND C_DCON20S_12U, [r1], r2 + ADDV $0xfff8000000000800, R4 // ADDV $-2251799813683200, R4 // 1e00a0031e00001784f81000 + ADDV $0xfff8000000000800, R4, R5 // ADDV $-2251799813683200, R4, R5 // 1e00a0031e00001785f81000 + AND $0xfff8000000000800, R4 // AND $-2251799813683200, R4 // 1e00a0031e00001784f81400 + AND $0xfff8000000000800, R4, R5 // AND $-2251799813683200, R4, R5 // 1e00a0031e00001785f81400 diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc4.s b/src/cmd/asm/internal/asm/testdata/loong64enc4.s new file mode 100644 index 0000000000..16c06a3501 --- /dev/null +++ b/src/cmd/asm/internal/asm/testdata/loong64enc4.s @@ -0,0 +1,42 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "../../../../../runtime/textflag.h" + +TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + // ADDV/AND C_DCON32_12S, [r1], r2 + ADDV $0x27312345fffff800, R4 // ADDV $2824077224892692480, R4 // 1e00a002be682416decf090384f81000 + ADDV $0x27312345fffff800, R4, R5 // ADDV $2824077224892692480, R4, R5 // 1e00a002be682416decf090385f81000 + AND $0x27312345fffff800, R4 // AND $2824077224892692480, R4 // 1e00a002be682416decf090384f81400 + AND $0x27312345fffff800, R4, R5 // AND $2824077224892692480, R4, R5 // 1e00a002be682416decf090385f81400 + + // ADDV/AND C_DCON32_0, [r1], r2 + ADDV $0x2731234500000000, R4 // ADDV $2824077220597727232, R4 // 1e008002be682416decf090384f81000 + ADDV $0x2731234500000000, R4, R5 // ADDV $2824077220597727232, R4, R5 // 1e008002be682416decf090385f81000 + AND $0x2731234500000000, R4 // AND $2824077220597727232, R4 // 1e008002be682416decf090384f81400 + AND $0x2731234500000000, R4, R5 // AND $2824077220597727232, R4, R5 // 1e008002be682416decf090385f81400 + + // ADDV/AND C_DCON32_20, [r1], r2 + ADDV $0x2731234512345000, R4 // ADDV $2824077220903145472, R4 // be682414be682416decf090384f81000 + ADDV $0x2731234512345000, R4, R5 // ADDV $2824077220903145472, R4, R5 // be682414be682416decf090385f81000 + AND $0x2731234512345000, R4 // AND $2824077220903145472, R4 // be682414be682416decf090384f81400 + AND $0x2731234512345000, R4, R5 // AND $2824077220903145472, R4, R5 // be682414be682416decf090385f81400 + + // ADDV/AND C_DCON12_32S, [r1], r2 + ADDV $0x273fffff80000800, R4 // ADDV $2828260563841189888, R4 // 1e000015de03a003decf090384f81000 + ADDV $0x273fffff80000800, R4, R5 // ADDV $2828260563841189888, R4, R5 // 1e000015de03a003decf090385f81000 + AND $0x273fffff80000800, R4 // AND $2828260563841189888, R4 // 1e000015de03a003decf090384f81400 + AND $0x273fffff80000800, R4, R5 // AND $2828260563841189888, R4, R5 // 1e000015de03a003decf090385f81400 + + // ADDV/AND C_DCON20S_32, [r1], r2 + ADDV $0xfff8000080000800, R4 // ADDV $-2251797666199552, R4 // 1e000015de03a0031e00001784f81000 + ADDV $0xfff8000080000800, R4, R5 // ADDV $-2251797666199552, R4, R5 // 1e000015de03a0031e00001785f81000 + AND $0xfff8000080000800, R4 // AND $-2251797666199552, R4 // 1e000015de03a0031e00001784f81400 + AND $0xfff8000080000800, R4, R5 // AND $-2251797666199552, R4, R5 // 1e000015de03a0031e00001785f81400 + + // ADDV/AND C_DCON32_12U, [r1], r2 + ADDV $0x2731234500000800, R4 // ADDV $2824077220597729280, R4 // 1e00a003be682416decf090384f81000 + ADDV $0x2731234500000800, R4, R5 // ADDV $2824077220597729280, R4, R5 // 1e00a003be682416decf090385f81000 + AND $0x2731234500000800, R4 // AND $2824077220597729280, R4 // 1e00a003be682416decf090384f81400 + AND $0x2731234500000800, R4, R5 // AND $2824077220597729280, R4, R5 // 1e00a003be682416decf090385f81400 diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc5.s b/src/cmd/asm/internal/asm/testdata/loong64enc5.s new file mode 100644 index 0000000000..423e5c3b01 --- /dev/null +++ b/src/cmd/asm/internal/asm/testdata/loong64enc5.s @@ -0,0 +1,17 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "../../../../../runtime/textflag.h" + +TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + // ADDV/AND C_DCON, [r1], r2 + ADDV $0xfedcba9876543210, R4 // ADDV $-81985529216486896, R4 // 7ea8ec14de4388031e539717deb73f0384f81000 + ADDV $0xfedcba9876543210, R5, R4 // ADDV $-81985529216486896, R5, R4 // 7ea8ec14de4388031e539717deb73f03a4f81000 + ADDV $0x4edcba9876543210, R4 // ADDV $5682621993817747984, R4 // 7ea8ec14de4388031e539717deb7130384f81000 + ADDV $0x4edcba9876543210, R5, R4 // ADDV $5682621993817747984, R5, R4 // 7ea8ec14de4388031e539717deb71303a4f81000 + AND $0x4edcba9876543210, R4 // AND $5682621993817747984, R4 // 7ea8ec14de4388031e539717deb7130384f81400 + AND $0x4edcba9876543210, R5, R4 // AND $5682621993817747984, R5, R4 // 7ea8ec14de4388031e539717deb71303a4f81400 + AND $0xfedcba9876543210, R4 // AND $-81985529216486896, R4 // 7ea8ec14de4388031e539717deb73f0384f81400 + AND $0xfedcba9876543210, R5, R4 // AND $-81985529216486896, R5, R4 // 7ea8ec14de4388031e539717deb73f03a4f81400 + diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index e6984dcba7..d1c4691d8e 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -330,12 +330,58 @@ const ( C_ZCON C_SCON // 12 bit signed C_UCON // 32 bit signed, low 12 bits 0 + + // When the immediate value is SCON, it can choose either the ADDCON implementation + // or the ANDCON implementation, using ADD0CON/AND0CON to distinguish them, so that + // the program can choose the implementation with fewer instructions. C_ADD0CON C_AND0CON - C_ADDCON // -0x800 <= v < 0 - C_ANDCON // 0 < v <= 0xFFF - C_LCON // other 32 - C_DCON // other 64 (could subdivide further) + + C_ADDCON // -0x800 <= v < 0 + C_ANDCON // 0 < v <= 0xFFF + C_LCON // other 32 + + // 64 bit signed, lo32 bits 0, hi20 bits are not 0, hi12 bits can + // be obtained by sign extension of the hi20 bits. + C_DCON20S_0 + // 64 bit signed, lo52 bits 0, hi12 bits are not 0. + C_DCON12_0 + // 64 bit signed, lo32 bits 0, hi32 bits are not 0. + C_DCON32_0 + // 64 bit signed, lo12 bits 0, lo20 bits are not 0, hi20 bits can be + // obtained by sign extension of the lo20 bits, other bits are not 0. + C_DCON12_20S + // 64 bit signed, lo12 bits 0, hi20 bits are not 0, hi12 bits can be + // obtained by sign extension of the hi20 bits, other bits are not 0. + C_DCON20S_20 + // 64 bit signed, lo12 bits 0, other bits are not 0. + C_DCON32_20 + // 64 bit signed, lo12 bits are not 0, 12~51 bits can be obtained + // by sign extension of the lo12 bits, other bits are not 0. + C_DCON12_12S + // 64 bit signed, hi20 bits and lo12 bits are not 0, hi12 bits can + // be obtained by sign extension of the hi20 bits, lo20 bits can + // be obtained by sign extension of the lo12 bits. + C_DCON20S_12S + // 64 bit signed, lo12 bits are not 0, lo20 bits can be obtained by sign + // extension of the lo12 bits, other bits are not 0. + C_DCON32_12S + // 64 bit signed, lo20 and lo12 bits are not 0, hi20 bits can be obtained by sign + // extension of the lo20 bits. other bits are not 0. + C_DCON12_32S + // 64 bit signed, hi20 bits are not 0, hi12 bits can be obtained by sign + // extension of the hi20 bits, lo32 bits are not 0. + C_DCON20S_32 + // 64 bit signed, 12~51 bits 0, other bits are not 0. + C_DCON12_12U + // 64 bit signed, lo20 bits 0, hi20 bits are not 0, hi12 bits can be + // obtained by sign extension of the hi20 bits, lo12 bits are not 0. + C_DCON20S_12U + // 64 bit signed, lo20 bits 0, other bits are not 0. + C_DCON32_12U + // other 64 + C_DCON + C_SACON // $n(REG) where n <= int12 C_LACON // $n(REG) where int12 < n <= int32 C_DACON // $n(REG) where int32 < n diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 9024c5e53e..5757c3c452 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -9,6 +9,7 @@ import ( "cmd/internal/objabi" "fmt" "log" + "math/bits" "slices" ) @@ -192,6 +193,9 @@ var optab = []Optab{ {AMOVV, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0}, {AMOVW, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, {AMOVV, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, + {AMOVV, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 67, 4, 0, NOTUSETMP}, + {AMOVV, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 68, 8, 0, NOTUSETMP}, + {AMOVV, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 69, 12, 0, NOTUSETMP}, {AMOVV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 59, 16, 0, NOTUSETMP}, {AADD, C_ADD0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, @@ -225,6 +229,20 @@ var optab = []Optab{ {AADDV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, {AADDV, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, + {AAND, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, + {AAND, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, + {AADDV, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 70, 8, 0, 0}, + {AADDV, C_DCON12_0, C_REG, C_NONE, C_REG, C_NONE, 70, 8, 0, 0}, + {AAND, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 70, 8, 0, 0}, + {AAND, C_DCON12_0, C_REG, C_NONE, C_REG, C_NONE, 70, 8, 0, 0}, + {AADDV, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 71, 12, 0, 0}, + {AADDV, C_DCON12_20S, C_REG, C_NONE, C_REG, C_NONE, 71, 12, 0, 0}, + {AAND, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 71, 12, 0, 0}, + {AAND, C_DCON12_20S, C_REG, C_NONE, C_REG, C_NONE, 71, 12, 0, 0}, + {AADDV, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, + {AADDV, C_DCON32_12S, C_REG, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, + {AAND, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, + {AAND, C_DCON32_12S, C_REG, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, {ASLL, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, {ASLL, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, @@ -790,7 +808,7 @@ func (c *ctxt0) aclass(a *obj.Addr) int { } if c.instoffset != int64(int32(c.instoffset)) { - return C_DCON + return dconClass(c.instoffset) } if c.instoffset >= 0 { @@ -830,6 +848,159 @@ func (c *ctxt0) aclass(a *obj.Addr) int { return C_GOK } +// The constants here define the data characteristics within the bit field range. +// +// ALL1: The data in the bit field is all 1 +// ALL0: The data in the bit field is all 0 +// ST1: The data in the bit field starts with 1, but not all 1 +// ST0: The data in the bit field starts with 0, but not all 0 +const ( + ALL1 = iota + ALL0 + ST1 + ST0 +) + +// mask returns the mask of the specified bit field, which is used to help determine +// the data characteristics of the immediate value at the specified bit. +func mask(suf int8, len int8) (uint64, uint64) { + if len == 12 { + if suf == 0 { + return 0xfff, 0x800 + } else { // suf == 52 + return 0xfff0000000000000, 0x8000000000000000 + } + } else { // len == 20 + if suf == 12 { + return 0xfffff000, 0x80000000 + } else { // suf == 32 + return 0xfffff00000000, 0x8000000000000 + } + } +} + +// bitField return a number represent status of val in bit field +// +// suf: The starting bit of the bit field +// len: The length of the bit field +func bitField(val int64, suf int8, len int8) int8 { + mask1, mask2 := mask(suf, len) + if uint64(val)&mask1 == mask1 { + return ALL1 + } else if uint64(val)&mask1 == 0x0 { + return ALL0 + } else if uint64(val)&mask2 == mask2 { + return ST1 + } else { + return ST0 + } +} + +// Loading an immediate value larger than 32 bits requires four instructions +// on loong64 (lu12i.w + ori + lu32i.d + lu52i.d), but in some special cases, +// we can use the sign extension and zero extension features of the instruction +// to fill in the high-order data (all 0 or all 1), which can save one to +// three instructions. +// +// | 63 ~ 52 | 51 ~ 32 | 31 ~ 12 | 11 ~ 0 | +// | lu52i.d | lu32i.d | lu12i.w | ori | +func dconClass(offset int64) int { + tzb := bits.TrailingZeros64(uint64(offset)) + hi12 := bitField(offset, 52, 12) + hi20 := bitField(offset, 32, 20) + lo20 := bitField(offset, 12, 20) + lo12 := bitField(offset, 0, 12) + if tzb >= 52 { + return C_DCON12_0 // lu52i.d + } + if tzb >= 32 { + if ((hi20 == ALL1 || hi20 == ST1) && hi12 == ALL1) || ((hi20 == ALL0 || hi20 == ST0) && hi12 == ALL0) { + return C_DCON20S_0 // addi.w + lu32i.d + } + return C_DCON32_0 // addi.w + lu32i.d + lu52i.d + } + if tzb >= 12 { + if lo20 == ST1 || lo20 == ALL1 { + if hi20 == ALL1 { + return C_DCON12_20S // lu12i.w + lu52i.d + } + if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) { + return C_DCON20S_20 // lu12i.w + lu32i.d + } + return C_DCON32_20 // lu12i.w + lu32i.d + lu52i.d + } + if hi20 == ALL0 { + return C_DCON12_20S // lu12i.w + lu52i.d + } + if (hi20 == ST0 && hi12 == ALL0) || ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) { + return C_DCON20S_20 // lu12i.w + lu32i.d + } + return C_DCON32_20 // lu12i.w + lu32i.d + lu52i.d + } + if lo12 == ST1 || lo12 == ALL1 { + if lo20 == ALL1 { + if hi20 == ALL1 { + return C_DCON12_12S // addi.d + lu52i.d + } + if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) { + return C_DCON20S_12S // addi.w + lu32i.d + } + return C_DCON32_12S // addi.w + lu32i.d + lu52i.d + } + if lo20 == ST1 { + if hi20 == ALL1 { + + return C_DCON12_32S // lu12i.w + ori + lu52i.d + } + if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) { + return C_DCON20S_32 // lu12i.w + ori + lu32i.d + } + return C_DCON // lu12i.w + ori + lu32i.d + lu52i.d + } + if lo20 == ALL0 { + if hi20 == ALL0 { + return C_DCON12_12U // ori + lu52i.d + } + if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) { + return C_DCON20S_12U // ori + lu32i.d + } + return C_DCON32_12U // ori + lu32i.d + lu52i.d + } + if hi20 == ALL0 { + return C_DCON12_32S // lu12i.w + ori + lu52i.d + } + if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) { + return C_DCON20S_32 // lu12i.w + ori + lu32i.d + } + return C_DCON // lu12i.w + ori + lu32i.d + lu52i.d + } + if lo20 == ALL0 { + if hi20 == ALL0 { + return C_DCON12_12U // ori + lu52i.d + } + if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) { + return C_DCON20S_12U // ori + lu32i.d + } + return C_DCON32_12U // ori + lu32i.d + lu52i.d + } + if lo20 == ST1 || lo20 == ALL1 { + if hi20 == ALL1 { + return C_DCON12_32S // lu12i.w + ori + lu52i.d + } + if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) { + return C_DCON20S_32 // lu12i.w + ori + lu32i.d + } + return C_DCON + } + if hi20 == ALL0 { + return C_DCON12_32S // lu12i.w + ori + lu52i.d + } + if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) { + return C_DCON20S_32 // lu12i.w + ori + lu32i.d + } + return C_DCON +} + // In Loong64,there are 8 CFRs, denoted as fcc0-fcc7. // There are 4 FCSRs, denoted as fcsr0-fcsr3. func (c *ctxt0) rclass(r int16) int { @@ -935,7 +1106,14 @@ func cmp(a int, b int) bool { } switch a { case C_DCON: - if b == C_LCON { + if b == C_LCON || b == C_DCON32_0 || + b == C_DCON12_0 || b == C_DCON20S_0 || + b == C_DCON12_20S || b == C_DCON12_12S || + b == C_DCON20S_20 || b == C_DCON32_20 || + b == C_DCON20S_12S || b == C_DCON32_12S || + b == C_DCON12_32S || b == C_DCON20S_32 || + b == C_DCON12_12U || b == C_DCON20S_12U || + b == C_DCON32_12U { return true } fallthrough @@ -944,6 +1122,22 @@ func cmp(a int, b int) bool { return true } + case C_DCON12_0: + + case C_DCON12_20S: + if b == C_DCON20S_20 || b == C_DCON12_12S || + b == C_DCON20S_12S || b == C_DCON12_12U || + b == C_DCON20S_12U || b == C_DCON20S_0 { + return true + } + + case C_DCON32_12S: + if b == C_DCON32_20 || b == C_DCON12_32S || + b == C_DCON20S_32 || b == C_DCON32_12U || + b == C_DCON32_0 { + return true + } + case C_ADD0CON: if b == C_ADDCON { return true @@ -2015,6 +2209,129 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("illegal register combination: %v\n", p) } o1 = OP_RRR(atomicInst[p.As], uint32(rk), uint32(rj), uint32(rd)) + + case 67: // mov $dcon12_0, r + v := c.vregoff(&p.From) + o1 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(0), uint32(p.To.Reg)) + + case 68: // mov $dcon12_20S, r + v := c.vregoff(&p.From) + contype := c.aclass(&p.From) + switch contype { + default: // C_DCON12_20S + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) + o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) + case C_DCON20S_20: + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) + case C_DCON12_12S: + o1 = OP_12IRR(c.opirr(AADDV), uint32(v), uint32(0), uint32(p.To.Reg)) + o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) + case C_DCON20S_12S, C_DCON20S_0: + o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(p.To.Reg)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) + case C_DCON12_12U: + o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg)) + o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) + case C_DCON20S_12U: + o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) + } + + case 69: // mov $dcon32_12S, r + v := c.vregoff(&p.From) + contype := c.aclass(&p.From) + switch contype { + default: // C_DCON32_12S, C_DCON32_0 + o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(p.To.Reg)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) + o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) + case C_DCON32_20: + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) + o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) + case C_DCON12_32S: + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) + o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) + case C_DCON20S_32: + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) + o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) + case C_DCON32_12U: + o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) + o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) + } + + case 70: // add $dcon12_0,[r1],r2 + v := c.vregoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + o1 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(0), uint32(REGTMP)) + o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 71: // add $dcon12_20S,[r1],r2 + v := c.vregoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + contype := c.aclass(&p.From) + switch contype { + default: // C_DCON12_20S + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) + case C_DCON20S_20: + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) + case C_DCON12_12S: + o1 = OP_12IRR(c.opirr(AADDV), uint32(v), uint32(0), uint32(REGTMP)) + o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) + case C_DCON20S_12S, C_DCON20S_0: + o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(REGTMP)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) + case C_DCON12_12U: + o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP)) + o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) + case C_DCON20S_12U: + o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) + } + o3 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 72: // add $dcon32_12S,[r1],r2 + v := c.vregoff(&p.From) + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } + contype := c.aclass(&p.From) + switch contype { + default: // C_DCON32_12S, C_DCON32_0 + o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(REGTMP)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) + o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) + case C_DCON32_20: + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) + o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) + case C_DCON12_32S: + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) + case C_DCON20S_32: + o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) + o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) + o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) + case C_DCON32_12U: + o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP)) + o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) + o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) + } + o4 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) } out[0] = o1 diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go index ce76109d2a..a2f04a22ee 100644 --- a/src/cmd/internal/obj/loong64/cnames.go +++ b/src/cmd/internal/obj/loong64/cnames.go @@ -21,6 +21,20 @@ var cnames0 = []string{ "ADDCON", "ANDCON", "LCON", + "DCON20S_0", + "DCON12_0", + "DCON32_0", + "DCON12_20S", + "DCON20S_20", + "DCON32_20", + "DCON12_12S", + "DCON20S_12S", + "DCON32_12S", + "DCON12_32S", + "DCON20S_32", + "DCON12_12U", + "DCON20S_12U", + "DCON32_12U", "DCON", "SACON", "LACON",