From 90b7d7aaa27b5a1d7a69f76ed82ed2e88ae84c31 Mon Sep 17 00:00:00 2001 From: limeidan Date: Thu, 7 Aug 2025 11:23:55 +0800 Subject: [PATCH] cmd/compile/internal: optimize multiplication use new operation 'ADDshiftLLV' on loong64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit goos: linux goarch: loong64 pkg: cmd/compile/internal/test cpu: Loongson-3A6000-HV @ 2500.00MHz │ old │ new │ │ sec/op │ sec/op vs base │ MulconstI32/3 0.8004n ± 0% 0.4247n ± 2% -46.94% (p=0.000 n=10) MulconstI32/5 0.8005n ± 0% 0.4256n ± 1% -46.83% (p=0.000 n=10) MulconstI32/12 1.2010n ± 0% 0.8005n ± 0% -33.35% (p=0.000 n=10) MulconstI32/120 0.8090n ± 0% 0.8067n ± 0% -0.28% (p=0.007 n=10) MulconstI32/-120 0.8109n ± 0% 0.8072n ± 0% -0.47% (p=0.000 n=10) MulconstI32/65537 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) MulconstI32/65538 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.265 n=10) MulconstI64/3 0.8005n ± 0% 0.4241n ± 1% -47.02% (p=0.000 n=10) MulconstI64/5 0.8004n ± 0% 0.4249n ± 1% -46.91% (p=0.000 n=10) MulconstI64/12 1.2010n ± 0% 0.8004n ± 0% -33.36% (p=0.000 n=10) MulconstI64/120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.635 n=10) MulconstI64/-120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.837 n=10) MulconstI64/65537 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.837 n=10) MulconstI64/65538 0.8096n ± 0% 0.8004n ± 0% -1.14% (p=0.000 n=10) MulconstU32/3 0.8004n ± 0% 0.4263n ± 1% -46.75% (p=0.000 n=10) MulconstU32/5 0.8005n ± 0% 0.4262n ± 1% -46.76% (p=0.000 n=10) MulconstU32/12 1.2010n ± 0% 0.8005n ± 0% -33.35% (p=0.000 n=10) MulconstU32/120 0.8105n ± 0% 0.8096n ± 0% ~ (p=0.183 n=10) MulconstU32/65537 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) MulconstU32/65538 0.8005n ± 0% 0.8005n ± 0% ~ (p=1.000 n=10) MulconstU64/3 0.8004n ± 0% 0.4265n ± 4% -46.71% (p=0.000 n=10) MulconstU64/5 0.8004n ± 0% 0.4256n ± 0% -46.82% (p=0.000 n=10) MulconstU64/12 1.2010n ± 0% 0.8004n ± 0% -33.36% (p=0.000 n=10) MulconstU64/120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.387 n=10) MulconstU64/65537 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.265 n=10) MulconstU64/65538 0.8080n ± 0% 0.8004n ± 0% -0.93% (p=0.000 n=10) geomean 0.8539n 0.6597n -22.74% Change-Id: Ie33e88985d7639f481bbba540bc917b9f185c357 Reviewed-on: https://go-review.googlesource.com/c/go/+/693855 Reviewed-by: Dmitri Shuralyov Reviewed-by: Keith Randall Reviewed-by: sophie zhao Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/loong64/ssa.go | 11 +++ .../compile/internal/ssa/_gen/LOONG64Ops.go | 2 + src/cmd/compile/internal/ssa/config.go | 13 ++- src/cmd/compile/internal/ssa/opGen.go | 16 ++++ src/cmd/internal/obj/loong64/asm.go | 4 +- test/codegen/arithmetic.go | 4 +- test/codegen/multiply.go | 94 +++++++++---------- 7 files changed, 91 insertions(+), 53 deletions(-) diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index f8ecebb350..c7fb903d5d 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -1065,6 +1065,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { {Type: obj.TYPE_CONST, Offset: int64((v.AuxInt >> 0) & 0x1f)}, }) + case ssa.OpLOONG64ADDshiftLLV: + // ADDshiftLLV Rarg0, Rarg1, $shift + // ALSLV $shift, Rarg1, Rarg0, Rtmp + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.Reg = v.Args[1].Reg() + p.AddRestSourceReg(v.Args[0].Reg()) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpClobber, ssa.OpClobberReg: // TODO: implement for clobberdead experiment. Nop is ok for now. default: diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go index 75429cbffd..d6818e8592 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go @@ -577,6 +577,8 @@ func init() { // is $hint and bit[41:5] is $n. {name: "PRELD", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELD", hasSideEffects: true}, {name: "PRELDX", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELDX", hasSideEffects: true}, + + {name: "ADDshiftLLV", argLength: 2, aux: "Int64", reg: gp21, asm: "ALSLV"}, // arg0 + arg1< 3 { - c.ctxt.Diag("The shift amount is too large.") + if sa < 0 || sa > 3 { + c.ctxt.Diag("%v: shift amount out of range[1, 4].\n", p) } r := p.GetFrom3().Reg o1 = OP_2IRRR(c.opirrr(p.As), uint32(sa), uint32(r), uint32(p.Reg), uint32(p.To.Reg)) diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 39a7986c7b..67adb50fa5 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -257,7 +257,7 @@ func Mul_96(n int) int { // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL` // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL` // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL` - // loong64:"ADDVU","SLLV\t[$]5",-"MULV" + // loong64:"SLLV\t[$]5","ALSLV\t[$]1," // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD` return n * 96 } @@ -317,7 +317,7 @@ func MergeMuls5(a, n int) int { // Multiplications folded negation func FoldNegMul(a int) int { - // loong64:"MULV","MOVV\t[$]-11",-"SUBVU\tR[0-9], R0," + // loong64:"SUBVU","ALSLV\t[$]2","ALSLV\t[$]1" return (-a) * 11 } diff --git a/test/codegen/multiply.go b/test/codegen/multiply.go index bb22d1a2b1..dc2910dab7 100644 --- a/test/codegen/multiply.go +++ b/test/codegen/multiply.go @@ -24,7 +24,7 @@ func m2(x int64) int64 { func m3(x int64) int64 { // amd64: "LEAQ\t.*[*]2" // arm64: "ADD\tR[0-9]+<<1," - // loong64: "ADDVU","ADDVU" + // loong64: "ALSLV\t[$]1," return x * 3 } func m4(x int64) int64 { @@ -36,19 +36,19 @@ func m4(x int64) int64 { func m5(x int64) int64 { // amd64: "LEAQ\t.*[*]4" // arm64: "ADD\tR[0-9]+<<2," - // loong64: "SLLV\t[$]2,","ADDVU" + // loong64: "ALSLV\t[$]2," return x * 5 } func m6(x int64) int64 { // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]2" // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<1," - // loong64: "ADDVU","ADDVU","ADDVU" + // loong64: "ADDVU", "ADDVU", "ADDVU" return x * 6 } func m7(x int64) int64 { // amd64: "LEAQ\t.*[*]2" // arm64: "LSL\t[$]3,", "SUB\tR[0-9]+," - // loong64: "SLLV\t[$]3,","SUBVU" + // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]1," return x * 7 } func m8(x int64) int64 { @@ -60,43 +60,43 @@ func m8(x int64) int64 { func m9(x int64) int64 { // amd64: "LEAQ\t.*[*]8" // arm64: "ADD\tR[0-9]+<<3," - // loong64: "SLLV\t[$]3,","ADDVU" + // loong64: "ALSLV\t[$]3," return x * 9 } func m10(x int64) int64 { // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]4" // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<2," - // loong64: "ADDVU","SLLV\t[$]3,","ADDVU" + // loong64: "ADDVU", "ALSLV\t[$]2," return x * 10 } func m11(x int64) int64 { // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]2" // arm64: "MOVD\t[$]11,", "MUL" - // loong64: "MOVV\t[$]11,", "MULV" + // loong64: "ALSLV\t[$]2,", "ALSLV\t[$]1," return x * 11 } func m12(x int64) int64 { // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]2," // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<1," - // loong64: "ADDVU","ADDVU","SLLV\t[$]2," + // loong64: "SLLV", "ALSLV\t[$]1," return x * 12 } func m13(x int64) int64 { // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4" // arm64: "MOVD\t[$]13,", "MUL" - // loong64: "MOVV\t[$]13,","MULV" + // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]2," return x * 13 } func m14(x int64) int64 { // amd64: "IMUL3Q\t[$]14," // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+<<1," - // loong64: "ADDVU","SLLV\t[$]4,","SUBVU" + // loong64: "ADDVU", "ALSLV\t[$]1", "ALSLV\t[$]2" return x * 14 } func m15(x int64) int64 { // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4" // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+," - // loong64: "SLLV\t[$]4,","SUBVU" + // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]2," return x * 15 } func m16(x int64) int64 { @@ -108,79 +108,79 @@ func m16(x int64) int64 { func m17(x int64) int64 { // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8" // arm64: "ADD\tR[0-9]+<<4," - // loong64: "SLLV\t[$]4,","ADDVU" + // loong64: "ALSLV\t[$]" return x * 17 } func m18(x int64) int64 { // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8" // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<3," - // loong64: "ADDVU","SLLV\t[$]4,","ADDVU" + // loong64: "ADDVU", "ALSLV\t[$]3," return x * 18 } func m19(x int64) int64 { // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]2" // arm64: "MOVD\t[$]19,", "MUL" - // loong64: "MOVV\t[$]19,","MULV" + // loong64: "ALSLV\t[$]3,", "ALSLV\t[$]1," return x * 19 } func m20(x int64) int64 { // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]2," // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<2," - // loong64: "SLLV\t[$]2,","SLLV\t[$]4,","ADDVU" + // loong64: "SLLV\t[$]2,", "ALSLV\t[$]2," return x * 20 } func m21(x int64) int64 { // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4" // arm64: "MOVD\t[$]21,", "MUL" - // loong64: "MOVV\t[$]21,","MULV" + // loong64: "ALSLV\t[$]2,", "ALSLV\t[$]2," return x * 21 } func m22(x int64) int64 { // amd64: "IMUL3Q\t[$]22," // arm64: "MOVD\t[$]22,", "MUL" - // loong64: "MOVV\t[$]22,","MULV" + // loong64: "ADDVU", "ALSLV\t[$]2,", "ALSLV\t[$]2," return x * 22 } func m23(x int64) int64 { // amd64: "IMUL3Q\t[$]23," // arm64: "MOVD\t[$]23,", "MUL" - // loong64: "MOVV\t[$]23,","MULV" + // loong64: "ALSLV\t[$]1,", "SUBVU", "ALSLV\t[$]3," return x * 23 } func m24(x int64) int64 { // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]3," // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<1," - // loong64: "ADDVU","ADDVU","SLLV\t[$]3," + // loong64: "SLLV\t[$]3", "ALSLV\t[$]1," return x * 24 } func m25(x int64) int64 { // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4" // arm64: "MOVD\t[$]25,", "MUL" - // loong64: "MOVV\t[$]25,","MULV" + // loong64: "ALSLV\t[$]2,", "ALSLV\t[$]2," return x * 25 } func m26(x int64) int64 { // amd64: "IMUL3Q\t[$]26," // arm64: "MOVD\t[$]26,", "MUL" - // loong64: "MOVV\t[$]26,","MULV" + // loong64: "ADDVU", "ALSLV\t[$]1,", "ALSLV\t[$]3," return x * 26 } func m27(x int64) int64 { // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]8" // arm64: "MOVD\t[$]27,", "MUL" - // loong64: "MOVV\t[$]27,","MULV" + // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]3," return x * 27 } func m28(x int64) int64 { // amd64: "IMUL3Q\t[$]28," // arm64: "LSL\t[$]5, "SUB\tR[0-9]+<<2," - // loong64: "SLLV\t[$]5,","SLLV\t[$]2,","SUBVU" + // loong64: "ALSLV\t[$]1,","SLLV\t[$]2,","ALSLV\t[$]3," return x * 28 } func m29(x int64) int64 { // amd64: "IMUL3Q\t[$]29," // arm64: "MOVD\t[$]29,", "MUL" - // loong64: "MOVV\t[$]29,","MULV" + // loong64: "ALSLV\t[$]1,","SLLV\t[$]5,","SUBVU" return x * 29 } func m30(x int64) int64 { @@ -204,49 +204,49 @@ func m32(x int64) int64 { func m33(x int64) int64 { // amd64: "SHLQ\t[$]2,", "LEAQ\t.*[*]8" // arm64: "ADD\tR[0-9]+<<5," - // loong64: "SLLV\t[$]5,","ADDVU" + // loong64: "ADDVU", "ALSLV\t[$]4," return x * 33 } func m34(x int64) int64 { // amd64: "SHLQ\t[$]5,", "LEAQ\t.*[*]2" // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<4," - // loong64: "ADDVU","SLLV\t[$]5,","ADDVU" + // loong64: "ADDVU", "ALSLV\t[$]4," return x * 34 } func m35(x int64) int64 { // amd64: "IMUL3Q\t[$]35," // arm64: "MOVD\t[$]35,", "MUL" - // loong64: "MOVV\t[$]35,","MULV" + // loong64: "ALSLV\t[$]4,", "ALSLV\t[$]1," return x * 35 } func m36(x int64) int64 { // amd64: "LEAQ\t.*[*]8", "SHLQ\t[$]2," // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<3," - // loong64: "SLLV\t[$]2,","SLLV\t[$]5,","ADDVU" + // loong64: "SLLV\t[$]2,", "ALSLV\t[$]3," return x * 36 } func m37(x int64) int64 { // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]4" // arm64: "MOVD\t[$]37,", "MUL" - // loong64: "MOVV\t[$]37,","MULV" + // loong64: "ALSLV\t[$]3,", "ALSLV\t[$]2," return x * 37 } func m38(x int64) int64 { // amd64: "IMUL3Q\t[$]38," // arm64: "MOVD\t[$]38,", "MUL" - // loong64: "MOVV\t[$]38,","MULV" + // loong64: "ALSLV\t[$]3,", "ALSLV\t[$]2," return x * 38 } func m39(x int64) int64 { // amd64: "IMUL3Q\t[$]39," // arm64: "MOVD\t[$]39,", "MUL" - // loong64: "MOVV\t[$]39,", "MULV" + // loong64: "ALSLV\t[$]2,", "SUBVU", "ALSLV\t[$]3," return x * 39 } func m40(x int64) int64 { // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]3," // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<2," - // loong64: "SLLV\t[$]3,","SLLV\t[$]5,","ADDVU" + // loong64: "SLLV\t[$]3,", "ALSLV\t[$]2," return x * 40 } @@ -265,7 +265,7 @@ func mn2(x int64) int64 { func mn3(x int64) int64 { // amd64: "NEGQ", "LEAQ\t.*[*]2" // arm64: "SUB\tR[0-9]+<<2," - // loong64: "SLLV\t[$]2,","SUBVU" + // loong64: "SUBVU", "ALSLV\t[$]1," return x * -3 } func mn4(x int64) int64 { @@ -277,19 +277,19 @@ func mn4(x int64) int64 { func mn5(x int64) int64 { // amd64: "NEGQ", "LEAQ\t.*[*]4" // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<2," - // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]2,","SUBVU" + // loong64: "SUBVU", "ALSLV\t[$]2," return x * -5 } func mn6(x int64) int64 { // amd64: "IMUL3Q\t[$]-6," // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<2," - // loong64: "ADDVU","SLLV\t[$]3,","SUBVU" + // loong64: "ADDVU", "SUBVU", "ALSLV\t[$]3," return x * -6 } func mn7(x int64) int64 { // amd64: "NEGQ", "LEAQ\t.*[*]8" // arm64: "SUB\tR[0-9]+<<3," - // loong64: "SLLV\t[$]3","SUBVU" + // loong64: "SUBVU", "ALSLV\t[$]3," return x * -7 } func mn8(x int64) int64 { @@ -301,43 +301,43 @@ func mn8(x int64) int64 { func mn9(x int64) int64 { // amd64: "NEGQ", "LEAQ\t.*[*]8" // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<3," - // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]3","SUBVU" + // loong64: "SUBVU", "ALSLV\t[$]3," return x * -9 } func mn10(x int64) int64 { // amd64: "IMUL3Q\t[$]-10," // arm64: "MOVD\t[$]-10,", "MUL" - // loong64: "MOVV\t[$]-10","MULV" + // loong64: "ADDVU", "ALSLV\t[$]3", "SUBVU" return x * -10 } func mn11(x int64) int64 { // amd64: "IMUL3Q\t[$]-11," // arm64: "MOVD\t[$]-11,", "MUL" - // loong64: "MOVV\t[$]-11","MULV" + // loong64: "ALSLV\t[$]2,", "SUBVU", "ALSLV\t[$]4," return x * -11 } func mn12(x int64) int64 { // amd64: "IMUL3Q\t[$]-12," // arm64: "LSL\t[$]2,", "SUB\tR[0-9]+<<2," - // loong64: "SLLV\t[$]2,","SLLV\t[$]4,","SUBVU" + // loong64: "SUBVU", "SLLV\t[$]2,", "ALSLV\t[$]4," return x * -12 } func mn13(x int64) int64 { // amd64: "IMUL3Q\t[$]-13," // arm64: "MOVD\t[$]-13,", "MUL" - // loong64: "MOVV\t[$]-13","MULV" + // loong64: "ALSLV\t[$]4,", "SLLV\t[$]2, ", "SUBVU" return x * -13 } func mn14(x int64) int64 { // amd64: "IMUL3Q\t[$]-14," // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<3," - // loong64: "ADDVU","SLLV\t[$]4,","SUBVU" + // loong64: "ADDVU", "SUBVU", "ALSLV\t[$]4," return x * -14 } func mn15(x int64) int64 { // amd64: "SHLQ\t[$]4,", "SUBQ" // arm64: "SUB\tR[0-9]+<<4," - // loong64: "SLLV\t[$]4,","SUBVU" + // loong64: "SUBVU", "ALSLV\t[$]4," return x * -15 } func mn16(x int64) int64 { @@ -349,24 +349,24 @@ func mn16(x int64) int64 { func mn17(x int64) int64 { // amd64: "IMUL3Q\t[$]-17," // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<4," - // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]4,","SUBVU" + // loong64: "SUBVU", "ALSLV\t[$]4," return x * -17 } func mn18(x int64) int64 { // amd64: "IMUL3Q\t[$]-18," // arm64: "MOVD\t[$]-18,", "MUL" - // loong64: "MOVV\t[$]-18","MULV" + // loong64: "ADDVU", "ALSLV\t[$]4,", "SUBVU" return x * -18 } func mn19(x int64) int64 { // amd64: "IMUL3Q\t[$]-19," // arm64: "MOVD\t[$]-19,", "MUL" - // loong64: "MOVV\t[$]-19","MULV" + // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]4,", "SUBVU" return x * -19 } func mn20(x int64) int64 { // amd64: "IMUL3Q\t[$]-20," // arm64: "MOVD\t[$]-20,", "MUL" - // loong64: "MOVV\t[$]-20","MULV" + // loong64: "SLLV\t[$]2,", "ALSLV\t[$]4,", "SUBVU" return x * -20 } -- 2.51.0