From: Xiaolin Zhao Date: Mon, 12 Aug 2024 09:20:44 +0000 (+0800) Subject: cmd/compile: optimize RotateLeft8/16 on loong64 X-Git-Tag: go1.24rc1~908 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=2c5b707b3b30ea194877600151a7c299671cab3a;p=gostls13.git cmd/compile: optimize RotateLeft8/16 on loong64 goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000 @ 2500.00MHz │ bench.old │ bench.new │ │ sec/op │ sec/op vs base │ RotateLeft8 1.401n ± 0% 1.201n ± 0% -14.28% (p=0.000 n=20) RotateLeft16 1.4010n ± 0% 0.8032n ± 0% -42.67% (p=0.000 n=20) geomean 1.401n 0.9822n -29.90% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000 @ 2500.00MHz │ bench.old │ bench.new │ │ sec/op │ sec/op vs base │ RotateLeft8 1.576n ± 0% 1.310n ± 0% -16.88% (p=0.000 n=20) RotateLeft16 1.576n ± 0% 1.166n ± 0% -26.02% (p=0.000 n=20) geomean 1.576n 1.236n -21.58% Change-Id: I39c18306be0b8fd31b57bd0911714abd1783b50e Reviewed-on: https://go-review.googlesource.com/c/go/+/604738 Auto-Submit: abner chenc Reviewed-by: Dmitri Shuralyov Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI Reviewed-by: Tim King --- diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index 0f58cbcc11..ab280f8f79 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -119,7 +119,9 @@ // rotates (RotateLeft8 x (MOVVconst [c])) => (Or8 (Lsh8x64 x (MOVVconst [c&7])) (Rsh8Ux64 x (MOVVconst [-c&7]))) +(RotateLeft8 x y) => (OR (SLLV x (ANDconst [7] y)) (SRLV (ZeroExt8to64 x) (ANDconst [7] (NEGV y)))) (RotateLeft16 x (MOVVconst [c])) => (Or16 (Lsh16x64 x (MOVVconst [c&15])) (Rsh16Ux64 x (MOVVconst [-c&15]))) +(RotateLeft16 x y) => (ROTR (OR (ZeroExt16to32 x) (SLLVconst (ZeroExt16to32 x) [16])) (NEGV y)) (RotateLeft32 x y) => (ROTR x (NEGV y)) (RotateLeft64 x y) => (ROTRV x (NEGV y)) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index b95afd9f2d..c7d5dc7106 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -2255,9 +2255,9 @@ func canRotate(c *Config, bits int64) bool { return false } switch c.arch { - case "386", "amd64", "arm64", "riscv64": + case "386", "amd64", "arm64", "loong64", "riscv64": return true - case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64": + case "arm", "s390x", "ppc64", "ppc64le", "wasm": return bits >= 32 default: return false diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index f4b1997e18..e17c305f4f 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -6102,7 +6102,26 @@ func rewriteValueLOONG64_OpRotateLeft16(v *Value) bool { v.AddArg2(v0, v2) return true } - return false + // match: (RotateLeft16 x y) + // result: (ROTR (OR (ZeroExt16to32 x) (SLLVconst (ZeroExt16to32 x) [16])) (NEGV y)) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64ROTR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpLOONG64OR, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, t) + v2.AuxInt = int64ToAuxInt(16) + v2.AddArg(v1) + v0.AddArg2(v1, v2) + v3 := b.NewValue0(v.Pos, OpLOONG64NEGV, typ.Int64) + v3.AddArg(y) + v.AddArg2(v0, v3) + return true + } } func rewriteValueLOONG64_OpRotateLeft32(v *Value) bool { v_1 := v.Args[1] @@ -6162,7 +6181,31 @@ func rewriteValueLOONG64_OpRotateLeft8(v *Value) bool { v.AddArg2(v0, v2) return true } - return false + // match: (RotateLeft8 x y) + // result: (OR (SLLV x (ANDconst [7] y)) (SRLV (ZeroExt8to64 x) (ANDconst [7] (NEGV y)))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64OR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpLOONG64ANDconst, typ.Int64) + v1.AuxInt = int64ToAuxInt(7) + v1.AddArg(y) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v3.AddArg(x) + v4 := b.NewValue0(v.Pos, OpLOONG64ANDconst, typ.Int64) + v4.AuxInt = int64ToAuxInt(7) + v5 := b.NewValue0(v.Pos, OpLOONG64NEGV, typ.Int64) + v5.AddArg(y) + v4.AddArg(v5) + v2.AddArg2(v3, v4) + v.AddArg2(v0, v2) + return true + } } func rewriteValueLOONG64_OpRsh16Ux16(v *Value) bool { v_1 := v.Args[1] diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index af2c9e790c..4754f29525 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -230,6 +230,7 @@ func ReverseBytes16(n uint16) uint16 { func RotateLeft64(n uint64) uint64 { // amd64:"ROLQ" // arm64:"ROR" + // loong64:"ROTRV" // ppc64x:"ROTL" // riscv64:"RORI" // s390x:"RISBGZ\t[$]0, [$]63, [$]37, " @@ -241,6 +242,7 @@ func RotateLeft32(n uint32) uint32 { // amd64:"ROLL" 386:"ROLL" // arm:`MOVW\tR[0-9]+@>23` // arm64:"RORW" + // loong64:"ROTR\t" // ppc64x:"ROTLW" // riscv64:"RORIW" // s390x:"RLL" @@ -251,18 +253,21 @@ func RotateLeft32(n uint32) uint32 { func RotateLeft16(n uint16, s int) uint16 { // amd64:"ROLW" 386:"ROLW" // arm64:"RORW",-"CSEL" + // loong64:"ROTR\t","SLLV" return bits.RotateLeft16(n, s) } func RotateLeft8(n uint8, s int) uint8 { // amd64:"ROLB" 386:"ROLB" // arm64:"LSL","LSR",-"CSEL" + // loong64:"OR","SLLV","SRLV" return bits.RotateLeft8(n, s) } func RotateLeftVariable(n uint, m int) uint { // amd64:"ROLQ" // arm64:"ROR" + // loong64:"ROTRV" // ppc64x:"ROTL" // riscv64:"ROL" // s390x:"RLLG" @@ -273,6 +278,7 @@ func RotateLeftVariable(n uint, m int) uint { func RotateLeftVariable64(n uint64, m int) uint64 { // amd64:"ROLQ" // arm64:"ROR" + // loong64:"ROTRV" // ppc64x:"ROTL" // riscv64:"ROL" // s390x:"RLLG" @@ -284,6 +290,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 { // arm:`MOVW\tR[0-9]+@>R[0-9]+` // amd64:"ROLL" // arm64:"RORW" + // loong64:"ROTR\t" // ppc64x:"ROTLW" // riscv64:"ROLW" // s390x:"RLL"