From: Xiaolin Zhao Date: Thu, 14 Nov 2024 03:35:39 +0000 (+0800) Subject: cmd/compile: optimize shifts of int32 and uint32 on loong64 X-Git-Tag: go1.25rc1~775 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=2a772a2fe7db5602a2932c63a0278ed45e8762cc;p=gostls13.git cmd/compile: optimize shifts of int32 and uint32 on loong64 goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000-HV @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | LeadingZeros 1.100n ± 1% 1.101n ± 0% ~ (p=0.566 n=10) LeadingZeros8 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10) LeadingZeros16 1.501n ± 0% 1.502n ± 0% +0.07% (p=0.000 n=10) LeadingZeros32 1.2010n ± 0% 0.9511n ± 0% -20.81% (p=0.000 n=10) LeadingZeros64 1.104n ± 1% 1.119n ± 0% +1.40% (p=0.000 n=10) TrailingZeros 0.8137n ± 0% 0.8086n ± 0% -0.63% (p=0.001 n=10) TrailingZeros8 1.031n ± 1% 1.031n ± 1% ~ (p=0.956 n=10) TrailingZeros16 0.8204n ± 1% 0.8114n ± 0% -1.11% (p=0.000 n=10) TrailingZeros32 0.8145n ± 0% 0.8090n ± 0% -0.68% (p=0.000 n=10) TrailingZeros64 0.8159n ± 0% 0.8089n ± 1% -0.86% (p=0.000 n=10) OnesCount 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10) OnesCount8 0.8005n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) OnesCount16 0.9339n ± 0% 0.9344n ± 0% +0.05% (p=0.000 n=10) OnesCount32 0.8672n ± 0% 0.8677n ± 0% +0.06% (p=0.000 n=10) OnesCount64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) RotateLeft 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) RotateLeft8 1.202n ± 0% 1.202n ± 0% ~ (p=0.210 n=10) RotateLeft16 0.8050n ± 0% 0.8036n ± 0% -0.17% (p=0.002 n=10) RotateLeft32 0.6674n ± 0% 0.6674n ± 0% ~ (p=1.000 n=10) RotateLeft64 0.6673n ± 0% 0.6674n ± 0% ~ (p=0.072 n=10) Reverse 0.4123n ± 0% 0.4067n ± 1% -1.37% (p=0.000 n=10) Reverse8 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Reverse16 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) Reverse32 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.000 n=10) Reverse64 0.8004n ± 0% 0.8009n ± 0% +0.06% (p=0.001 n=10) ReverseBytes 0.4100n ± 1% 0.4057n ± 1% -1.06% (p=0.002 n=10) ReverseBytes16 0.8004n ± 0% 0.8009n ± 0% +0.07% (p=0.000 n=10) ReverseBytes32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) ReverseBytes64 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Add 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Add64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Add64multiple 1.831n ± 0% 1.832n ± 0% ~ (p=1.000 n=10) Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub32 1.601n ± 0% 1.602n ± 0% +0.06% (p=0.000 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=0.474 n=10) Sub64multiple 2.400n ± 0% 2.402n ± 0% +0.10% (p=0.000 n=10) Mul 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Mul32 0.8005n ± 0% 0.8009n ± 0% +0.05% (p=0.000 n=10) Mul64 0.8004n ± 0% 0.8008n ± 0% +0.05% (p=0.000 n=10) Div 9.107n ± 0% 9.083n ± 0% ~ (p=0.255 n=10) Div32 4.009n ± 0% 4.011n ± 0% +0.05% (p=0.000 n=10) Div64 9.705n ± 0% 9.711n ± 0% +0.06% (p=0.000 n=10) geomean 1.089n 1.083n -0.62% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | LeadingZeros 1.352n ± 0% 1.341n ± 4% -0.81% (p=0.024 n=10) LeadingZeros8 1.766n ± 0% 1.781n ± 0% +0.88% (p=0.000 n=10) LeadingZeros16 1.766n ± 0% 1.782n ± 0% +0.88% (p=0.000 n=10) LeadingZeros32 1.536n ± 0% 1.341n ± 1% -12.73% (p=0.000 n=10) LeadingZeros64 1.351n ± 1% 1.338n ± 0% -0.96% (p=0.000 n=10) TrailingZeros 0.9037n ± 0% 0.9025n ± 0% -0.12% (p=0.020 n=10) TrailingZeros8 1.087n ± 3% 1.056n ± 0% ~ (p=0.060 n=10) TrailingZeros16 1.101n ± 0% 1.101n ± 0% ~ (p=0.211 n=10) TrailingZeros32 0.9040n ± 0% 0.9024n ± 1% -0.18% (p=0.017 n=10) TrailingZeros64 0.9043n ± 0% 0.9028n ± 1% ~ (p=0.118 n=10) OnesCount 1.503n ± 2% 1.482n ± 1% -1.43% (p=0.001 n=10) OnesCount8 1.207n ± 0% 1.206n ± 0% -0.12% (p=0.000 n=10) OnesCount16 1.501n ± 0% 1.534n ± 0% +2.13% (p=0.000 n=10) OnesCount32 1.483n ± 1% 1.531n ± 1% +3.27% (p=0.000 n=10) OnesCount64 1.301n ± 0% 1.302n ± 0% +0.08% (p=0.000 n=10) RotateLeft 0.8136n ± 4% 0.8083n ± 0% -0.66% (p=0.002 n=10) RotateLeft8 1.311n ± 0% 1.310n ± 0% ~ (p=0.786 n=10) RotateLeft16 1.165n ± 0% 1.149n ± 0% -1.33% (p=0.001 n=10) RotateLeft32 0.8138n ± 1% 0.8093n ± 0% -0.57% (p=0.017 n=10) RotateLeft64 0.8149n ± 1% 0.8088n ± 0% -0.74% (p=0.000 n=10) Reverse 0.5195n ± 1% 0.5109n ± 0% -1.67% (p=0.000 n=10) Reverse8 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Reverse16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Reverse32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.012 n=10) Reverse64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.010 n=10) ReverseBytes 0.5120n ± 1% 0.5122n ± 2% ~ (p=0.306 n=10) ReverseBytes16 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) ReverseBytes32 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) ReverseBytes64 0.8007n ± 0% 0.8010n ± 0% +0.04% (p=0.000 n=10) Add 1.201n ± 0% 1.201n ± 4% ~ (p=0.334 n=10) Add32 1.201n ± 0% 1.201n ± 0% ~ (p=0.563 n=10) Add64 1.201n ± 0% 1.201n ± 1% ~ (p=0.652 n=10) Add64multiple 1.909n ± 0% 1.902n ± 0% ~ (p=0.126 n=10) Sub 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub32 1.655n ± 0% 1.654n ± 0% ~ (p=0.589 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub64multiple 2.150n ± 0% 2.180n ± 4% +1.37% (p=0.000 n=10) Mul 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.011 n=10) Mul32 1.053n ± 0% 1.030n ± 0% -2.23% (p=0.000 n=10) Mul64 0.9341n ± 0% 0.9345n ± 0% +0.04% (p=0.018 n=10) Div 11.59n ± 0% 11.57n ± 1% ~ (p=0.091 n=10) Div32 4.337n ± 0% 4.337n ± 1% ~ (p=0.783 n=10) Div64 12.81n ± 0% 12.76n ± 0% -0.39% (p=0.001 n=10) geomean 1.257n 1.252n -0.46% Change-Id: I9e93ea49736760c19dc6b6463d2aa95878121b7b Reviewed-on: https://go-review.googlesource.com/c/go/+/627855 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Reviewed-by: abner chenc Reviewed-by: Meidan Li Reviewed-by: Junyang Shao --- diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index e8b8b27f87..60516d6618 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -165,8 +165,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpLOONG64OR, ssa.OpLOONG64XOR, ssa.OpLOONG64NOR, + ssa.OpLOONG64SLL, ssa.OpLOONG64SLLV, + ssa.OpLOONG64SRL, ssa.OpLOONG64SRLV, + ssa.OpLOONG64SRA, ssa.OpLOONG64SRAV, ssa.OpLOONG64ROTR, ssa.OpLOONG64ROTRV, @@ -274,8 +277,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpLOONG64ORconst, ssa.OpLOONG64XORconst, ssa.OpLOONG64NORconst, + ssa.OpLOONG64SLLconst, ssa.OpLOONG64SLLVconst, + ssa.OpLOONG64SRLconst, ssa.OpLOONG64SRLVconst, + ssa.OpLOONG64SRAconst, ssa.OpLOONG64SRAVconst, ssa.OpLOONG64ROTRconst, ssa.OpLOONG64ROTRVconst, diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index 7ffd579dc7..41c120c983 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -62,10 +62,10 @@ (Lsh64x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) (Lsh64x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Lsh32x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) -(Lsh32x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) -(Lsh32x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) -(Lsh32x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +(Lsh32x64 x y) => (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) +(Lsh32x32 x y) => (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) +(Lsh32x16 x y) => (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) +(Lsh32x8 x y) => (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) (Lsh16x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) (Lsh16x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) @@ -82,10 +82,10 @@ (Rsh64Ux16 x y) => (MASKEQZ (SRLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) (Rsh64Ux8 x y) => (MASKEQZ (SRLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) -(Rsh32Ux64 x y) => (MASKEQZ (SRLV (ZeroExt32to64 x) y) (SGTU (MOVVconst [64]) y)) -(Rsh32Ux32 x y) => (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) -(Rsh32Ux16 x y) => (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) -(Rsh32Ux8 x y) => (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +(Rsh32Ux64 x y) => (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) +(Rsh32Ux32 x y) => (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) +(Rsh32Ux16 x y) => (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) +(Rsh32Ux8 x y) => (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) (Rsh16Ux64 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) y) (SGTU (MOVVconst [64]) y)) (Rsh16Ux32 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) @@ -102,10 +102,10 @@ (Rsh64x16 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) (Rsh64x8 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) -(Rsh32x64 x y) => (SRAV (SignExt32to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) -(Rsh32x32 x y) => (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) -(Rsh32x16 x y) => (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) -(Rsh32x8 x y) => (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) +(Rsh32x64 x y) => (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) +(Rsh32x32 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) +(Rsh32x16 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) +(Rsh32x8 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) (Rsh16x64 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) (Rsh16x32 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) @@ -686,15 +686,30 @@ (XOR x (MOVVconst [c])) && is32Bit(c) => (XORconst [c] x) (NOR x (MOVVconst [c])) && is32Bit(c) => (NORconst [c] x) +(SLL _ (MOVVconst [c])) && uint64(c)>=32 => (MOVVconst [0]) (SLLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0]) +(SRL _ (MOVVconst [c])) && uint64(c)>=32 => (MOVVconst [0]) (SRLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0]) +(SRA x (MOVVconst [c])) && uint64(c)>=32 => (SRAconst x [31]) (SRAV x (MOVVconst [c])) && uint64(c)>=64 => (SRAVconst x [63]) +(SLL x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <=31 => (SLLconst x [c]) (SLLV x (MOVVconst [c])) => (SLLVconst x [c]) +(SRL x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <=31 => (SRLconst x [c]) (SRLV x (MOVVconst [c])) => (SRLVconst x [c]) +(SRA x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <=31 => (SRAconst x [c]) (SRAV x (MOVVconst [c])) => (SRAVconst x [c]) (ROTR x (MOVVconst [c])) => (ROTRconst x [c&31]) (ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63]) +// Avoid unnecessary zero and sign extension when right shifting. +(SRLVconst [rc] (MOVWUreg y)) && rc >= 0 && rc <= 31 => (SRLconst [int64(rc)] y) +(SRAVconst [rc] (MOVWreg y)) && rc >= 0 && rc <= 31 => (SRAconst [int64(rc)] y) + +// Replace right shifts that exceed size of signed type. +(SRAVconst [rc] (MOVBreg y)) && rc >= 8 => (SRAVconst [63] (SLLVconst [56] y)) +(SRAVconst [rc] (MOVHreg y)) && rc >= 16 => (SRAVconst [63] (SLLVconst [48] y)) +(SRAVconst [rc] (MOVWreg y)) && rc >= 32 => (SRAconst [31] y) + // If the shift amount is larger than the datasize(32, 16, 8), we can optimize to constant 0. (MOVWUreg (SLLVconst [lc] x)) && lc >= 32 => (MOVVconst [0]) (MOVHUreg (SLLVconst [lc] x)) && lc >= 16 => (MOVVconst [0]) diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go index 8f17158b64..c68a24ca97 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go @@ -240,11 +240,17 @@ func init() { {name: "FCOPYSGD", argLength: 2, reg: fp21, asm: "FCOPYSGD"}, // float64 // shifts + {name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << arg1, shift amount is mod 32 {name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"}, // arg0 << arg1, shift amount is mod 64 + {name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int64"}, // arg0 << auxInt, auxInt should be in the range 0 to 31. {name: "SLLVconst", argLength: 1, reg: gp11, asm: "SLLV", aux: "Int64"}, // arg0 << auxInt + {name: "SRL", argLength: 2, reg: gp21, asm: "SRL"}, // arg0 >> arg1, shift amount is mod 32 {name: "SRLV", argLength: 2, reg: gp21, asm: "SRLV"}, // arg0 >> arg1, unsigned, shift amount is mod 64 + {name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int64"}, // arg0 >> auxInt, auxInt should be in the range 0 to 31. {name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"}, // arg0 >> auxInt, unsigned + {name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> arg1, shift amount is mod 32 {name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"}, // arg0 >> arg1, signed, shift amount is mod 64 + {name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int64"}, // arg0 >> auxInt, signed, auxInt should be in the range 0 to 31. {name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"}, // arg0 >> auxInt, signed {name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"}, // arg0 right rotate by (arg1 mod 32) bits {name: "ROTRV", argLength: 2, reg: gp21, asm: "ROTRV"}, // arg0 right rotate by (arg1 mod 64) bits diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 8b51015ed8..03ccc0f5d7 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1834,11 +1834,17 @@ const ( OpLOONG64MASKEQZ OpLOONG64MASKNEZ OpLOONG64FCOPYSGD + OpLOONG64SLL OpLOONG64SLLV + OpLOONG64SLLconst OpLOONG64SLLVconst + OpLOONG64SRL OpLOONG64SRLV + OpLOONG64SRLconst OpLOONG64SRLVconst + OpLOONG64SRA OpLOONG64SRAV + OpLOONG64SRAconst OpLOONG64SRAVconst OpLOONG64ROTR OpLOONG64ROTRV @@ -24709,6 +24715,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SLL", + argLen: 2, + asm: loong64.ASLL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "SLLV", argLen: 2, @@ -24723,6 +24743,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SLLconst", + auxType: auxInt64, + argLen: 1, + asm: loong64.ASLL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "SLLVconst", auxType: auxInt64, @@ -24737,6 +24771,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SRL", + argLen: 2, + asm: loong64.ASRL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "SRLV", argLen: 2, @@ -24751,6 +24799,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SRLconst", + auxType: auxInt64, + argLen: 1, + asm: loong64.ASRL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "SRLVconst", auxType: auxInt64, @@ -24765,6 +24827,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SRA", + argLen: 2, + asm: loong64.ASRA, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "SRAV", argLen: 2, @@ -24779,6 +24855,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SRAconst", + auxType: auxInt64, + argLen: 1, + asm: loong64.ASRA, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "SRAVconst", auxType: auxInt64, diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index 4499efa01d..01000db4c0 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -454,14 +454,20 @@ func rewriteValueLOONG64(v *Value) bool { return rewriteValueLOONG64_OpLOONG64SGTUconst(v) case OpLOONG64SGTconst: return rewriteValueLOONG64_OpLOONG64SGTconst(v) + case OpLOONG64SLL: + return rewriteValueLOONG64_OpLOONG64SLL(v) case OpLOONG64SLLV: return rewriteValueLOONG64_OpLOONG64SLLV(v) case OpLOONG64SLLVconst: return rewriteValueLOONG64_OpLOONG64SLLVconst(v) + case OpLOONG64SRA: + return rewriteValueLOONG64_OpLOONG64SRA(v) case OpLOONG64SRAV: return rewriteValueLOONG64_OpLOONG64SRAV(v) case OpLOONG64SRAVconst: return rewriteValueLOONG64_OpLOONG64SRAVconst(v) + case OpLOONG64SRL: + return rewriteValueLOONG64_OpLOONG64SRL(v) case OpLOONG64SRLV: return rewriteValueLOONG64_OpLOONG64SRLV(v) case OpLOONG64SRLVconst: @@ -6031,6 +6037,43 @@ func rewriteValueLOONG64_OpLOONG64SGTconst(v *Value) bool { } return false } +func rewriteValueLOONG64_OpLOONG64SLL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SLL _ (MOVVconst [c])) + // cond: uint64(c)>=32 + // result: (MOVVconst [0]) + for { + if v_1.Op != OpLOONG64MOVVconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(uint64(c) >= 32) { + break + } + v.reset(OpLOONG64MOVVconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (SLL x (MOVVconst [c])) + // cond: uint64(c) >=0 && uint64(c) <=31 + // result: (SLLconst x [c]) + for { + x := v_0 + if v_1.Op != OpLOONG64MOVVconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(uint64(c) >= 0 && uint64(c) <= 31) { + break + } + v.reset(OpLOONG64SLLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } + return false +} func rewriteValueLOONG64_OpLOONG64SLLV(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -6080,6 +6123,45 @@ func rewriteValueLOONG64_OpLOONG64SLLVconst(v *Value) bool { } return false } +func rewriteValueLOONG64_OpLOONG64SRA(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SRA x (MOVVconst [c])) + // cond: uint64(c)>=32 + // result: (SRAconst x [31]) + for { + x := v_0 + if v_1.Op != OpLOONG64MOVVconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(uint64(c) >= 32) { + break + } + v.reset(OpLOONG64SRAconst) + v.AuxInt = int64ToAuxInt(31) + v.AddArg(x) + return true + } + // match: (SRA x (MOVVconst [c])) + // cond: uint64(c) >=0 && uint64(c) <=31 + // result: (SRAconst x [c]) + for { + x := v_0 + if v_1.Op != OpLOONG64MOVVconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(uint64(c) >= 0 && uint64(c) <= 31) { + break + } + v.reset(OpLOONG64SRAconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } + return false +} func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -6117,6 +6199,83 @@ func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool { } func rewriteValueLOONG64_OpLOONG64SRAVconst(v *Value) bool { v_0 := v.Args[0] + b := v.Block + // match: (SRAVconst [rc] (MOVWreg y)) + // cond: rc >= 0 && rc <= 31 + // result: (SRAconst [int64(rc)] y) + for { + rc := auxIntToInt64(v.AuxInt) + if v_0.Op != OpLOONG64MOVWreg { + break + } + y := v_0.Args[0] + if !(rc >= 0 && rc <= 31) { + break + } + v.reset(OpLOONG64SRAconst) + v.AuxInt = int64ToAuxInt(int64(rc)) + v.AddArg(y) + return true + } + // match: (SRAVconst [rc] (MOVBreg y)) + // cond: rc >= 8 + // result: (SRAVconst [63] (SLLVconst [56] y)) + for { + t := v.Type + rc := auxIntToInt64(v.AuxInt) + if v_0.Op != OpLOONG64MOVBreg { + break + } + y := v_0.Args[0] + if !(rc >= 8) { + break + } + v.reset(OpLOONG64SRAVconst) + v.AuxInt = int64ToAuxInt(63) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, t) + v0.AuxInt = int64ToAuxInt(56) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (SRAVconst [rc] (MOVHreg y)) + // cond: rc >= 16 + // result: (SRAVconst [63] (SLLVconst [48] y)) + for { + t := v.Type + rc := auxIntToInt64(v.AuxInt) + if v_0.Op != OpLOONG64MOVHreg { + break + } + y := v_0.Args[0] + if !(rc >= 16) { + break + } + v.reset(OpLOONG64SRAVconst) + v.AuxInt = int64ToAuxInt(63) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, t) + v0.AuxInt = int64ToAuxInt(48) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (SRAVconst [rc] (MOVWreg y)) + // cond: rc >= 32 + // result: (SRAconst [31] y) + for { + rc := auxIntToInt64(v.AuxInt) + if v_0.Op != OpLOONG64MOVWreg { + break + } + y := v_0.Args[0] + if !(rc >= 32) { + break + } + v.reset(OpLOONG64SRAconst) + v.AuxInt = int64ToAuxInt(31) + v.AddArg(y) + return true + } // match: (SRAVconst [c] (MOVVconst [d])) // result: (MOVVconst [d>>uint64(c)]) for { @@ -6131,6 +6290,43 @@ func rewriteValueLOONG64_OpLOONG64SRAVconst(v *Value) bool { } return false } +func rewriteValueLOONG64_OpLOONG64SRL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SRL _ (MOVVconst [c])) + // cond: uint64(c)>=32 + // result: (MOVVconst [0]) + for { + if v_1.Op != OpLOONG64MOVVconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(uint64(c) >= 32) { + break + } + v.reset(OpLOONG64MOVVconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (SRL x (MOVVconst [c])) + // cond: uint64(c) >=0 && uint64(c) <=31 + // result: (SRLconst x [c]) + for { + x := v_0 + if v_1.Op != OpLOONG64MOVVconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(uint64(c) >= 0 && uint64(c) <= 31) { + break + } + v.reset(OpLOONG64SRLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } + return false +} func rewriteValueLOONG64_OpLOONG64SRLV(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -6235,6 +6431,23 @@ func rewriteValueLOONG64_OpLOONG64SRLVconst(v *Value) bool { v.AddArg(x) return true } + // match: (SRLVconst [rc] (MOVWUreg y)) + // cond: rc >= 0 && rc <= 31 + // result: (SRLconst [int64(rc)] y) + for { + rc := auxIntToInt64(v.AuxInt) + if v_0.Op != OpLOONG64MOVWUreg { + break + } + y := v_0.Args[0] + if !(rc >= 0 && rc <= 31) { + break + } + v.reset(OpLOONG64SRLconst) + v.AuxInt = int64ToAuxInt(int64(rc)) + v.AddArg(y) + return true + } // match: (SRLVconst [rc] (MOVWUreg x)) // cond: rc >= 32 // result: (MOVVconst [0]) @@ -7340,19 +7553,19 @@ func rewriteValueLOONG64_OpLsh32x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Lsh32x16 x y) - // result: (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + // result: (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 v.reset(OpLOONG64MASKEQZ) - v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) v1.AddArg(y) v0.AddArg2(x, v1) v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v3.AuxInt = int64ToAuxInt(64) + v3.AuxInt = int64ToAuxInt(32) v2.AddArg2(v3, v1) v.AddArg2(v0, v2) return true @@ -7364,19 +7577,19 @@ func rewriteValueLOONG64_OpLsh32x32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Lsh32x32 x y) - // result: (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) + // result: (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 v.reset(OpLOONG64MASKEQZ) - v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) v1.AddArg(y) v0.AddArg2(x, v1) v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v3.AuxInt = int64ToAuxInt(64) + v3.AuxInt = int64ToAuxInt(32) v2.AddArg2(v3, v1) v.AddArg2(v0, v2) return true @@ -7388,17 +7601,17 @@ func rewriteValueLOONG64_OpLsh32x64(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Lsh32x64 x y) - // result: (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) + // result: (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) for { t := v.Type x := v_0 y := v_1 v.reset(OpLOONG64MASKEQZ) - v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) v0.AddArg2(x, y) v1 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) v2 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v2.AuxInt = int64ToAuxInt(64) + v2.AuxInt = int64ToAuxInt(32) v1.AddArg2(v2, y) v.AddArg2(v0, v1) return true @@ -7410,19 +7623,19 @@ func rewriteValueLOONG64_OpLsh32x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Lsh32x8 x y) - // result: (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + // result: (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 v.reset(OpLOONG64MASKEQZ) - v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) v1.AddArg(y) v0.AddArg2(x, v1) v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v3.AuxInt = int64ToAuxInt(64) + v3.AuxInt = int64ToAuxInt(32) v2.AddArg2(v3, v1) v.AddArg2(v0, v2) return true @@ -8772,23 +8985,21 @@ func rewriteValueLOONG64_OpRsh32Ux16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Rsh32Ux16 x y) - // result: (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + // result: (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 v.reset(OpLOONG64MASKEQZ) - v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) - v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) - v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v4.AuxInt = int64ToAuxInt(64) - v3.AddArg2(v4, v2) - v.AddArg2(v0, v3) + v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) + v1.AddArg(y) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v3.AuxInt = int64ToAuxInt(32) + v2.AddArg2(v3, v1) + v.AddArg2(v0, v2) return true } } @@ -8798,23 +9009,21 @@ func rewriteValueLOONG64_OpRsh32Ux32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Rsh32Ux32 x y) - // result: (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) + // result: (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 v.reset(OpLOONG64MASKEQZ) - v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) - v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v4.AuxInt = int64ToAuxInt(64) - v3.AddArg2(v4, v2) - v.AddArg2(v0, v3) + v1.AddArg(y) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v3.AuxInt = int64ToAuxInt(32) + v2.AddArg2(v3, v1) + v.AddArg2(v0, v2) return true } } @@ -8824,21 +9033,19 @@ func rewriteValueLOONG64_OpRsh32Ux64(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Rsh32Ux64 x y) - // result: (MASKEQZ (SRLV (ZeroExt32to64 x) y) (SGTU (MOVVconst [64]) y)) + // result: (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) for { t := v.Type x := v_0 y := v_1 v.reset(OpLOONG64MASKEQZ) - v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) - v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v1.AddArg(x) - v0.AddArg2(v1, y) - v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) - v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v3.AuxInt = int64ToAuxInt(64) - v2.AddArg2(v3, y) - v.AddArg2(v0, v2) + v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v2 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v2.AuxInt = int64ToAuxInt(32) + v1.AddArg2(v2, y) + v.AddArg2(v0, v1) return true } } @@ -8848,23 +9055,21 @@ func rewriteValueLOONG64_OpRsh32Ux8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Rsh32Ux8 x y) - // result: (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + // result: (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 v.reset(OpLOONG64MASKEQZ) - v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) - v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) - v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v4.AuxInt = int64ToAuxInt(64) - v3.AddArg2(v4, v2) - v.AddArg2(v0, v3) + v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v1.AddArg(y) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v3.AuxInt = int64ToAuxInt(32) + v2.AddArg2(v3, v1) + v.AddArg2(v0, v2) return true } } @@ -8874,25 +9079,23 @@ func rewriteValueLOONG64_OpRsh32x16(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Rsh32x16 x y) - // result: (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) + // result: (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) for { t := v.Type x := v_0 y := v_1 - v.reset(OpLOONG64SRAV) - v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) - v0.AddArg(x) - v1 := b.NewValue0(v.Pos, OpLOONG64OR, t) - v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) - v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) - v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) - v4.AddArg(y) - v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v5.AuxInt = int64ToAuxInt(63) - v3.AddArg2(v4, v5) - v2.AddArg(v3) - v1.AddArg2(v2, v4) - v.AddArg2(v0, v1) + v.reset(OpLOONG64SRA) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) + v3.AddArg(y) + v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v4.AuxInt = int64ToAuxInt(31) + v2.AddArg2(v3, v4) + v1.AddArg(v2) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) return true } } @@ -8902,25 +9105,23 @@ func rewriteValueLOONG64_OpRsh32x32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Rsh32x32 x y) - // result: (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) + // result: (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) for { t := v.Type x := v_0 y := v_1 - v.reset(OpLOONG64SRAV) - v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) - v0.AddArg(x) - v1 := b.NewValue0(v.Pos, OpLOONG64OR, t) - v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) - v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) - v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v4.AddArg(y) - v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v5.AuxInt = int64ToAuxInt(63) - v3.AddArg2(v4, v5) - v2.AddArg(v3) - v1.AddArg2(v2, v4) - v.AddArg2(v0, v1) + v.reset(OpLOONG64SRA) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) + v3.AddArg(y) + v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v4.AuxInt = int64ToAuxInt(31) + v2.AddArg2(v3, v4) + v1.AddArg(v2) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) return true } } @@ -8930,23 +9131,21 @@ func rewriteValueLOONG64_OpRsh32x64(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Rsh32x64 x y) - // result: (SRAV (SignExt32to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) + // result: (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) for { t := v.Type x := v_0 y := v_1 - v.reset(OpLOONG64SRAV) - v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) - v0.AddArg(x) - v1 := b.NewValue0(v.Pos, OpLOONG64OR, t) - v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) - v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) - v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v4.AuxInt = int64ToAuxInt(63) - v3.AddArg2(y, v4) - v2.AddArg(v3) - v1.AddArg2(v2, y) - v.AddArg2(v0, v1) + v.reset(OpLOONG64SRA) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v3.AuxInt = int64ToAuxInt(31) + v2.AddArg2(y, v3) + v1.AddArg(v2) + v0.AddArg2(v1, y) + v.AddArg2(x, v0) return true } } @@ -8956,25 +9155,23 @@ func rewriteValueLOONG64_OpRsh32x8(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (Rsh32x8 x y) - // result: (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) + // result: (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) for { t := v.Type x := v_0 y := v_1 - v.reset(OpLOONG64SRAV) - v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) - v0.AddArg(x) - v1 := b.NewValue0(v.Pos, OpLOONG64OR, t) - v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) - v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) - v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) - v4.AddArg(y) - v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) - v5.AuxInt = int64ToAuxInt(63) - v3.AddArg2(v4, v5) - v2.AddArg(v3) - v1.AddArg2(v2, v4) - v.AddArg2(v0, v1) + v.reset(OpLOONG64SRA) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v3.AddArg(y) + v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v4.AuxInt = int64ToAuxInt(31) + v2.AddArg2(v3, v4) + v1.AddArg(v2) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) return true } } diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 52efefb0ed..8254e974df 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -11,49 +11,58 @@ package codegen // ------------------ // func lshConst64x64(v int64) int64 { + // loong64:"SLLV" // ppc64x:"SLD" // riscv64:"SLLI",-"AND",-"SLTIU" return v << uint64(33) } func rshConst64Ux64(v uint64) uint64 { + // loong64:"SRLV" // ppc64x:"SRD" // riscv64:"SRLI\t",-"AND",-"SLTIU" return v >> uint64(33) } func rshConst64Ux64Overflow32(v uint32) uint64 { + // loong64:"MOVV\t\\$0,",-"SRL\t" // riscv64:"MOV\t\\$0,",-"SRL" return uint64(v) >> 32 } func rshConst64Ux64Overflow16(v uint16) uint64 { + // loong64:"MOVV\t\\$0,",-"SRLV" // riscv64:"MOV\t\\$0,",-"SRL" return uint64(v) >> 16 } func rshConst64Ux64Overflow8(v uint8) uint64 { + // loong64:"MOVV\t\\$0,",-"SRLV" // riscv64:"MOV\t\\$0,",-"SRL" return uint64(v) >> 8 } func rshConst64x64(v int64) int64 { + // loong64:"SRAV" // ppc64x:"SRAD" // riscv64:"SRAI\t",-"OR",-"SLTIU" return v >> uint64(33) } func rshConst64x64Overflow32(v int32) int64 { + // loong64:"SRA\t\\$31" // riscv64:"SRAIW",-"SLLI",-"SRAI\t" return int64(v) >> 32 } func rshConst64x64Overflow16(v int16) int64 { + // loong64:"SLLV\t\\$48","SRAV\t\\$63" // riscv64:"SLLI","SRAI",-"SRAIW" return int64(v) >> 16 } func rshConst64x64Overflow8(v int8) int64 { + // loong64:"SLLV\t\\$56","SRAV\t\\$63" // riscv64:"SLLI","SRAI",-"SRAIW" return int64(v) >> 8 } @@ -69,36 +78,42 @@ func lshConst64x1(v int64) int64 { } func lshConst32x64(v int32) int32 { + // loong64:"SLL\t" // ppc64x:"SLW" // riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW" return v << uint64(29) } func rshConst32Ux64(v uint32) uint32 { + // loong64:"SRL\t" // ppc64x:"SRW" // riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW" return v >> uint64(29) } func rshConst32x64(v int32) int32 { + // loong64:"SRA\t" // ppc64x:"SRAW" // riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW" return v >> uint64(29) } func lshConst64x32(v int64) int64 { + // loong64:"SLLV" // ppc64x:"SLD" // riscv64:"SLLI",-"AND",-"SLTIU" return v << uint32(33) } func rshConst64Ux32(v uint64) uint64 { + // loong64:"SRLV" // ppc64x:"SRD" // riscv64:"SRLI\t",-"AND",-"SLTIU" return v >> uint32(33) } func rshConst64x32(v int64) int64 { + // loong64:"SRAV" // ppc64x:"SRAD" // riscv64:"SRAI\t",-"OR",-"SLTIU" return v >> uint32(33)