From: Russ Cox Date: Tue, 4 Nov 2025 03:09:48 +0000 (-0500) Subject: cmd/compile: implement Avg64u, Hmul64, Hmul64u for wasm X-Git-Tag: go1.26rc1~382 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=6e165b4d17;p=gostls13.git cmd/compile: implement Avg64u, Hmul64, Hmul64u for wasm This lets us remove useAvg and useHmul from the division rules. The compiler is simpler and the generated code is faster. goos: wasip1 goarch: wasm pkg: internal/strconv │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ AppendFloat/Decimal 192.8n ± 1% 194.6n ± 0% +0.91% (p=0.000 n=10) AppendFloat/Float 328.6n ± 0% 279.6n ± 0% -14.93% (p=0.000 n=10) AppendFloat/Exp 335.6n ± 1% 289.2n ± 1% -13.80% (p=0.000 n=10) AppendFloat/NegExp 336.0n ± 0% 289.1n ± 1% -13.97% (p=0.000 n=10) AppendFloat/LongExp 332.4n ± 0% 285.2n ± 1% -14.20% (p=0.000 n=10) AppendFloat/Big 348.2n ± 0% 300.1n ± 0% -13.83% (p=0.000 n=10) AppendFloat/BinaryExp 137.4n ± 0% 138.2n ± 0% +0.55% (p=0.001 n=10) AppendFloat/32Integer 193.3n ± 1% 196.5n ± 0% +1.66% (p=0.000 n=10) AppendFloat/32ExactFraction 283.3n ± 0% 268.9n ± 1% -5.08% (p=0.000 n=10) AppendFloat/32Point 279.9n ± 0% 266.5n ± 0% -4.80% (p=0.000 n=10) AppendFloat/32Exp 300.1n ± 0% 288.3n ± 1% -3.90% (p=0.000 n=10) AppendFloat/32NegExp 288.2n ± 1% 277.9n ± 1% -3.59% (p=0.000 n=10) AppendFloat/32Shortest 261.7n ± 0% 250.2n ± 0% -4.39% (p=0.000 n=10) AppendFloat/32Fixed8Hard 173.3n ± 1% 158.9n ± 1% -8.31% (p=0.000 n=10) AppendFloat/32Fixed9Hard 180.0n ± 0% 167.9n ± 2% -6.70% (p=0.000 n=10) AppendFloat/64Fixed1 167.1n ± 0% 149.6n ± 1% -10.50% (p=0.000 n=10) AppendFloat/64Fixed2 162.4n ± 1% 146.5n ± 0% -9.73% (p=0.000 n=10) AppendFloat/64Fixed2.5 165.5n ± 0% 149.4n ± 1% -9.70% (p=0.000 n=10) AppendFloat/64Fixed3 166.4n ± 1% 150.2n ± 0% -9.74% (p=0.000 n=10) AppendFloat/64Fixed4 163.7n ± 0% 149.6n ± 1% -8.62% (p=0.000 n=10) AppendFloat/64Fixed5Hard 182.8n ± 1% 167.1n ± 1% -8.61% (p=0.000 n=10) AppendFloat/64Fixed12 222.2n ± 0% 208.8n ± 0% -6.05% (p=0.000 n=10) AppendFloat/64Fixed16 197.6n ± 1% 181.7n ± 0% -8.02% (p=0.000 n=10) AppendFloat/64Fixed12Hard 194.5n ± 0% 181.0n ± 0% -6.99% (p=0.000 n=10) AppendFloat/64Fixed17Hard 205.1n ± 1% 191.9n ± 0% -6.44% (p=0.000 n=10) AppendFloat/64Fixed18Hard 6.269µ ± 0% 6.643µ ± 0% +5.97% (p=0.000 n=10) AppendFloat/64FixedF1 211.7n ± 1% 197.0n ± 0% -6.95% (p=0.000 n=10) AppendFloat/64FixedF2 189.4n ± 0% 174.2n ± 0% -8.08% (p=0.000 n=10) AppendFloat/64FixedF3 169.0n ± 0% 154.9n ± 0% -8.32% (p=0.000 n=10) AppendFloat/Slowpath64 321.2n ± 0% 274.2n ± 1% -14.63% (p=0.000 n=10) AppendFloat/SlowpathDenormal64 307.4n ± 1% 261.2n ± 0% -15.03% (p=0.000 n=10) AppendInt 3.367µ ± 1% 3.376µ ± 0% ~ (p=0.517 n=10) AppendUint 675.5n ± 0% 676.9n ± 0% ~ (p=0.196 n=10) AppendIntSmall 28.13n ± 1% 28.17n ± 0% +0.14% (p=0.015 n=10) AppendUintVarlen/digits=1 20.70n ± 0% 20.51n ± 1% -0.89% (p=0.018 n=10) AppendUintVarlen/digits=2 20.43n ± 0% 20.27n ± 0% -0.81% (p=0.001 n=10) AppendUintVarlen/digits=3 38.48n ± 0% 37.93n ± 0% -1.43% (p=0.000 n=10) AppendUintVarlen/digits=4 41.10n ± 0% 38.78n ± 1% -5.62% (p=0.000 n=10) AppendUintVarlen/digits=5 42.25n ± 1% 42.11n ± 0% -0.32% (p=0.041 n=10) AppendUintVarlen/digits=6 45.40n ± 1% 43.14n ± 0% -4.98% (p=0.000 n=10) AppendUintVarlen/digits=7 46.81n ± 1% 46.03n ± 0% -1.66% (p=0.000 n=10) AppendUintVarlen/digits=8 48.88n ± 1% 46.59n ± 1% -4.68% (p=0.000 n=10) AppendUintVarlen/digits=9 49.94n ± 2% 49.41n ± 1% -1.06% (p=0.000 n=10) AppendUintVarlen/digits=10 57.28n ± 1% 56.92n ± 1% -0.62% (p=0.045 n=10) AppendUintVarlen/digits=11 60.09n ± 1% 58.11n ± 2% -3.30% (p=0.000 n=10) AppendUintVarlen/digits=12 62.22n ± 0% 61.85n ± 0% -0.59% (p=0.000 n=10) AppendUintVarlen/digits=13 64.94n ± 0% 62.92n ± 0% -3.10% (p=0.000 n=10) AppendUintVarlen/digits=14 65.42n ± 1% 65.19n ± 1% -0.34% (p=0.005 n=10) AppendUintVarlen/digits=15 68.17n ± 0% 66.13n ± 0% -2.99% (p=0.000 n=10) AppendUintVarlen/digits=16 70.21n ± 1% 70.09n ± 1% ~ (p=0.517 n=10) AppendUintVarlen/digits=17 72.93n ± 0% 70.49n ± 0% -3.34% (p=0.000 n=10) AppendUintVarlen/digits=18 73.01n ± 0% 72.75n ± 0% -0.35% (p=0.000 n=10) AppendUintVarlen/digits=19 79.27n ± 1% 79.49n ± 1% ~ (p=0.671 n=10) AppendUintVarlen/digits=20 82.18n ± 0% 80.43n ± 1% -2.14% (p=0.000 n=10) geomean 143.4n 136.0n -5.20% Change-Id: I8245814a0259ad13cf9225f57db8e9fe3d2e4267 Reviewed-on: https://go-review.googlesource.com/c/go/+/717407 LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- diff --git a/src/cmd/compile/internal/ssa/_gen/Wasm.rules b/src/cmd/compile/internal/ssa/_gen/Wasm.rules index f632a01109..6028152253 100644 --- a/src/cmd/compile/internal/ssa/_gen/Wasm.rules +++ b/src/cmd/compile/internal/ssa/_gen/Wasm.rules @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +(Last ___) => v.Args[len(v.Args)-1] + // Lowering arithmetic (Add(64|32|16|8|Ptr) ...) => (I64Add ...) (Add(64|32)F ...) => (F(64|32)Add ...) @@ -44,6 +46,37 @@ (Not ...) => (I64Eqz ...) +(Avg64u x y) => (I64Add (I64ShrU (I64Sub x y) (I64Const [1])) y) + +// High word of multiply without carry bits; see Hacker's Delight, 2nd. ed, Figure 8-2, p. 174. +(Hmul64 x y) => + (Last + x0: (ZeroExt32to64 x) + x1: (I64ShrS x (I64Const [32])) + y0: (ZeroExt32to64 y) + y1: (I64ShrS y (I64Const [32])) + x0y0: (I64Mul x0 y0) + tt: (I64Add (I64Mul x1 y0) (I64ShrU x0y0 (I64Const [32]))) + w1: (I64Add (I64Mul x0 y1) (ZeroExt32to64 tt)) + w2: (I64ShrS tt (I64Const [32])) + (I64Add (I64Add (I64Mul x1 y1) w2) (I64ShrS w1 (I64Const [32])))) + +// Same as Hmul64 but signed shifts now unsigned. +(Hmul64u x y) => + (Last + x0: (ZeroExt32to64 x) + x1: (I64ShrU x (I64Const [32])) + y0: (ZeroExt32to64 y) + y1: (I64ShrU y (I64Const [32])) + w0: (I64Mul x0 y0) + tt: (I64Add (I64Mul x1 y0) (I64ShrU w0 (I64Const [32]))) + w1: (I64Add (I64Mul x0 y1) (ZeroExt32to64 tt)) + w2: (I64ShrU tt (I64Const [32])) + hi: (I64Add (I64Add (I64Mul x1 y1) w2) (I64ShrU w1 (I64Const [32])))) + +(Select0 (Mul64uhilo x y)) => (Hmul64u x y) +(Select1 (Mul64uhilo x y)) => (I64Mul x y) + // Lowering pointer arithmetic (OffPtr ...) => (I64AddConst ...) diff --git a/src/cmd/compile/internal/ssa/_gen/divmod.rules b/src/cmd/compile/internal/ssa/_gen/divmod.rules index 21e0a19406..7dd7d245bd 100644 --- a/src/cmd/compile/internal/ssa/_gen/divmod.rules +++ b/src/cmd/compile/internal/ssa/_gen/divmod.rules @@ -79,17 +79,9 @@ // The magic number m for c is ⌈2^k/c⌉, so we can use // (m+1)/2 = ⌈2^k/(c/2)⌉ instead. // -// 8. An unsigned divide on systems with an avg instruction. +// 8. A general unsigned divide using an avg instruction. // We noted above that (x*((1<>N>>s = ((x*m)>>N+x)>>s. // Let hi = (x*m)>>N, so we want (hi+x) >> s = avg(hi, x) >> (s-1). -// -// 9. Unsigned 64-bit divide by 16-bit constant on 32-bit systems. -// Use long division with 16-bit digits. -// -// Note: All systems have Hmul and Avg except for wasm, and the -// wasm JITs may well apply all these optimizations already anyway, -// so it may be worth looking into avoiding this pass entirely on wasm -// and dropping all the useAvg useHmul uncertainty. // Case 1. Signed divides where 2N ≤ register size. (Div8 x (Const8 [c])) && smagicOK8(c) => @@ -112,13 +104,13 @@ (Rsh64x64 (SignExt32to64 x) (Const64 [63]))) // Case 2. Signed divides where m is even. -(Div32 x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 && config.useHmul => +(Div32 x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 => (Sub32 (Rsh32x64 (Hmul32 x (Const32 [int32(smagic32(c).m/2)])) (Const64 [smagic32(c).s - 1])) (Rsh32x64 x (Const64 [31]))) -(Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul => +(Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 => (Sub64 (Rsh64x64 (Hmul64 x (Const64 [int64(smagic64(c).m/2)])) @@ -126,13 +118,13 @@ (Rsh64x64 x (Const64 [63]))) // Case 3. Signed divides where m is odd. -(Div32 x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 && config.useHmul => +(Div32 x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 => (Sub32 (Rsh32x64 (Add32 x (Hmul32 x (Const32 [int32(smagic32(c).m)]))) (Const64 [smagic32(c).s])) (Rsh32x64 x (Const64 [31]))) -(Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul => +(Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 => (Sub64 (Rsh64x64 (Add64 x (Hmul64 x (Const64 [int64(smagic64(c).m)]))) @@ -149,11 +141,11 @@ (Rsh64Ux64 (Mul64 (SignExt32to64 x) (Const64 [int64(smagic32(c).m)])) (Const64 [32 + smagic32(c).s])) -(Div32u x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 4 && config.useHmul => +(Div32u x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 4 => (Rsh32Ux64 (Hmul32u x (Const32 [int32(smagic32(c).m)])) (Const64 [smagic32(c).s])) -(Div64u x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul => +(Div64u x (Const64 [c])) && t.IsSigned() && smagicOK64(c) => (Rsh64Ux64 (Hmul64u x (Const64 [int64(smagic64(c).m)])) (Const64 [smagic64(c).s])) @@ -181,11 +173,11 @@ (Rsh64Ux64 (Mul64 (ZeroExt32to64 x) (Const64 [int64(1<<31 + umagic32(c).m/2)])) (Const64 [32 + umagic32(c).s - 1]))) -(Div32u x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 && config.useHmul => +(Div32u x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 => (Rsh32Ux64 (Hmul32u x (Const32 [int32(1<<31 + umagic32(c).m/2)])) (Const64 [umagic32(c).s - 1])) -(Div64u x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 => (Rsh64Ux64 (Hmul64u x (Const64 [int64(1<<63 + umagic64(c).m/2)])) (Const64 [umagic64(c).s - 1])) @@ -205,39 +197,39 @@ (Rsh64Ux64 (ZeroExt32to64 x) (Const64 [1])) (Const64 [int64(1<<31 + (umagic32(c).m+1)/2)])) (Const64 [32 + umagic32(c).s - 2]))) -(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 && config.useHmul => +(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 => (Rsh32Ux64 (Hmul32u (Rsh32Ux64 x (Const64 [1])) (Const32 [int32(1<<31 + (umagic32(c).m+1)/2)])) (Const64 [umagic32(c).s - 2])) -(Div64u x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) && c&1 == 0 => (Rsh64Ux64 (Hmul64u (Rsh64Ux64 x (Const64 [1])) (Const64 [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 [umagic64(c).s - 2])) -// Case 8. Unsigned divide on systems with avg. -(Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && config.useAvg => +// Case 8. Unsigned divide using avg. +(Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 => (Trunc32to16 (Rsh32Ux64 (Avg32u (Lsh32x64 (ZeroExt16to32 x) (Const64 [16])) (Mul32 (ZeroExt16to32 x) (Const32 [int32(umagic16(c).m)]))) (Const64 [16 + umagic16(c).s - 1]))) -(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && config.useAvg => +(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 => (Trunc64to32 (Rsh64Ux64 (Avg64u (Lsh64x64 (ZeroExt32to64 x) (Const64 [32])) (Mul64 (ZeroExt32to64 x) (Const64 [int64(umagic32(c).m)]))) (Const64 [32 + umagic32(c).s - 1]))) -(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && config.useAvg && config.useHmul => +(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 => (Rsh32Ux64 (Avg32u x (Hmul32u x (Const32 [int32(umagic32(c).m)]))) (Const64 [umagic32(c).s - 1])) -(Div64u x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) => (Rsh64Ux64 (Avg64u x (Hmul64u x (Const64 [int64(umagic64(c).m)]))) (Const64 [umagic64(c).s - 1])) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 819d77e420..ec0240941c 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -41,8 +41,6 @@ type Config struct { hasGReg bool // has hardware g register ctxt *obj.Link // Generic arch information optimize bool // Do optimization - useAvg bool // Use optimizations that need Avg* operations - useHmul bool // Use optimizations that need Hmul* operations SoftFloat bool // Race bool // race detector enabled BigEndian bool // @@ -168,8 +166,6 @@ type Frontend interface { // NewConfig returns a new configuration object for the given architecture. func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat bool) *Config { c := &Config{arch: arch, Types: types} - c.useAvg = true - c.useHmul = true switch arch { case "amd64": c.PtrSize = 8 @@ -359,8 +355,6 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo c.FPReg = framepointerRegWasm c.LinkReg = linkRegWasm c.hasGReg = true - c.useAvg = false - c.useHmul = false c.unalignedOK = true c.haveCondSelect = true default: diff --git a/src/cmd/compile/internal/ssa/rewriteWasm.go b/src/cmd/compile/internal/ssa/rewriteWasm.go index a164a6eee5..faba41b3e5 100644 --- a/src/cmd/compile/internal/ssa/rewriteWasm.go +++ b/src/cmd/compile/internal/ssa/rewriteWasm.go @@ -48,6 +48,8 @@ func rewriteValueWasm(v *Value) bool { case OpAndB: v.Op = OpWasmI64And return true + case OpAvg64u: + return rewriteValueWasm_OpAvg64u(v) case OpBitLen16: return rewriteValueWasm_OpBitLen16(v) case OpBitLen32: @@ -228,6 +230,10 @@ func rewriteValueWasm(v *Value) bool { case OpGetClosurePtr: v.Op = OpWasmLoweredGetClosurePtr return true + case OpHmul64: + return rewriteValueWasm_OpHmul64(v) + case OpHmul64u: + return rewriteValueWasm_OpHmul64u(v) case OpInterCall: v.Op = OpWasmLoweredInterCall return true @@ -239,6 +245,8 @@ func rewriteValueWasm(v *Value) bool { case OpIsSliceInBounds: v.Op = OpWasmI64LeU return true + case OpLast: + return rewriteValueWasm_OpLast(v) case OpLeq16: return rewriteValueWasm_OpLeq16(v) case OpLeq16U: @@ -514,6 +522,10 @@ func rewriteValueWasm(v *Value) bool { return rewriteValueWasm_OpRsh8x64(v) case OpRsh8x8: return rewriteValueWasm_OpRsh8x8(v) + case OpSelect0: + return rewriteValueWasm_OpSelect0(v) + case OpSelect1: + return rewriteValueWasm_OpSelect1(v) case OpSignExt16to32: return rewriteValueWasm_OpSignExt16to32(v) case OpSignExt16to64: @@ -684,6 +696,27 @@ func rewriteValueWasm_OpAddr(v *Value) bool { return true } } +func rewriteValueWasm_OpAvg64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Avg64u x y) + // result: (I64Add (I64ShrU (I64Sub x y) (I64Const [1])) y) + for { + x := v_0 + y := v_1 + v.reset(OpWasmI64Add) + v0 := b.NewValue0(v.Pos, OpWasmI64ShrU, typ.Int64) + v1 := b.NewValue0(v.Pos, OpWasmI64Sub, typ.Int64) + v1.AddArg2(x, y) + v2 := b.NewValue0(v.Pos, OpWasmI64Const, typ.Int64) + v2.AuxInt = int64ToAuxInt(1) + v0.AddArg2(v1, v2) + v.AddArg2(v0, y) + return true + } +} func rewriteValueWasm_OpBitLen16(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -1162,6 +1195,108 @@ func rewriteValueWasm_OpEq8(v *Value) bool { return true } } +func rewriteValueWasm_OpHmul64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Hmul64 x y) + // result: (Last x0: (ZeroExt32to64 x) x1: (I64ShrS x (I64Const [32])) y0: (ZeroExt32to64 y) y1: (I64ShrS y (I64Const [32])) x0y0: (I64Mul x0 y0) tt: (I64Add (I64Mul x1 y0) (I64ShrU x0y0 (I64Const [32]))) w1: (I64Add (I64Mul x0 y1) (ZeroExt32to64 tt)) w2: (I64ShrS tt (I64Const [32])) (I64Add (I64Add (I64Mul x1 y1) w2) (I64ShrS w1 (I64Const [32])))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpWasmI64ShrS, typ.Int64) + v2 := b.NewValue0(v.Pos, OpWasmI64Const, typ.Int64) + v2.AuxInt = int64ToAuxInt(32) + x1.AddArg2(x, v2) + y0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpWasmI64ShrS, typ.Int64) + y1.AddArg2(y, v2) + x0y0 := b.NewValue0(v.Pos, OpWasmI64Mul, typ.Int64) + x0y0.AddArg2(x0, y0) + tt := b.NewValue0(v.Pos, OpWasmI64Add, typ.Int64) + v7 := b.NewValue0(v.Pos, OpWasmI64Mul, typ.Int64) + v7.AddArg2(x1, y0) + v8 := b.NewValue0(v.Pos, OpWasmI64ShrU, typ.Int64) + v8.AddArg2(x0y0, v2) + tt.AddArg2(v7, v8) + w1 := b.NewValue0(v.Pos, OpWasmI64Add, typ.Int64) + v10 := b.NewValue0(v.Pos, OpWasmI64Mul, typ.Int64) + v10.AddArg2(x0, y1) + v11 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) + v11.AddArg(tt) + w1.AddArg2(v10, v11) + w2 := b.NewValue0(v.Pos, OpWasmI64ShrS, typ.Int64) + w2.AddArg2(tt, v2) + v13 := b.NewValue0(v.Pos, OpWasmI64Add, typ.Int64) + v14 := b.NewValue0(v.Pos, OpWasmI64Add, typ.Int64) + v15 := b.NewValue0(v.Pos, OpWasmI64Mul, typ.Int64) + v15.AddArg2(x1, y1) + v14.AddArg2(v15, w2) + v16 := b.NewValue0(v.Pos, OpWasmI64ShrS, typ.Int64) + v16.AddArg2(w1, v2) + v13.AddArg2(v14, v16) + v.AddArgs(x0, x1, y0, y1, x0y0, tt, w1, w2, v13) + return true + } +} +func rewriteValueWasm_OpHmul64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Hmul64u x y) + // result: (Last x0: (ZeroExt32to64 x) x1: (I64ShrU x (I64Const [32])) y0: (ZeroExt32to64 y) y1: (I64ShrU y (I64Const [32])) w0: (I64Mul x0 y0) tt: (I64Add (I64Mul x1 y0) (I64ShrU w0 (I64Const [32]))) w1: (I64Add (I64Mul x0 y1) (ZeroExt32to64 tt)) w2: (I64ShrU tt (I64Const [32])) hi: (I64Add (I64Add (I64Mul x1 y1) w2) (I64ShrU w1 (I64Const [32])))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpWasmI64ShrU, typ.Int64) + v2 := b.NewValue0(v.Pos, OpWasmI64Const, typ.Int64) + v2.AuxInt = int64ToAuxInt(32) + x1.AddArg2(x, v2) + y0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpWasmI64ShrU, typ.Int64) + y1.AddArg2(y, v2) + w0 := b.NewValue0(v.Pos, OpWasmI64Mul, typ.Int64) + w0.AddArg2(x0, y0) + tt := b.NewValue0(v.Pos, OpWasmI64Add, typ.Int64) + v7 := b.NewValue0(v.Pos, OpWasmI64Mul, typ.Int64) + v7.AddArg2(x1, y0) + v8 := b.NewValue0(v.Pos, OpWasmI64ShrU, typ.Int64) + v8.AddArg2(w0, v2) + tt.AddArg2(v7, v8) + w1 := b.NewValue0(v.Pos, OpWasmI64Add, typ.Int64) + v10 := b.NewValue0(v.Pos, OpWasmI64Mul, typ.Int64) + v10.AddArg2(x0, y1) + v11 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) + v11.AddArg(tt) + w1.AddArg2(v10, v11) + w2 := b.NewValue0(v.Pos, OpWasmI64ShrU, typ.Int64) + w2.AddArg2(tt, v2) + hi := b.NewValue0(v.Pos, OpWasmI64Add, typ.Int64) + v14 := b.NewValue0(v.Pos, OpWasmI64Add, typ.Int64) + v15 := b.NewValue0(v.Pos, OpWasmI64Mul, typ.Int64) + v15.AddArg2(x1, y1) + v14.AddArg2(v15, w2) + v16 := b.NewValue0(v.Pos, OpWasmI64ShrU, typ.Int64) + v16.AddArg2(w1, v2) + hi.AddArg2(v14, v16) + v.AddArgs(x0, x1, y0, y1, w0, tt, w1, w2, hi) + return true + } +} func rewriteValueWasm_OpIsNonNil(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -1177,6 +1312,14 @@ func rewriteValueWasm_OpIsNonNil(v *Value) bool { return true } } +func rewriteValueWasm_OpLast(v *Value) bool { + // match: (Last ___) + // result: v.Args[len(v.Args)-1] + for { + v.copyOf(v.Args[len(v.Args)-1]) + return true + } +} func rewriteValueWasm_OpLeq16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -3199,6 +3342,40 @@ func rewriteValueWasm_OpRsh8x8(v *Value) bool { return true } } +func rewriteValueWasm_OpSelect0(v *Value) bool { + v_0 := v.Args[0] + // match: (Select0 (Mul64uhilo x y)) + // result: (Hmul64u x y) + for { + t := v.Type + if v_0.Op != OpMul64uhilo { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpHmul64u) + v.Type = t + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueWasm_OpSelect1(v *Value) bool { + v_0 := v.Args[0] + // match: (Select1 (Mul64uhilo x y)) + // result: (I64Mul x y) + for { + if v_0.Op != OpMul64uhilo { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpWasmI64Mul) + v.AddArg2(x, y) + return true + } + return false +} func rewriteValueWasm_OpSignExt16to32(v *Value) bool { v_0 := v.Args[0] // match: (SignExt16to32 x:(I64Load16S _ _)) diff --git a/src/cmd/compile/internal/ssa/rewritedivmod.go b/src/cmd/compile/internal/ssa/rewritedivmod.go index 02978075a8..ab5cf7d676 100644 --- a/src/cmd/compile/internal/ssa/rewritedivmod.go +++ b/src/cmd/compile/internal/ssa/rewritedivmod.go @@ -212,7 +212,7 @@ func rewriteValuedivmod_OpDiv16u(v *Value) bool { return true } // match: (Div16u x (Const16 [c])) - // cond: umagicOK16(c) && config.RegSize == 4 && config.useAvg + // cond: umagicOK16(c) && config.RegSize == 4 // result: (Trunc32to16 (Rsh32Ux64 (Avg32u (Lsh32x64 (ZeroExt16to32 x) (Const64 [16])) (Mul32 (ZeroExt16to32 x) (Const32 [int32(umagic16(c).m)]))) (Const64 [16 + umagic16(c).s - 1]))) for { t := v.Type @@ -221,7 +221,7 @@ func rewriteValuedivmod_OpDiv16u(v *Value) bool { break } c := auxIntToInt16(v_1.AuxInt) - if !(umagicOK16(c) && config.RegSize == 4 && config.useAvg) { + if !(umagicOK16(c) && config.RegSize == 4) { break } v.reset(OpTrunc32to16) @@ -315,7 +315,7 @@ func rewriteValuedivmod_OpDiv32(v *Value) bool { return true } // match: (Div32 x (Const32 [c])) - // cond: smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 && config.useHmul + // cond: smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 // result: (Sub32 (Rsh32x64 (Hmul32 x (Const32 [int32(smagic32(c).m/2)])) (Const64 [smagic32(c).s - 1])) (Rsh32x64 x (Const64 [31]))) for { t := v.Type @@ -324,7 +324,7 @@ func rewriteValuedivmod_OpDiv32(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 && config.useHmul) { + if !(smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0) { break } v.reset(OpSub32) @@ -345,7 +345,7 @@ func rewriteValuedivmod_OpDiv32(v *Value) bool { return true } // match: (Div32 x (Const32 [c])) - // cond: smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 && config.useHmul + // cond: smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 // result: (Sub32 (Rsh32x64 (Add32 x (Hmul32 x (Const32 [int32(smagic32(c).m)]))) (Const64 [smagic32(c).s])) (Rsh32x64 x (Const64 [31]))) for { t := v.Type @@ -354,7 +354,7 @@ func rewriteValuedivmod_OpDiv32(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 && config.useHmul) { + if !(smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0) { break } v.reset(OpSub32) @@ -411,7 +411,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { return true } // match: (Div32u x (Const32 [c])) - // cond: t.IsSigned() && smagicOK32(c) && config.RegSize == 4 && config.useHmul + // cond: t.IsSigned() && smagicOK32(c) && config.RegSize == 4 // result: (Rsh32Ux64 (Hmul32u x (Const32 [int32(smagic32(c).m)])) (Const64 [smagic32(c).s])) for { t := v.Type @@ -420,7 +420,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(t.IsSigned() && smagicOK32(c) && config.RegSize == 4 && config.useHmul) { + if !(t.IsSigned() && smagicOK32(c) && config.RegSize == 4) { break } v.reset(OpRsh32Ux64) @@ -463,7 +463,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { return true } // match: (Div32u x (Const32 [c])) - // cond: umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 && config.useHmul + // cond: umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 // result: (Rsh32Ux64 (Hmul32u x (Const32 [int32(1<<31 + umagic32(c).m/2)])) (Const64 [umagic32(c).s - 1])) for { t := v.Type @@ -472,7 +472,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 && config.useHmul) { + if !(umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4) { break } v.reset(OpRsh32Ux64) @@ -519,7 +519,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { return true } // match: (Div32u x (Const32 [c])) - // cond: umagicOK32(c) && config.RegSize == 4 && c&1 == 0 && config.useHmul + // cond: umagicOK32(c) && config.RegSize == 4 && c&1 == 0 // result: (Rsh32Ux64 (Hmul32u (Rsh32Ux64 x (Const64 [1])) (Const32 [int32(1<<31 + (umagic32(c).m+1)/2)])) (Const64 [umagic32(c).s - 2])) for { t := v.Type @@ -528,7 +528,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(umagicOK32(c) && config.RegSize == 4 && c&1 == 0 && config.useHmul) { + if !(umagicOK32(c) && config.RegSize == 4 && c&1 == 0) { break } v.reset(OpRsh32Ux64) @@ -547,7 +547,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { return true } // match: (Div32u x (Const32 [c])) - // cond: umagicOK32(c) && config.RegSize == 8 && config.useAvg + // cond: umagicOK32(c) && config.RegSize == 8 // result: (Trunc64to32 (Rsh64Ux64 (Avg64u (Lsh64x64 (ZeroExt32to64 x) (Const64 [32])) (Mul64 (ZeroExt32to64 x) (Const64 [int64(umagic32(c).m)]))) (Const64 [32 + umagic32(c).s - 1]))) for { t := v.Type @@ -556,7 +556,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(umagicOK32(c) && config.RegSize == 8 && config.useAvg) { + if !(umagicOK32(c) && config.RegSize == 8) { break } v.reset(OpTrunc64to32) @@ -581,7 +581,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { return true } // match: (Div32u x (Const32 [c])) - // cond: umagicOK32(c) && config.RegSize == 4 && config.useAvg && config.useHmul + // cond: umagicOK32(c) && config.RegSize == 4 // result: (Rsh32Ux64 (Avg32u x (Hmul32u x (Const32 [int32(umagic32(c).m)]))) (Const64 [umagic32(c).s - 1])) for { t := v.Type @@ -590,7 +590,7 @@ func rewriteValuedivmod_OpDiv32u(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(umagicOK32(c) && config.RegSize == 4 && config.useAvg && config.useHmul) { + if !(umagicOK32(c) && config.RegSize == 4) { break } v.reset(OpRsh32Ux64) @@ -612,7 +612,6 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - config := b.Func.Config typ := &b.Func.Config.Types // match: (Div64 n (Const64 [c])) // cond: isPowerOfTwo(c) @@ -644,7 +643,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { return true } // match: (Div64 x (Const64 [c])) - // cond: smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul + // cond: smagicOK64(c) && smagic64(c).m&1 == 0 // result: (Sub64 (Rsh64x64 (Hmul64 x (Const64 [int64(smagic64(c).m/2)])) (Const64 [smagic64(c).s - 1])) (Rsh64x64 x (Const64 [63]))) for { t := v.Type @@ -653,7 +652,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul) { + if !(smagicOK64(c) && smagic64(c).m&1 == 0) { break } v.reset(OpSub64) @@ -674,7 +673,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { return true } // match: (Div64 x (Const64 [c])) - // cond: smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul + // cond: smagicOK64(c) && smagic64(c).m&1 != 0 // result: (Sub64 (Rsh64x64 (Add64 x (Hmul64 x (Const64 [int64(smagic64(c).m)]))) (Const64 [smagic64(c).s])) (Rsh64x64 x (Const64 [63]))) for { t := v.Type @@ -683,7 +682,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul) { + if !(smagicOK64(c) && smagic64(c).m&1 != 0) { break } v.reset(OpSub64) @@ -711,10 +710,9 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - config := b.Func.Config typ := &b.Func.Config.Types // match: (Div64u x (Const64 [c])) - // cond: t.IsSigned() && smagicOK64(c) && config.useHmul + // cond: t.IsSigned() && smagicOK64(c) // result: (Rsh64Ux64 (Hmul64u x (Const64 [int64(smagic64(c).m)])) (Const64 [smagic64(c).s])) for { t := v.Type @@ -723,7 +721,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(t.IsSigned() && smagicOK64(c) && config.useHmul) { + if !(t.IsSigned() && smagicOK64(c)) { break } v.reset(OpRsh64Ux64) @@ -738,7 +736,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul + // cond: umagicOK64(c) && umagic64(c).m&1 == 0 // result: (Rsh64Ux64 (Hmul64u x (Const64 [int64(1<<63 + umagic64(c).m/2)])) (Const64 [umagic64(c).s - 1])) for { t := v.Type @@ -747,7 +745,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul) { + if !(umagicOK64(c) && umagic64(c).m&1 == 0) { break } v.reset(OpRsh64Ux64) @@ -762,7 +760,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && c&1 == 0 && config.useHmul + // cond: umagicOK64(c) && c&1 == 0 // result: (Rsh64Ux64 (Hmul64u (Rsh64Ux64 x (Const64 [1])) (Const64 [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 [umagic64(c).s - 2])) for { t := v.Type @@ -771,7 +769,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && c&1 == 0 && config.useHmul) { + if !(umagicOK64(c) && c&1 == 0) { break } v.reset(OpRsh64Ux64) @@ -790,7 +788,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && config.useAvg && config.useHmul + // cond: umagicOK64(c) // result: (Rsh64Ux64 (Avg64u x (Hmul64u x (Const64 [int64(umagic64(c).m)]))) (Const64 [umagic64(c).s - 1])) for { t := v.Type @@ -799,7 +797,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && config.useAvg && config.useHmul) { + if !(umagicOK64(c)) { break } v.reset(OpRsh64Ux64) diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index 190c4840ce..78a4235116 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1219,11 +1219,11 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { alias("math/bits", "OnesCount", "math/bits", "OnesCount64", p8...) - addF("math/bits", "Mul64", + add("math/bits", "Mul64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1]) }, - sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.PPC64, sys.S390X, sys.MIPS64, sys.MIPS, sys.RISCV64, sys.Loong64) + all...) alias("math/bits", "Mul", "math/bits", "Mul64", p8...) alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...) addF("math/bits", "Add64", diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go index 782426215c..713adc0e8b 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics_test.go +++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go @@ -1328,6 +1328,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"s390x", "sync/atomic", "SwapUint64"}: struct{}{}, {"s390x", "sync/atomic", "SwapUintptr"}: struct{}{}, {"s390x", "crypto/internal/constanttime", "boolToUint8"}: struct{}{}, + {"wasm", "internal/runtime/math", "Mul64"}: struct{}{}, {"wasm", "internal/runtime/sys", "GetCallerPC"}: struct{}{}, {"wasm", "internal/runtime/sys", "GetCallerSP"}: struct{}{}, {"wasm", "internal/runtime/sys", "GetClosurePtr"}: struct{}{}, @@ -1344,11 +1345,14 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"wasm", "math", "RoundToEven"}: struct{}{}, {"wasm", "math", "Trunc"}: struct{}{}, {"wasm", "math", "sqrt"}: struct{}{}, + {"wasm", "math/big", "mulWW"}: struct{}{}, {"wasm", "math/bits", "Len"}: struct{}{}, {"wasm", "math/bits", "Len16"}: struct{}{}, {"wasm", "math/bits", "Len32"}: struct{}{}, {"wasm", "math/bits", "Len64"}: struct{}{}, {"wasm", "math/bits", "Len8"}: struct{}{}, + {"wasm", "math/bits", "Mul"}: struct{}{}, + {"wasm", "math/bits", "Mul64"}: struct{}{}, {"wasm", "math/bits", "OnesCount"}: struct{}{}, {"wasm", "math/bits", "OnesCount16"}: struct{}{}, {"wasm", "math/bits", "OnesCount32"}: struct{}{}, diff --git a/test/codegen/divmod.go b/test/codegen/divmod.go index 98d0852398..9de091af7a 100644 --- a/test/codegen/divmod.go +++ b/test/codegen/divmod.go @@ -124,6 +124,7 @@ func div7_int8(i int8) int8 { // arm64: "MULW" // arm64: "SBFX [$]10, R[0-9]+, [$]22," // arm64: "SUB R[0-9]+->31," + // wasm: "I64Const [$]147" return i / 7 } @@ -136,6 +137,7 @@ func div7_int16(i int16) int16 { // arm64: "MULW" // arm64: "SBFX [$]18, R[0-9]+, [$]14," // arm64: "SUB R[0-9]+->31," + // wasm: "I64Const [$]37450" return i / 7 } @@ -145,6 +147,7 @@ func div7_int32(i int32) int32 { // arm64: "MUL " // arm64: "ASR [$]34," // arm64: "SUB R[0-9]+->63," + // wasm: "I64Const [$]2454267027" return i / 7 } @@ -160,6 +163,7 @@ func div9_int32(i int32) int32 { // arm64: "MUL " // arm64: "ASR [$]35," // arm64: "SUB R[0-9]+->63," + // wasm: "I64Const [$]3817748708" return i / 9 } @@ -170,6 +174,8 @@ func div7_int64(i int64) int64 { // arm64: "SMULH" // arm64: "ASR [$]1," // arm64: "SUB R[0-9]+->63," + // wasm: "I64Const [$]613566757" + // wasm: "I64Const [$]1227133513" return i / 7 } @@ -185,6 +191,7 @@ func div3_int32(i int32) int32 { // arm64: "MUL" // arm64: "ASR [$]33," // arm64: "SUB R[0-9]+->63," + // wasm: "I64Const [$]2863311531" return i / 3 } @@ -195,6 +202,8 @@ func div3_int64(i int64) int64 { // arm64: "ADD" // arm64: "ASR [$]1," // arm64: "SUB R[0-9]+->63," + // wasm: "I64Const [$]-1431655766" + // wasm: "I64Const [$]2863311531" return i / 3 } @@ -211,6 +220,8 @@ func div7_int16u(i int16) int16 { // arm64: "MULW" // arm64: "UBFX [$]18, R[0-9]+, [$]14," // arm64: -"SUB" + // wasm: "I64Const [$]37450" + // wasm -"I64Sub" return i / 7 } @@ -226,6 +237,8 @@ func div7_int32u(i int32) int32 { // arm64: "MUL" // arm64: "LSR [$]34," // arm64: -"SUB" + // wasm: "I64Const [$]2454267027" + // wasm -"I64Sub" return i / 7 } @@ -238,6 +251,9 @@ func div7_int64u(i int64) int64 { // arm64: "UMULH" // arm64: "LSR [$]2," // arm64: -"SUB" + // wasm: "I64Const [$]1227133514" + // wasm: "I64Const [$]2454267026" + // wasm -"I64Sub" return i / 7 } @@ -249,6 +265,7 @@ func div7_uint8(i uint8) uint8 { // arm64: "MOVD [$]293," // arm64: "MULW" // arm64: "UBFX [$]11, R[0-9]+, [$]21," + // wasm: "I64Const [$]293" return i / 7 } @@ -257,6 +274,7 @@ func div7_uint16(i uint16) uint16 { // arm64: "MOVD [$]74899," // arm64: "MUL" // arm64: "LSR [$]19," + // wasm: "I64Const [$]74899" return i / 7 } @@ -267,6 +285,7 @@ func div3_uint16(i uint16) uint16 { // arm64: "MOVD [$]87382," // arm64: "MUL" // arm64: "LSR [$]18," + // wasm: "I64Const [$]87382" return i / 3 } @@ -275,6 +294,7 @@ func div3_uint32(i uint32) uint32 { // arm64: "MOVD [$]2863311531," // arm64: "MUL" // arm64: "LSR [$]33," + // wasm: "I64Const [$]2863311531" return i / 3 } @@ -286,6 +306,8 @@ func div3_uint64(i uint64) uint64 { // arm64: "MOVD [$]-6148914691236517205," // arm64: "UMULH" // arm64: "LSR [$]1," + // wasm: "I64Const [$]2863311530" + // wasm: "I64Const [$]2863311531" return i / 3 } @@ -307,6 +329,7 @@ func div14_uint32(i uint32) uint32 { // arm64: "MOVD [$]2454267027," // arm64: "MUL" // arm64: "LSR [$]34," + // wasm: "I64Const [$]2454267027" return i / 14 } @@ -318,6 +341,8 @@ func div14_uint64(i uint64) uint64 { // arm64: "MOVD [$]-7905747460161236406," // arm64: "UMULH" // arm64: "LSR [$]2," + // wasm: "I64Const [$]1227133514" + // wasm: "I64Const [$]2454267026" return i / 14 } @@ -345,6 +370,7 @@ func div7_uint32(i uint32) uint32 { // arm64: "SUB" // arm64: "ADD R[0-9]+>>1," // arm64: "LSR [$]34," + // wasm: "I64Const [$]613566757" return i / 7 } @@ -358,6 +384,8 @@ func div7_uint64(i uint64) uint64 { // arm64: "SUB", // arm64: "ADD R[0-9]+>>1," // arm64: "LSR [$]2," + // wasm: "I64Const [$]613566756" + // wasm: "I64Const [$]2454267027" return i / 7 } @@ -370,6 +398,8 @@ func div12345_uint64(i uint64) uint64 { // arm64: "MOVD [$]-6205696892516465602," // arm64: "UMULH" // arm64: "LSR [$]13," + // wasm: "I64Const [$]835683390" + // wasm: "I64Const [$]2850090894" return i / 12345 } @@ -480,7 +510,7 @@ func div_divis32_uint8(i uint8) (uint8, bool) { // arm64: "UBFX [$]5, R[0-9]+, [$]3" // arm64: "TSTW [$]31," // arm64: "CSET EQ" - return i/32, i%32 == 0 + return i / 32, i%32 == 0 } func div_ndivis32_uint8(i uint8) (uint8, bool) { @@ -490,7 +520,7 @@ func div_ndivis32_uint8(i uint8) (uint8, bool) { // arm64: "UBFX [$]5, R[0-9]+, [$]3" // arm64: "TSTW [$]31," // arm64: "CSET NE" - return i/32, i%32 != 0 + return i / 32, i%32 != 0 } func div_divis32_uint16(i uint16) (uint16, bool) { @@ -500,7 +530,7 @@ func div_divis32_uint16(i uint16) (uint16, bool) { // arm64: "UBFX [$]5, R[0-9]+, [$]11" // arm64: "TSTW [$]31," // arm64: "CSET EQ" - return i/32, i%32 == 0 + return i / 32, i%32 == 0 } func div_ndivis32_uint16(i uint16) (uint16, bool) { @@ -510,7 +540,7 @@ func div_ndivis32_uint16(i uint16) (uint16, bool) { // arm64: "UBFX [$]5, R[0-9]+, [$]11," // arm64: "TSTW [$]31," // arm64: "CSET NE" - return i/32, i%32 != 0 + return i / 32, i%32 != 0 } func div_divis32_uint32(i uint32) (uint32, bool) { @@ -520,7 +550,7 @@ func div_divis32_uint32(i uint32) (uint32, bool) { // arm64: "UBFX [$]5, R[0-9]+, [$]27," // arm64: "TSTW [$]31," // arm64: "CSET EQ" - return i/32, i%32 == 0 + return i / 32, i%32 == 0 } func div_ndivis32_uint32(i uint32) (uint32, bool) { @@ -530,7 +560,7 @@ func div_ndivis32_uint32(i uint32) (uint32, bool) { // arm64: "UBFX [$]5, R[0-9]+, [$]27," // arm64: "TSTW [$]31," // arm64: "CSET NE" - return i/32, i%32 != 0 + return i / 32, i%32 != 0 } func div_divis32_uint64(i uint64) (uint64, bool) { @@ -541,7 +571,7 @@ func div_divis32_uint64(i uint64) (uint64, bool) { // arm64: "LSR [$]5," // arm64: "TST [$]31," // arm64: "CSET EQ" - return i/32, i%32 == 0 + return i / 32, i%32 == 0 } func div_ndivis32_uint64(i uint64) (uint64, bool) { @@ -552,7 +582,7 @@ func div_ndivis32_uint64(i uint64) (uint64, bool) { // arm64: "LSR [$]5," // arm64: "TST [$]31," // arm64: "CSET NE" - return i/32, i%32 != 0 + return i / 32, i%32 != 0 } func div_divis32_int8(i int8) (int8, bool) { @@ -566,7 +596,7 @@ func div_divis32_int8(i int8) (int8, bool) { // arm64: "SBFX [$]5, R[0-9]+, [$]3," // arm64: "TSTW [$]31," // arm64: "CSET EQ" - return i/32, i%32 == 0 + return i / 32, i%32 == 0 } func div_ndivis32_int8(i int8) (int8, bool) { @@ -580,7 +610,7 @@ func div_ndivis32_int8(i int8) (int8, bool) { // arm64: "SBFX [$]5, R[0-9]+, [$]3," // arm64: "TSTW [$]31," // arm64: "CSET NE" - return i/32, i%32 != 0 + return i / 32, i%32 != 0 } func div_divis32_int16(i int16) (int16, bool) { @@ -594,7 +624,7 @@ func div_divis32_int16(i int16) (int16, bool) { // arm64: "SBFX [$]5, R[0-9]+, [$]11," // arm64: "TSTW [$]31," // arm64: "CSET EQ" - return i/32, i%32 == 0 + return i / 32, i%32 == 0 } func div_ndivis32_int16(i int16) (int16, bool) { @@ -608,7 +638,7 @@ func div_ndivis32_int16(i int16) (int16, bool) { // arm64: "SBFX [$]5, R[0-9]+, [$]11," // arm64: "TSTW [$]31," // arm64: "CSET NE" - return i/32, i%32 != 0 + return i / 32, i%32 != 0 } func div_divis32_int32(i int32) (int32, bool) { @@ -622,7 +652,7 @@ func div_divis32_int32(i int32) (int32, bool) { // arm64: "SBFX [$]5, R[0-9]+, [$]27," // arm64: "TSTW [$]31," // arm64: "CSET EQ" - return i/32, i%32 == 0 + return i / 32, i%32 == 0 } func div_ndivis32_int32(i int32) (int32, bool) { @@ -636,7 +666,7 @@ func div_ndivis32_int32(i int32) (int32, bool) { // arm64: "SBFX [$]5, R[0-9]+, [$]27," // arm64: "TSTW [$]31," // arm64: "CSET NE" - return i/32, i%32 != 0 + return i / 32, i%32 != 0 } func div_divis32_int64(i int64) (int64, bool) { @@ -651,7 +681,7 @@ func div_divis32_int64(i int64) (int64, bool) { // arm64: "ASR [$]5," // arm64: "TST [$]31," // arm64: "CSET EQ" - return i/32, i%32 == 0 + return i / 32, i%32 == 0 } func div_ndivis32_int64(i int64) (int64, bool) { @@ -666,7 +696,7 @@ func div_ndivis32_int64(i int64) (int64, bool) { // arm64: "ASR [$]5," // arm64: "TST [$]31," // arm64: "CSET NE" - return i/32, i%32 != 0 + return i / 32, i%32 != 0 } // Divisibility and non-divisibility by non-power-of-two. @@ -923,7 +953,7 @@ func div_divis6_uint8(i uint8) (uint8, bool) { // arm64: "UBFX [$]11, R[0-9]+, [$]21," // arm64: "CSET EQ" // arm64: -"RO[RL]" - return i/6, i%6 == 0 + return i / 6, i%6 == 0 } func div_ndivis6_uint8(i uint8) (uint8, bool) { @@ -936,7 +966,7 @@ func div_ndivis6_uint8(i uint8) (uint8, bool) { // arm64: "UBFX [$]11, R[0-9]+, [$]21," // arm64: "CSET NE" // arm64: -"RO[RL]" - return i/6, i%6 != 0 + return i / 6, i%6 != 0 } func div_divis6_uint16(i uint16) (uint16, bool) { @@ -950,7 +980,7 @@ func div_divis6_uint16(i uint16) (uint16, bool) { // arm64: "LSR [$]19," // arm64: "CSET EQ" // arm64: -"RO[RL]" - return i/6, i%6 == 0 + return i / 6, i%6 == 0 } func div_ndivis6_uint16(i uint16) (uint16, bool) { @@ -964,7 +994,7 @@ func div_ndivis6_uint16(i uint16) (uint16, bool) { // arm64: "LSR [$]19," // arm64: "CSET NE" // arm64: -"RO[RL]" - return i/6, i%6 != 0 + return i / 6, i%6 != 0 } func div_divis6_uint32(i uint32) (uint32, bool) { @@ -978,7 +1008,7 @@ func div_divis6_uint32(i uint32) (uint32, bool) { // arm64: "LSR [$]34," // arm64: "CSET EQ" // arm64: -"RO[RL]" - return i/6, i%6 == 0 + return i / 6, i%6 == 0 } func div_ndivis6_uint32(i uint32) (uint32, bool) { @@ -992,7 +1022,7 @@ func div_ndivis6_uint32(i uint32) (uint32, bool) { // arm64: "LSR [$]34," // arm64: "CSET NE" // arm64: -"RO[RL]" - return i/6, i%6 != 0 + return i / 6, i%6 != 0 } func div_divis6_uint64(i uint64) (uint64, bool) { @@ -1009,7 +1039,7 @@ func div_divis6_uint64(i uint64) (uint64, bool) { // arm64: "LSR [$]2," // arm64: "CSET EQ" // arm64: -"RO[RL]" - return i/6, i%6 == 0 + return i / 6, i%6 == 0 } func div_ndivis6_uint64(i uint64) (uint64, bool) { @@ -1026,7 +1056,7 @@ func div_ndivis6_uint64(i uint64) (uint64, bool) { // arm64: "LSR [$]2," // arm64: "CSET NE" // arm64: -"RO[RL]" - return i/6, i%6 != 0 + return i / 6, i%6 != 0 } func div_divis6_int8(i int8) (int8, bool) { @@ -1042,7 +1072,7 @@ func div_divis6_int8(i int8) (int8, bool) { // arm64: "SUB R[0-9]+->31," // arm64: "CSET EQ" // arm64: -"RO[RL]" - return i/6, i%6 == 0 + return i / 6, i%6 == 0 } func div_ndivis6_int8(i int8) (int8, bool) { @@ -1058,7 +1088,7 @@ func div_ndivis6_int8(i int8) (int8, bool) { // arm64: "SUB R[0-9]+->31," // arm64: "CSET NE" // arm64: -"RO[RL]" - return i/6, i%6 != 0 + return i / 6, i%6 != 0 } func div_divis6_int16(i int16) (int16, bool) { @@ -1074,7 +1104,7 @@ func div_divis6_int16(i int16) (int16, bool) { // arm64: "SUB R[0-9]+->31," // arm64: "CSET EQ" // arm64: -"RO[RL]" - return i/6, i%6 == 0 + return i / 6, i%6 == 0 } func div_ndivis6_int16(i int16) (int16, bool) { @@ -1090,7 +1120,7 @@ func div_ndivis6_int16(i int16) (int16, bool) { // arm64: "SUB R[0-9]+->31," // arm64: "CSET NE" // arm64: -"RO[RL]" - return i/6, i%6 != 0 + return i / 6, i%6 != 0 } func div_divis6_int32(i int32) (int32, bool) { @@ -1107,7 +1137,7 @@ func div_divis6_int32(i int32) (int32, bool) { // arm64: "SUB R[0-9]+->63," // arm64: "CSET EQ" // arm64: -"RO[RL]" - return i/6, i%6 == 0 + return i / 6, i%6 == 0 } func div_ndivis6_int32(i int32) (int32, bool) { @@ -1124,7 +1154,7 @@ func div_ndivis6_int32(i int32) (int32, bool) { // arm64: "SUB R[0-9]+->63," // arm64: "CSET NE" // arm64: -"RO[RL]" - return i/6, i%6 != 0 + return i / 6, i%6 != 0 } func div_divis6_int64(i int64) (int64, bool) { @@ -1145,7 +1175,7 @@ func div_divis6_int64(i int64) (int64, bool) { // arm64: "SUB R[0-9]+->63," // arm64: "CSET EQ" // arm64: -"RO[RL]" - return i/6, i%6 == 0 + return i / 6, i%6 == 0 } func div_ndivis6_int64(i int64) (int64, bool) { @@ -1166,5 +1196,5 @@ func div_ndivis6_int64(i int64) (int64, bool) { // arm64: "SUB R[0-9]+->63," // arm64: "CSET NE" // arm64: -"RO[RL]" - return i/6, i%6 != 0 + return i / 6, i%6 != 0 }