From 192b675f1778039563296cac253aa281d4b13d12 Mon Sep 17 00:00:00 2001 From: erifan01 Date: Mon, 11 Feb 2019 06:37:49 +0000 Subject: [PATCH] cmd/compile: add an optimaztion rule for math/bits.ReverseBytes16 on arm64 On amd64 ReverseBytes16 is lowered to a rotate instruction. However arm64 doesn't have 16-bit rotate instruction, but has a REV16W instruction which can be used for ReverseBytes16. This CL adds a rule to turn the patterns like (x<<8) | (x>>8) (the type of x is uint16, and "|" can also be "^" or "+") to a REV16W instruction. Code: func reverseBytes16(i uint16) uint16 { return bits.ReverseBytes16(i) } Before: 0x0004 00004 (test.go:6) MOVHU "".i(FP), R0 0x0008 00008 ($GOROOT/src/math/bits/bits.go:262) UBFX $8, R0, $8, R1 0x000c 00012 ($GOROOT/src/math/bits/bits.go:262) ORR R0<<8, R1, R0 0x0010 00016 (test.go:6) MOVH R0, "".~r1+8(FP) 0x0014 00020 (test.go:6) RET (R30) After: 0x0000 00000 (test.go:6) MOVHU "".i(FP), R0 0x0004 00004 (test.go:6) REV16W R0, R0 0x0008 00008 (test.go:6) MOVH R0, "".~r1+8(FP) 0x000c 00012 (test.go:6) RET (R30) Benchmarks: name old time/op new time/op delta ReverseBytes-224 1.000000ns +- 0% 1.000000ns +- 0% ~ (all equal) ReverseBytes16-224 1.500000ns +- 0% 1.000000ns +- 0% -33.33% (p=0.000 n=9+10) ReverseBytes32-224 1.000000ns +- 0% 1.000000ns +- 0% ~ (all equal) ReverseBytes64-224 1.000000ns +- 0% 1.000000ns +- 0% ~ (all equal) Change-Id: I87cd41b2d8e549bf39c601f185d5775bd42d739c Reviewed-on: https://go-review.googlesource.com/c/157757 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/compile/internal/ssa/gen/ARM64.rules | 3 + src/cmd/compile/internal/ssa/rewriteARM64.go | 113 +++++++++++++++++-- test/codegen/mathbits.go | 1 + 3 files changed, 107 insertions(+), 10 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index fc806f75a0..3f49a9bcf9 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -1786,6 +1786,9 @@ (CMPconst [64] (SUB (MOVDconst [32]) (ANDconst [31] y))))) && cc.(Op) == OpARM64LessThanU -> (RORW x y) +// ((x>>8) | (x<<8)) -> (REV16W x), the type of x is uint16, "|" can also be "^" or "+". +((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (UBFX [arm64BFAuxInt(8, 8)] x) x) -> (REV16W x) + // Extract from reg pair (ADDshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x) ( ORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 05b8b9c697..fe815efb14 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -2304,6 +2304,8 @@ func rewriteValueARM64_OpARM64ADDconst_0(v *Value) bool { func rewriteValueARM64_OpARM64ADDshiftLL_0(v *Value) bool { b := v.Block _ = b + typ := &b.Func.Config.Types + _ = typ // match: (ADDshiftLL (MOVDconst [c]) x [d]) // cond: // result: (ADDconst [c] (SLLconst x [d])) @@ -2387,6 +2389,35 @@ func rewriteValueARM64_OpARM64ADDshiftLL_0(v *Value) bool { v.AddArg(x) return true } + // match: (ADDshiftLL [8] (UBFX [arm64BFAuxInt(8, 8)] x) x) + // cond: + // result: (REV16W x) + for { + if v.Type != typ.UInt16 { + break + } + if v.AuxInt != 8 { + break + } + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64UBFX { + break + } + if v_0.Type != typ.UInt16 { + break + } + if v_0.AuxInt != arm64BFAuxInt(8, 8) { + break + } + x := v_0.Args[0] + if x != v.Args[1] { + break + } + v.reset(OpARM64REV16W) + v.AddArg(x) + return true + } // match: (ADDshiftLL [c] (SRLconst x [64-c]) x2) // cond: // result: (EXTRconst [64-c] x2 x) @@ -26504,6 +26535,8 @@ func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool { func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool { b := v.Block _ = b + typ := &b.Func.Config.Types + _ = typ // match: (ORshiftLL (MOVDconst [c]) x [d]) // cond: // result: (ORconst [c] (SLLconst x [d])) @@ -26610,6 +26643,35 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool { v.AddArg(x) return true } + // match: (ORshiftLL [8] (UBFX [arm64BFAuxInt(8, 8)] x) x) + // cond: + // result: (REV16W x) + for { + if v.Type != typ.UInt16 { + break + } + if v.AuxInt != 8 { + break + } + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64UBFX { + break + } + if v_0.Type != typ.UInt16 { + break + } + if v_0.AuxInt != arm64BFAuxInt(8, 8) { + break + } + x := v_0.Args[0] + if x != v.Args[1] { + break + } + v.reset(OpARM64REV16W) + v.AddArg(x) + return true + } // match: (ORshiftLL [c] (SRLconst x [64-c]) x2) // cond: // result: (EXTRconst [64-c] x2 x) @@ -26739,6 +26801,11 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { + b := v.Block + _ = b // match: (ORshiftLL [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) // result: @mergePoint(b,x0,x1) (MOVHUloadidx ptr0 idx0 mem) @@ -26795,11 +26862,6 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { - b := v.Block - _ = b // match: (ORshiftLL [8] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) // result: @mergePoint(b,x0,x1) (MOVHUloadidx ptr idx mem) @@ -27754,6 +27816,11 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValueARM64_OpARM64ORshiftLL_20(v *Value) bool { + b := v.Block + _ = b // match: (ORshiftLL [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem))) // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) // result: @mergePoint(b,x0,x1) (REV16W (MOVHUload [i0] {s} p mem)) @@ -27810,11 +27877,6 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool { v0.AddArg(v1) return true } - return false -} -func rewriteValueARM64_OpARM64ORshiftLL_20(v *Value) bool { - b := v.Block - _ = b // match: (ORshiftLL [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem))) // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1) // result: @mergePoint(b,x0,x1) (REV16W (MOVHUloadidx ptr0 idx0 mem)) @@ -31905,6 +31967,8 @@ func rewriteValueARM64_OpARM64XORconst_0(v *Value) bool { func rewriteValueARM64_OpARM64XORshiftLL_0(v *Value) bool { b := v.Block _ = b + typ := &b.Func.Config.Types + _ = typ // match: (XORshiftLL (MOVDconst [c]) x [d]) // cond: // result: (XORconst [c] (SLLconst x [d])) @@ -32010,6 +32074,35 @@ func rewriteValueARM64_OpARM64XORshiftLL_0(v *Value) bool { v.AddArg(x) return true } + // match: (XORshiftLL [8] (UBFX [arm64BFAuxInt(8, 8)] x) x) + // cond: + // result: (REV16W x) + for { + if v.Type != typ.UInt16 { + break + } + if v.AuxInt != 8 { + break + } + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64UBFX { + break + } + if v_0.Type != typ.UInt16 { + break + } + if v_0.AuxInt != arm64BFAuxInt(8, 8) { + break + } + x := v_0.Args[0] + if x != v.Args[1] { + break + } + v.reset(OpARM64REV16W) + v.AddArg(x) + return true + } // match: (XORshiftLL [c] (SRLconst x [64-c]) x2) // cond: // result: (EXTRconst [64-c] x2 x) diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index d8b1775b0f..b2a8e3ea7a 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -170,6 +170,7 @@ func ReverseBytes32(n uint32) uint32 { func ReverseBytes16(n uint16) uint16 { // amd64:"ROLW" + // arm64:"REV16W",-"UBFX",-"ORR" return bits.ReverseBytes16(n) } -- 2.50.0