From 0a382e0b7f9abb39644adcfe65013df200989324 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Wed, 11 Jul 2018 01:30:32 +0000 Subject: [PATCH] cmd/compile: optimize ARMv7 code MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit "AND $0xffff0000, Rx" will be encoded to 12 bytes. 1. MOVWload from the constant pool to Rtmp 2. AND Rtmp, Rx 3. a 4-byte item in the constant pool It can be simplified to 8 bytes on ARMv7, since ARMv7 has "MOVW $imm-16, Rx". 1. MOVW $0xffff, Rtmp 2. BIC Rtmp, Rx The above optimization also applies to BICconst, ADDconst and SUBconst. 1. The total size of pkg/android_arm (excluding cmd/compile) decreases about 2KB. 2. The go1 benchmark shows no regression, exlcuding noise. name old time/op new time/op delta BinaryTree17-4 25.5s ± 1% 25.2s ± 1% -0.85% (p=0.000 n=30+30) Fannkuch11-4 13.3s ± 0% 13.3s ± 0% +0.16% (p=0.000 n=24+25) FmtFprintfEmpty-4 397ns ± 0% 394ns ± 0% -0.64% (p=0.000 n=30+30) FmtFprintfString-4 679ns ± 0% 678ns ± 0% ~ (p=0.093 n=30+29) FmtFprintfInt-4 708ns ± 0% 707ns ± 0% -0.19% (p=0.000 n=27+28) FmtFprintfIntInt-4 1.05µs ± 0% 1.05µs ± 0% -0.07% (p=0.001 n=18+30) FmtFprintfPrefixedInt-4 1.16µs ± 0% 1.15µs ± 0% -0.41% (p=0.000 n=29+30) FmtFprintfFloat-4 2.26µs ± 0% 2.23µs ± 1% -1.40% (p=0.000 n=30+30) FmtManyArgs-4 3.96µs ± 0% 3.95µs ± 0% -0.29% (p=0.000 n=29+30) GobDecode-4 52.9ms ± 2% 53.4ms ± 2% +0.92% (p=0.004 n=28+30) GobEncode-4 49.7ms ± 2% 49.8ms ± 2% ~ (p=0.890 n=30+26) Gzip-4 2.61s ± 0% 2.60s ± 0% -0.36% (p=0.000 n=29+29) Gunzip-4 312ms ± 0% 311ms ± 0% -0.13% (p=0.000 n=30+28) HTTPClientServer-4 1.02ms ± 8% 1.00ms ± 7% ~ (p=0.224 n=29+26) JSONEncode-4 125ms ± 1% 124ms ± 3% -1.05% (p=0.000 n=25+30) JSONDecode-4 432ms ± 1% 436ms ± 2% ~ (p=0.277 n=26+30) Mandelbrot200-4 18.4ms ± 0% 18.4ms ± 0% +0.02% (p=0.001 n=28+25) GoParse-4 22.4ms ± 1% 22.3ms ± 1% -0.41% (p=0.000 n=28+28) RegexpMatchEasy0_32-4 697ns ± 0% 706ns ± 0% +1.23% (p=0.000 n=19+30) RegexpMatchEasy0_1K-4 4.27µs ± 0% 4.26µs ± 0% -0.06% (p=0.000 n=30+30) RegexpMatchEasy1_32-4 741ns ± 0% 735ns ± 0% -0.86% (p=0.000 n=26+30) RegexpMatchEasy1_1K-4 5.49µs ± 0% 5.49µs ± 0% -0.03% (p=0.023 n=25+30) RegexpMatchMedium_32-4 1.05µs ± 2% 1.04µs ± 2% ~ (p=0.893 n=30+30) RegexpMatchMedium_1K-4 261µs ± 0% 261µs ± 0% -0.11% (p=0.000 n=29+30) RegexpMatchHard_32-4 14.9µs ± 0% 14.9µs ± 0% -0.36% (p=0.000 n=23+29) RegexpMatchHard_1K-4 446µs ± 0% 445µs ± 0% -0.17% (p=0.000 n=30+29) Revcomp-4 41.6ms ± 1% 41.7ms ± 1% +0.27% (p=0.040 n=28+30) Template-4 531ms ± 0% 532ms ± 1% ~ (p=0.059 n=30+30) TimeParse-4 3.40µs ± 0% 3.33µs ± 0% -2.02% (p=0.000 n=30+30) TimeFormat-4 6.14µs ± 0% 6.11µs ± 0% -0.45% (p=0.000 n=27+29) [Geo mean] 384µs 383µs -0.27% name old speed new speed delta GobDecode-4 14.5MB/s ± 2% 14.4MB/s ± 2% -0.90% (p=0.005 n=28+30) GobEncode-4 15.4MB/s ± 2% 15.4MB/s ± 2% ~ (p=0.741 n=30+25) Gzip-4 7.44MB/s ± 0% 7.47MB/s ± 1% +0.37% (p=0.000 n=25+30) Gunzip-4 62.3MB/s ± 0% 62.4MB/s ± 0% +0.13% (p=0.000 n=30+28) JSONEncode-4 15.5MB/s ± 1% 15.6MB/s ± 3% +1.07% (p=0.000 n=25+30) JSONDecode-4 4.48MB/s ± 0% 4.46MB/s ± 2% ~ (p=0.655 n=23+30) GoParse-4 2.58MB/s ± 1% 2.59MB/s ± 1% +0.42% (p=0.000 n=28+29) RegexpMatchEasy0_32-4 45.9MB/s ± 0% 45.3MB/s ± 0% -1.23% (p=0.000 n=28+30) RegexpMatchEasy0_1K-4 240MB/s ± 0% 240MB/s ± 0% +0.07% (p=0.000 n=30+30) RegexpMatchEasy1_32-4 43.2MB/s ± 0% 43.5MB/s ± 0% +0.85% (p=0.000 n=30+28) RegexpMatchEasy1_1K-4 186MB/s ± 0% 186MB/s ± 0% +0.03% (p=0.026 n=25+30) RegexpMatchMedium_32-4 955kB/s ± 2% 960kB/s ± 2% ~ (p=0.084 n=30+30) RegexpMatchMedium_1K-4 3.92MB/s ± 0% 3.93MB/s ± 0% +0.14% (p=0.000 n=29+30) RegexpMatchHard_32-4 2.14MB/s ± 0% 2.15MB/s ± 0% +0.31% (p=0.000 n=30+26) RegexpMatchHard_1K-4 2.30MB/s ± 0% 2.30MB/s ± 0% ~ (all equal) Revcomp-4 61.1MB/s ± 1% 60.9MB/s ± 1% -0.27% (p=0.039 n=28+30) Template-4 3.66MB/s ± 0% 3.65MB/s ± 1% -0.14% (p=0.045 n=30+30) [Geo mean] 12.8MB/s 12.8MB/s +0.04% Change-Id: I02370e2584b4c041fddd324c97628fd6f0c12183 Reviewed-on: https://go-review.googlesource.com/123179 Run-TryBot: Ben Shi TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/ARM.rules | 4 ++ src/cmd/compile/internal/ssa/rewriteARM.go | 56 ++++++++++++++++++++++ test/codegen/bits.go | 5 ++ 3 files changed, 65 insertions(+) diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules index 2846ef6d2e..e8a3c27c71 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM.rules @@ -812,6 +812,10 @@ (SUBconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(uint32(-c)) -> (ADDconst [int64(int32(-c))] x) (ANDconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) -> (BICconst [int64(int32(^uint32(c)))] x) (BICconst [c] x) && !isARMImmRot(uint32(c)) && isARMImmRot(^uint32(c)) -> (ANDconst [int64(int32(^uint32(c)))] x) +(ADDconst [c] x) && objabi.GOARM==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && uint32(-c)<=0xffff -> (SUBconst [int64(int32(-c))] x) +(SUBconst [c] x) && objabi.GOARM==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && uint32(-c)<=0xffff -> (ANDconst [int64(int32(-c))] x) +(ANDconst [c] x) && objabi.GOARM==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && ^uint32(c)<=0xffff -> (BICconst [int64(int32(^uint32(c)))] x) +(BICconst [c] x) && objabi.GOARM==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && ^uint32(c)<=0xffff -> (ANDconst [int64(int32(^uint32(c)))] x) (ADDconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(c+d))]) (ADDconst [c] (ADDconst [d] x)) -> (ADDconst [int64(int32(c+d))] x) (ADDconst [c] (SUBconst [d] x)) -> (ADDconst [int64(int32(c-d))] x) diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go index dc12733279..e463511f17 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM.go +++ b/src/cmd/compile/internal/ssa/rewriteARM.go @@ -2850,6 +2850,20 @@ func rewriteValueARM_OpARMADDconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ADDconst [c] x) + // cond: objabi.GOARM==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && uint32(-c)<=0xffff + // result: (SUBconst [int64(int32(-c))] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(objabi.GOARM == 7 && !isARMImmRot(uint32(c)) && uint32(c) > 0xffff && uint32(-c) <= 0xffff) { + break + } + v.reset(OpARMSUBconst) + v.AuxInt = int64(int32(-c)) + v.AddArg(x) + return true + } // match: (ADDconst [c] (MOVWconst [d])) // cond: // result: (MOVWconst [int64(int32(c+d))]) @@ -3670,6 +3684,20 @@ func rewriteValueARM_OpARMANDconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ANDconst [c] x) + // cond: objabi.GOARM==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && ^uint32(c)<=0xffff + // result: (BICconst [int64(int32(^uint32(c)))] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(objabi.GOARM == 7 && !isARMImmRot(uint32(c)) && uint32(c) > 0xffff && ^uint32(c) <= 0xffff) { + break + } + v.reset(OpARMBICconst) + v.AuxInt = int64(int32(^uint32(c))) + v.AddArg(x) + return true + } // match: (ANDconst [c] (MOVWconst [d])) // cond: // result: (MOVWconst [c&d]) @@ -4243,6 +4271,20 @@ func rewriteValueARM_OpARMBICconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (BICconst [c] x) + // cond: objabi.GOARM==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && ^uint32(c)<=0xffff + // result: (ANDconst [int64(int32(^uint32(c)))] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(objabi.GOARM == 7 && !isARMImmRot(uint32(c)) && uint32(c) > 0xffff && ^uint32(c) <= 0xffff) { + break + } + v.reset(OpARMANDconst) + v.AuxInt = int64(int32(^uint32(c))) + v.AddArg(x) + return true + } // match: (BICconst [c] (MOVWconst [d])) // cond: // result: (MOVWconst [d&^c]) @@ -15243,6 +15285,20 @@ func rewriteValueARM_OpARMSUBconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (SUBconst [c] x) + // cond: objabi.GOARM==7 && !isARMImmRot(uint32(c)) && uint32(c)>0xffff && uint32(-c)<=0xffff + // result: (ANDconst [int64(int32(-c))] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(objabi.GOARM == 7 && !isARMImmRot(uint32(c)) && uint32(c) > 0xffff && uint32(-c) <= 0xffff) { + break + } + v.reset(OpARMANDconst) + v.AuxInt = int64(int32(-c)) + v.AddArg(x) + return true + } // match: (SUBconst [c] (MOVWconst [d])) // cond: // result: (MOVWconst [int64(int32(d-c))]) diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 2d1645b5e3..c46f75845c 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -284,6 +284,11 @@ func and_mask_2(a uint64) uint64 { return a & (1 << 63) } +func and_mask_3(a uint32) uint32 { + // arm/7:`BIC`,-`AND` + return a & 0xffff0000 +} + // Check generation of arm64 BIC/EON/ORN instructions func op_bic(x, y uint32) uint32 { -- 2.48.1