From d60cf39f8e58211c9d4d507d673131f32623d5cc Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Thu, 27 Sep 2018 13:21:03 +0000 Subject: [PATCH] cmd/compile: optimize arm64's MADD and MSUB MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This CL implements constant folding for MADD/MSUB on arm64. 1. The total size of pkg/android_arm64/ decreases about 4KB, excluding cmd/compile/ . 2. There is no regression in the go1 benchmark, excluding noise. name old time/op new time/op delta BinaryTree17-4 16.4s ± 1% 16.5s ± 1% +0.24% (p=0.008 n=29+29) Fannkuch11-4 8.73s ± 0% 8.71s ± 0% -0.15% (p=0.000 n=29+29) FmtFprintfEmpty-4 174ns ± 0% 174ns ± 0% ~ (all equal) FmtFprintfString-4 370ns ± 0% 372ns ± 2% +0.53% (p=0.007 n=24+30) FmtFprintfInt-4 419ns ± 0% 419ns ± 0% ~ (all equal) FmtFprintfIntInt-4 673ns ± 1% 661ns ± 1% -1.81% (p=0.000 n=30+27) FmtFprintfPrefixedInt-4 806ns ± 0% 805ns ± 0% ~ (p=0.957 n=28+27) FmtFprintfFloat-4 1.09µs ± 0% 1.09µs ± 0% -0.04% (p=0.001 n=22+30) FmtManyArgs-4 2.67µs ± 0% 2.68µs ± 0% +0.03% (p=0.045 n=29+28) GobDecode-4 33.2ms ± 1% 32.5ms ± 1% -2.11% (p=0.000 n=29+29) GobEncode-4 29.5ms ± 0% 29.2ms ± 0% -1.04% (p=0.000 n=28+28) Gzip-4 1.39s ± 2% 1.38s ± 1% -0.48% (p=0.023 n=30+30) Gunzip-4 139ms ± 0% 139ms ± 0% ~ (p=0.616 n=30+28) HTTPClientServer-4 766µs ± 4% 758µs ± 3% -1.03% (p=0.013 n=28+29) JSONEncode-4 49.7ms ± 0% 49.6ms ± 0% -0.24% (p=0.000 n=30+30) JSONDecode-4 266ms ± 0% 268ms ± 1% +1.07% (p=0.000 n=29+30) Mandelbrot200-4 16.6ms ± 0% 16.6ms ± 0% ~ (p=0.248 n=30+29) GoParse-4 15.9ms ± 0% 16.0ms ± 0% +0.76% (p=0.000 n=29+29) RegexpMatchEasy0_32-4 381ns ± 0% 380ns ± 0% -0.14% (p=0.000 n=30+30) RegexpMatchEasy0_1K-4 1.18µs ± 0% 1.19µs ± 1% +0.30% (p=0.000 n=29+30) RegexpMatchEasy1_32-4 357ns ± 0% 357ns ± 0% ~ (all equal) RegexpMatchEasy1_1K-4 2.04µs ± 0% 2.05µs ± 0% +0.50% (p=0.000 n=26+28) RegexpMatchMedium_32-4 590ns ± 0% 589ns ± 0% -0.12% (p=0.000 n=30+23) RegexpMatchMedium_1K-4 162µs ± 0% 162µs ± 0% ~ (p=0.318 n=28+25) RegexpMatchHard_32-4 9.56µs ± 0% 9.56µs ± 0% ~ (p=0.072 n=30+29) RegexpMatchHard_1K-4 287µs ± 0% 287µs ± 0% -0.02% (p=0.005 n=28+28) Revcomp-4 2.50s ± 0% 2.51s ± 0% ~ (p=0.246 n=29+29) Template-4 312ms ± 1% 313ms ± 1% +0.46% (p=0.002 n=30+30) TimeParse-4 1.68µs ± 0% 1.67µs ± 0% -0.31% (p=0.000 n=27+29) TimeFormat-4 1.66µs ± 0% 1.64µs ± 0% -0.92% (p=0.000 n=29+26) [Geo mean] 247µs 246µs -0.15% name old speed new speed delta GobDecode-4 23.1MB/s ± 1% 23.6MB/s ± 0% +2.17% (p=0.000 n=29+28) GobEncode-4 26.0MB/s ± 0% 26.3MB/s ± 0% +1.05% (p=0.000 n=28+28) Gzip-4 14.0MB/s ± 2% 14.1MB/s ± 1% +0.47% (p=0.026 n=30+30) Gunzip-4 139MB/s ± 0% 139MB/s ± 0% ~ (p=0.624 n=30+28) JSONEncode-4 39.1MB/s ± 0% 39.2MB/s ± 0% +0.24% (p=0.000 n=30+30) JSONDecode-4 7.31MB/s ± 0% 7.23MB/s ± 1% -1.07% (p=0.000 n=28+30) GoParse-4 3.65MB/s ± 0% 3.62MB/s ± 0% -0.77% (p=0.000 n=29+29) RegexpMatchEasy0_32-4 84.0MB/s ± 0% 84.1MB/s ± 0% +0.18% (p=0.000 n=28+30) RegexpMatchEasy0_1K-4 864MB/s ± 0% 861MB/s ± 1% -0.29% (p=0.000 n=29+30) RegexpMatchEasy1_32-4 89.5MB/s ± 0% 89.5MB/s ± 0% ~ (p=0.841 n=28+28) RegexpMatchEasy1_1K-4 502MB/s ± 0% 500MB/s ± 0% -0.51% (p=0.000 n=29+29) RegexpMatchMedium_32-4 1.69MB/s ± 0% 1.70MB/s ± 0% +0.41% (p=0.000 n=26+30) RegexpMatchMedium_1K-4 6.31MB/s ± 0% 6.30MB/s ± 0% ~ (p=0.129 n=30+25) RegexpMatchHard_32-4 3.35MB/s ± 0% 3.35MB/s ± 0% ~ (p=0.657 n=30+29) RegexpMatchHard_1K-4 3.57MB/s ± 0% 3.57MB/s ± 0% ~ (all equal) Revcomp-4 102MB/s ± 0% 101MB/s ± 0% ~ (p=0.213 n=29+29) Template-4 6.22MB/s ± 1% 6.19MB/s ± 1% -0.42% (p=0.005 n=30+29) [Geo mean] 24.1MB/s 24.2MB/s +0.08% Change-Id: I6c02d3c9975f6bd8bc215cb1fc14d29602b45649 Reviewed-on: https://go-review.googlesource.com/138095 Run-TryBot: Ben Shi TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/ARM64.rules | 112 +- src/cmd/compile/internal/ssa/rewriteARM64.go | 6776 ++++++++++++------ test/codegen/arithmetic.go | 15 +- 3 files changed, 4525 insertions(+), 2378 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 8fb39538c2..3fce018d45 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -1154,15 +1154,15 @@ (MULW (NEG x) y) -> (MNEGW x y) // madd/msub -(ADD a l:(MUL x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y) -(SUB a l:(MUL x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y) -(ADD a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y) -(SUB a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y) +(ADD a l:(MUL x y)) && l.Uses==1 && clobber(l) -> (MADD a x y) +(SUB a l:(MUL x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y) +(ADD a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y) +(SUB a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MADD a x y) -(ADD a l:(MULW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y) -(SUB a l:(MULW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y) -(ADD a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y) -(SUB a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y) +(ADD a l:(MULW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y) +(SUB a l:(MULW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y) +(ADD a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y) +(SUB a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y) // mul by constant (MUL x (MOVDconst [-1])) -> (NEG x) @@ -1210,6 +1210,94 @@ (MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) (MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) +(MADD a x (MOVDconst [-1])) -> (SUB a x) +(MADD a _ (MOVDconst [0])) -> a +(MADD a x (MOVDconst [1])) -> (ADD a x) +(MADD a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)]) +(MADD a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL x x [log2(c-1)])) +(MADD a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL x x [log2(c+1)])) +(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) +(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) +(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) +(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + +(MADD a (MOVDconst [-1]) x) -> (SUB a x) +(MADD a (MOVDconst [0]) _) -> a +(MADD a (MOVDconst [1]) x) -> (ADD a x) +(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)]) +(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL x x [log2(c-1)])) +(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL x x [log2(c+1)])) +(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) +(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) +(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) +(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + +(MADDW a x (MOVDconst [c])) && int32(c)==-1 -> (SUB a x) +(MADDW a _ (MOVDconst [c])) && int32(c)==0 -> a +(MADDW a x (MOVDconst [c])) && int32(c)==1 -> (ADD a x) +(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)]) +(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL x x [log2(c-1)])) +(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL x x [log2(c+1)])) +(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) +(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) +(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) +(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + +(MADDW a (MOVDconst [c]) x) && int32(c)==-1 -> (SUB a x) +(MADDW a (MOVDconst [c]) _) && int32(c)==0 -> a +(MADDW a (MOVDconst [c]) x) && int32(c)==1 -> (ADD a x) +(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)]) +(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL x x [log2(c-1)])) +(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL x x [log2(c+1)])) +(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) +(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) +(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) +(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + +(MSUB a x (MOVDconst [-1])) -> (ADD a x) +(MSUB a _ (MOVDconst [0])) -> a +(MSUB a x (MOVDconst [1])) -> (SUB a x) +(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)]) +(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL x x [log2(c-1)])) +(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL x x [log2(c+1)])) +(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) +(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) +(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) +(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + +(MSUB a (MOVDconst [-1]) x) -> (ADD a x) +(MSUB a (MOVDconst [0]) _) -> a +(MSUB a (MOVDconst [1]) x) -> (SUB a x) +(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)]) +(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL x x [log2(c-1)])) +(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL x x [log2(c+1)])) +(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) +(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) +(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) +(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + +(MSUBW a x (MOVDconst [c])) && int32(c)==-1 -> (ADD a x) +(MSUBW a _ (MOVDconst [c])) && int32(c)==0 -> a +(MSUBW a x (MOVDconst [c])) && int32(c)==1 -> (SUB a x) +(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)]) +(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL x x [log2(c-1)])) +(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL x x [log2(c+1)])) +(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) +(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) +(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) +(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + +(MSUBW a (MOVDconst [c]) x) && int32(c)==-1 -> (ADD a x) +(MSUBW a (MOVDconst [c]) _) && int32(c)==0 -> a +(MSUBW a (MOVDconst [c]) x) && int32(c)==1 -> (SUB a x) +(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)]) +(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL x x [log2(c-1)])) +(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL x x [log2(c+1)])) +(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) +(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) +(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) +(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + // div by constant (UDIV x (MOVDconst [1])) -> x (UDIV x (MOVDconst [c])) && isPowerOfTwo(c) -> (SRLconst [log2(c)] x) @@ -1261,6 +1349,14 @@ (MULW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)*int32(d))]) (MNEG (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-c*d]) (MNEGW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-int64(int32(c)*int32(d))]) +(MADD (MOVDconst [c]) x y) -> (ADDconst [c] (MUL x y)) +(MADDW (MOVDconst [c]) x y) -> (ADDconst [c] (MULW x y)) +(MSUB (MOVDconst [c]) x y) -> (ADDconst [c] (MNEG x y)) +(MSUBW (MOVDconst [c]) x y) -> (ADDconst [c] (MNEGW x y)) +(MADD a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [c*d] a) +(MADDW a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [int64(int32(c)*int32(d))] a) +(MSUB a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [c*d] a) +(MSUBW a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [int64(int32(c)*int32(d))] a) (DIV (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c/d]) (UDIV (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint64(c)/uint64(d))]) (DIVW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)/int32(d))]) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 5bf165df48..f07ab42090 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -139,6 +139,10 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64LessThan_0(v) case OpARM64LessThanU: return rewriteValueARM64_OpARM64LessThanU_0(v) + case OpARM64MADD: + return rewriteValueARM64_OpARM64MADD_0(v) || rewriteValueARM64_OpARM64MADD_10(v) || rewriteValueARM64_OpARM64MADD_20(v) + case OpARM64MADDW: + return rewriteValueARM64_OpARM64MADDW_0(v) || rewriteValueARM64_OpARM64MADDW_10(v) || rewriteValueARM64_OpARM64MADDW_20(v) case OpARM64MNEG: return rewriteValueARM64_OpARM64MNEG_0(v) || rewriteValueARM64_OpARM64MNEG_10(v) || rewriteValueARM64_OpARM64MNEG_20(v) case OpARM64MNEGW: @@ -245,6 +249,10 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v) case OpARM64MOVWstorezeroidx4: return rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v) + case OpARM64MSUB: + return rewriteValueARM64_OpARM64MSUB_0(v) || rewriteValueARM64_OpARM64MSUB_10(v) || rewriteValueARM64_OpARM64MSUB_20(v) + case OpARM64MSUBW: + return rewriteValueARM64_OpARM64MSUBW_0(v) || rewriteValueARM64_OpARM64MSUBW_10(v) || rewriteValueARM64_OpARM64MSUBW_20(v) case OpARM64MUL: return rewriteValueARM64_OpARM64MUL_0(v) || rewriteValueARM64_OpARM64MUL_10(v) || rewriteValueARM64_OpARM64MUL_20(v) case OpARM64MULW: @@ -924,7 +932,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { return true } // match: (ADD a l:(MUL x y)) - // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: l.Uses==1 && clobber(l) // result: (MADD a x y) for { _ = v.Args[1] @@ -936,7 +944,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { _ = l.Args[1] x := l.Args[0] y := l.Args[1] - if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MADD) @@ -946,7 +954,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { return true } // match: (ADD l:(MUL x y) a) - // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: l.Uses==1 && clobber(l) // result: (MADD a x y) for { _ = v.Args[1] @@ -958,7 +966,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { x := l.Args[0] y := l.Args[1] a := v.Args[1] - if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MADD) @@ -968,7 +976,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { return true } // match: (ADD a l:(MNEG x y)) - // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: l.Uses==1 && clobber(l) // result: (MSUB a x y) for { _ = v.Args[1] @@ -980,7 +988,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { _ = l.Args[1] x := l.Args[0] y := l.Args[1] - if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MSUB) @@ -990,7 +998,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { return true } // match: (ADD l:(MNEG x y) a) - // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: l.Uses==1 && clobber(l) // result: (MSUB a x y) for { _ = v.Args[1] @@ -1002,7 +1010,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { x := l.Args[0] y := l.Args[1] a := v.Args[1] - if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MSUB) @@ -1012,7 +1020,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { return true } // match: (ADD a l:(MULW x y)) - // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l) // result: (MADDW a x y) for { _ = v.Args[1] @@ -1024,7 +1032,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { _ = l.Args[1] x := l.Args[0] y := l.Args[1] - if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MADDW) @@ -1034,7 +1042,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { return true } // match: (ADD l:(MULW x y) a) - // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l) // result: (MADDW a x y) for { _ = v.Args[1] @@ -1046,7 +1054,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { x := l.Args[0] y := l.Args[1] a := v.Args[1] - if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MADDW) @@ -1056,7 +1064,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { return true } // match: (ADD a l:(MNEGW x y)) - // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l) // result: (MSUBW a x y) for { _ = v.Args[1] @@ -1068,7 +1076,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { _ = l.Args[1] x := l.Args[0] y := l.Args[1] - if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MSUBW) @@ -1078,7 +1086,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { return true } // match: (ADD l:(MNEGW x y) a) - // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l) // result: (MSUBW a x y) for { _ = v.Args[1] @@ -1090,7 +1098,7 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { x := l.Args[0] y := l.Args[1] a := v.Args[1] - if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MSUBW) @@ -6515,192 +6523,229 @@ func rewriteValueARM64_OpARM64LessThanU_0(v *Value) bool { } return false } -func rewriteValueARM64_OpARM64MNEG_0(v *Value) bool { +func rewriteValueARM64_OpARM64MADD_0(v *Value) bool { b := v.Block _ = b - // match: (MNEG x (MOVDconst [-1])) + // match: (MADD a x (MOVDconst [-1])) // cond: - // result: x + // result: (SUB a x) for { - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - if v_1.AuxInt != -1 { + if v_2.AuxInt != -1 { break } - v.reset(OpCopy) - v.Type = x.Type + v.reset(OpARM64SUB) + v.AddArg(a) v.AddArg(x) return true } - // match: (MNEG (MOVDconst [-1]) x) + // match: (MADD a _ (MOVDconst [0])) // cond: - // result: x + // result: a for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - if v_0.AuxInt != -1 { + if v_2.AuxInt != 0 { break } - x := v.Args[1] v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) + v.Type = a.Type + v.AddArg(a) return true } - // match: (MNEG _ (MOVDconst [0])) + // match: (MADD a x (MOVDconst [1])) // cond: - // result: (MOVDconst [0]) + // result: (ADD a x) for { - _ = v.Args[1] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - if v_1.AuxInt != 0 { + if v_2.AuxInt != 1 { break } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + v.reset(OpARM64ADD) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEG (MOVDconst [0]) _) - // cond: - // result: (MOVDconst [0]) + // match: (MADD a x (MOVDconst [c])) + // cond: isPowerOfTwo(c) + // result: (ADDshiftLL a x [log2(c)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - if v_0.AuxInt != 0 { + c := v_2.AuxInt + if !(isPowerOfTwo(c)) { break } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEG x (MOVDconst [1])) - // cond: - // result: (NEG x) + // match: (MADD a x (MOVDconst [c])) + // cond: isPowerOfTwo(c-1) && c>=3 + // result: (ADD a (ADDshiftLL x x [log2(c-1)])) for { - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - if v_1.AuxInt != 1 { + c := v_2.AuxInt + if !(isPowerOfTwo(c-1) && c >= 3) { break } - v.reset(OpARM64NEG) - v.AddArg(x) + v.reset(OpARM64ADD) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MNEG (MOVDconst [1]) x) - // cond: - // result: (NEG x) + // match: (MADD a x (MOVDconst [c])) + // cond: isPowerOfTwo(c+1) && c>=7 + // result: (SUB a (SUBshiftLL x x [log2(c+1)])) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - if v_0.AuxInt != 1 { + c := v_2.AuxInt + if !(isPowerOfTwo(c+1) && c >= 7) { break } - x := v.Args[1] - v.reset(OpARM64NEG) - v.AddArg(x) + v.reset(OpARM64SUB) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MNEG x (MOVDconst [c])) - // cond: isPowerOfTwo(c) - // result: (NEG (SLLconst [log2(c)] x)) + // match: (MADD a x (MOVDconst [c])) + // cond: c%3 == 0 && isPowerOfTwo(c/3) + // result: (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) for { - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - if !(isPowerOfTwo(c)) { + c := v_2.AuxInt + if !(c%3 == 0 && isPowerOfTwo(c/3)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 3) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEG (MOVDconst [c]) x) - // cond: isPowerOfTwo(c) - // result: (NEG (SLLconst [log2(c)] x)) + // match: (MADD a x (MOVDconst [c])) + // cond: c%5 == 0 && isPowerOfTwo(c/5) + // result: (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(isPowerOfTwo(c)) { + c := v_2.AuxInt + if !(c%5 == 0 && isPowerOfTwo(c/5)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 5) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEG x (MOVDconst [c])) - // cond: isPowerOfTwo(c-1) && c >= 3 - // result: (NEG (ADDshiftLL x x [log2(c-1)])) + // match: (MADD a x (MOVDconst [c])) + // cond: c%7 == 0 && isPowerOfTwo(c/7) + // result: (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) for { - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - if !(isPowerOfTwo(c-1) && c >= 3) { + c := v_2.AuxInt + if !(c%7 == 0 && isPowerOfTwo(c/7)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = log2(c - 1) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 7) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEG (MOVDconst [c]) x) - // cond: isPowerOfTwo(c-1) && c >= 3 - // result: (NEG (ADDshiftLL x x [log2(c-1)])) + // match: (MADD a x (MOVDconst [c])) + // cond: c%9 == 0 && isPowerOfTwo(c/9) + // result: (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(isPowerOfTwo(c-1) && c >= 3) { + c := v_2.AuxInt + if !(c%9 == 0 && isPowerOfTwo(c/9)) { break } - v.reset(OpARM64NEG) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 9) + v.AddArg(a) v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = log2(c - 1) + v0.AuxInt = 3 v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) @@ -6708,488 +6753,508 @@ func rewriteValueARM64_OpARM64MNEG_0(v *Value) bool { } return false } -func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool { +func rewriteValueARM64_OpARM64MADD_10(v *Value) bool { b := v.Block _ = b - // match: (MNEG x (MOVDconst [c])) - // cond: isPowerOfTwo(c+1) && c >= 7 - // result: (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) + // match: (MADD a (MOVDconst [-1]) x) + // cond: + // result: (SUB a x) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - if !(isPowerOfTwo(c+1) && c >= 7) { + if v_1.AuxInt != -1 { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = log2(c + 1) - v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v1.AddArg(x) - v0.AddArg(v1) - v0.AddArg(x) - v.AddArg(v0) + x := v.Args[2] + v.reset(OpARM64SUB) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEG (MOVDconst [c]) x) - // cond: isPowerOfTwo(c+1) && c >= 7 - // result: (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) + // match: (MADD a (MOVDconst [0]) _) + // cond: + // result: a for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(isPowerOfTwo(c+1) && c >= 7) { + if v_1.AuxInt != 0 { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = log2(c + 1) - v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v1.AddArg(x) - v0.AddArg(v1) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpCopy) + v.Type = a.Type + v.AddArg(a) return true } - // match: (MNEG x (MOVDconst [c])) - // cond: c%3 == 0 && isPowerOfTwo(c/3) - // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) + // match: (MADD a (MOVDconst [1]) x) + // cond: + // result: (ADD a x) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - if !(c%3 == 0 && isPowerOfTwo(c/3)) { + if v_1.AuxInt != 1 { break } - v.reset(OpARM64SLLconst) - v.Type = x.Type - v.AuxInt = log2(c / 3) - v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) - v0.AuxInt = 2 - v0.AddArg(x) - v0.AddArg(x) - v.AddArg(v0) + x := v.Args[2] + v.reset(OpARM64ADD) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEG (MOVDconst [c]) x) - // cond: c%3 == 0 && isPowerOfTwo(c/3) - // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) + // match: (MADD a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c) + // result: (ADDshiftLL a x [log2(c)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(c%3 == 0 && isPowerOfTwo(c/3)) { + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c)) { break } - v.reset(OpARM64SLLconst) - v.Type = x.Type - v.AuxInt = log2(c / 3) - v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) - v0.AuxInt = 2 - v0.AddArg(x) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEG x (MOVDconst [c])) - // cond: c%5 == 0 && isPowerOfTwo(c/5) - // result: (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) + // match: (MADD a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c-1) && c>=3 + // result: (ADD a (ADDshiftLL x x [log2(c-1)])) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - if !(c%5 == 0 && isPowerOfTwo(c/5)) { + x := v.Args[2] + if !(isPowerOfTwo(c-1) && c >= 3) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 5) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 2 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64ADD) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEG (MOVDconst [c]) x) - // cond: c%5 == 0 && isPowerOfTwo(c/5) - // result: (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) + // match: (MADD a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c+1) && c>=7 + // result: (SUB a (SUBshiftLL x x [log2(c+1)])) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(c%5 == 0 && isPowerOfTwo(c/5)) { + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c+1) && c >= 7) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 5) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 2 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SUB) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEG x (MOVDconst [c])) - // cond: c%7 == 0 && isPowerOfTwo(c/7) - // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) + // match: (MADD a (MOVDconst [c]) x) + // cond: c%3 == 0 && isPowerOfTwo(c/3) + // result: (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - if !(c%7 == 0 && isPowerOfTwo(c/7)) { + x := v.Args[2] + if !(c%3 == 0 && isPowerOfTwo(c/3)) { break } - v.reset(OpARM64SLLconst) - v.Type = x.Type - v.AuxInt = log2(c / 7) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 3) + v.AddArg(a) v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) - v0.AuxInt = 3 + v0.AuxInt = 2 v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEG (MOVDconst [c]) x) - // cond: c%7 == 0 && isPowerOfTwo(c/7) - // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) + // match: (MADD a (MOVDconst [c]) x) + // cond: c%5 == 0 && isPowerOfTwo(c/5) + // result: (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(c%7 == 0 && isPowerOfTwo(c/7)) { + c := v_1.AuxInt + x := v.Args[2] + if !(c%5 == 0 && isPowerOfTwo(c/5)) { break } - v.reset(OpARM64SLLconst) - v.Type = x.Type - v.AuxInt = log2(c / 7) - v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) - v0.AuxInt = 3 + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 5) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 2 v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEG x (MOVDconst [c])) - // cond: c%9 == 0 && isPowerOfTwo(c/9) - // result: (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) + // match: (MADD a (MOVDconst [c]) x) + // cond: c%7 == 0 && isPowerOfTwo(c/7) + // result: (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - if !(c%9 == 0 && isPowerOfTwo(c/9)) { + x := v.Args[2] + if !(c%7 == 0 && isPowerOfTwo(c/7)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 9) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 3 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 7) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEG (MOVDconst [c]) x) + // match: (MADD a (MOVDconst [c]) x) // cond: c%9 == 0 && isPowerOfTwo(c/9) - // result: (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) + // result: (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] + c := v_1.AuxInt + x := v.Args[2] if !(c%9 == 0 && isPowerOfTwo(c/9)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 9) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 3 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 9) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } return false } -func rewriteValueARM64_OpARM64MNEG_20(v *Value) bool { - // match: (MNEG (MOVDconst [c]) (MOVDconst [d])) +func rewriteValueARM64_OpARM64MADD_20(v *Value) bool { + b := v.Block + _ = b + // match: (MADD (MOVDconst [c]) x y) // cond: - // result: (MOVDconst [-c*d]) + // result: (ADDconst [c] (MUL x y)) for { - _ = v.Args[1] + _ = v.Args[2] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } c := v_0.AuxInt - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { - break - } - d := v_1.AuxInt - v.reset(OpARM64MOVDconst) - v.AuxInt = -c * d + x := v.Args[1] + y := v.Args[2] + v.reset(OpARM64ADDconst) + v.AuxInt = c + v0 := b.NewValue0(v.Pos, OpARM64MUL, x.Type) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) return true } - // match: (MNEG (MOVDconst [d]) (MOVDconst [c])) + // match: (MADD a (MOVDconst [c]) (MOVDconst [d])) // cond: - // result: (MOVDconst [-c*d]) + // result: (ADDconst [c*d] a) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { - break - } - d := v_0.AuxInt + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - v.reset(OpARM64MOVDconst) - v.AuxInt = -c * d + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + d := v_2.AuxInt + v.reset(OpARM64ADDconst) + v.AuxInt = c * d + v.AddArg(a) return true } return false } -func rewriteValueARM64_OpARM64MNEGW_0(v *Value) bool { +func rewriteValueARM64_OpARM64MADDW_0(v *Value) bool { b := v.Block _ = b - // match: (MNEGW x (MOVDconst [c])) + // match: (MADDW a x (MOVDconst [c])) // cond: int32(c)==-1 - // result: x + // result: (SUB a x) for { - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { - break + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break } - c := v_1.AuxInt + c := v_2.AuxInt if !(int32(c) == -1) { break } - v.reset(OpCopy) - v.Type = x.Type + v.reset(OpARM64SUB) + v.AddArg(a) v.AddArg(x) return true } - // match: (MNEGW (MOVDconst [c]) x) - // cond: int32(c)==-1 - // result: x + // match: (MADDW a _ (MOVDconst [c])) + // cond: int32(c)==0 + // result: a for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(int32(c) == -1) { + c := v_2.AuxInt + if !(int32(c) == 0) { break } v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) + v.Type = a.Type + v.AddArg(a) return true } - // match: (MNEGW _ (MOVDconst [c])) - // cond: int32(c)==0 - // result: (MOVDconst [0]) + // match: (MADDW a x (MOVDconst [c])) + // cond: int32(c)==1 + // result: (ADD a x) for { - _ = v.Args[1] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - if !(int32(c) == 0) { + c := v_2.AuxInt + if !(int32(c) == 1) { break } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + v.reset(OpARM64ADD) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEGW (MOVDconst [c]) _) - // cond: int32(c)==0 - // result: (MOVDconst [0]) + // match: (MADDW a x (MOVDconst [c])) + // cond: isPowerOfTwo(c) + // result: (ADDshiftLL a x [log2(c)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - if !(int32(c) == 0) { + c := v_2.AuxInt + if !(isPowerOfTwo(c)) { break } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEGW x (MOVDconst [c])) - // cond: int32(c)==1 - // result: (NEG x) + // match: (MADDW a x (MOVDconst [c])) + // cond: isPowerOfTwo(c-1) && int32(c)>=3 + // result: (ADD a (ADDshiftLL x x [log2(c-1)])) for { - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - if !(int32(c) == 1) { + c := v_2.AuxInt + if !(isPowerOfTwo(c-1) && int32(c) >= 3) { break } - v.reset(OpARM64NEG) - v.AddArg(x) + v.reset(OpARM64ADD) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MNEGW (MOVDconst [c]) x) - // cond: int32(c)==1 - // result: (NEG x) + // match: (MADDW a x (MOVDconst [c])) + // cond: isPowerOfTwo(c+1) && int32(c)>=7 + // result: (SUB a (SUBshiftLL x x [log2(c+1)])) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(int32(c) == 1) { + c := v_2.AuxInt + if !(isPowerOfTwo(c+1) && int32(c) >= 7) { break } - v.reset(OpARM64NEG) - v.AddArg(x) + v.reset(OpARM64SUB) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MNEGW x (MOVDconst [c])) - // cond: isPowerOfTwo(c) - // result: (NEG (SLLconst [log2(c)] x)) + // match: (MADDW a x (MOVDconst [c])) + // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) + // result: (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) for { - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - if !(isPowerOfTwo(c)) { + c := v_2.AuxInt + if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 3) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEGW (MOVDconst [c]) x) - // cond: isPowerOfTwo(c) - // result: (NEG (SLLconst [log2(c)] x)) + // match: (MADDW a x (MOVDconst [c])) + // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) + // result: (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(isPowerOfTwo(c)) { + c := v_2.AuxInt + if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 5) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEGW x (MOVDconst [c])) - // cond: isPowerOfTwo(c-1) && int32(c) >= 3 - // result: (NEG (ADDshiftLL x x [log2(c-1)])) + // match: (MADDW a x (MOVDconst [c])) + // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) + // result: (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) for { - _ = v.Args[1] - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - if !(isPowerOfTwo(c-1) && int32(c) >= 3) { + c := v_2.AuxInt + if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = log2(c - 1) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 7) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEGW (MOVDconst [c]) x) - // cond: isPowerOfTwo(c-1) && int32(c) >= 3 - // result: (NEG (ADDshiftLL x x [log2(c-1)])) + // match: (MADDW a x (MOVDconst [c])) + // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) + // result: (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(isPowerOfTwo(c-1) && int32(c) >= 3) { + c := v_2.AuxInt + if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) { break } - v.reset(OpARM64NEG) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 9) + v.AddArg(a) v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = log2(c - 1) + v0.AuxInt = 3 v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) @@ -7197,196 +7262,205 @@ func rewriteValueARM64_OpARM64MNEGW_0(v *Value) bool { } return false } -func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { +func rewriteValueARM64_OpARM64MADDW_10(v *Value) bool { b := v.Block _ = b - // match: (MNEGW x (MOVDconst [c])) - // cond: isPowerOfTwo(c+1) && int32(c) >= 7 - // result: (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) + // match: (MADDW a (MOVDconst [c]) x) + // cond: int32(c)==-1 + // result: (SUB a x) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - if !(isPowerOfTwo(c+1) && int32(c) >= 7) { + x := v.Args[2] + if !(int32(c) == -1) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = log2(c + 1) - v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v1.AddArg(x) - v0.AddArg(v1) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpARM64SUB) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEGW (MOVDconst [c]) x) - // cond: isPowerOfTwo(c+1) && int32(c) >= 7 - // result: (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) + // match: (MADDW a (MOVDconst [c]) _) + // cond: int32(c)==0 + // result: a for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(isPowerOfTwo(c+1) && int32(c) >= 7) { + c := v_1.AuxInt + if !(int32(c) == 0) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v0.AuxInt = log2(c + 1) - v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v1.AddArg(x) - v0.AddArg(v1) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpCopy) + v.Type = a.Type + v.AddArg(a) return true } - // match: (MNEGW x (MOVDconst [c])) - // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) - // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) + // match: (MADDW a (MOVDconst [c]) x) + // cond: int32(c)==1 + // result: (ADD a x) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { + x := v.Args[2] + if !(int32(c) == 1) { break } - v.reset(OpARM64SLLconst) - v.Type = x.Type - v.AuxInt = log2(c / 3) - v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) - v0.AuxInt = 2 - v0.AddArg(x) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpARM64ADD) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MNEGW (MOVDconst [c]) x) - // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) - // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) + // match: (MADDW a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c) + // result: (ADDshiftLL a x [log2(c)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c)) { break } - v.reset(OpARM64SLLconst) - v.Type = x.Type - v.AuxInt = log2(c / 3) - v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) - v0.AuxInt = 2 + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c) + v.AddArg(a) + v.AddArg(x) + return true + } + // match: (MADDW a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c-1) && int32(c)>=3 + // result: (ADD a (ADDshiftLL x x [log2(c-1)])) + for { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c-1) && int32(c) >= 3) { + break + } + v.reset(OpARM64ADD) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEGW x (MOVDconst [c])) - // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) - // result: (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) + // match: (MADDW a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c+1) && int32(c)>=7 + // result: (SUB a (SUBshiftLL x x [log2(c+1)])) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { + x := v.Args[2] + if !(isPowerOfTwo(c+1) && int32(c) >= 7) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 5) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 2 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SUB) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEGW (MOVDconst [c]) x) - // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) - // result: (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) + // match: (MADDW a (MOVDconst [c]) x) + // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) + // result: (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] - if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { + c := v_1.AuxInt + x := v.Args[2] + if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 5) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 2 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 3) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEGW x (MOVDconst [c])) - // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) - // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) + // match: (MADDW a (MOVDconst [c]) x) + // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) + // result: (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { + x := v.Args[2] + if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { break } - v.reset(OpARM64SLLconst) - v.Type = x.Type - v.AuxInt = log2(c / 7) - v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) - v0.AuxInt = 3 + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 5) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 2 v0.AddArg(x) v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEGW (MOVDconst [c]) x) + // match: (MADDW a (MOVDconst [c]) x) // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) - // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) + // result: (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - x := v.Args[1] + c := v_1.AuxInt + x := v.Args[2] if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { break } - v.reset(OpARM64SLLconst) - v.Type = x.Type + v.reset(OpARM64SUBshiftLL) v.AuxInt = log2(c / 7) + v.AddArg(a) v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) v0.AuxInt = 3 v0.AddArg(x) @@ -7394,880 +7468,1832 @@ func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { v.AddArg(v0) return true } - // match: (MNEGW x (MOVDconst [c])) + // match: (MADDW a (MOVDconst [c]) x) // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) - // result: (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) + // result: (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) for { - _ = v.Args[1] - x := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt + x := v.Args[2] if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 9) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 3 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 9) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } - // match: (MNEGW (MOVDconst [c]) x) - // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) - // result: (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) + return false +} +func rewriteValueARM64_OpARM64MADDW_20(v *Value) bool { + b := v.Block + _ = b + // match: (MADDW (MOVDconst [c]) x y) + // cond: + // result: (ADDconst [c] (MULW x y)) for { - _ = v.Args[1] + _ = v.Args[2] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } c := v_0.AuxInt x := v.Args[1] - if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) { - break - } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 9) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 3 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + y := v.Args[2] + v.reset(OpARM64ADDconst) + v.AuxInt = c + v0 := b.NewValue0(v.Pos, OpARM64MULW, x.Type) + v0.AddArg(x) + v0.AddArg(y) v.AddArg(v0) return true } - return false -} -func rewriteValueARM64_OpARM64MNEGW_20(v *Value) bool { - // match: (MNEGW (MOVDconst [c]) (MOVDconst [d])) + // match: (MADDW a (MOVDconst [c]) (MOVDconst [d])) // cond: - // result: (MOVDconst [-int64(int32(c)*int32(d))]) + // result: (ADDconst [int64(int32(c)*int32(d))] a) for { - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { - break - } - c := v_0.AuxInt + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } - d := v_1.AuxInt - v.reset(OpARM64MOVDconst) - v.AuxInt = -int64(int32(c) * int32(d)) + c := v_1.AuxInt + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + d := v_2.AuxInt + v.reset(OpARM64ADDconst) + v.AuxInt = int64(int32(c) * int32(d)) + v.AddArg(a) return true } - // match: (MNEGW (MOVDconst [d]) (MOVDconst [c])) + return false +} +func rewriteValueARM64_OpARM64MNEG_0(v *Value) bool { + b := v.Block + _ = b + // match: (MNEG x (MOVDconst [-1])) // cond: - // result: (MOVDconst [-int64(int32(c)*int32(d))]) + // result: x for { _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { - break - } - d := v_0.AuxInt + x := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - v.reset(OpARM64MOVDconst) - v.AuxInt = -int64(int32(c) * int32(d)) + if v_1.AuxInt != -1 { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) return true } - return false -} -func rewriteValueARM64_OpARM64MOD_0(v *Value) bool { - // match: (MOD (MOVDconst [c]) (MOVDconst [d])) + // match: (MNEG (MOVDconst [-1]) x) // cond: - // result: (MOVDconst [c%d]) + // result: x for { _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt + if v_0.AuxInt != -1 { + break + } + x := v.Args[1] + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (MNEG _ (MOVDconst [0])) + // cond: + // result: (MOVDconst [0]) + for { + _ = v.Args[1] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } - d := v_1.AuxInt + if v_1.AuxInt != 0 { + break + } v.reset(OpARM64MOVDconst) - v.AuxInt = c % d + v.AuxInt = 0 return true } - return false -} -func rewriteValueARM64_OpARM64MODW_0(v *Value) bool { - // match: (MODW (MOVDconst [c]) (MOVDconst [d])) + // match: (MNEG (MOVDconst [0]) _) // cond: - // result: (MOVDconst [int64(int32(c)%int32(d))]) + // result: (MOVDconst [0]) for { _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + if v_0.AuxInt != 0 { break } - d := v_1.AuxInt v.reset(OpARM64MOVDconst) - v.AuxInt = int64(int32(c) % int32(d)) + v.AuxInt = 0 return true } - return false -} -func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - // match: (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVBUload [off1+off2] {sym} ptr mem) + // match: (MNEG x (MOVDconst [1])) + // cond: + // result: (NEG x) for { - off1 := v.AuxInt - sym := v.Aux _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADDconst { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v.Args[1] - if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + if v_1.AuxInt != 1 { break } - v.reset(OpARM64MOVBUload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg(ptr) - v.AddArg(mem) + v.reset(OpARM64NEG) + v.AddArg(x) return true } - // match: (MOVBUload [off] {sym} (ADD ptr idx) mem) - // cond: off == 0 && sym == nil - // result: (MOVBUloadidx ptr idx mem) + // match: (MNEG (MOVDconst [1]) x) + // cond: + // result: (NEG x) for { - off := v.AuxInt - sym := v.Aux _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpARM64ADD { + if v_0.Op != OpARM64MOVDconst { break } - _ = v_0.Args[1] - ptr := v_0.Args[0] - idx := v_0.Args[1] - mem := v.Args[1] - if !(off == 0 && sym == nil) { + if v_0.AuxInt != 1 { break } - v.reset(OpARM64MOVBUloadidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + x := v.Args[1] + v.reset(OpARM64NEG) + v.AddArg(x) return true } - // match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) - // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + // match: (MNEG x (MOVDconst [c])) + // cond: isPowerOfTwo(c) + // result: (NEG (SLLconst [log2(c)] x)) for { - off1 := v.AuxInt - sym1 := v.Aux _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDaddr { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + c := v_1.AuxInt + if !(isPowerOfTwo(c)) { break } - v.reset(OpARM64MOVBUload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg(ptr) - v.AddArg(mem) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVDconst [0]) + // match: (MNEG (MOVDconst [c]) x) + // cond: isPowerOfTwo(c) + // result: (NEG (SLLconst [log2(c)] x)) for { - off := v.AuxInt - sym := v.Aux _ = v.Args[1] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVBstorezero { + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux - _ = v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + c := v_0.AuxInt + x := v.Args[1] + if !(isPowerOfTwo(c)) { break } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueARM64_OpARM64MOVBUloadidx_0(v *Value) bool { - // match: (MOVBUloadidx ptr (MOVDconst [c]) mem) - // cond: - // result: (MOVBUload [c] ptr mem) + // match: (MNEG x (MOVDconst [c])) + // cond: isPowerOfTwo(c-1) && c >= 3 + // result: (NEG (ADDshiftLL x x [log2(c-1)])) for { - _ = v.Args[2] - ptr := v.Args[0] + _ = v.Args[1] + x := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - mem := v.Args[2] - v.reset(OpARM64MOVBUload) - v.AuxInt = c - v.AddArg(ptr) - v.AddArg(mem) + if !(isPowerOfTwo(c-1) && c >= 3) { + break + } + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBUloadidx (MOVDconst [c]) ptr mem) - // cond: - // result: (MOVBUload [c] ptr mem) + // match: (MNEG (MOVDconst [c]) x) + // cond: isPowerOfTwo(c-1) && c >= 3 + // result: (NEG (ADDshiftLL x x [log2(c-1)])) for { - _ = v.Args[2] + _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } c := v_0.AuxInt - ptr := v.Args[1] - mem := v.Args[2] - v.reset(OpARM64MOVBUload) - v.AuxInt = c - v.AddArg(ptr) - v.AddArg(mem) - return true - } - // match: (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) - // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) - // result: (MOVDconst [0]) - for { - _ = v.Args[2] - ptr := v.Args[0] - idx := v.Args[1] - v_2 := v.Args[2] - if v_2.Op != OpARM64MOVBstorezeroidx { - break - } - _ = v_2.Args[2] - ptr2 := v_2.Args[0] - idx2 := v_2.Args[1] - if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + x := v.Args[1] + if !(isPowerOfTwo(c-1) && c >= 3) { break } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } return false } -func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool { - // match: (MOVBUreg x:(MOVBUload _ _)) - // cond: - // result: (MOVDreg x) +func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool { + b := v.Block + _ = b + // match: (MNEG x (MOVDconst [c])) + // cond: isPowerOfTwo(c+1) && c >= 7 + // result: (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) for { + _ = v.Args[1] x := v.Args[0] - if x.Op != OpARM64MOVBUload { + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - _ = x.Args[1] - v.reset(OpARM64MOVDreg) - v.AddArg(x) + c := v_1.AuxInt + if !(isPowerOfTwo(c+1) && c >= 7) { + break + } + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) + v1.AddArg(x) + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBUreg x:(MOVBUloadidx _ _ _)) - // cond: - // result: (MOVDreg x) + // match: (MNEG (MOVDconst [c]) x) + // cond: isPowerOfTwo(c+1) && c >= 7 + // result: (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) for { - x := v.Args[0] - if x.Op != OpARM64MOVBUloadidx { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { break } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) - v.AddArg(x) + c := v_0.AuxInt + x := v.Args[1] + if !(isPowerOfTwo(c+1) && c >= 7) { + break + } + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) + v1.AddArg(x) + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBUreg x:(MOVBUreg _)) - // cond: - // result: (MOVDreg x) + // match: (MNEG x (MOVDconst [c])) + // cond: c%3 == 0 && isPowerOfTwo(c/3) + // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) for { + _ = v.Args[1] x := v.Args[0] - if x.Op != OpARM64MOVBUreg { + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVBUreg (ANDconst [c] x)) - // cond: - // result: (ANDconst [c&(1<<8-1)] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpARM64ANDconst { + c := v_1.AuxInt + if !(c%3 == 0 && isPowerOfTwo(c/3)) { break } - c := v_0.AuxInt - x := v_0.Args[0] - v.reset(OpARM64ANDconst) - v.AuxInt = c & (1<<8 - 1) - v.AddArg(x) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBUreg (MOVDconst [c])) - // cond: - // result: (MOVDconst [int64(uint8(c))]) + // match: (MNEG (MOVDconst [c]) x) + // cond: c%3 == 0 && isPowerOfTwo(c/3) + // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) for { + _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } c := v_0.AuxInt - v.reset(OpARM64MOVDconst) - v.AuxInt = int64(uint8(c)) + x := v.Args[1] + if !(c%3 == 0 && isPowerOfTwo(c/3)) { + break + } + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBUreg x) - // cond: x.Type.IsBoolean() - // result: (MOVDreg x) + // match: (MNEG x (MOVDconst [c])) + // cond: c%5 == 0 && isPowerOfTwo(c/5) + // result: (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) for { + _ = v.Args[1] x := v.Args[0] - if !(x.Type.IsBoolean()) { + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - v.reset(OpARM64MOVDreg) - v.AddArg(x) + c := v_1.AuxInt + if !(c%5 == 0 && isPowerOfTwo(c/5)) { + break + } + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c / 5) + v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v1.AuxInt = 2 + v1.AddArg(x) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) return true } - // match: (MOVBUreg (SLLconst [sc] x)) - // cond: isARM64BFMask(sc, 1<<8-1, sc) - // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x) + // match: (MNEG (MOVDconst [c]) x) + // cond: c%5 == 0 && isPowerOfTwo(c/5) + // result: (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) for { + _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpARM64SLLconst { + if v_0.Op != OpARM64MOVDconst { break } - sc := v_0.AuxInt - x := v_0.Args[0] - if !(isARM64BFMask(sc, 1<<8-1, sc)) { + c := v_0.AuxInt + x := v.Args[1] + if !(c%5 == 0 && isPowerOfTwo(c/5)) { break } - v.reset(OpARM64UBFIZ) - v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc)) - v.AddArg(x) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c / 5) + v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v1.AuxInt = 2 + v1.AddArg(x) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) return true } - // match: (MOVBUreg (SRLconst [sc] x)) - // cond: isARM64BFMask(sc, 1<<8-1, 0) - // result: (UBFX [arm64BFAuxInt(sc, 8)] x) + // match: (MNEG x (MOVDconst [c])) + // cond: c%7 == 0 && isPowerOfTwo(c/7) + // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) for { - v_0 := v.Args[0] - if v_0.Op != OpARM64SRLconst { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - sc := v_0.AuxInt - x := v_0.Args[0] - if !(isARM64BFMask(sc, 1<<8-1, 0)) { + c := v_1.AuxInt + if !(c%7 == 0 && isPowerOfTwo(c/7)) { break } - v.reset(OpARM64UBFX) - v.AuxInt = arm64BFAuxInt(sc, 8) - v.AddArg(x) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 7) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVBload [off1+off2] {sym} ptr mem) + // match: (MNEG (MOVDconst [c]) x) + // cond: c%7 == 0 && isPowerOfTwo(c/7) + // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) for { - off1 := v.AuxInt - sym := v.Aux _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpARM64ADDconst { + if v_0.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v.Args[1] - if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + c := v_0.AuxInt + x := v.Args[1] + if !(c%7 == 0 && isPowerOfTwo(c/7)) { break } - v.reset(OpARM64MOVBload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg(ptr) - v.AddArg(mem) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 7) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBload [off] {sym} (ADD ptr idx) mem) - // cond: off == 0 && sym == nil - // result: (MOVBloadidx ptr idx mem) + // match: (MNEG x (MOVDconst [c])) + // cond: c%9 == 0 && isPowerOfTwo(c/9) + // result: (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) for { - off := v.AuxInt - sym := v.Aux _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADD { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - _ = v_0.Args[1] - ptr := v_0.Args[0] - idx := v_0.Args[1] - mem := v.Args[1] - if !(off == 0 && sym == nil) { + c := v_1.AuxInt + if !(c%9 == 0 && isPowerOfTwo(c/9)) { break } - v.reset(OpARM64MOVBloadidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c / 9) + v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v1.AuxInt = 3 + v1.AddArg(x) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) return true } - // match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) - // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + // match: (MNEG (MOVDconst [c]) x) + // cond: c%9 == 0 && isPowerOfTwo(c/9) + // result: (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) for { - off1 := v.AuxInt - sym1 := v.Aux _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDaddr { + if v_0.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + c := v_0.AuxInt + x := v.Args[1] + if !(c%9 == 0 && isPowerOfTwo(c/9)) { break } - v.reset(OpARM64MOVBload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg(ptr) - v.AddArg(mem) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c / 9) + v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v1.AuxInt = 3 + v1.AddArg(x) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) return true } - // match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVDconst [0]) + return false +} +func rewriteValueARM64_OpARM64MNEG_20(v *Value) bool { + // match: (MNEG (MOVDconst [c]) (MOVDconst [d])) + // cond: + // result: (MOVDconst [-c*d]) for { - off := v.AuxInt - sym := v.Aux _ = v.Args[1] - ptr := v.Args[0] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt v_1 := v.Args[1] - if v_1.Op != OpARM64MOVBstorezero { + if v_1.Op != OpARM64MOVDconst { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux - _ = v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + d := v_1.AuxInt + v.reset(OpARM64MOVDconst) + v.AuxInt = -c * d + return true + } + // match: (MNEG (MOVDconst [d]) (MOVDconst [c])) + // cond: + // result: (MOVDconst [-c*d]) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + d := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } + c := v_1.AuxInt v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + v.AuxInt = -c * d return true } return false } -func rewriteValueARM64_OpARM64MOVBloadidx_0(v *Value) bool { - // match: (MOVBloadidx ptr (MOVDconst [c]) mem) - // cond: - // result: (MOVBload [c] ptr mem) +func rewriteValueARM64_OpARM64MNEGW_0(v *Value) bool { + b := v.Block + _ = b + // match: (MNEGW x (MOVDconst [c])) + // cond: int32(c)==-1 + // result: x for { - _ = v.Args[2] - ptr := v.Args[0] + _ = v.Args[1] + x := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - mem := v.Args[2] - v.reset(OpARM64MOVBload) - v.AuxInt = c - v.AddArg(ptr) - v.AddArg(mem) + if !(int32(c) == -1) { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) return true } - // match: (MOVBloadidx (MOVDconst [c]) ptr mem) - // cond: - // result: (MOVBload [c] ptr mem) + // match: (MNEGW (MOVDconst [c]) x) + // cond: int32(c)==-1 + // result: x for { - _ = v.Args[2] + _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } c := v_0.AuxInt - ptr := v.Args[1] - mem := v.Args[2] - v.reset(OpARM64MOVBload) - v.AuxInt = c - v.AddArg(ptr) - v.AddArg(mem) + x := v.Args[1] + if !(int32(c) == -1) { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) return true } - // match: (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) - // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // match: (MNEGW _ (MOVDconst [c])) + // cond: int32(c)==0 // result: (MOVDconst [0]) for { - _ = v.Args[2] - ptr := v.Args[0] - idx := v.Args[1] - v_2 := v.Args[2] - if v_2.Op != OpARM64MOVBstorezeroidx { + _ = v.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - _ = v_2.Args[2] - ptr2 := v_2.Args[0] - idx2 := v_2.Args[1] - if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + c := v_1.AuxInt + if !(int32(c) == 0) { break } v.reset(OpARM64MOVDconst) v.AuxInt = 0 return true } - return false -} -func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool { - // match: (MOVBreg x:(MOVBload _ _)) - // cond: - // result: (MOVDreg x) + // match: (MNEGW (MOVDconst [c]) _) + // cond: int32(c)==0 + // result: (MOVDconst [0]) for { - x := v.Args[0] - if x.Op != OpARM64MOVBload { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { break } - _ = x.Args[1] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVBreg x:(MOVBloadidx _ _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVBloadidx { + c := v_0.AuxInt + if !(int32(c) == 0) { break } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) - v.AddArg(x) + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 return true } - // match: (MOVBreg x:(MOVBreg _)) - // cond: - // result: (MOVDreg x) + // match: (MNEGW x (MOVDconst [c])) + // cond: int32(c)==1 + // result: (NEG x) for { + _ = v.Args[1] x := v.Args[0] - if x.Op != OpARM64MOVBreg { + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - v.reset(OpARM64MOVDreg) + c := v_1.AuxInt + if !(int32(c) == 1) { + break + } + v.reset(OpARM64NEG) v.AddArg(x) return true } - // match: (MOVBreg (MOVDconst [c])) - // cond: - // result: (MOVDconst [int64(int8(c))]) + // match: (MNEGW (MOVDconst [c]) x) + // cond: int32(c)==1 + // result: (NEG x) for { + _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } c := v_0.AuxInt - v.reset(OpARM64MOVDconst) - v.AuxInt = int64(int8(c)) + x := v.Args[1] + if !(int32(c) == 1) { + break + } + v.reset(OpARM64NEG) + v.AddArg(x) return true } - // match: (MOVBreg (SLLconst [lc] x)) - // cond: lc < 8 - // result: (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x) + // match: (MNEGW x (MOVDconst [c])) + // cond: isPowerOfTwo(c) + // result: (NEG (SLLconst [log2(c)] x)) for { - v_0 := v.Args[0] - if v_0.Op != OpARM64SLLconst { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - lc := v_0.AuxInt - x := v_0.Args[0] - if !(lc < 8) { + c := v_1.AuxInt + if !(isPowerOfTwo(c)) { break } - v.reset(OpARM64SBFIZ) - v.AuxInt = arm64BFAuxInt(lc, 8-lc) - v.AddArg(x) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVBstore [off1+off2] {sym} ptr val mem) + // match: (MNEGW (MOVDconst [c]) x) + // cond: isPowerOfTwo(c) + // result: (NEG (SLLconst [log2(c)] x)) for { - off1 := v.AuxInt - sym := v.Aux - _ = v.Args[2] + _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpARM64ADDconst { + if v_0.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v.Args[1] - mem := v.Args[2] - if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + c := v_0.AuxInt + x := v.Args[1] + if !(isPowerOfTwo(c)) { break } - v.reset(OpARM64MOVBstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg(ptr) - v.AddArg(val) - v.AddArg(mem) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} (ADD ptr idx) val mem) - // cond: off == 0 && sym == nil - // result: (MOVBstoreidx ptr idx val mem) + // match: (MNEGW x (MOVDconst [c])) + // cond: isPowerOfTwo(c-1) && int32(c) >= 3 + // result: (NEG (ADDshiftLL x x [log2(c-1)])) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADD { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - _ = v_0.Args[1] - ptr := v_0.Args[0] - idx := v_0.Args[1] - val := v.Args[1] - mem := v.Args[2] - if !(off == 0 && sym == nil) { + c := v_1.AuxInt + if !(isPowerOfTwo(c-1) && int32(c) >= 3) { break } - v.reset(OpARM64MOVBstoreidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(val) - v.AddArg(mem) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) - // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) + // match: (MNEGW (MOVDconst [c]) x) + // cond: isPowerOfTwo(c-1) && int32(c) >= 3 + // result: (NEG (ADDshiftLL x x [log2(c-1)])) for { - off1 := v.AuxInt - sym1 := v.Aux - _ = v.Args[2] + _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDaddr { + if v_0.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - val := v.Args[1] - mem := v.Args[2] - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + c := v_0.AuxInt + x := v.Args[1] + if !(isPowerOfTwo(c-1) && int32(c) >= 3) { break } - v.reset(OpARM64MOVBstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg(ptr) - v.AddArg(val) - v.AddArg(mem) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) - // cond: - // result: (MOVBstorezero [off] {sym} ptr mem) + return false +} +func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { + b := v.Block + _ = b + // match: (MNEGW x (MOVDconst [c])) + // cond: isPowerOfTwo(c+1) && int32(c) >= 7 + // result: (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] + _ = v.Args[1] + x := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } - if v_1.AuxInt != 0 { + c := v_1.AuxInt + if !(isPowerOfTwo(c+1) && int32(c) >= 7) { break } - mem := v.Args[2] - v.reset(OpARM64MOVBstorezero) - v.AuxInt = off - v.Aux = sym - v.AddArg(ptr) - v.AddArg(mem) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) + v1.AddArg(x) + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem) - // cond: - // result: (MOVBstore [off] {sym} ptr x mem) + // match: (MNEGW (MOVDconst [c]) x) + // cond: isPowerOfTwo(c+1) && int32(c) >= 7 + // result: (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVBreg { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { break } - x := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVBstore) - v.AuxInt = off - v.Aux = sym - v.AddArg(ptr) - v.AddArg(x) - v.AddArg(mem) - return true - } - // match: (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) - // cond: - // result: (MOVBstore [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVBUreg { + c := v_0.AuxInt + x := v.Args[1] + if !(isPowerOfTwo(c+1) && int32(c) >= 7) { break } - x := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVBstore) - v.AuxInt = off - v.Aux = sym - v.AddArg(ptr) - v.AddArg(x) - v.AddArg(mem) + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) + v1.AddArg(x) + v0.AddArg(v1) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} ptr (MOVHreg x) mem) - // cond: - // result: (MOVBstore [off] {sym} ptr x mem) + // match: (MNEGW x (MOVDconst [c])) + // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) + // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] + _ = v.Args[1] + x := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpARM64MOVHreg { + if v_1.Op != OpARM64MOVDconst { break } - x := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVBstore) - v.AuxInt = off - v.Aux = sym - v.AddArg(ptr) - v.AddArg(x) - v.AddArg(mem) + c := v_1.AuxInt + if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { + break + } + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) - // cond: - // result: (MOVBstore [off] {sym} ptr x mem) + // match: (MNEGW (MOVDconst [c]) x) + // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) + // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVHUreg { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { break } - x := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVBstore) - v.AuxInt = off - v.Aux = sym - v.AddArg(ptr) - v.AddArg(x) - v.AddArg(mem) + c := v_0.AuxInt + x := v.Args[1] + if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { + break + } + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} ptr (MOVWreg x) mem) - // cond: - // result: (MOVBstore [off] {sym} ptr x mem) + // match: (MNEGW x (MOVDconst [c])) + // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) + // result: (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] + _ = v.Args[1] + x := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpARM64MOVWreg { + if v_1.Op != OpARM64MOVDconst { break } - x := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVBstore) - v.AuxInt = off - v.Aux = sym - v.AddArg(ptr) - v.AddArg(x) - v.AddArg(mem) + c := v_1.AuxInt + if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { + break + } + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c / 5) + v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v1.AuxInt = 2 + v1.AddArg(x) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) - // cond: - // result: (MOVBstore [off] {sym} ptr x mem) + // match: (MNEGW (MOVDconst [c]) x) + // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) + // result: (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) for { - off := v.AuxInt + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + x := v.Args[1] + if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { + break + } + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c / 5) + v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v1.AuxInt = 2 + v1.AddArg(x) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (MNEGW x (MOVDconst [c])) + // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) + // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { + break + } + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 7) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MNEGW (MOVDconst [c]) x) + // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) + // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + x := v.Args[1] + if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { + break + } + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 7) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MNEGW x (MOVDconst [c])) + // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) + // result: (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) { + break + } + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c / 9) + v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v1.AuxInt = 3 + v1.AddArg(x) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (MNEGW (MOVDconst [c]) x) + // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) + // result: (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + x := v.Args[1] + if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) { + break + } + v.reset(OpARM64NEG) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) + v0.AuxInt = log2(c / 9) + v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v1.AuxInt = 3 + v1.AddArg(x) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueARM64_OpARM64MNEGW_20(v *Value) bool { + // match: (MNEGW (MOVDconst [c]) (MOVDconst [d])) + // cond: + // result: (MOVDconst [-int64(int32(c)*int32(d))]) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + d := v_1.AuxInt + v.reset(OpARM64MOVDconst) + v.AuxInt = -int64(int32(c) * int32(d)) + return true + } + // match: (MNEGW (MOVDconst [d]) (MOVDconst [c])) + // cond: + // result: (MOVDconst [-int64(int32(c)*int32(d))]) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + d := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + v.reset(OpARM64MOVDconst) + v.AuxInt = -int64(int32(c) * int32(d)) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOD_0(v *Value) bool { + // match: (MOD (MOVDconst [c]) (MOVDconst [d])) + // cond: + // result: (MOVDconst [c%d]) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + d := v_1.AuxInt + v.reset(OpARM64MOVDconst) + v.AuxInt = c % d + return true + } + return false +} +func rewriteValueARM64_OpARM64MODW_0(v *Value) bool { + // match: (MODW (MOVDconst [c]) (MOVDconst [d])) + // cond: + // result: (MOVDconst [int64(int32(c)%int32(d))]) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + d := v_1.AuxInt + v.reset(OpARM64MOVDconst) + v.AuxInt = int64(int32(c) % int32(d)) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVBUload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVBUload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBUload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVBUloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVBUloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVBUload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVDconst [0]) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVBstorezero { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + _ = v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBUloadidx_0(v *Value) bool { + // match: (MOVBUloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVBUload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVBUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBUloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVBUload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVBUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVBstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool { + // match: (MOVBUreg x:(MOVBUload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUload { + break + } + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVBUreg x:(MOVBUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVBUreg x:(MOVBUreg _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUreg { + break + } + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVBUreg (ANDconst [c] x)) + // cond: + // result: (ANDconst [c&(1<<8-1)] x) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64ANDconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpARM64ANDconst) + v.AuxInt = c & (1<<8 - 1) + v.AddArg(x) + return true + } + // match: (MOVBUreg (MOVDconst [c])) + // cond: + // result: (MOVDconst [int64(uint8(c))]) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + v.reset(OpARM64MOVDconst) + v.AuxInt = int64(uint8(c)) + return true + } + // match: (MOVBUreg x) + // cond: x.Type.IsBoolean() + // result: (MOVDreg x) + for { + x := v.Args[0] + if !(x.Type.IsBoolean()) { + break + } + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVBUreg (SLLconst [sc] x)) + // cond: isARM64BFMask(sc, 1<<8-1, sc) + // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64SLLconst { + break + } + sc := v_0.AuxInt + x := v_0.Args[0] + if !(isARM64BFMask(sc, 1<<8-1, sc)) { + break + } + v.reset(OpARM64UBFIZ) + v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc)) + v.AddArg(x) + return true + } + // match: (MOVBUreg (SRLconst [sc] x)) + // cond: isARM64BFMask(sc, 1<<8-1, 0) + // result: (UBFX [arm64BFAuxInt(sc, 8)] x) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64SRLconst { + break + } + sc := v_0.AuxInt + x := v_0.Args[0] + if !(isARM64BFMask(sc, 1<<8-1, 0)) { + break + } + v.reset(OpARM64UBFX) + v.AuxInt = arm64BFAuxInt(sc, 8) + v.AddArg(x) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVBload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVBload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVBloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVBloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVBload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVDconst [0]) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVBstorezero { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + _ = v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBloadidx_0(v *Value) bool { + // match: (MOVBloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVBload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVBload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBloadidx (MOVDconst [c]) ptr mem) + // cond: + // result: (MOVBload [c] ptr mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVBload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) + for { + _ = v.Args[2] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVBstorezeroidx { + break + } + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool { + // match: (MOVBreg x:(MOVBload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBload { + break + } + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVBreg x:(MOVBloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVBreg x:(MOVBreg _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBreg { + break + } + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVBreg (MOVDconst [c])) + // cond: + // result: (MOVDconst [int64(int8(c))]) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + v.reset(OpARM64MOVDconst) + v.AuxInt = int64(int8(c)) + return true + } + // match: (MOVBreg (SLLconst [lc] x)) + // cond: lc < 8 + // result: (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64SLLconst { + break + } + lc := v_0.AuxInt + x := v_0.Args[0] + if !(lc < 8) { + break + } + v.reset(OpARM64SBFIZ) + v.AuxInt = arm64BFAuxInt(lc, 8-lc) + v.AddArg(x) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVBstore [off1+off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVBstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off] {sym} (ADD ptr idx) val mem) + // cond: off == 0 && sym == nil + // result: (MOVBstoreidx ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + val := v.Args[1] + mem := v.Args[2] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVBstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVBstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) + // cond: + // result: (MOVBstorezero [off] {sym} ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + if v_1.AuxInt != 0 { + break + } + mem := v.Args[2] + v.reset(OpARM64MOVBstorezero) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem) + // cond: + // result: (MOVBstore [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVBreg { + break + } + x := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVBstore) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) + // cond: + // result: (MOVBstore [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVBUreg { + break + } + x := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVBstore) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off] {sym} ptr (MOVHreg x) mem) + // cond: + // result: (MOVBstore [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVHreg { + break + } + x := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVBstore) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) + // cond: + // result: (MOVBstore [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVHUreg { + break + } + x := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVBstore) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off] {sym} ptr (MOVWreg x) mem) + // cond: + // result: (MOVBstore [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVWreg { + break + } + x := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVBstore) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) + // cond: + // result: (MOVBstore [off] {sym} ptr x mem) + for { + off := v.AuxInt sym := v.Aux _ = v.Args[2] ptr := v.Args[0] @@ -14508,9 +15534,403 @@ func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool { v.AddArg(mem) return true } - // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem) + // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem) + // cond: + // result: (MOVHstorezero [c] idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + idx := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVHstorezero) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) + // cond: + // result: (MOVHstorezeroidx2 ptr idx mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64SLLconst { + break + } + if v_1.AuxInt != 1 { + break + } + idx := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVHstorezeroidx2) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVHstorezeroidx ptr (ADD idx idx) mem) + // cond: + // result: (MOVHstorezeroidx2 ptr idx mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64ADD { + break + } + _ = v_1.Args[1] + idx := v_1.Args[0] + if idx != v_1.Args[1] { + break + } + mem := v.Args[2] + v.reset(OpARM64MOVHstorezeroidx2) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) + // cond: + // result: (MOVHstorezeroidx2 ptr idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64SLLconst { + break + } + if v_0.AuxInt != 1 { + break + } + idx := v_0.Args[0] + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVHstorezeroidx2) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVHstorezeroidx (ADD idx idx) ptr mem) + // cond: + // result: (MOVHstorezeroidx2 ptr idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + idx := v_0.Args[0] + if idx != v_0.Args[1] { + break + } + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVHstorezeroidx2) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstorezeroidx ptr idx mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64ADDconst { + break + } + if v_1.AuxInt != 2 { + break + } + idx := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVHstorezeroidx { + break + } + _ = x.Args[2] + if ptr != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpARM64MOVWstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVHstorezeroidx2_0(v *Value) bool { + // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVHstorezero [c<<1] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVHstorezero) + v.AuxInt = c << 1 + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVQstorezero [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVQstorezero) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVQstorezero) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) + // cond: + // result: (FMOVSfpgp val) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64FMOVSstore { + break + } + if v_1.AuxInt != off { + break + } + if v_1.Aux != sym { + break + } + _ = v_1.Args[2] + if ptr != v_1.Args[0] { + break + } + val := v_1.Args[1] + v.reset(OpARM64FMOVSfpgp) + v.AddArg(val) + return true + } + // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWUload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVWUload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWUload [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVWUloadidx ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVWUloadidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVWUloadidx4 ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDshiftLL { + break + } + if v_0.AuxInt != 2 { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVWUloadidx4) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVWUload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVDconst [0]) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[1] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVWstorezero { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + _ = v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + break + } + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool { + // match: (MOVWUloadidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVWUload [c] ptr mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVWUload) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWUloadidx (MOVDconst [c]) ptr mem) // cond: - // result: (MOVHstorezero [c] idx mem) + // result: (MOVWUload [c] ptr mem) for { _ = v.Args[2] v_0 := v.Args[0] @@ -14518,17 +15938,17 @@ func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool { break } c := v_0.AuxInt - idx := v.Args[1] + ptr := v.Args[1] mem := v.Args[2] - v.reset(OpARM64MOVHstorezero) + v.reset(OpARM64MOVWUload) v.AuxInt = c - v.AddArg(idx) + v.AddArg(ptr) v.AddArg(mem) return true } - // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) + // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem) // cond: - // result: (MOVHstorezeroidx2 ptr idx mem) + // result: (MOVWUloadidx4 ptr idx mem) for { _ = v.Args[2] ptr := v.Args[0] @@ -14536,231 +15956,321 @@ func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool { if v_1.Op != OpARM64SLLconst { break } - if v_1.AuxInt != 1 { + if v_1.AuxInt != 2 { break } idx := v_1.Args[0] mem := v.Args[2] - v.reset(OpARM64MOVHstorezeroidx2) + v.reset(OpARM64MOVWUloadidx4) v.AddArg(ptr) v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVHstorezeroidx ptr (ADD idx idx) mem) + // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem) // cond: - // result: (MOVHstorezeroidx2 ptr idx mem) + // result: (MOVWUloadidx4 ptr idx mem) for { _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64ADD { + v_0 := v.Args[0] + if v_0.Op != OpARM64SLLconst { break } - _ = v_1.Args[1] - idx := v_1.Args[0] - if idx != v_1.Args[1] { + if v_0.AuxInt != 2 { break } + idx := v_0.Args[0] + ptr := v.Args[1] mem := v.Args[2] - v.reset(OpARM64MOVHstorezeroidx2) + v.reset(OpARM64MOVWUloadidx4) v.AddArg(ptr) v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) - // cond: - // result: (MOVHstorezeroidx2 ptr idx mem) + // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) + // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) + // result: (MOVDconst [0]) for { _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64SLLconst { + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWstorezeroidx { break } - if v_0.AuxInt != 1 { + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { break } - idx := v_0.Args[0] - ptr := v.Args[1] - mem := v.Args[2] - v.reset(OpARM64MOVHstorezeroidx2) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 return true } - // match: (MOVHstorezeroidx (ADD idx idx) ptr mem) + return false +} +func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool { + // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem) // cond: - // result: (MOVHstorezeroidx2 ptr idx mem) + // result: (MOVWUload [c<<2] ptr mem) for { _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADD { - break - } - _ = v_0.Args[1] - idx := v_0.Args[0] - if idx != v_0.Args[1] { + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - ptr := v.Args[1] + c := v_1.AuxInt mem := v.Args[2] - v.reset(OpARM64MOVHstorezeroidx2) + v.reset(OpARM64MOVWUload) + v.AuxInt = c << 2 v.AddArg(ptr) - v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstorezeroidx ptr idx mem) + // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) + // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) + // result: (MOVDconst [0]) for { _ = v.Args[2] ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64ADDconst { + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWstorezeroidx4 { break } - if v_1.AuxInt != 2 { + _ = v_2.Args[2] + ptr2 := v_2.Args[0] + idx2 := v_2.Args[1] + if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) { break } - idx := v_1.Args[0] - x := v.Args[2] - if x.Op != OpARM64MOVHstorezeroidx { + v.reset(OpARM64MOVDconst) + v.AuxInt = 0 + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { + // match: (MOVWUreg x:(MOVBUload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUload { break } - _ = x.Args[2] - if ptr != x.Args[0] { + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg x:(MOVHUload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHUload { break } - if idx != x.Args[1] { + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg x:(MOVWUload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVWUload { break } - mem := x.Args[2] - if !(x.Uses == 1 && clobber(x)) { + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg x:(MOVBUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUloadidx { break } - v.reset(OpARM64MOVWstorezeroidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) return true } - return false -} -func rewriteValueARM64_OpARM64MOVHstorezeroidx2_0(v *Value) bool { - // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) + // match: (MOVWUreg x:(MOVHUloadidx _ _ _)) // cond: - // result: (MOVHstorezero [c<<1] ptr mem) + // result: (MOVDreg x) for { - _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + x := v.Args[0] + if x.Op != OpARM64MOVHUloadidx { break } - c := v_1.AuxInt - mem := v.Args[2] - v.reset(OpARM64MOVHstorezero) - v.AuxInt = c << 1 - v.AddArg(ptr) - v.AddArg(mem) + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) return true } - return false -} -func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVQstorezero [off1+off2] {sym} ptr mem) + // match: (MOVWUreg x:(MOVWUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) for { - off1 := v.AuxInt - sym := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADDconst { + x := v.Args[0] + if x.Op != OpARM64MOVWUloadidx { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v.Args[1] - if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHUloadidx2 { break } - v.reset(OpARM64MOVQstorezero) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg(ptr) - v.AddArg(mem) + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) return true } - // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) - // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _)) + // cond: + // result: (MOVDreg x) for { - off1 := v.AuxInt - sym1 := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDaddr { + x := v.Args[0] + if x.Op != OpARM64MOVWUloadidx4 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg x:(MOVBUreg _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUreg { break } - v.reset(OpARM64MOVQstorezero) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg(ptr) - v.AddArg(mem) + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg x:(MOVHUreg _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHUreg { + break + } + v.reset(OpARM64MOVDreg) + v.AddArg(x) return true } return false } -func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - // match: (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) +func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool { + // match: (MOVWUreg x:(MOVWUreg _)) // cond: - // result: (FMOVSfpgp val) + // result: (MOVDreg x) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[1] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64FMOVSstore { + x := v.Args[0] + if x.Op != OpARM64MOVWUreg { break } - if v_1.AuxInt != off { + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWUreg (ANDconst [c] x)) + // cond: + // result: (ANDconst [c&(1<<32-1)] x) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64ANDconst { break } - if v_1.Aux != sym { + c := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpARM64ANDconst) + v.AuxInt = c & (1<<32 - 1) + v.AddArg(x) + return true + } + // match: (MOVWUreg (MOVDconst [c])) + // cond: + // result: (MOVDconst [int64(uint32(c))]) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { break } - _ = v_1.Args[2] - if ptr != v_1.Args[0] { + c := v_0.AuxInt + v.reset(OpARM64MOVDconst) + v.AuxInt = int64(uint32(c)) + return true + } + // match: (MOVWUreg (SLLconst [sc] x)) + // cond: isARM64BFMask(sc, 1<<32-1, sc) + // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64SLLconst { break } - val := v_1.Args[1] - v.reset(OpARM64FMOVSfpgp) - v.AddArg(val) + sc := v_0.AuxInt + x := v_0.Args[0] + if !(isARM64BFMask(sc, 1<<32-1, sc)) { + break + } + v.reset(OpARM64UBFIZ) + v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc)) + v.AddArg(x) return true } - // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) + // match: (MOVWUreg (SRLconst [sc] x)) + // cond: isARM64BFMask(sc, 1<<32-1, 0) + // result: (UBFX [arm64BFAuxInt(sc, 32)] x) + for { + v_0 := v.Args[0] + if v_0.Op != OpARM64SRLconst { + break + } + sc := v_0.AuxInt + x := v_0.Args[0] + if !(isARM64BFMask(sc, 1<<32-1, 0)) { + break + } + v.reset(OpARM64UBFX) + v.AuxInt = arm64BFAuxInt(sc, 32) + v.AddArg(x) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWUload [off1+off2] {sym} ptr mem) + // result: (MOVWload [off1+off2] {sym} ptr mem) for { off1 := v.AuxInt sym := v.Aux @@ -14775,16 +16285,16 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(OpARM64MOVWUload) + v.reset(OpARM64MOVWload) v.AuxInt = off1 + off2 v.Aux = sym v.AddArg(ptr) v.AddArg(mem) return true } - // match: (MOVWUload [off] {sym} (ADD ptr idx) mem) + // match: (MOVWload [off] {sym} (ADD ptr idx) mem) // cond: off == 0 && sym == nil - // result: (MOVWUloadidx ptr idx mem) + // result: (MOVWloadidx ptr idx mem) for { off := v.AuxInt sym := v.Aux @@ -14800,15 +16310,15 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { if !(off == 0 && sym == nil) { break } - v.reset(OpARM64MOVWUloadidx) + v.reset(OpARM64MOVWloadidx) v.AddArg(ptr) v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) + // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem) // cond: off == 0 && sym == nil - // result: (MOVWUloadidx4 ptr idx mem) + // result: (MOVWloadidx4 ptr idx mem) for { off := v.AuxInt sym := v.Aux @@ -14827,15 +16337,15 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { if !(off == 0 && sym == nil) { break } - v.reset(OpARM64MOVWUloadidx4) + v.reset(OpARM64MOVWloadidx4) v.AddArg(ptr) v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -14851,14 +16361,14 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(OpARM64MOVWUload) + v.reset(OpARM64MOVWload) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) v.AddArg(ptr) v.AddArg(mem) return true } - // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) + // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) // result: (MOVDconst [0]) for { @@ -14883,10 +16393,10 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool { } return false } -func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool { - // match: (MOVWUloadidx ptr (MOVDconst [c]) mem) +func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool { + // match: (MOVWloadidx ptr (MOVDconst [c]) mem) // cond: - // result: (MOVWUload [c] ptr mem) + // result: (MOVWload [c] ptr mem) for { _ = v.Args[2] ptr := v.Args[0] @@ -14896,15 +16406,15 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool { } c := v_1.AuxInt mem := v.Args[2] - v.reset(OpARM64MOVWUload) + v.reset(OpARM64MOVWload) v.AuxInt = c v.AddArg(ptr) v.AddArg(mem) return true } - // match: (MOVWUloadidx (MOVDconst [c]) ptr mem) + // match: (MOVWloadidx (MOVDconst [c]) ptr mem) // cond: - // result: (MOVWUload [c] ptr mem) + // result: (MOVWload [c] ptr mem) for { _ = v.Args[2] v_0 := v.Args[0] @@ -14914,15 +16424,15 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool { c := v_0.AuxInt ptr := v.Args[1] mem := v.Args[2] - v.reset(OpARM64MOVWUload) + v.reset(OpARM64MOVWload) v.AuxInt = c v.AddArg(ptr) v.AddArg(mem) return true } - // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem) + // match: (MOVWloadidx ptr (SLLconst [2] idx) mem) // cond: - // result: (MOVWUloadidx4 ptr idx mem) + // result: (MOVWloadidx4 ptr idx mem) for { _ = v.Args[2] ptr := v.Args[0] @@ -14935,15 +16445,15 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool { } idx := v_1.Args[0] mem := v.Args[2] - v.reset(OpARM64MOVWUloadidx4) + v.reset(OpARM64MOVWloadidx4) v.AddArg(ptr) v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem) + // match: (MOVWloadidx (SLLconst [2] idx) ptr mem) // cond: - // result: (MOVWUloadidx4 ptr idx mem) + // result: (MOVWloadidx4 ptr idx mem) for { _ = v.Args[2] v_0 := v.Args[0] @@ -14956,13 +16466,13 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool { idx := v_0.Args[0] ptr := v.Args[1] mem := v.Args[2] - v.reset(OpARM64MOVWUloadidx4) + v.reset(OpARM64MOVWloadidx4) v.AddArg(ptr) v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) + // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) // result: (MOVDconst [0]) for { @@ -14985,10 +16495,10 @@ func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool { } return false } -func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool { - // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem) +func rewriteValueARM64_OpARM64MOVWloadidx4_0(v *Value) bool { + // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem) // cond: - // result: (MOVWUload [c<<2] ptr mem) + // result: (MOVWload [c<<2] ptr mem) for { _ = v.Args[2] ptr := v.Args[0] @@ -14998,13 +16508,13 @@ func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool { } c := v_1.AuxInt mem := v.Args[2] - v.reset(OpARM64MOVWUload) + v.reset(OpARM64MOVWload) v.AuxInt = c << 2 v.AddArg(ptr) v.AddArg(mem) return true } - // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) + // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) // result: (MOVDconst [0]) for { @@ -15027,8 +16537,21 @@ func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool { } return false } -func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { - // match: (MOVWUreg x:(MOVBUload _ _)) +func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool { + // match: (MOVWreg x:(MOVBload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBload { + break + } + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVBUload _ _)) // cond: // result: (MOVDreg x) for { @@ -15036,43 +16559,108 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { if x.Op != OpARM64MOVBUload { break } - _ = x.Args[1] + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVHload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHload { + break + } + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVHUload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVHUload { + break + } + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVWload _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVWload { + break + } + _ = x.Args[1] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVBloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBloadidx { + break + } + _ = x.Args[2] + v.reset(OpARM64MOVDreg) + v.AddArg(x) + return true + } + // match: (MOVWreg x:(MOVBUloadidx _ _ _)) + // cond: + // result: (MOVDreg x) + for { + x := v.Args[0] + if x.Op != OpARM64MOVBUloadidx { + break + } + _ = x.Args[2] v.reset(OpARM64MOVDreg) v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVHUload _ _)) + // match: (MOVWreg x:(MOVHloadidx _ _ _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVHUload { + if x.Op != OpARM64MOVHloadidx { break } - _ = x.Args[1] + _ = x.Args[2] v.reset(OpARM64MOVDreg) v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVWUload _ _)) + // match: (MOVWreg x:(MOVHUloadidx _ _ _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVWUload { + if x.Op != OpARM64MOVHUloadidx { break } - _ = x.Args[1] + _ = x.Args[2] v.reset(OpARM64MOVDreg) v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVBUloadidx _ _ _)) + // match: (MOVWreg x:(MOVWloadidx _ _ _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVBUloadidx { + if x.Op != OpARM64MOVWloadidx { break } _ = x.Args[2] @@ -15080,12 +16668,15 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVHUloadidx _ _ _)) + return false +} +func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool { + // match: (MOVWreg x:(MOVHloadidx2 _ _ _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVHUloadidx { + if x.Op != OpARM64MOVHloadidx2 { break } _ = x.Args[2] @@ -15093,12 +16684,12 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVWUloadidx _ _ _)) + // match: (MOVWreg x:(MOVHUloadidx2 _ _ _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVWUloadidx { + if x.Op != OpARM64MOVHUloadidx2 { break } _ = x.Args[2] @@ -15106,12 +16697,12 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _)) + // match: (MOVWreg x:(MOVWloadidx4 _ _ _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVHUloadidx2 { + if x.Op != OpARM64MOVWloadidx4 { break } _ = x.Args[2] @@ -15119,20 +16710,19 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _)) + // match: (MOVWreg x:(MOVBreg _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVWUloadidx4 { + if x.Op != OpARM64MOVBreg { break } - _ = x.Args[2] v.reset(OpARM64MOVDreg) v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVBUreg _)) + // match: (MOVWreg x:(MOVBUreg _)) // cond: // result: (MOVDreg x) for { @@ -15144,51 +16734,45 @@ func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool { v.AddArg(x) return true } - // match: (MOVWUreg x:(MOVHUreg _)) + // match: (MOVWreg x:(MOVHreg _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVHUreg { + if x.Op != OpARM64MOVHreg { break } v.reset(OpARM64MOVDreg) v.AddArg(x) return true } - return false -} -func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool { - // match: (MOVWUreg x:(MOVWUreg _)) + // match: (MOVWreg x:(MOVHreg _)) // cond: // result: (MOVDreg x) for { x := v.Args[0] - if x.Op != OpARM64MOVWUreg { + if x.Op != OpARM64MOVHreg { break } v.reset(OpARM64MOVDreg) v.AddArg(x) return true } - // match: (MOVWUreg (ANDconst [c] x)) + // match: (MOVWreg x:(MOVWreg _)) // cond: - // result: (ANDconst [c&(1<<32-1)] x) + // result: (MOVDreg x) for { - v_0 := v.Args[0] - if v_0.Op != OpARM64ANDconst { + x := v.Args[0] + if x.Op != OpARM64MOVWreg { break } - c := v_0.AuxInt - x := v_0.Args[0] - v.reset(OpARM64ANDconst) - v.AuxInt = c & (1<<32 - 1) + v.reset(OpARM64MOVDreg) v.AddArg(x) return true } - // match: (MOVWUreg (MOVDconst [c])) + // match: (MOVWreg (MOVDconst [c])) // cond: - // result: (MOVDconst [int64(uint32(c))]) + // result: (MOVDconst [int64(int32(c))]) for { v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { @@ -15196,83 +16780,89 @@ func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool { } c := v_0.AuxInt v.reset(OpARM64MOVDconst) - v.AuxInt = int64(uint32(c)) + v.AuxInt = int64(int32(c)) return true } - // match: (MOVWUreg (SLLconst [sc] x)) - // cond: isARM64BFMask(sc, 1<<32-1, sc) - // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x) + // match: (MOVWreg (SLLconst [lc] x)) + // cond: lc < 32 + // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x) for { v_0 := v.Args[0] if v_0.Op != OpARM64SLLconst { break } - sc := v_0.AuxInt - x := v_0.Args[0] - if !(isARM64BFMask(sc, 1<<32-1, sc)) { - break - } - v.reset(OpARM64UBFIZ) - v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc)) - v.AddArg(x) - return true - } - // match: (MOVWUreg (SRLconst [sc] x)) - // cond: isARM64BFMask(sc, 1<<32-1, 0) - // result: (UBFX [arm64BFAuxInt(sc, 32)] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpARM64SRLconst { - break - } - sc := v_0.AuxInt + lc := v_0.AuxInt x := v_0.Args[0] - if !(isARM64BFMask(sc, 1<<32-1, 0)) { + if !(lc < 32) { break } - v.reset(OpARM64UBFX) - v.AuxInt = arm64BFAuxInt(sc, 32) + v.reset(OpARM64SBFIZ) + v.AuxInt = arm64BFAuxInt(lc, 32-lc) v.AddArg(x) return true } return false } -func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool { +func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool { b := v.Block _ = b config := b.Func.Config _ = config - // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) + // match: (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) + // cond: + // result: (FMOVSstore [off] {sym} ptr val mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64FMOVSfpgp { + break + } + val := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64FMOVSstore) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWload [off1+off2] {sym} ptr mem) + // result: (MOVWstore [off1+off2] {sym} ptr val mem) for { off1 := v.AuxInt sym := v.Aux - _ = v.Args[1] + _ = v.Args[2] v_0 := v.Args[0] if v_0.Op != OpARM64ADDconst { break } off2 := v_0.AuxInt ptr := v_0.Args[0] - mem := v.Args[1] + val := v.Args[1] + mem := v.Args[2] if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(OpARM64MOVWload) + v.reset(OpARM64MOVWstore) v.AuxInt = off1 + off2 v.Aux = sym v.AddArg(ptr) + v.AddArg(val) v.AddArg(mem) return true } - // match: (MOVWload [off] {sym} (ADD ptr idx) mem) + // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem) // cond: off == 0 && sym == nil - // result: (MOVWloadidx ptr idx mem) + // result: (MOVWstoreidx ptr idx val mem) for { off := v.AuxInt sym := v.Aux - _ = v.Args[1] + _ = v.Args[2] v_0 := v.Args[0] if v_0.Op != OpARM64ADD { break @@ -15280,23 +16870,392 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool { _ = v_0.Args[1] ptr := v_0.Args[0] idx := v_0.Args[1] - mem := v.Args[1] + val := v.Args[1] + mem := v.Args[2] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) + // cond: off == 0 && sym == nil + // result: (MOVWstoreidx4 ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDshiftLL { + break + } + if v_0.AuxInt != 2 { + break + } + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + val := v.Args[1] + mem := v.Args[2] if !(off == 0 && sym == nil) { break } - v.reset(OpARM64MOVWloadidx) - v.AddArg(ptr) - v.AddArg(idx) + v.reset(OpARM64MOVWstoreidx4) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(OpARM64MOVWstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) + // cond: + // result: (MOVWstorezero [off] {sym} ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { + break + } + if v_1.AuxInt != 0 { + break + } + mem := v.Args[2] + v.reset(OpARM64MOVWstorezero) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem) + // cond: + // result: (MOVWstore [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVWreg { + break + } + x := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVWstore) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) + // cond: + // result: (MOVWstore [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVWUreg { + break + } + x := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVWstore) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem)) + // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) + // result: (MOVDstore [i-4] {s} ptr0 w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + ptr0 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 32 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstore { + break + } + if x.AuxInt != i-4 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + if w != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstore) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg(ptr0) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVDstoreidx ptr1 idx1 w mem) + for { + if v.AuxInt != 4 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 32 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool { + b := v.Block + _ = b + // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem)) + // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x) + // result: (MOVDstoreidx ptr1 (SLLconst [2] idx1) w mem) + for { + if v.AuxInt != 4 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDshiftLL { + break + } + if v_0.AuxInt != 2 { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + if v_1.AuxInt != 32 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstoreidx4 { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr1) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type) + v0.AuxInt = 2 + v0.AddArg(idx1) + v.AddArg(v0) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem)) + // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) + // result: (MOVDstore [i-4] {s} ptr0 w0 mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + ptr0 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstore { + break + } + if x.AuxInt != i-4 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + ptr1 := x.Args[0] + w0 := x.Args[1] + if w0.Op != OpARM64SRLconst { + break + } + if w0.AuxInt != j-32 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstore) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg(ptr0) + v.AddArg(w0) + v.AddArg(mem) + return true + } + // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem)) + // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) + // result: (MOVDstoreidx ptr1 idx1 w0 mem) + for { + if v.AuxInt != 4 { + break + } + s := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { + break + } + _ = v_0.Args[1] + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstoreidx { + break + } + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + w0 := x.Args[2] + if w0.Op != OpARM64SRLconst { + break + } + if w0.AuxInt != j-32 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr1) + v.AddArg(idx1) + v.AddArg(w0) v.AddArg(mem) return true } - // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem) - // cond: off == 0 && sym == nil - // result: (MOVWloadidx4 ptr idx mem) + // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem)) + // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x) + // result: (MOVDstoreidx ptr1 (SLLconst [2] idx1) w0 mem) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[1] + if v.AuxInt != 4 { + break + } + s := v.Aux + _ = v.Args[2] v_0 := v.Args[0] if v_0.Op != OpARM64ADDshiftLL { break @@ -15305,110 +17264,93 @@ func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool { break } _ = v_0.Args[1] - ptr := v_0.Args[0] - idx := v_0.Args[1] - mem := v.Args[1] - if !(off == 0 && sym == nil) { + ptr0 := v_0.Args[0] + idx0 := v_0.Args[1] + v_1 := v.Args[1] + if v_1.Op != OpARM64SRLconst { break } - v.reset(OpARM64MOVWloadidx4) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) - return true - } - // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) - // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDaddr { + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstoreidx4 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + _ = x.Args[3] + ptr1 := x.Args[0] + idx1 := x.Args[1] + w0 := x.Args[2] + if w0.Op != OpARM64SRLconst { break } - v.reset(OpARM64MOVWload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg(ptr) - v.AddArg(mem) - return true - } - // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVDconst [0]) - for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[1] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVWstorezero { + if w0.AuxInt != j-32 { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux - _ = v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + if w != w0.Args[0] { break } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + mem := x.Args[3] + if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) { + break + } + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr1) + v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type) + v0.AuxInt = 2 + v0.AddArg(idx1) + v.AddArg(v0) + v.AddArg(w0) + v.AddArg(mem) return true } return false } -func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool { - // match: (MOVWloadidx ptr (MOVDconst [c]) mem) +func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool { + // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem) // cond: - // result: (MOVWload [c] ptr mem) + // result: (MOVWstore [c] ptr val mem) for { - _ = v.Args[2] + _ = v.Args[3] ptr := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - mem := v.Args[2] - v.reset(OpARM64MOVWload) + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVWstore) v.AuxInt = c v.AddArg(ptr) + v.AddArg(val) v.AddArg(mem) return true } - // match: (MOVWloadidx (MOVDconst [c]) ptr mem) + // match: (MOVWstoreidx (MOVDconst [c]) idx val mem) // cond: - // result: (MOVWload [c] ptr mem) + // result: (MOVWstore [c] idx val mem) for { - _ = v.Args[2] + _ = v.Args[3] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDconst { break } c := v_0.AuxInt - ptr := v.Args[1] - mem := v.Args[2] - v.reset(OpARM64MOVWload) + idx := v.Args[1] + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVWstore) v.AuxInt = c - v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(val) v.AddArg(mem) return true } - // match: (MOVWloadidx ptr (SLLconst [2] idx) mem) + // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem) // cond: - // result: (MOVWloadidx4 ptr idx mem) + // result: (MOVWstoreidx4 ptr idx val mem) for { - _ = v.Args[2] + _ = v.Args[3] ptr := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64SLLconst { @@ -15418,18 +17360,20 @@ func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool { break } idx := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVWloadidx4) + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVWstoreidx4) v.AddArg(ptr) v.AddArg(idx) + v.AddArg(val) v.AddArg(mem) return true } - // match: (MOVWloadidx (SLLconst [2] idx) ptr mem) + // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem) // cond: - // result: (MOVWloadidx4 ptr idx mem) + // result: (MOVWstoreidx4 ptr idx val mem) for { - _ = v.Args[2] + _ = v.Args[3] v_0 := v.Args[0] if v_0.Op != OpARM64SLLconst { break @@ -15439,459 +17383,245 @@ func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool { } idx := v_0.Args[0] ptr := v.Args[1] - mem := v.Args[2] - v.reset(OpARM64MOVWloadidx4) + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVWstoreidx4) v.AddArg(ptr) v.AddArg(idx) + v.AddArg(val) v.AddArg(mem) return true } - // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _)) - // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) - // result: (MOVDconst [0]) + // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem) + // cond: + // result: (MOVWstorezeroidx ptr idx mem) for { - _ = v.Args[2] + _ = v.Args[3] ptr := v.Args[0] idx := v.Args[1] v_2 := v.Args[2] - if v_2.Op != OpARM64MOVWstorezeroidx { - break - } - _ = v_2.Args[2] - ptr2 := v_2.Args[0] - idx2 := v_2.Args[1] - if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) { + if v_2.Op != OpARM64MOVDconst { break } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 - return true - } - return false -} -func rewriteValueARM64_OpARM64MOVWloadidx4_0(v *Value) bool { - // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem) - // cond: - // result: (MOVWload [c<<2] ptr mem) - for { - _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + if v_2.AuxInt != 0 { break } - c := v_1.AuxInt - mem := v.Args[2] - v.reset(OpARM64MOVWload) - v.AuxInt = c << 2 + mem := v.Args[3] + v.reset(OpARM64MOVWstorezeroidx) v.AddArg(ptr) + v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) - // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) - // result: (MOVDconst [0]) + // match: (MOVWstoreidx ptr idx (MOVWreg x) mem) + // cond: + // result: (MOVWstoreidx ptr idx x mem) for { - _ = v.Args[2] + _ = v.Args[3] ptr := v.Args[0] idx := v.Args[1] v_2 := v.Args[2] - if v_2.Op != OpARM64MOVWstorezeroidx4 { - break - } - _ = v_2.Args[2] - ptr2 := v_2.Args[0] - idx2 := v_2.Args[1] - if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) { - break - } - v.reset(OpARM64MOVDconst) - v.AuxInt = 0 - return true - } - return false -} -func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool { - // match: (MOVWreg x:(MOVBload _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVBload { - break - } - _ = x.Args[1] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVBUload _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVBUload { - break - } - _ = x.Args[1] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVHload _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVHload { - break - } - _ = x.Args[1] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVHUload _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVHUload { - break - } - _ = x.Args[1] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVWload _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVWload { - break - } - _ = x.Args[1] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVBloadidx _ _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVBloadidx { - break - } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVBUloadidx _ _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVBUloadidx { - break - } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVHloadidx _ _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVHloadidx { - break - } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVHUloadidx _ _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVHUloadidx { - break - } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVWloadidx _ _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVWloadidx { - break - } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - return false -} -func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool { - // match: (MOVWreg x:(MOVHloadidx2 _ _ _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVHloadidx2 { + if v_2.Op != OpARM64MOVWreg { break } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr) + v.AddArg(idx) v.AddArg(x) + v.AddArg(mem) return true } - // match: (MOVWreg x:(MOVHUloadidx2 _ _ _)) + // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem) // cond: - // result: (MOVDreg x) + // result: (MOVWstoreidx ptr idx x mem) for { - x := v.Args[0] - if x.Op != OpARM64MOVHUloadidx2 { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWUreg { break } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVWstoreidx) + v.AddArg(ptr) + v.AddArg(idx) v.AddArg(x) + v.AddArg(mem) return true } - // match: (MOVWreg x:(MOVWloadidx4 _ _ _)) - // cond: - // result: (MOVDreg x) + // match: (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVDstoreidx ptr idx w mem) for { - x := v.Args[0] - if x.Op != OpARM64MOVWloadidx4 { + _ = v.Args[3] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64ADDconst { break } - _ = x.Args[2] - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVBreg _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVBreg { + if v_1.AuxInt != 4 { break } - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVBUreg _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVBUreg { + idx := v_1.Args[0] + v_2 := v.Args[2] + if v_2.Op != OpARM64SRLconst { break } - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVHreg _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVHreg { + if v_2.AuxInt != 32 { break } - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVHreg _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVHreg { + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpARM64MOVWstoreidx { break } - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg x:(MOVWreg _)) - // cond: - // result: (MOVDreg x) - for { - x := v.Args[0] - if x.Op != OpARM64MOVWreg { + _ = x.Args[3] + if ptr != x.Args[0] { break } - v.reset(OpARM64MOVDreg) - v.AddArg(x) - return true - } - // match: (MOVWreg (MOVDconst [c])) - // cond: - // result: (MOVDconst [int64(int32(c))]) - for { - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + if idx != x.Args[1] { break } - c := v_0.AuxInt - v.reset(OpARM64MOVDconst) - v.AuxInt = int64(int32(c)) - return true - } - // match: (MOVWreg (SLLconst [lc] x)) - // cond: lc < 32 - // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpARM64SLLconst { + if w != x.Args[2] { break } - lc := v_0.AuxInt - x := v_0.Args[0] - if !(lc < 32) { + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpARM64SBFIZ) - v.AuxInt = arm64BFAuxInt(lc, 32-lc) - v.AddArg(x) + v.reset(OpARM64MOVDstoreidx) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) return true } return false } -func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - // match: (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) +func rewriteValueARM64_OpARM64MOVWstoreidx4_0(v *Value) bool { + // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) // cond: - // result: (FMOVSstore [off] {sym} ptr val mem) + // result: (MOVWstore [c<<2] ptr val mem) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] + _ = v.Args[3] ptr := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpARM64FMOVSfpgp { - break - } - val := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64FMOVSstore) - v.AuxInt = off - v.Aux = sym + if v_1.Op != OpARM64MOVDconst { + break + } + c := v_1.AuxInt + val := v.Args[2] + mem := v.Args[3] + v.reset(OpARM64MOVWstore) + v.AuxInt = c << 2 v.AddArg(ptr) v.AddArg(val) v.AddArg(mem) return true } - // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWstore [off1+off2] {sym} ptr val mem) + // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) + // cond: + // result: (MOVWstorezeroidx4 ptr idx mem) for { - off1 := v.AuxInt - sym := v.Aux - _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADDconst { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v.Args[1] - mem := v.Args[2] - if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + if v_2.AuxInt != 0 { break } - v.reset(OpARM64MOVWstore) - v.AuxInt = off1 + off2 - v.Aux = sym + mem := v.Args[3] + v.reset(OpARM64MOVWstorezeroidx4) v.AddArg(ptr) - v.AddArg(val) + v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem) - // cond: off == 0 && sym == nil - // result: (MOVWstoreidx ptr idx val mem) + // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem) + // cond: + // result: (MOVWstoreidx4 ptr idx x mem) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADD { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWreg { break } - _ = v_0.Args[1] - ptr := v_0.Args[0] - idx := v_0.Args[1] - val := v.Args[1] - mem := v.Args[2] - if !(off == 0 && sym == nil) { + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVWstoreidx4) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) + // cond: + // result: (MOVWstoreidx4 ptr idx x mem) + for { + _ = v.Args[3] + ptr := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVWUreg { break } - v.reset(OpARM64MOVWstoreidx) + x := v_2.Args[0] + mem := v.Args[3] + v.reset(OpARM64MOVWstoreidx4) v.AddArg(ptr) v.AddArg(idx) - v.AddArg(val) + v.AddArg(x) v.AddArg(mem) return true } - // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) - // cond: off == 0 && sym == nil - // result: (MOVWstoreidx4 ptr idx val mem) + return false +} +func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWstorezero [off1+off2] {sym} ptr mem) for { - off := v.AuxInt + off1 := v.AuxInt sym := v.Aux - _ = v.Args[2] + _ = v.Args[1] v_0 := v.Args[0] - if v_0.Op != OpARM64ADDshiftLL { - break - } - if v_0.AuxInt != 2 { + if v_0.Op != OpARM64ADDconst { break } - _ = v_0.Args[1] + off2 := v_0.AuxInt ptr := v_0.Args[0] - idx := v_0.Args[1] - val := v.Args[1] - mem := v.Args[2] - if !(off == 0 && sym == nil) { + mem := v.Args[1] + if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(OpARM64MOVWstoreidx4) + v.reset(OpARM64MOVWstorezero) + v.AuxInt = off1 + off2 + v.Aux = sym v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(val) v.AddArg(mem) return true } - // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) + // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) + // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) for { off1 := v.AuxInt sym1 := v.Aux - _ = v.Args[2] + _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64MOVDaddr { break @@ -15899,138 +17629,106 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool { off2 := v_0.AuxInt sym2 := v_0.Aux ptr := v_0.Args[0] - val := v.Args[1] - mem := v.Args[2] + mem := v.Args[1] if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(OpARM64MOVWstore) + v.reset(OpARM64MOVWstorezero) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) v.AddArg(ptr) - v.AddArg(val) v.AddArg(mem) return true } - // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) - // cond: - // result: (MOVWstorezero [off] {sym} ptr mem) + // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVWstorezeroidx ptr idx mem) for { off := v.AuxInt sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADD { break } - if v_1.AuxInt != 0 { + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { break } - mem := v.Args[2] - v.reset(OpARM64MOVWstorezero) - v.AuxInt = off - v.Aux = sym + v.reset(OpARM64MOVWstorezeroidx) v.AddArg(ptr) + v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem) - // cond: - // result: (MOVWstore [off] {sym} ptr x mem) + // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (MOVWstorezeroidx4 ptr idx mem) for { off := v.AuxInt sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVWreg { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARM64ADDshiftLL { break } - x := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVWstore) - v.AuxInt = off - v.Aux = sym - v.AddArg(ptr) - v.AddArg(x) - v.AddArg(mem) - return true - } - // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) - // cond: - // result: (MOVWstore [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[2] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVWUreg { + if v_0.AuxInt != 2 { break } - x := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVWstore) - v.AuxInt = off - v.Aux = sym + _ = v_0.Args[1] + ptr := v_0.Args[0] + idx := v_0.Args[1] + mem := v.Args[1] + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64MOVWstorezeroidx4) v.AddArg(ptr) - v.AddArg(x) + v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem)) - // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) - // result: (MOVDstore [i-4] {s} ptr0 w mem) + // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem)) + // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x) + // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem) for { i := v.AuxInt s := v.Aux - _ = v.Args[2] + _ = v.Args[1] ptr0 := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64SRLconst { - break - } - if v_1.AuxInt != 32 { - break - } - w := v_1.Args[0] - x := v.Args[2] - if x.Op != OpARM64MOVWstore { - break - } - if x.AuxInt != i-4 { + x := v.Args[1] + if x.Op != OpARM64MOVWstorezero { break } + j := x.AuxInt if x.Aux != s { break } - _ = x.Args[2] + _ = x.Args[1] ptr1 := x.Args[0] - if w != x.Args[1] { - break - } - mem := x.Args[2] - if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) { + mem := x.Args[1] + if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) { break } - v.reset(OpARM64MOVDstore) - v.AuxInt = i - 4 + v.reset(OpARM64MOVDstorezero) + v.AuxInt = min(i, j) v.Aux = s v.AddArg(ptr0) - v.AddArg(w) v.AddArg(mem) return true } - // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem)) + // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem)) // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) - // result: (MOVDstoreidx ptr1 idx1 w mem) + // result: (MOVDstorezeroidx ptr1 idx1 mem) for { if v.AuxInt != 4 { break } s := v.Aux - _ = v.Args[2] + _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64ADD { break @@ -16038,49 +17736,32 @@ func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool { _ = v_0.Args[1] ptr0 := v_0.Args[0] idx0 := v_0.Args[1] - v_1 := v.Args[1] - if v_1.Op != OpARM64SRLconst { - break - } - if v_1.AuxInt != 32 { - break - } - w := v_1.Args[0] - x := v.Args[2] - if x.Op != OpARM64MOVWstoreidx { + x := v.Args[1] + if x.Op != OpARM64MOVWstorezeroidx { break } - _ = x.Args[3] + _ = x.Args[2] ptr1 := x.Args[0] idx1 := x.Args[1] - if w != x.Args[2] { - break - } - mem := x.Args[3] + mem := x.Args[2] if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { break } - v.reset(OpARM64MOVDstoreidx) + v.reset(OpARM64MOVDstorezeroidx) v.AddArg(ptr1) v.AddArg(idx1) - v.AddArg(w) v.AddArg(mem) return true } - return false -} -func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool { - b := v.Block - _ = b - // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem)) + // match: (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem)) // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x) - // result: (MOVDstoreidx ptr1 (SLLconst [2] idx1) w mem) + // result: (MOVDstorezeroidx ptr1 (SLLconst [2] idx1) mem) for { if v.AuxInt != 4 { break } s := v.Aux - _ = v.Args[2] + _ = v.Args[1] v_0 := v.Args[0] if v_0.Op != OpARM64ADDshiftLL { break @@ -16091,816 +17772,1179 @@ func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool { _ = v_0.Args[1] ptr0 := v_0.Args[0] idx0 := v_0.Args[1] - v_1 := v.Args[1] - if v_1.Op != OpARM64SRLconst { - break - } - if v_1.AuxInt != 32 { - break - } - w := v_1.Args[0] - x := v.Args[2] - if x.Op != OpARM64MOVWstoreidx4 { + x := v.Args[1] + if x.Op != OpARM64MOVWstorezeroidx4 { break } - _ = x.Args[3] + _ = x.Args[2] ptr1 := x.Args[0] idx1 := x.Args[1] - if w != x.Args[2] { - break - } - mem := x.Args[3] + mem := x.Args[2] if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) { break } - v.reset(OpARM64MOVDstoreidx) + v.reset(OpARM64MOVDstorezeroidx) v.AddArg(ptr1) v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type) v0.AuxInt = 2 v0.AddArg(idx1) v.AddArg(v0) - v.AddArg(w) v.AddArg(mem) return true } - // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem)) - // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x) - // result: (MOVDstore [i-4] {s} ptr0 w0 mem) + return false +} +func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool { + // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVWstorezero [c] ptr mem) for { - i := v.AuxInt - s := v.Aux _ = v.Args[2] - ptr0 := v.Args[0] + ptr := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpARM64SRLconst { + if v_1.Op != OpARM64MOVDconst { break } - j := v_1.AuxInt - w := v_1.Args[0] - x := v.Args[2] - if x.Op != OpARM64MOVWstore { + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVWstorezero) + v.AuxInt = c + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem) + // cond: + // result: (MOVWstorezero [c] idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { break } - if x.AuxInt != i-4 { + c := v_0.AuxInt + idx := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVWstorezero) + v.AuxInt = c + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) + // cond: + // result: (MOVWstorezeroidx4 ptr idx mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64SLLconst { break } - if x.Aux != s { + if v_1.AuxInt != 2 { break } - _ = x.Args[2] - ptr1 := x.Args[0] - w0 := x.Args[1] - if w0.Op != OpARM64SRLconst { + idx := v_1.Args[0] + mem := v.Args[2] + v.reset(OpARM64MOVWstorezeroidx4) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) + // cond: + // result: (MOVWstorezeroidx4 ptr idx mem) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64SLLconst { break } - if w0.AuxInt != j-32 { + if v_0.AuxInt != 2 { break } - if w != w0.Args[0] { + idx := v_0.Args[0] + ptr := v.Args[1] + mem := v.Args[2] + v.reset(OpARM64MOVWstorezeroidx4) + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVDstorezeroidx ptr idx mem) + for { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64ADDconst { + break + } + if v_1.AuxInt != 4 { + break + } + idx := v_1.Args[0] + x := v.Args[2] + if x.Op != OpARM64MOVWstorezeroidx { + break + } + _ = x.Args[2] + if ptr != x.Args[0] { + break + } + if idx != x.Args[1] { break } mem := x.Args[2] - if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) { + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpARM64MOVDstore) - v.AuxInt = i - 4 - v.Aux = s - v.AddArg(ptr0) - v.AddArg(w0) + v.reset(OpARM64MOVDstorezeroidx) + v.AddArg(ptr) + v.AddArg(idx) v.AddArg(mem) return true } - // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem)) - // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) - // result: (MOVDstoreidx ptr1 idx1 w0 mem) + return false +} +func rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v *Value) bool { + // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) + // cond: + // result: (MOVWstorezero [c<<2] ptr mem) for { - if v.AuxInt != 4 { + _ = v.Args[2] + ptr := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - s := v.Aux + c := v_1.AuxInt + mem := v.Args[2] + v.reset(OpARM64MOVWstorezero) + v.AuxInt = c << 2 + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64MSUB_0(v *Value) bool { + b := v.Block + _ = b + // match: (MSUB a x (MOVDconst [-1])) + // cond: + // result: (ADD a x) + for { _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADD { + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - _ = v_0.Args[1] - ptr0 := v_0.Args[0] - idx0 := v_0.Args[1] - v_1 := v.Args[1] - if v_1.Op != OpARM64SRLconst { + if v_2.AuxInt != -1 { break } - j := v_1.AuxInt - w := v_1.Args[0] - x := v.Args[2] - if x.Op != OpARM64MOVWstoreidx { + v.reset(OpARM64ADD) + v.AddArg(a) + v.AddArg(x) + return true + } + // match: (MSUB a _ (MOVDconst [0])) + // cond: + // result: a + for { + _ = v.Args[2] + a := v.Args[0] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - _ = x.Args[3] - ptr1 := x.Args[0] - idx1 := x.Args[1] - w0 := x.Args[2] - if w0.Op != OpARM64SRLconst { + if v_2.AuxInt != 0 { break } - if w0.AuxInt != j-32 { + v.reset(OpCopy) + v.Type = a.Type + v.AddArg(a) + return true + } + // match: (MSUB a x (MOVDconst [1])) + // cond: + // result: (SUB a x) + for { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + if v_2.AuxInt != 1 { + break + } + v.reset(OpARM64SUB) + v.AddArg(a) + v.AddArg(x) + return true + } + // match: (MSUB a x (MOVDconst [c])) + // cond: isPowerOfTwo(c) + // result: (SUBshiftLL a x [log2(c)]) + for { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + c := v_2.AuxInt + if !(isPowerOfTwo(c)) { break } - if w != w0.Args[0] { + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c) + v.AddArg(a) + v.AddArg(x) + return true + } + // match: (MSUB a x (MOVDconst [c])) + // cond: isPowerOfTwo(c-1) && c>=3 + // result: (SUB a (ADDshiftLL x x [log2(c-1)])) + for { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - mem := x.Args[3] - if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + c := v_2.AuxInt + if !(isPowerOfTwo(c-1) && c >= 3) { break } - v.reset(OpARM64MOVDstoreidx) - v.AddArg(ptr1) - v.AddArg(idx1) - v.AddArg(w0) - v.AddArg(mem) + v.reset(OpARM64SUB) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem)) - // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x) - // result: (MOVDstoreidx ptr1 (SLLconst [2] idx1) w0 mem) + // match: (MSUB a x (MOVDconst [c])) + // cond: isPowerOfTwo(c+1) && c>=7 + // result: (ADD a (SUBshiftLL x x [log2(c+1)])) for { - if v.AuxInt != 4 { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - s := v.Aux + c := v_2.AuxInt + if !(isPowerOfTwo(c+1) && c >= 7) { + break + } + v.reset(OpARM64ADD) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MSUB a x (MOVDconst [c])) + // cond: c%3 == 0 && isPowerOfTwo(c/3) + // result: (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) + for { _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADDshiftLL { + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - if v_0.AuxInt != 2 { + c := v_2.AuxInt + if !(c%3 == 0 && isPowerOfTwo(c/3)) { break } - _ = v_0.Args[1] - ptr0 := v_0.Args[0] - idx0 := v_0.Args[1] - v_1 := v.Args[1] - if v_1.Op != OpARM64SRLconst { + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 3) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MSUB a x (MOVDconst [c])) + // cond: c%5 == 0 && isPowerOfTwo(c/5) + // result: (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) + for { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - j := v_1.AuxInt - w := v_1.Args[0] - x := v.Args[2] - if x.Op != OpARM64MOVWstoreidx4 { + c := v_2.AuxInt + if !(c%5 == 0 && isPowerOfTwo(c/5)) { break } - _ = x.Args[3] - ptr1 := x.Args[0] - idx1 := x.Args[1] - w0 := x.Args[2] - if w0.Op != OpARM64SRLconst { + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 5) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MSUB a x (MOVDconst [c])) + // cond: c%7 == 0 && isPowerOfTwo(c/7) + // result: (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) + for { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - if w0.AuxInt != j-32 { + c := v_2.AuxInt + if !(c%7 == 0 && isPowerOfTwo(c/7)) { break } - if w != w0.Args[0] { + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 7) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MSUB a x (MOVDconst [c])) + // cond: c%9 == 0 && isPowerOfTwo(c/9) + // result: (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + for { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - mem := x.Args[3] - if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) { + c := v_2.AuxInt + if !(c%9 == 0 && isPowerOfTwo(c/9)) { break } - v.reset(OpARM64MOVDstoreidx) - v.AddArg(ptr1) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type) - v0.AuxInt = 2 - v0.AddArg(idx1) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 9) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) - v.AddArg(w0) - v.AddArg(mem) return true } return false } -func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool { - // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem) +func rewriteValueARM64_OpARM64MSUB_10(v *Value) bool { + b := v.Block + _ = b + // match: (MSUB a (MOVDconst [-1]) x) // cond: - // result: (MOVWstore [c] ptr val mem) + // result: (ADD a x) for { - _ = v.Args[3] - ptr := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - val := v.Args[2] - mem := v.Args[3] - v.reset(OpARM64MOVWstore) - v.AuxInt = c - v.AddArg(ptr) - v.AddArg(val) - v.AddArg(mem) + if v_1.AuxInt != -1 { + break + } + x := v.Args[2] + v.reset(OpARM64ADD) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MOVWstoreidx (MOVDconst [c]) idx val mem) + // match: (MSUB a (MOVDconst [0]) _) // cond: - // result: (MOVWstore [c] idx val mem) + // result: a for { - _ = v.Args[3] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - idx := v.Args[1] - val := v.Args[2] - mem := v.Args[3] - v.reset(OpARM64MOVWstore) - v.AuxInt = c - v.AddArg(idx) - v.AddArg(val) - v.AddArg(mem) + if v_1.AuxInt != 0 { + break + } + v.reset(OpCopy) + v.Type = a.Type + v.AddArg(a) return true } - // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem) + // match: (MSUB a (MOVDconst [1]) x) // cond: - // result: (MOVWstoreidx4 ptr idx val mem) + // result: (SUB a x) for { - _ = v.Args[3] - ptr := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpARM64SLLconst { + if v_1.Op != OpARM64MOVDconst { break } - if v_1.AuxInt != 2 { + if v_1.AuxInt != 1 { break } - idx := v_1.Args[0] - val := v.Args[2] - mem := v.Args[3] - v.reset(OpARM64MOVWstoreidx4) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(val) - v.AddArg(mem) + x := v.Args[2] + v.reset(OpARM64SUB) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem) - // cond: - // result: (MOVWstoreidx4 ptr idx val mem) + // match: (MSUB a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c) + // result: (SUBshiftLL a x [log2(c)]) for { - _ = v.Args[3] - v_0 := v.Args[0] - if v_0.Op != OpARM64SLLconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - if v_0.AuxInt != 2 { + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c)) { break } - idx := v_0.Args[0] - ptr := v.Args[1] - val := v.Args[2] - mem := v.Args[3] - v.reset(OpARM64MOVWstoreidx4) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(val) - v.AddArg(mem) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem) - // cond: - // result: (MOVWstorezeroidx ptr idx mem) + // match: (MSUB a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c-1) && c>=3 + // result: (SUB a (ADDshiftLL x x [log2(c-1)])) for { - _ = v.Args[3] - ptr := v.Args[0] - idx := v.Args[1] - v_2 := v.Args[2] - if v_2.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - if v_2.AuxInt != 0 { + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c-1) && c >= 3) { break } - mem := v.Args[3] - v.reset(OpARM64MOVWstorezeroidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpARM64SUB) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstoreidx ptr idx (MOVWreg x) mem) - // cond: - // result: (MOVWstoreidx ptr idx x mem) + // match: (MSUB a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c+1) && c>=7 + // result: (ADD a (SUBshiftLL x x [log2(c+1)])) for { - _ = v.Args[3] - ptr := v.Args[0] - idx := v.Args[1] - v_2 := v.Args[2] - if v_2.Op != OpARM64MOVWreg { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - x := v_2.Args[0] - mem := v.Args[3] - v.reset(OpARM64MOVWstoreidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(x) - v.AddArg(mem) + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c+1) && c >= 7) { + break + } + v.reset(OpARM64ADD) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem) - // cond: - // result: (MOVWstoreidx ptr idx x mem) + // match: (MSUB a (MOVDconst [c]) x) + // cond: c%3 == 0 && isPowerOfTwo(c/3) + // result: (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) for { - _ = v.Args[3] - ptr := v.Args[0] - idx := v.Args[1] - v_2 := v.Args[2] - if v_2.Op != OpARM64MOVWUreg { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - x := v_2.Args[0] - mem := v.Args[3] - v.reset(OpARM64MOVWstoreidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(x) - v.AddArg(mem) + c := v_1.AuxInt + x := v.Args[2] + if !(c%3 == 0 && isPowerOfTwo(c/3)) { + break + } + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 3) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVDstoreidx ptr idx w mem) + // match: (MSUB a (MOVDconst [c]) x) + // cond: c%5 == 0 && isPowerOfTwo(c/5) + // result: (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) for { - _ = v.Args[3] - ptr := v.Args[0] + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpARM64ADDconst { + if v_1.Op != OpARM64MOVDconst { break } - if v_1.AuxInt != 4 { + c := v_1.AuxInt + x := v.Args[2] + if !(c%5 == 0 && isPowerOfTwo(c/5)) { break } - idx := v_1.Args[0] - v_2 := v.Args[2] - if v_2.Op != OpARM64SRLconst { + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 5) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MSUB a (MOVDconst [c]) x) + // cond: c%7 == 0 && isPowerOfTwo(c/7) + // result: (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) + for { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - if v_2.AuxInt != 32 { + c := v_1.AuxInt + x := v.Args[2] + if !(c%7 == 0 && isPowerOfTwo(c/7)) { break } - w := v_2.Args[0] - x := v.Args[3] - if x.Op != OpARM64MOVWstoreidx { + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 7) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MSUB a (MOVDconst [c]) x) + // cond: c%9 == 0 && isPowerOfTwo(c/9) + // result: (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + for { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - _ = x.Args[3] - if ptr != x.Args[0] { + c := v_1.AuxInt + x := v.Args[2] + if !(c%9 == 0 && isPowerOfTwo(c/9)) { break } - if idx != x.Args[1] { + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 9) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueARM64_OpARM64MSUB_20(v *Value) bool { + b := v.Block + _ = b + // match: (MSUB (MOVDconst [c]) x y) + // cond: + // result: (ADDconst [c] (MNEG x y)) + for { + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { break } - if w != x.Args[2] { + c := v_0.AuxInt + x := v.Args[1] + y := v.Args[2] + v.reset(OpARM64ADDconst) + v.AuxInt = c + v0 := b.NewValue0(v.Pos, OpARM64MNEG, x.Type) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (MSUB a (MOVDconst [c]) (MOVDconst [d])) + // cond: + // result: (SUBconst [c*d] a) + for { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - mem := x.Args[3] - if !(x.Uses == 1 && clobber(x)) { + c := v_1.AuxInt + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - v.reset(OpARM64MOVDstoreidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(w) - v.AddArg(mem) + d := v_2.AuxInt + v.reset(OpARM64SUBconst) + v.AuxInt = c * d + v.AddArg(a) return true } return false } -func rewriteValueARM64_OpARM64MOVWstoreidx4_0(v *Value) bool { - // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) - // cond: - // result: (MOVWstore [c<<2] ptr val mem) +func rewriteValueARM64_OpARM64MSUBW_0(v *Value) bool { + b := v.Block + _ = b + // match: (MSUBW a x (MOVDconst [c])) + // cond: int32(c)==-1 + // result: (ADD a x) for { - _ = v.Args[3] - ptr := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpARM64MOVDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt - val := v.Args[2] - mem := v.Args[3] - v.reset(OpARM64MOVWstore) - v.AuxInt = c << 2 - v.AddArg(ptr) - v.AddArg(val) - v.AddArg(mem) + c := v_2.AuxInt + if !(int32(c) == -1) { + break + } + v.reset(OpARM64ADD) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) - // cond: - // result: (MOVWstorezeroidx4 ptr idx mem) + // match: (MSUBW a _ (MOVDconst [c])) + // cond: int32(c)==0 + // result: a for { - _ = v.Args[3] - ptr := v.Args[0] - idx := v.Args[1] + _ = v.Args[2] + a := v.Args[0] v_2 := v.Args[2] if v_2.Op != OpARM64MOVDconst { break } - if v_2.AuxInt != 0 { + c := v_2.AuxInt + if !(int32(c) == 0) { break } - mem := v.Args[3] - v.reset(OpARM64MOVWstorezeroidx4) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpCopy) + v.Type = a.Type + v.AddArg(a) return true } - // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem) - // cond: - // result: (MOVWstoreidx4 ptr idx x mem) + // match: (MSUBW a x (MOVDconst [c])) + // cond: int32(c)==1 + // result: (SUB a x) for { - _ = v.Args[3] - ptr := v.Args[0] - idx := v.Args[1] + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] v_2 := v.Args[2] - if v_2.Op != OpARM64MOVWreg { + if v_2.Op != OpARM64MOVDconst { break } - x := v_2.Args[0] - mem := v.Args[3] - v.reset(OpARM64MOVWstoreidx4) - v.AddArg(ptr) - v.AddArg(idx) + c := v_2.AuxInt + if !(int32(c) == 1) { + break + } + v.reset(OpARM64SUB) + v.AddArg(a) v.AddArg(x) - v.AddArg(mem) return true } - // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) - // cond: - // result: (MOVWstoreidx4 ptr idx x mem) + // match: (MSUBW a x (MOVDconst [c])) + // cond: isPowerOfTwo(c) + // result: (SUBshiftLL a x [log2(c)]) for { - _ = v.Args[3] - ptr := v.Args[0] - idx := v.Args[1] + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] v_2 := v.Args[2] - if v_2.Op != OpARM64MOVWUreg { + if v_2.Op != OpARM64MOVDconst { break } - x := v_2.Args[0] - mem := v.Args[3] - v.reset(OpARM64MOVWstoreidx4) - v.AddArg(ptr) - v.AddArg(idx) + c := v_2.AuxInt + if !(isPowerOfTwo(c)) { + break + } + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c) + v.AddArg(a) v.AddArg(x) - v.AddArg(mem) return true } - return false -} -func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWstorezero [off1+off2] {sym} ptr mem) + // match: (MSUBW a x (MOVDconst [c])) + // cond: isPowerOfTwo(c-1) && int32(c)>=3 + // result: (SUB a (ADDshiftLL x x [log2(c-1)])) for { - off1 := v.AuxInt - sym := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADDconst { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v.Args[1] - if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + c := v_2.AuxInt + if !(isPowerOfTwo(c-1) && int32(c) >= 3) { break } - v.reset(OpARM64MOVWstorezero) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg(ptr) - v.AddArg(mem) + v.reset(OpARM64SUB) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) - // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + // match: (MSUBW a x (MOVDconst [c])) + // cond: isPowerOfTwo(c+1) && int32(c)>=7 + // result: (ADD a (SUBshiftLL x x [log2(c+1)])) for { - off1 := v.AuxInt - sym1 := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDaddr { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + c := v_2.AuxInt + if !(isPowerOfTwo(c+1) && int32(c) >= 7) { break } - v.reset(OpARM64MOVWstorezero) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg(ptr) - v.AddArg(mem) + v.reset(OpARM64ADD) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem) - // cond: off == 0 && sym == nil - // result: (MOVWstorezeroidx ptr idx mem) + // match: (MSUBW a x (MOVDconst [c])) + // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) + // result: (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADD { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - _ = v_0.Args[1] - ptr := v_0.Args[0] - idx := v_0.Args[1] - mem := v.Args[1] - if !(off == 0 && sym == nil) { + c := v_2.AuxInt + if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { break } - v.reset(OpARM64MOVWstorezeroidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 3) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) - // cond: off == 0 && sym == nil - // result: (MOVWstorezeroidx4 ptr idx mem) + // match: (MSUBW a x (MOVDconst [c])) + // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) + // result: (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) for { - off := v.AuxInt - sym := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADDshiftLL { - break - } - if v_0.AuxInt != 2 { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - _ = v_0.Args[1] - ptr := v_0.Args[0] - idx := v_0.Args[1] - mem := v.Args[1] - if !(off == 0 && sym == nil) { + c := v_2.AuxInt + if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { break } - v.reset(OpARM64MOVWstorezeroidx4) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 5) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem)) - // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x) - // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem) + // match: (MSUBW a x (MOVDconst [c])) + // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) + // result: (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) for { - i := v.AuxInt - s := v.Aux - _ = v.Args[1] - ptr0 := v.Args[0] + _ = v.Args[2] + a := v.Args[0] x := v.Args[1] - if x.Op != OpARM64MOVWstorezero { - break - } - j := x.AuxInt - if x.Aux != s { + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - _ = x.Args[1] - ptr1 := x.Args[0] - mem := x.Args[1] - if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) { + c := v_2.AuxInt + if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { break } - v.reset(OpARM64MOVDstorezero) - v.AuxInt = min(i, j) - v.Aux = s - v.AddArg(ptr0) - v.AddArg(mem) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 7) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem)) - // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x) - // result: (MOVDstorezeroidx ptr1 idx1 mem) + // match: (MSUBW a x (MOVDconst [c])) + // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) + // result: (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) for { - if v.AuxInt != 4 { + _ = v.Args[2] + a := v.Args[0] + x := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { break } - s := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADD { + c := v_2.AuxInt + if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) { break } - _ = v_0.Args[1] - ptr0 := v_0.Args[0] - idx0 := v_0.Args[1] - x := v.Args[1] - if x.Op != OpARM64MOVWstorezeroidx { + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 9) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueARM64_OpARM64MSUBW_10(v *Value) bool { + b := v.Block + _ = b + // match: (MSUBW a (MOVDconst [c]) x) + // cond: int32(c)==-1 + // result: (ADD a x) + for { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - _ = x.Args[2] - ptr1 := x.Args[0] - idx1 := x.Args[1] - mem := x.Args[2] - if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) { + c := v_1.AuxInt + x := v.Args[2] + if !(int32(c) == -1) { break } - v.reset(OpARM64MOVDstorezeroidx) - v.AddArg(ptr1) - v.AddArg(idx1) - v.AddArg(mem) + v.reset(OpARM64ADD) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem)) - // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x) - // result: (MOVDstorezeroidx ptr1 (SLLconst [2] idx1) mem) + // match: (MSUBW a (MOVDconst [c]) _) + // cond: int32(c)==0 + // result: a for { - if v.AuxInt != 4 { - break - } - s := v.Aux - _ = v.Args[1] - v_0 := v.Args[0] - if v_0.Op != OpARM64ADDshiftLL { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - if v_0.AuxInt != 2 { + c := v_1.AuxInt + if !(int32(c) == 0) { break } - _ = v_0.Args[1] - ptr0 := v_0.Args[0] - idx0 := v_0.Args[1] - x := v.Args[1] - if x.Op != OpARM64MOVWstorezeroidx4 { + v.reset(OpCopy) + v.Type = a.Type + v.AddArg(a) + return true + } + // match: (MSUBW a (MOVDconst [c]) x) + // cond: int32(c)==1 + // result: (SUB a x) + for { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - _ = x.Args[2] - ptr1 := x.Args[0] - idx1 := x.Args[1] - mem := x.Args[2] - if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) { + c := v_1.AuxInt + x := v.Args[2] + if !(int32(c) == 1) { break } - v.reset(OpARM64MOVDstorezeroidx) - v.AddArg(ptr1) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type) - v0.AuxInt = 2 - v0.AddArg(idx1) - v.AddArg(v0) - v.AddArg(mem) + v.reset(OpARM64SUB) + v.AddArg(a) + v.AddArg(x) return true } - return false -} -func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool { - // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem) - // cond: - // result: (MOVWstorezero [c] ptr mem) + // match: (MSUBW a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c) + // result: (SUBshiftLL a x [log2(c)]) for { _ = v.Args[2] - ptr := v.Args[0] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - mem := v.Args[2] - v.reset(OpARM64MOVWstorezero) - v.AuxInt = c - v.AddArg(ptr) - v.AddArg(mem) + x := v.Args[2] + if !(isPowerOfTwo(c)) { + break + } + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c) + v.AddArg(a) + v.AddArg(x) return true } - // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem) - // cond: - // result: (MOVWstorezero [c] idx mem) + // match: (MSUBW a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c-1) && int32(c)>=3 + // result: (SUB a (ADDshiftLL x x [log2(c-1)])) for { _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64MOVDconst { + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - c := v_0.AuxInt - idx := v.Args[1] - mem := v.Args[2] - v.reset(OpARM64MOVWstorezero) - v.AuxInt = c - v.AddArg(idx) - v.AddArg(mem) + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c-1) && int32(c) >= 3) { + break + } + v.reset(OpARM64SUB) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) - // cond: - // result: (MOVWstorezeroidx4 ptr idx mem) + // match: (MSUBW a (MOVDconst [c]) x) + // cond: isPowerOfTwo(c+1) && int32(c)>=7 + // result: (ADD a (SUBshiftLL x x [log2(c+1)])) for { _ = v.Args[2] - ptr := v.Args[0] + a := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpARM64SLLconst { + if v_1.Op != OpARM64MOVDconst { break } - if v_1.AuxInt != 2 { + c := v_1.AuxInt + x := v.Args[2] + if !(isPowerOfTwo(c+1) && int32(c) >= 7) { break } - idx := v_1.Args[0] - mem := v.Args[2] - v.reset(OpARM64MOVWstorezeroidx4) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpARM64ADD) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) - // cond: - // result: (MOVWstorezeroidx4 ptr idx mem) + // match: (MSUBW a (MOVDconst [c]) x) + // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) + // result: (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)]) for { _ = v.Args[2] - v_0 := v.Args[0] - if v_0.Op != OpARM64SLLconst { + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - if v_0.AuxInt != 2 { + c := v_1.AuxInt + x := v.Args[2] + if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { break } - idx := v_0.Args[0] - ptr := v.Args[1] - mem := v.Args[2] - v.reset(OpARM64MOVWstorezeroidx4) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 3) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVDstorezeroidx ptr idx mem) + // match: (MSUBW a (MOVDconst [c]) x) + // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) + // result: (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)]) for { _ = v.Args[2] - ptr := v.Args[0] + a := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpARM64ADDconst { + if v_1.Op != OpARM64MOVDconst { break } - if v_1.AuxInt != 4 { + c := v_1.AuxInt + x := v.Args[2] + if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) { break } - idx := v_1.Args[0] - x := v.Args[2] - if x.Op != OpARM64MOVWstorezeroidx { + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 5) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MSUBW a (MOVDconst [c]) x) + // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) + // result: (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)]) + for { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - _ = x.Args[2] - if ptr != x.Args[0] { + c := v_1.AuxInt + x := v.Args[2] + if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { break } - if idx != x.Args[1] { + v.reset(OpARM64ADDshiftLL) + v.AuxInt = log2(c / 7) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MSUBW a (MOVDconst [c]) x) + // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) + // result: (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)]) + for { + _ = v.Args[2] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARM64MOVDconst { break } - mem := x.Args[2] - if !(x.Uses == 1 && clobber(x)) { + c := v_1.AuxInt + x := v.Args[2] + if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) { break } - v.reset(OpARM64MOVDstorezeroidx) - v.AddArg(ptr) - v.AddArg(idx) - v.AddArg(mem) + v.reset(OpARM64SUBshiftLL) + v.AuxInt = log2(c / 9) + v.AddArg(a) + v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) return true } return false } -func rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v *Value) bool { - // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) +func rewriteValueARM64_OpARM64MSUBW_20(v *Value) bool { + b := v.Block + _ = b + // match: (MSUBW (MOVDconst [c]) x y) // cond: - // result: (MOVWstorezero [c<<2] ptr mem) + // result: (ADDconst [c] (MNEGW x y)) for { _ = v.Args[2] - ptr := v.Args[0] + v_0 := v.Args[0] + if v_0.Op != OpARM64MOVDconst { + break + } + c := v_0.AuxInt + x := v.Args[1] + y := v.Args[2] + v.reset(OpARM64ADDconst) + v.AuxInt = c + v0 := b.NewValue0(v.Pos, OpARM64MNEGW, x.Type) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) + return true + } + // match: (MSUBW a (MOVDconst [c]) (MOVDconst [d])) + // cond: + // result: (SUBconst [int64(int32(c)*int32(d))] a) + for { + _ = v.Args[2] + a := v.Args[0] v_1 := v.Args[1] if v_1.Op != OpARM64MOVDconst { break } c := v_1.AuxInt - mem := v.Args[2] - v.reset(OpARM64MOVWstorezero) - v.AuxInt = c << 2 - v.AddArg(ptr) - v.AddArg(mem) + v_2 := v.Args[2] + if v_2.Op != OpARM64MOVDconst { + break + } + d := v_2.AuxInt + v.reset(OpARM64SUBconst) + v.AuxInt = int64(int32(c) * int32(d)) + v.AddArg(a) return true } return false @@ -26956,7 +29000,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { return true } // match: (SUB a l:(MUL x y)) - // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: l.Uses==1 && clobber(l) // result: (MSUB a x y) for { _ = v.Args[1] @@ -26968,7 +29012,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { _ = l.Args[1] x := l.Args[0] y := l.Args[1] - if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MSUB) @@ -26978,7 +29022,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { return true } // match: (SUB a l:(MNEG x y)) - // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: l.Uses==1 && clobber(l) // result: (MADD a x y) for { _ = v.Args[1] @@ -26990,7 +29034,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { _ = l.Args[1] x := l.Args[0] y := l.Args[1] - if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MADD) @@ -27000,7 +29044,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { return true } // match: (SUB a l:(MULW x y)) - // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l) // result: (MSUBW a x y) for { _ = v.Args[1] @@ -27012,7 +29056,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { _ = l.Args[1] x := l.Args[0] y := l.Args[1] - if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MSUBW) @@ -27022,7 +29066,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { return true } // match: (SUB a l:(MNEGW x y)) - // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) + // cond: a.Type.Size() != 8 && l.Uses==1 && clobber(l) // result: (MADDW a x y) for { _ = v.Args[1] @@ -27034,7 +29078,7 @@ func rewriteValueARM64_OpARM64SUB_0(v *Value) bool { _ = l.Args[1] x := l.Args[0] y := l.Args[1] - if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) { + if !(a.Type.Size() != 8 && l.Uses == 1 && clobber(l)) { break } v.reset(OpARM64MADDW) @@ -39865,4 +41909,4 @@ func rewriteBlockARM64(b *Block) bool { } } return false -} +} \ No newline at end of file diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index c0539256d5..05a28695d4 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -206,8 +206,15 @@ func AddMul(x int) int { return 2*x + 1 } -func MULA(a, b, c uint32) uint32 { - // arm:`MULA` - // arm64:`MADDW` - return a*b + c +func MULA(a, b, c uint32) (uint32, uint32, uint32) { + // arm:`MULA`,-`MUL\s` + // arm64:`MADDW`,-`MULW` + r0 := a*b + c + // arm:`MULA`-`MUL\s` + // arm64:`MADDW`,-`MULW` + r1 := c*79 + a + // arm:`ADD`,-`MULA`-`MUL\s` + // arm64:`ADD`,-`MADD`,-`MULW` + r2 := b*64 + c + return r0, r1, r2 } -- 2.50.0