]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: add some LEAL{1,2,4,8} rewrite rules for AMD64
authorJosh Bleecher Snyder <josharian@gmail.com>
Mon, 26 Feb 2018 15:05:19 +0000 (07:05 -0800)
committerJosh Bleecher Snyder <josharian@gmail.com>
Mon, 7 May 2018 22:09:18 +0000 (22:09 +0000)
This should improve some 32 bit arithmetic operations.

During make.bash, this increases the number of
rules firing by 15518:

$ wc -l rulelog-*
 13490514 rulelog-head
 13474996 rulelog-master

compress/flate benchmarks:

name                             old time/op    new time/op    delta
Decode/Digits/Huffman/1e4-8         103µs ± 4%     102µs ± 0%  -0.95%  (p=0.000 n=30+27)
Decode/Digits/Huffman/1e5-8         962µs ± 2%     954µs ± 1%  -0.80%  (p=0.000 n=25+25)
Decode/Digits/Huffman/1e6-8        9.55ms ± 1%    9.50ms ± 1%  -0.57%  (p=0.000 n=29+29)
Decode/Digits/Speed/1e4-8           110µs ± 2%     110µs ± 2%  -0.41%  (p=0.003 n=28+30)
Decode/Digits/Speed/1e5-8          1.15ms ± 1%    1.14ms ± 1%  -0.85%  (p=0.000 n=29+28)
Decode/Digits/Speed/1e6-8          11.5ms ± 2%    11.4ms ± 1%  -1.26%  (p=0.000 n=28+27)
Decode/Digits/Default/1e4-8         113µs ± 1%     112µs ± 1%  -0.49%  (p=0.001 n=27+30)
Decode/Digits/Default/1e5-8        1.13ms ± 0%    1.12ms ± 1%  -0.75%  (p=0.000 n=26+24)
Decode/Digits/Default/1e6-8        11.1ms ± 1%    11.1ms ± 1%  -0.47%  (p=0.000 n=28+27)
Decode/Digits/Compression/1e4-8     113µs ± 1%     112µs ± 1%  -0.70%  (p=0.000 n=28+29)
Decode/Digits/Compression/1e5-8    1.13ms ± 2%    1.12ms ± 1%  -1.41%  (p=0.000 n=28+26)
Decode/Digits/Compression/1e6-8    11.1ms ± 1%    11.1ms ± 1%  -0.33%  (p=0.002 n=29+27)
Decode/Twain/Huffman/1e4-8          115µs ± 1%     115µs ± 1%  -0.40%  (p=0.000 n=28+26)
Decode/Twain/Huffman/1e5-8         1.05ms ± 1%    1.04ms ± 0%  -0.41%  (p=0.000 n=27+25)
Decode/Twain/Huffman/1e6-8         10.4ms ± 1%    10.4ms ± 1%    ~     (p=0.993 n=28+24)
Decode/Twain/Speed/1e4-8            118µs ± 2%     116µs ± 1%  -1.08%  (p=0.000 n=27+29)
Decode/Twain/Speed/1e5-8           1.07ms ± 1%    1.07ms ± 1%  -0.23%  (p=0.041 n=26+27)
Decode/Twain/Speed/1e6-8           10.6ms ± 1%    10.5ms ± 0%  -0.68%  (p=0.000 n=29+27)
Decode/Twain/Default/1e4-8          110µs ± 1%     109µs ± 0%  -0.49%  (p=0.000 n=29+26)
Decode/Twain/Default/1e5-8          906µs ± 1%     902µs ± 1%  -0.48%  (p=0.000 n=27+28)
Decode/Twain/Default/1e6-8         8.75ms ± 1%    8.68ms ± 2%  -0.73%  (p=0.000 n=28+28)
Decode/Twain/Compression/1e4-8      110µs ± 1%     109µs ± 1%  -0.80%  (p=0.000 n=27+28)
Decode/Twain/Compression/1e5-8      905µs ± 1%     906µs ± 5%    ~     (p=0.065 n=27+29)
Decode/Twain/Compression/1e6-8     8.75ms ± 2%    8.68ms ± 1%  -0.76%  (p=0.000 n=26+26)
Encode/Digits/Huffman/1e4-8        31.8µs ± 1%    32.3µs ± 2%  +1.43%  (p=0.000 n=28+27)
Encode/Digits/Huffman/1e5-8         299µs ± 2%     296µs ± 1%  -1.05%  (p=0.000 n=29+29)
Encode/Digits/Huffman/1e6-8        2.99ms ± 3%    2.96ms ± 1%  -1.00%  (p=0.000 n=29+28)
Encode/Digits/Speed/1e4-8           149µs ± 1%     152µs ± 4%  +2.18%  (p=0.000 n=30+30)
Encode/Digits/Speed/1e5-8          1.39ms ± 1%    1.40ms ± 2%  +1.02%  (p=0.000 n=27+27)
Encode/Digits/Speed/1e6-8          13.7ms ± 0%    13.8ms ± 1%  +0.81%  (p=0.000 n=27+27)
Encode/Digits/Default/1e4-8         297µs ± 7%     297µs ± 7%    ~     (p=1.000 n=30+30)
Encode/Digits/Default/1e5-8        4.51ms ± 1%    4.42ms ± 1%  -2.06%  (p=0.000 n=29+29)
Encode/Digits/Default/1e6-8        47.5ms ± 1%    46.6ms ± 1%  -1.90%  (p=0.000 n=27+25)
Encode/Digits/Compression/1e4-8     302µs ± 7%     303µs ± 9%    ~     (p=0.854 n=30+30)
Encode/Digits/Compression/1e5-8    4.52ms ± 1%    4.43ms ± 2%  -1.91%  (p=0.000 n=26+25)
Encode/Digits/Compression/1e6-8    47.5ms ± 1%    46.7ms ± 1%  -1.70%  (p=0.000 n=26+27)
Encode/Twain/Huffman/1e4-8         46.6µs ± 2%    46.8µs ± 2%    ~     (p=0.114 n=30+30)
Encode/Twain/Huffman/1e5-8          357µs ± 3%     352µs ± 2%  -1.13%  (p=0.000 n=29+28)
Encode/Twain/Huffman/1e6-8         3.58ms ± 4%    3.52ms ± 1%  -1.43%  (p=0.003 n=30+28)
Encode/Twain/Speed/1e4-8            173µs ± 1%     174µs ± 1%  +0.65%  (p=0.000 n=27+28)
Encode/Twain/Speed/1e5-8           1.39ms ± 1%    1.40ms ± 1%  +0.92%  (p=0.000 n=28+27)
Encode/Twain/Speed/1e6-8           13.6ms ± 1%    13.7ms ± 1%  +0.51%  (p=0.000 n=25+26)
Encode/Twain/Default/1e4-8          364µs ± 5%     361µs ± 5%    ~     (p=0.219 n=30+30)
Encode/Twain/Default/1e5-8         5.41ms ± 1%    5.43ms ± 5%    ~     (p=0.655 n=27+27)
Encode/Twain/Default/1e6-8         57.2ms ± 1%    58.4ms ± 4%  +2.15%  (p=0.000 n=22+28)
Encode/Twain/Compression/1e4-8      371µs ± 9%     373µs ± 6%    ~     (p=0.503 n=30+29)
Encode/Twain/Compression/1e5-8     5.97ms ± 2%    5.92ms ± 1%  -0.75%  (p=0.000 n=28+26)
Encode/Twain/Compression/1e6-8     64.0ms ± 1%    63.8ms ± 1%  -0.36%  (p=0.036 n=27+25)
[Geo mean]                         1.37ms         1.36ms       -0.38%

Change-Id: I3df4de63f06eaf121c38821bd889453a8de1b199
Reviewed-on: https://go-review.googlesource.com/101276
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/rewriteAMD64.go

index bd36e60f6eea3790414c944d242662f237aace87..45c82a0cd7f160335d2ee51d6539a490148b120d 100644 (file)
 (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
 
 // Using MOVZX instead of AND is cheaper.
-(ANDLconst [0xFF] x) -> (MOVBQZX x)
-(ANDLconst [0xFFFF] x) -> (MOVWQZX x)
-(ANDQconst [0xFF] x) -> (MOVBQZX x)
-(ANDQconst [0xFFFF] x) -> (MOVWQZX x)
+(AND(Q|L)const [  0xFF] x) -> (MOVBQZX x)
+(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x)
 (ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
 
 // strength reduction
 // which can require a register-register move
 // to preserve the original value,
 // so it must be used with care.
-(MULQconst [-9] x) -> (NEGQ (LEAQ8 <v.Type> x x))
-(MULQconst [-5] x) -> (NEGQ (LEAQ4 <v.Type> x x))
-(MULQconst [-3] x) -> (NEGQ (LEAQ2 <v.Type> x x))
-(MULQconst [-1] x) -> (NEGQ x)
-(MULQconst [0] _) -> (MOVQconst [0])
-(MULQconst [1] x) -> x
-(MULQconst [3] x) -> (LEAQ2 x x)
-(MULQconst [5] x) -> (LEAQ4 x x)
-(MULQconst [7] x) -> (LEAQ2 x (LEAQ2 <v.Type> x x))
-(MULQconst [9] x) -> (LEAQ8 x x)
-(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x))
-(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x))
-(MULQconst [19] x) -> (LEAQ2 x (LEAQ8 <v.Type> x x))
-(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x))
-(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x))
-(MULQconst [27] x) -> (LEAQ8 (LEAQ2 <v.Type> x x) (LEAQ2 <v.Type> x x))
-(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x))
-(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x))
-(MULQconst [45] x) -> (LEAQ8 (LEAQ4 <v.Type> x x) (LEAQ4 <v.Type> x x))
-(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x))
-(MULQconst [81] x) -> (LEAQ8 (LEAQ8 <v.Type> x x) (LEAQ8 <v.Type> x x))
-
-(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
-(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
-(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
-(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
-(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
-(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
-(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
-(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
-
-// combine add/shift into LEAQ
-(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
-(ADDQ x (SHLQconst [2] y)) -> (LEAQ4 x y)
-(ADDQ x (SHLQconst [1] y)) -> (LEAQ2 x y)
-(ADDQ x (ADDQ y y)) -> (LEAQ2 x y)
-(ADDQ x (ADDQ x y)) -> (LEAQ2 y x)
-
-// combine ADDQ/ADDQconst into LEAQ1
-(ADDQconst [c] (ADDQ x y)) -> (LEAQ1 [c] x y)
-(ADDQ (ADDQconst [c] x) y) -> (LEAQ1 [c] x y)
+(MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [-1] x) -> (NEG(Q|L) x)
+(MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0])
+(MUL(Q|L)const [ 1] x) -> x
+(MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x)
+(MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x)
+(MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x)
+(MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
+
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >=  15 -> (SUB(Q|L)  (SHL(Q|L)const <v.Type> [log2(c+1)] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >=  17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >=  34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >=  68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x)
+(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x))
+
+// combine add/shift into LEAQ/LEAL
+(ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y)
+(ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y)
+(ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y)
+(ADD(L|Q) x (ADD(L|Q) y y))        -> (LEA(L|Q)2 x y)
+(ADD(L|Q) x (ADD(L|Q) x y))        -> (LEA(L|Q)2 y x)
+
+// combine ADDQ/ADDQconst into LEAQ1/LEAL1
+(ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y)
+(ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y)
 (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x)
 
-// fold ADDQ into LEAQ
-(ADDQconst [c] (LEAQ [d] {s} x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
-(LEAQ [c] {s} (ADDQconst [d] x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
-(LEAQ [c] {s} (ADDQ x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
-(ADDQ x (LEAQ [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
-
-// fold ADDQconst into LEAQx
-(ADDQconst [c] (LEAQ1 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ1 [c+d] {s} x y)
-(ADDQconst [c] (LEAQ2 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ2 [c+d] {s} x y)
-(ADDQconst [c] (LEAQ4 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ4 [c+d] {s} x y)
-(ADDQconst [c] (LEAQ8 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ8 [c+d] {s} x y)
-(LEAQ1 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ1 [c+d] {s} x y)
-(LEAQ2 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ2 [c+d] {s} x y)
-(LEAQ2 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAQ2 [c+2*d] {s} x y)
-(LEAQ4 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ4 [c+d] {s} x y)
-(LEAQ4 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEAQ4 [c+4*d] {s} x y)
-(LEAQ8 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ8 [c+d] {s} x y)
-(LEAQ8 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEAQ8 [c+8*d] {s} x y)
-
-// fold shifts into LEAQx
-(LEAQ1 [c] {s} x (SHLQconst [1] y)) -> (LEAQ2 [c] {s} x y)
-(LEAQ1 [c] {s} x (SHLQconst [2] y)) -> (LEAQ4 [c] {s} x y)
-(LEAQ1 [c] {s} x (SHLQconst [3] y)) -> (LEAQ8 [c] {s} x y)
-(LEAQ2 [c] {s} x (SHLQconst [1] y)) -> (LEAQ4 [c] {s} x y)
-(LEAQ2 [c] {s} x (SHLQconst [2] y)) -> (LEAQ8 [c] {s} x y)
-(LEAQ4 [c] {s} x (SHLQconst [1] y)) -> (LEAQ8 [c] {s} x y)
+// fold ADDQ/ADDL into LEAQ/LEAL
+(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
+(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
+(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
+(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
+
+// fold ADDQconst/ADDLconst into LEAQx/LEALx
+(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y)
+(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y)
+(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y)
+(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y)
+(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y)
+(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y)
+(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y)
+(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y)
+
+// fold shifts into LEAQx/LEALx
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y)
 
 // reverse ordering of compare instruction
 (SETL (InvertFlags x)) -> (SETG x)
   && clobber(mem2)
   -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
 
-// amd64p32 rules
-// same as the rules above, but with 32 instead of 64 bit pointer arithmetic.
-// LEAQ,ADDQ -> LEAL,ADDL
-(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
-(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
-
 (MOVQload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
        (MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
 (MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
 (MOVLi2f <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
 
 // LEAQ is rematerializeable, so this helps to avoid register spill.
-// See isuue 22947 for details
-(ADDQconst [off] x:(SP)) -> (LEAQ [off] x)
+// See issue 22947 for details
+(ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x)
 
 // Fold loads into compares
 // Note: these may be undone by the flagalloc pass.
index 2fce1e2221435f2a0df05bc3b22003ce149ed24e..3ff7e48765afba886a1346d5375eb22a49ddca7e 100644 (file)
@@ -16,9 +16,9 @@ var _ = types.TypeMem // in case not otherwise used
 func rewriteValueAMD64(v *Value) bool {
        switch v.Op {
        case OpAMD64ADDL:
-               return rewriteValueAMD64_OpAMD64ADDL_0(v) || rewriteValueAMD64_OpAMD64ADDL_10(v)
+               return rewriteValueAMD64_OpAMD64ADDL_0(v) || rewriteValueAMD64_OpAMD64ADDL_10(v) || rewriteValueAMD64_OpAMD64ADDL_20(v)
        case OpAMD64ADDLconst:
-               return rewriteValueAMD64_OpAMD64ADDLconst_0(v)
+               return rewriteValueAMD64_OpAMD64ADDLconst_0(v) || rewriteValueAMD64_OpAMD64ADDLconst_10(v)
        case OpAMD64ADDLconstmem:
                return rewriteValueAMD64_OpAMD64ADDLconstmem_0(v)
        case OpAMD64ADDLmem:
@@ -155,6 +155,14 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v)
        case OpAMD64LEAL:
                return rewriteValueAMD64_OpAMD64LEAL_0(v)
+       case OpAMD64LEAL1:
+               return rewriteValueAMD64_OpAMD64LEAL1_0(v)
+       case OpAMD64LEAL2:
+               return rewriteValueAMD64_OpAMD64LEAL2_0(v)
+       case OpAMD64LEAL4:
+               return rewriteValueAMD64_OpAMD64LEAL4_0(v)
+       case OpAMD64LEAL8:
+               return rewriteValueAMD64_OpAMD64LEAL8_0(v)
        case OpAMD64LEAQ:
                return rewriteValueAMD64_OpAMD64LEAQ_0(v)
        case OpAMD64LEAQ1:
@@ -296,7 +304,7 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAMD64MULL:
                return rewriteValueAMD64_OpAMD64MULL_0(v)
        case OpAMD64MULLconst:
-               return rewriteValueAMD64_OpAMD64MULLconst_0(v)
+               return rewriteValueAMD64_OpAMD64MULLconst_0(v) || rewriteValueAMD64_OpAMD64MULLconst_10(v) || rewriteValueAMD64_OpAMD64MULLconst_20(v) || rewriteValueAMD64_OpAMD64MULLconst_30(v)
        case OpAMD64MULQ:
                return rewriteValueAMD64_OpAMD64MULQ_0(v)
        case OpAMD64MULQconst:
@@ -1239,34 +1247,40 @@ func rewriteValueAMD64_OpAMD64ADDL_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ADDL x (NEGL y))
+       // match: (ADDL x (SHLLconst [3] y))
        // cond:
-       // result: (SUBL x y)
+       // result: (LEAL8 x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if v_1.AuxInt != 3 {
                        break
                }
                y := v_1.Args[0]
-               v.reset(OpAMD64SUBL)
+               v.reset(OpAMD64LEAL8)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ADDL (NEGL y) x)
+       // match: (ADDL (SHLLconst [3] y) x)
        // cond:
-       // result: (SUBL x y)
+       // result: (LEAL8 x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NEGL {
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
                        break
                }
                y := v_0.Args[0]
                x := v.Args[1]
-               v.reset(OpAMD64SUBL)
+               v.reset(OpAMD64LEAL8)
                v.AddArg(x)
                v.AddArg(y)
                return true
@@ -1274,66 +1288,68 @@ func rewriteValueAMD64_OpAMD64ADDL_0(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64ADDL_10(v *Value) bool {
-       // match: (ADDL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ADDLmem x [off] {sym} ptr mem)
+       // match: (ADDL x (SHLLconst [2] y))
+       // cond:
+       // result: (LEAL4 x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpAMD64ADDLmem)
-               v.AuxInt = off
-               v.Aux = sym
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAL4)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDL l:(MOVLload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ADDLmem x [off] {sym} ptr mem)
+       // match: (ADDL (SHLLconst [2] y) x)
+       // cond:
+       // result: (LEAL4 x y)
        for {
                _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               y := v_0.Args[0]
                x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v.reset(OpAMD64LEAL4)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDL x (SHLLconst [1] y))
+       // cond:
+       // result: (LEAL2 x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               v.reset(OpAMD64ADDLmem)
-               v.AuxInt = off
-               v.Aux = sym
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAL2)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool {
-       // match: (ADDLconst [c] (SHLLconst [1] x))
+       // match: (ADDL (SHLLconst [1] y) x)
        // cond:
-       // result: (LEAL1 [c] x x)
+       // result: (LEAL2 x y)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64SHLLconst {
                        break
@@ -1341,1164 +1357,1154 @@ func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool {
                if v_0.AuxInt != 1 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64LEAL1)
-               v.AuxInt = c
-               v.AddArg(x)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAL2)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDLconst [c] x)
-       // cond: int32(c)==0
-       // result: x
+       // match: (ADDL x (ADDL y y))
+       // cond:
+       // result: (LEAL2 x y)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                x := v.Args[0]
-               if !(int32(c) == 0) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDL {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64LEAL2)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDLconst [c] (MOVLconst [d]))
+       // match: (ADDL (ADDL y y) x)
        // cond:
-       // result: (MOVLconst [int64(int32(c+d))])
+       // result: (LEAL2 x y)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64ADDL {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int64(int32(c + d))
-               return true
-       }
-       // match: (ADDLconst [c] (ADDLconst [d] x))
-       // cond:
-       // result: (ADDLconst [int64(int32(c+d))] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               if y != v_0.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ADDLconst)
-               v.AuxInt = int64(int32(c + d))
+               x := v.Args[1]
+               v.reset(OpAMD64LEAL2)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDLconst [c] (LEAL [d] {s} x))
-       // cond: is32Bit(c+d)
-       // result: (LEAL [c+d] {s} x)
+       // match: (ADDL x (ADDL x y))
+       // cond:
+       // result: (LEAL2 y x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDL {
                        break
                }
-               d := v_0.AuxInt
-               s := v_0.Aux
-               x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64LEAL)
-               v.AuxInt = c + d
-               v.Aux = s
+               y := v_1.Args[1]
+               v.reset(OpAMD64LEAL2)
+               v.AddArg(y)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDLconstmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (ADDLconstmem [valoff1] {sym} (ADDQconst [off2] base) mem)
-       // cond: ValAndOff(valoff1).canAdd(off2)
-       // result: (ADDLconstmem [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       // match: (ADDL x (ADDL y x))
+       // cond:
+       // result: (LEAL2 y x)
        for {
-               valoff1 := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDL {
                        break
                }
-               off2 := v_0.AuxInt
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(valoff1).canAdd(off2)) {
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               if x != v_1.Args[1] {
                        break
                }
-               v.reset(OpAMD64ADDLconstmem)
-               v.AuxInt = ValAndOff(valoff1).add(off2)
-               v.Aux = sym
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAL2)
+               v.AddArg(y)
+               v.AddArg(x)
                return true
        }
-       // match: (ADDLconstmem [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
-       // result: (ADDLconstmem [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       // match: (ADDL (ADDL x y) x)
+       // cond:
+       // result: (LEAL2 y x)
        for {
-               valoff1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64ADDL {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
-               v.reset(OpAMD64ADDLconstmem)
-               v.AuxInt = ValAndOff(valoff1).add(off2)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAL2)
+               v.AddArg(y)
+               v.AddArg(x)
                return true
        }
-       // match: (ADDLconstmem [valOff] {sym} ptr (MOVSSstore [ValAndOff(valOff).Off()] {sym} ptr x _))
+       // match: (ADDL (ADDL y x) x)
        // cond:
-       // result: (ADDLconst [ValAndOff(valOff).Val()] (MOVLf2i x))
+       // result: (LEAL2 y x)
        for {
-               valOff := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVSSstore {
-                       break
-               }
-               if v_1.AuxInt != ValAndOff(valOff).Off() {
-                       break
-               }
-               if v_1.Aux != sym {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDL {
                        break
                }
-               _ = v_1.Args[2]
-               if ptr != v_1.Args[0] {
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
-               x := v_1.Args[1]
-               v.reset(OpAMD64ADDLconst)
-               v.AuxInt = ValAndOff(valOff).Val()
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64LEAL2)
+               v.AddArg(y)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ADDLmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (ADDLmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (ADDLmem [off1+off2] {sym} val base mem)
+func rewriteValueAMD64_OpAMD64ADDL_20(v *Value) bool {
+       // match: (ADDL (ADDLconst [c] x) y)
+       // cond:
+       // result: (LEAL1 [c] x y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64ADDLmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ADDLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (ADDLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64ADDLmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64LEAL1)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+       // match: (ADDL y (ADDLconst [c] x))
        // cond:
-       // result: (ADDL x (MOVLf2i y))
+       // result: (LEAL1 [c] x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSSstore {
-                       break
-               }
-               if v_2.AuxInt != off {
-                       break
-               }
-               if v_2.Aux != sym {
-                       break
-               }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64ADDL)
+               c := v_1.AuxInt
+               x := v_1.Args[0]
+               v.reset(OpAMD64LEAL1)
+               v.AuxInt = c
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDQ_0(v *Value) bool {
-       // match: (ADDQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (ADDQconst [c] x)
+       // match: (ADDL x (LEAL [c] {s} y))
+       // cond: x.Op != OpSB && y.Op != OpSB
+       // result: (LEAL1 [c] {s} x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64LEAL {
                        break
                }
                c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               s := v_1.Aux
+               y := v_1.Args[0]
+               if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
+               v.reset(OpAMD64LEAL1)
                v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (ADDQconst [c] x)
+       // match: (ADDL (LEAL [c] {s} y) x)
+       // cond: x.Op != OpSB && y.Op != OpSB
+       // result: (LEAL1 [c] {s} x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
                c := v_0.AuxInt
+               s := v_0.Aux
+               y := v_0.Args[0]
                x := v.Args[1]
-               if !(is32Bit(c)) {
+               if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
+               v.reset(OpAMD64LEAL1)
                v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDQ (SHLQconst x [c]) (SHRQconst x [d]))
-       // cond: d==64-c
-       // result: (ROLQconst x [c])
+       // match: (ADDL x (NEGL y))
+       // cond:
+       // result: (SUBL x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 64-c) {
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
+               y := v_1.Args[0]
+               v.reset(OpAMD64SUBL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDQ (SHRQconst x [d]) (SHLQconst x [c]))
-       // cond: d==64-c
-       // result: (ROLQconst x [c])
+       // match: (ADDL (NEGL y) x)
+       // cond:
+       // result: (SUBL x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 64-c) {
+               if v_0.Op != OpAMD64NEGL {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64SUBL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDQ x (SHLQconst [3] y))
-       // cond:
-       // result: (LEAQ8 x y)
+       // match: (ADDL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDLmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               if v_1.AuxInt != 3 {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
+               v.reset(OpAMD64ADDLmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
-               v.AddArg(y)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (ADDQ (SHLQconst [3] y) x)
-       // cond:
-       // result: (LEAQ8 x y)
+       // match: (ADDL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDLmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               if v_0.AuxInt != 3 {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ8)
+               v.reset(OpAMD64ADDLmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
-               v.AddArg(y)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (ADDQ x (SHLQconst [2] y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool {
+       // match: (ADDLconst [c] (ADDL x y))
        // cond:
-       // result: (LEAQ4 x y)
+       // result: (LEAL1 [c] x y)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_1.AuxInt != 2 {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDL {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ4)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64LEAL1)
+               v.AuxInt = c
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ADDQ (SHLQconst [2] y) x)
+       // match: (ADDLconst [c] (SHLLconst [1] x))
        // cond:
-       // result: (LEAQ4 x y)
+       // result: (LEAL1 [c] x x)
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if v_0.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               if v_0.AuxInt != 1 {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ4)
+               x := v_0.Args[0]
+               v.reset(OpAMD64LEAL1)
+               v.AuxInt = c
+               v.AddArg(x)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ADDQ x (SHLQconst [1] y))
-       // cond:
-       // result: (LEAQ2 x y)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+       // match: (ADDLconst [c] (LEAL [d] {s} x))
+       // cond: is32Bit(c+d)
+       // result: (LEAL [c+d] {s} x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               if v_1.AuxInt != 1 {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               x := v_0.Args[0]
+               if !(is32Bit(c + d)) {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ2)
+               v.reset(OpAMD64LEAL)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ADDQ (SHLQconst [1] y) x)
-       // cond:
-       // result: (LEAQ2 x y)
+       // match: (ADDLconst [c] (LEAL1 [d] {s} x y))
+       // cond: is32Bit(c+d)
+       // result: (LEAL1 [c+d] {s} x y)
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if v_0.Op != OpAMD64LEAL1 {
                        break
                }
-               if v_0.AuxInt != 1 {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(c + d)) {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ2)
+               v.reset(OpAMD64LEAL1)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDQ_10(v *Value) bool {
-       // match: (ADDQ x (ADDQ y y))
-       // cond:
-       // result: (LEAQ2 x y)
+       // match: (ADDLconst [c] (LEAL2 [d] {s} x y))
+       // cond: is32Bit(c+d)
+       // result: (LEAL2 [c+d] {s} x y)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQ {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL2 {
                        break
                }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               if y != v_1.Args[1] {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
+               v.reset(OpAMD64LEAL2)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ADDQ (ADDQ y y) x)
-       // cond:
-       // result: (LEAQ2 x y)
+       // match: (ADDLconst [c] (LEAL4 [d] {s} x y))
+       // cond: is32Bit(c+d)
+       // result: (LEAL4 [c+d] {s} x y)
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if v_0.Op != OpAMD64LEAL4 {
                        break
                }
+               d := v_0.AuxInt
+               s := v_0.Aux
                _ = v_0.Args[1]
-               y := v_0.Args[0]
-               if y != v_0.Args[1] {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(c + d)) {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ2)
+               v.reset(OpAMD64LEAL4)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ADDQ x (ADDQ x y))
-       // cond:
-       // result: (LEAQ2 y x)
+       // match: (ADDLconst [c] (LEAL8 [d] {s} x y))
+       // cond: is32Bit(c+d)
+       // result: (LEAL8 [c+d] {s} x y)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQ {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL8 {
                        break
                }
-               _ = v_1.Args[1]
-               if x != v_1.Args[0] {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(c + d)) {
                        break
                }
-               y := v_1.Args[1]
-               v.reset(OpAMD64LEAQ2)
-               v.AddArg(y)
+               v.reset(OpAMD64LEAL8)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
-               return true
-       }
-       // match: (ADDQ x (ADDQ y x))
-       // cond:
-       // result: (LEAQ2 y x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               if x != v_1.Args[1] {
-                       break
-               }
-               v.reset(OpAMD64LEAQ2)
                v.AddArg(y)
-               v.AddArg(x)
                return true
        }
-       // match: (ADDQ (ADDQ x y) x)
-       // cond:
-       // result: (LEAQ2 y x)
+       // match: (ADDLconst [c] x)
+       // cond: int32(c)==0
+       // result: x
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if x != v.Args[1] {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AddArg(y)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (ADDQ (ADDQ y x) x)
+       // match: (ADDLconst [c] (MOVLconst [d]))
        // cond:
-       // result: (LEAQ2 y x)
+       // result: (MOVLconst [int64(int32(c+d))])
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               x := v_0.Args[1]
-               if x != v.Args[1] {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AddArg(y)
-               v.AddArg(x)
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int64(int32(c + d))
                return true
        }
-       // match: (ADDQ (ADDQconst [c] x) y)
+       // match: (ADDLconst [c] (ADDLconst [d] x))
        // cond:
-       // result: (LEAQ1 [c] x y)
+       // result: (ADDLconst [int64(int32(c+d))] x)
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c
+               v.reset(OpAMD64ADDLconst)
+               v.AuxInt = int64(int32(c + d))
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ADDQ y (ADDQconst [c] x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDLconst_10(v *Value) bool {
+       // match: (ADDLconst [off] x:(SP))
        // cond:
-       // result: (LEAQ1 [c] x y)
+       // result: (LEAL [off] x)
        for {
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               off := v.AuxInt
+               x := v.Args[0]
+               if x.Op != OpSP {
                        break
                }
-               c := v_1.AuxInt
-               x := v_1.Args[0]
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c
+               v.reset(OpAMD64LEAL)
+               v.AuxInt = off
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ADDQ x (LEAQ [c] {s} y))
-       // cond: x.Op != OpSB && y.Op != OpSB
-       // result: (LEAQ1 [c] {s} x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDLconstmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (ADDLconstmem [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2)
+       // result: (ADDLconstmem [ValAndOff(valoff1).add(off2)] {sym} base mem)
        for {
+               valoff1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
-               s := v_1.Aux
-               y := v_1.Args[0]
-               if !(x.Op != OpSB && y.Op != OpSB) {
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2)) {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64ADDLconstmem)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (ADDQ (LEAQ [c] {s} y) x)
-       // cond: x.Op != OpSB && y.Op != OpSB
-       // result: (LEAQ1 [c] {s} x y)
+       // match: (ADDLconstmem [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+       // result: (ADDLconstmem [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
        for {
+               valoff1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_0.AuxInt
-               s := v_0.Aux
-               y := v_0.Args[0]
-               x := v.Args[1]
-               if !(x.Op != OpSB && y.Op != OpSB) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDLconstmem)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDQ_20(v *Value) bool {
-       // match: (ADDQ x (NEGQ y))
+       // match: (ADDLconstmem [valOff] {sym} ptr (MOVSSstore [ValAndOff(valOff).Off()] {sym} ptr x _))
        // cond:
-       // result: (SUBQ x y)
+       // result: (ADDLconst [ValAndOff(valOff).Val()] (MOVLf2i x))
        for {
+               valOff := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
+               if v_1.Op != OpAMD64MOVSSstore {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64SUBQ)
-               v.AddArg(x)
-               v.AddArg(y)
+               if v_1.AuxInt != ValAndOff(valOff).Off() {
+                       break
+               }
+               if v_1.Aux != sym {
+                       break
+               }
+               _ = v_1.Args[2]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               x := v_1.Args[1]
+               v.reset(OpAMD64ADDLconst)
+               v.AuxInt = ValAndOff(valOff).Val()
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (ADDQ (NEGQ y) x)
-       // cond:
-       // result: (SUBQ x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDLmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (ADDLmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ADDLmem [off1+off2] {sym} val base mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NEGQ {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64SUBQ)
-               v.AddArg(x)
-               v.AddArg(y)
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (ADDQ x l:(MOVQload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ADDQmem x [off] {sym} ptr mem)
+       // match: (ADDLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ADDLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64ADDQmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
+               v.reset(OpAMD64ADDLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (ADDQ l:(MOVQload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ADDQmem x [off] {sym} ptr mem)
+       // match: (ADDLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+       // cond:
+       // result: (ADDL x (MOVLf2i y))
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSSstore {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if v_2.AuxInt != off {
                        break
                }
-               v.reset(OpAMD64ADDQmem)
-               v.AuxInt = off
-               v.Aux = sym
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64ADDL)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ADDQconst_0(v *Value) bool {
-       // match: (ADDQconst [c] (ADDQ x y))
-       // cond:
-       // result: (LEAQ1 [c] x y)
+func rewriteValueAMD64_OpAMD64ADDQ_0(v *Value) bool {
+       // match: (ADDQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (ADDQconst [c] x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64LEAQ1)
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
                v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ADDQconst [c] (SHLQconst [1] x))
-       // cond:
-       // result: (LEAQ1 [c] x x)
+       // match: (ADDQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (ADDQconst [c] x)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               if v_0.AuxInt != 1 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64LEAQ1)
+               v.reset(OpAMD64ADDQconst)
                v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(x)
                return true
        }
-       // match: (ADDQconst [c] (LEAQ [d] {s} x))
-       // cond: is32Bit(c+d)
-       // result: (LEAQ [c+d] {s} x)
+       // match: (ADDQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               s := v_0.Aux
+               c := v_0.AuxInt
                x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.AuxInt = c + d
-               v.Aux = s
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ADDQconst [c] (LEAQ1 [d] {s} x y))
-       // cond: is32Bit(c+d)
-       // result: (LEAQ1 [c+d] {s} x y)
+       // match: (ADDQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
                d := v_0.AuxInt
-               s := v_0.Aux
-               _ = v_0.Args[1]
                x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(c + d)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c + d
-               v.Aux = s
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ADDQconst [c] (LEAQ2 [d] {s} x y))
-       // cond: is32Bit(c+d)
-       // result: (LEAQ2 [c+d] {s} x y)
+       // match: (ADDQ x (SHLQconst [3] y))
+       // cond:
+       // result: (LEAQ8 x y)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               s := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(c + d)) {
+               if v_1.AuxInt != 3 {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c + d
-               v.Aux = s
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ADDQconst [c] (LEAQ4 [d] {s} x y))
-       // cond: is32Bit(c+d)
-       // result: (LEAQ4 [c+d] {s} x y)
+       // match: (ADDQ (SHLQconst [3] y) x)
+       // cond:
+       // result: (LEAQ8 x y)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               s := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(c + d)) {
+               if v_0.AuxInt != 3 {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c + d
-               v.Aux = s
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ8)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ADDQconst [c] (LEAQ8 [d] {s} x y))
-       // cond: is32Bit(c+d)
-       // result: (LEAQ8 [c+d] {s} x y)
+       // match: (ADDQ x (SHLQconst [2] y))
+       // cond:
+       // result: (LEAQ4 x y)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               s := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(c + d)) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c + d
-               v.Aux = s
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ4)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ADDQconst [0] x)
+       // match: (ADDQ (SHLQconst [2] y) x)
        // cond:
-       // result: x
+       // result: (LEAQ4 x y)
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ4)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDQconst [c] (MOVQconst [d]))
+       // match: (ADDQ x (SHLQconst [1] y))
        // cond:
-       // result: (MOVQconst [c+d])
+       // result: (LEAQ2 x y)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = c + d
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDQconst [c] (ADDQconst [d] x))
-       // cond: is32Bit(c+d)
-       // result: (ADDQconst [c+d] x)
+       // match: (ADDQ (SHLQconst [1] y) x)
+       // cond:
+       // result: (LEAQ2 x y)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = c + d
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ADDQconst_10(v *Value) bool {
-       // match: (ADDQconst [off] x:(SP))
+func rewriteValueAMD64_OpAMD64ADDQ_10(v *Value) bool {
+       // match: (ADDQ x (ADDQ y y))
        // cond:
-       // result: (LEAQ [off] x)
+       // result: (LEAQ2 x y)
        for {
-               off := v.AuxInt
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpSP {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQ {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.AuxInt = off
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64LEAQ2)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDQconstmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (ADDQconstmem [valoff1] {sym} (ADDQconst [off2] base) mem)
-       // cond: ValAndOff(valoff1).canAdd(off2)
-       // result: (ADDQconstmem [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       // match: (ADDQ (ADDQ y y) x)
+       // cond:
+       // result: (LEAQ2 x y)
        for {
-               valoff1 := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               off2 := v_0.AuxInt
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(valoff1).canAdd(off2)) {
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               if y != v_0.Args[1] {
                        break
                }
-               v.reset(OpAMD64ADDQconstmem)
-               v.AuxInt = ValAndOff(valoff1).add(off2)
-               v.Aux = sym
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDQconstmem [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
-       // result: (ADDQconstmem [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       // match: (ADDQ x (ADDQ x y))
+       // cond:
+       // result: (LEAQ2 y x)
        for {
-               valoff1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQ {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64ADDQconstmem)
-               v.AuxInt = ValAndOff(valoff1).add(off2)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
+               y := v_1.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(y)
+               v.AddArg(x)
                return true
        }
-       // match: (ADDQconstmem [valOff] {sym} ptr (MOVSDstore [ValAndOff(valOff).Off()] {sym} ptr x _))
+       // match: (ADDQ x (ADDQ y x))
        // cond:
-       // result: (ADDQconst [ValAndOff(valOff).Val()] (MOVQf2i x))
+       // result: (LEAQ2 y x)
        for {
-               valOff := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVSDstore {
-                       break
-               }
-               if v_1.AuxInt != ValAndOff(valOff).Off() {
-                       break
-               }
-               if v_1.Aux != sym {
+               if v_1.Op != OpAMD64ADDQ {
                        break
                }
-               _ = v_1.Args[2]
-               if ptr != v_1.Args[0] {
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               if x != v_1.Args[1] {
                        break
                }
-               x := v_1.Args[1]
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = ValAndOff(valOff).Val()
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(y)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDQmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (ADDQmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (ADDQmem [off1+off2] {sym} val base mem)
+       // match: (ADDQ (ADDQ x y) x)
+       // cond:
+       // result: (LEAQ2 y x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
-               v.reset(OpAMD64ADDQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(y)
+               v.AddArg(x)
                return true
        }
-       // match: (ADDQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (ADDQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (ADDQ (ADDQ y x) x)
+       // cond:
+       // result: (LEAQ2 y x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               if x != v.Args[1] {
                        break
                }
-               v.reset(OpAMD64ADDQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(y)
+               v.AddArg(x)
                return true
        }
-       // match: (ADDQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
+       // match: (ADDQ (ADDQconst [c] x) y)
        // cond:
-       // result: (ADDQ x (MOVQf2i y))
+       // result: (LEAQ1 [c] x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDQ y (ADDQconst [c] x))
+       // cond:
+       // result: (LEAQ1 [c] x y)
+       for {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDQ x (LEAQ [c] {s} y))
+       // cond: x.Op != OpSB && y.Op != OpSB
+       // result: (LEAQ1 [c] {s} x y)
+       for {
+               _ = v.Args[1]
                x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSDstore {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               if v_2.AuxInt != off {
+               c := v_1.AuxInt
+               s := v_1.Aux
+               y := v_1.Args[0]
+               if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
-               if v_2.Aux != sym {
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDQ (LEAQ [c] {s} y) x)
+       // cond: x.Op != OpSB && y.Op != OpSB
+       // result: (LEAQ1 [c] {s} x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               c := v_0.AuxInt
+               s := v_0.Aux
+               y := v_0.Args[0]
+               x := v.Args[1]
+               if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64ADDQ)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool {
-       // match: (ADDSD x l:(MOVSDload [off] {sym} ptr mem))
+func rewriteValueAMD64_OpAMD64ADDQ_20(v *Value) bool {
+       // match: (ADDQ x (NEGQ y))
+       // cond:
+       // result: (SUBQ x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64SUBQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDQ (NEGQ y) x)
+       // cond:
+       // result: (SUBQ x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64SUBQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADDQ x l:(MOVQload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ADDSDmem x [off] {sym} ptr mem)
+       // result: (ADDQmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVSDload {
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -2509,7 +2515,7 @@ func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64ADDSDmem)
+               v.reset(OpAMD64ADDQmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(x)
@@ -2517,13 +2523,13 @@ func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (ADDSD l:(MOVSDload [off] {sym} ptr mem) x)
+       // match: (ADDQ l:(MOVQload [off] {sym} ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ADDSDmem x [off] {sym} ptr mem)
+       // result: (ADDQmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                l := v.Args[0]
-               if l.Op != OpAMD64MOVSDload {
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -2535,7 +2541,7 @@ func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64ADDSDmem)
+               v.reset(OpAMD64ADDQmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(x)
@@ -2545,402 +2551,412 @@ func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ADDSDmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (ADDSDmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (ADDSDmem [off1+off2] {sym} val base mem)
+func rewriteValueAMD64_OpAMD64ADDQconst_0(v *Value) bool {
+       // match: (ADDQconst [c] (ADDQ x y))
+       // cond:
+       // result: (LEAQ1 [c] x y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               v.reset(OpAMD64ADDSDmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDSDmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (ADDSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (ADDQconst [c] (SHLQconst [1] x))
+       // cond:
+       // result: (LEAQ1 [c] x x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64ADDSDmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(x)
                return true
        }
-       // match: (ADDSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
-       // cond:
-       // result: (ADDSD x (MOVQi2f y))
+       // match: (ADDQconst [c] (LEAQ [d] {s} x))
+       // cond: is32Bit(c+d)
+       // result: (LEAQ [c+d] {s} x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVQstore {
-                       break
-               }
-               if v_2.AuxInt != off {
-                       break
-               }
-               if v_2.Aux != sym {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               x := v_0.Args[0]
+               if !(is32Bit(c + d)) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64ADDSD)
+               v.reset(OpAMD64LEAQ)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQi2f, typ.Float64)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDSS_0(v *Value) bool {
-       // match: (ADDSS x l:(MOVSSload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ADDSSmem x [off] {sym} ptr mem)
+       // match: (ADDQconst [c] (LEAQ1 [d] {s} x y))
+       // cond: is32Bit(c+d)
+       // result: (LEAQ1 [c+d] {s} x y)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSSload {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64ADDSSmem)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDSS l:(MOVSSload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ADDSSmem x [off] {sym} ptr mem)
+       // match: (ADDQconst [c] (LEAQ2 [d] {s} x y))
+       // cond: is32Bit(c+d)
+       // result: (LEAQ2 [c+d] {s} x y)
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVSSload {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ2 {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64ADDSSmem)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ADDSSmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (ADDSSmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (ADDSSmem [off1+off2] {sym} val base mem)
+       // match: (ADDQconst [c] (LEAQ4 [d] {s} x y))
+       // cond: is32Bit(c+d)
+       // result: (LEAQ4 [c+d] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64ADDSSmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDSSmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (ADDSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (ADDQconst [c] (LEAQ8 [d] {s} x y))
+       // cond: is32Bit(c+d)
+       // result: (LEAQ8 [c+d] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               d := v_0.AuxInt
+               s := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64ADDSSmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ADDSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
+       // match: (ADDQconst [0] x)
        // cond:
-       // result: (ADDSS x (MOVLi2f y))
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVLstore {
-                       break
-               }
-               if v_2.AuxInt != off {
-                       break
-               }
-               if v_2.Aux != sym {
-                       break
-               }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               if v.AuxInt != 0 {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64ADDSS)
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLi2f, typ.Float32)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ANDL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (ANDL (NOTL (SHLL (MOVLconst [1]) y)) x)
-       // cond: !config.nacl
-       // result: (BTRL x y)
+       // match: (ADDQconst [c] (MOVQconst [d]))
+       // cond:
+       // result: (MOVQconst [c+d])
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NOTL {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 1 {
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = c + d
+               return true
+       }
+       // match: (ADDQconst [c] (ADDQconst [d] x))
+       // cond: is32Bit(c+d)
+       // result: (ADDQconst [c+d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               y := v_0_0.Args[1]
-               x := v.Args[1]
-               if !(!config.nacl) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64BTRL)
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = c + d
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ANDL x (NOTL (SHLL (MOVLconst [1]) y)))
-       // cond: !config.nacl
-       // result: (BTRL x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDQconst_10(v *Value) bool {
+       // match: (ADDQconst [off] x:(SP))
+       // cond:
+       // result: (LEAQ [off] x)
        for {
-               _ = v.Args[1]
+               off := v.AuxInt
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NOTL {
-                       break
-               }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVLconst {
+               if x.Op != OpSP {
                        break
                }
-               if v_1_0_0.AuxInt != 1 {
+               v.reset(OpAMD64LEAQ)
+               v.AuxInt = off
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDQconstmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (ADDQconstmem [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2)
+       // result: (ADDQconstmem [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               y := v_1_0.Args[1]
-               if !(!config.nacl) {
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2)) {
                        break
                }
-               v.reset(OpAMD64BTRL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64ADDQconstmem)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (ANDL (MOVLconst [c]) x)
-       // cond: isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-       // result: (BTRLconst [log2uint32(^c)] x)
+       // match: (ADDQconstmem [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+       // result: (ADDQconstmem [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
        for {
+               valoff1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = log2uint32(^c)
-               v.AddArg(x)
+               v.reset(OpAMD64ADDQconstmem)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (ANDL x (MOVLconst [c]))
-       // cond: isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-       // result: (BTRLconst [log2uint32(^c)] x)
+       // match: (ADDQconstmem [valOff] {sym} ptr (MOVSDstore [ValAndOff(valOff).Off()] {sym} ptr x _))
+       // cond:
+       // result: (ADDQconst [ValAndOff(valOff).Val()] (MOVQf2i x))
        for {
+               valOff := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64MOVSDstore {
                        break
                }
-               c := v_1.AuxInt
-               if !(isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
+               if v_1.AuxInt != ValAndOff(valOff).Off() {
                        break
                }
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = log2uint32(^c)
-               v.AddArg(x)
+               if v_1.Aux != sym {
+                       break
+               }
+               _ = v_1.Args[2]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               x := v_1.Args[1]
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = ValAndOff(valOff).Val()
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (ANDL x (MOVLconst [c]))
-       // cond:
-       // result: (ANDLconst [c] x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDQmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (ADDQmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ADDQmem [off1+off2] {sym} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (ANDL (MOVLconst [c]) x)
-       // cond:
-       // result: (ANDLconst [c] x)
+       // match: (ADDQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ADDQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (ANDL x x)
+       // match: (ADDQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
        // cond:
-       // result: x
+       // result: (ADDQ x (MOVQf2i y))
        for {
-               _ = v.Args[1]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                x := v.Args[0]
-               if x != v.Args[1] {
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSDstore {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               if v_2.AuxInt != off {
+                       break
+               }
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64ADDQ)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (ANDL x l:(MOVLload [off] {sym} ptr mem))
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool {
+       // match: (ADDSD x l:(MOVSDload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ANDLmem x [off] {sym} ptr mem)
+       // result: (ADDSDmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               if l.Op != OpAMD64MOVSDload {
                        break
                }
                off := l.AuxInt
@@ -2951,7 +2967,7 @@ func rewriteValueAMD64_OpAMD64ANDL_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64ANDLmem)
+               v.reset(OpAMD64ADDSDmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(x)
@@ -2959,13 +2975,13 @@ func rewriteValueAMD64_OpAMD64ANDL_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (ANDL l:(MOVLload [off] {sym} ptr mem) x)
+       // match: (ADDSD l:(MOVSDload [off] {sym} ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ANDLmem x [off] {sym} ptr mem)
+       // result: (ADDSDmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               if l.Op != OpAMD64MOVSDload {
                        break
                }
                off := l.AuxInt
@@ -2977,7 +2993,7 @@ func rewriteValueAMD64_OpAMD64ANDL_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64ANDLmem)
+               v.reset(OpAMD64ADDSDmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(x)
@@ -2987,115 +3003,160 @@ func rewriteValueAMD64_OpAMD64ANDL_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64ADDSDmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (ANDLconst [c] x)
-       // cond: isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-       // result: (BTRLconst [log2uint32(^c)] x)
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (ADDSDmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ADDSDmem [off1+off2] {sym} val base mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = log2uint32(^c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDLconst [c] (ANDLconst [d] x))
-       // cond:
-       // result: (ANDLconst [c & d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & d
-               v.AddArg(x)
+               v.reset(OpAMD64ADDSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (ANDLconst [0xFF] x)
-       // cond:
-       // result: (MOVBQZX x)
+       // match: (ADDSDmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ADDSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               if v.AuxInt != 0xFF {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (ANDLconst [0xFFFF] x)
+       // match: (ADDSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
        // cond:
-       // result: (MOVWQZX x)
+       // result: (ADDSD x (MOVQi2f y))
        for {
-               if v.AuxInt != 0xFFFF {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVQstore {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDLconst [c] _)
-       // cond: int32(c)==0
-       // result: (MOVLconst [0])
-       for {
-               c := v.AuxInt
-               if !(int32(c) == 0) {
+               if v_2.AuxInt != off {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64ADDSD)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQi2f, typ.Float64)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (ANDLconst [c] x)
-       // cond: int32(c)==-1
-       // result: x
+       return false
+}
+func rewriteValueAMD64_OpAMD64ADDSS_0(v *Value) bool {
+       // match: (ADDSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDSSmem x [off] {sym} ptr mem)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                x := v.Args[0]
-               if !(int32(c) == -1) {
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSSload {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ADDSSmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (ANDLconst [c] (MOVLconst [d]))
-       // cond:
-       // result: (MOVLconst [c&d])
+       // match: (ADDSS l:(MOVSSload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDSSmem x [off] {sym} ptr mem)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVSSload {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = c & d
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ADDSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ANDLmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64ADDSSmem_0(v *Value) bool {
        b := v.Block
        _ = b
        typ := &b.Func.Config.Types
        _ = typ
-       // match: (ANDLmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // match: (ADDSSmem [off1] {sym} val (ADDQconst [off2] base) mem)
        // cond: is32Bit(off1+off2)
-       // result: (ANDLmem [off1+off2] {sym} val base mem)
+       // result: (ADDSSmem [off1+off2] {sym} val base mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -3111,7 +3172,7 @@ func rewriteValueAMD64_OpAMD64ANDLmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64ANDLmem)
+               v.reset(OpAMD64ADDSSmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(val)
@@ -3119,9 +3180,9 @@ func rewriteValueAMD64_OpAMD64ANDLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (ANDLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // match: (ADDSSmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (ANDLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // result: (ADDSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -3138,7 +3199,7 @@ func rewriteValueAMD64_OpAMD64ANDLmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64ANDLmem)
+               v.reset(OpAMD64ADDSSmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(val)
@@ -3146,9 +3207,9 @@ func rewriteValueAMD64_OpAMD64ANDLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (ANDLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+       // match: (ADDSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
        // cond:
-       // result: (ANDL x (MOVLf2i y))
+       // result: (ADDSS x (MOVLi2f y))
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -3156,7 +3217,7 @@ func rewriteValueAMD64_OpAMD64ANDLmem_0(v *Value) bool {
                x := v.Args[0]
                ptr := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSSstore {
+               if v_2.Op != OpAMD64MOVLstore {
                        break
                }
                if v_2.AuxInt != off {
@@ -3170,36 +3231,36 @@ func rewriteValueAMD64_OpAMD64ANDLmem_0(v *Value) bool {
                        break
                }
                y := v_2.Args[1]
-               v.reset(OpAMD64ANDL)
+               v.reset(OpAMD64ADDSS)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLi2f, typ.Float32)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64ANDL_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (ANDQ (NOTQ (SHLQ (MOVQconst [1]) y)) x)
+       // match: (ANDL (NOTL (SHLL (MOVLconst [1]) y)) x)
        // cond: !config.nacl
-       // result: (BTRQ x y)
+       // result: (BTRL x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NOTQ {
+               if v_0.Op != OpAMD64NOTL {
                        break
                }
                v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLQ {
+               if v_0_0.Op != OpAMD64SHLL {
                        break
                }
                _ = v_0_0.Args[1]
                v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVQconst {
+               if v_0_0_0.Op != OpAMD64MOVLconst {
                        break
                }
                if v_0_0_0.AuxInt != 1 {
@@ -3210,28 +3271,28 @@ func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
                if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64BTRQ)
+               v.reset(OpAMD64BTRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ANDQ x (NOTQ (SHLQ (MOVQconst [1]) y)))
+       // match: (ANDL x (NOTL (SHLL (MOVLconst [1]) y)))
        // cond: !config.nacl
-       // result: (BTRQ x y)
+       // result: (BTRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NOTQ {
+               if v_1.Op != OpAMD64NOTL {
                        break
                }
                v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLQ {
+               if v_1_0.Op != OpAMD64SHLL {
                        break
                }
                _ = v_1_0.Args[1]
                v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVQconst {
+               if v_1_0_0.Op != OpAMD64MOVLconst {
                        break
                }
                if v_1_0_0.AuxInt != 1 {
@@ -3241,88 +3302,82 @@ func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
                if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64BTRQ)
+               v.reset(OpAMD64BTRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ANDQ (MOVQconst [c]) x)
-       // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-       // result: (BTRQconst [log2(^c)] x)
+       // match: (ANDL (MOVLconst [c]) x)
+       // cond: isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
+       // result: (BTRLconst [log2uint32(^c)] x)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
+               if !(isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
                        break
                }
-               v.reset(OpAMD64BTRQconst)
-               v.AuxInt = log2(^c)
+               v.reset(OpAMD64BTRLconst)
+               v.AuxInt = log2uint32(^c)
                v.AddArg(x)
                return true
        }
-       // match: (ANDQ x (MOVQconst [c]))
-       // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-       // result: (BTRQconst [log2(^c)] x)
+       // match: (ANDL x (MOVLconst [c]))
+       // cond: isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
+       // result: (BTRLconst [log2uint32(^c)] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
+               if !(isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
                        break
                }
-               v.reset(OpAMD64BTRQconst)
-               v.AuxInt = log2(^c)
+               v.reset(OpAMD64BTRLconst)
+               v.AuxInt = log2uint32(^c)
                v.AddArg(x)
                return true
        }
-       // match: (ANDQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (ANDQconst [c] x)
+       // match: (ANDL x (MOVLconst [c]))
+       // cond:
+       // result: (ANDLconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQconst)
+               v.reset(OpAMD64ANDLconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ANDQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (ANDQconst [c] x)
+       // match: (ANDL (MOVLconst [c]) x)
+       // cond:
+       // result: (ANDLconst [c] x)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQconst)
+               v.reset(OpAMD64ANDLconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (ANDQ x x)
+       // match: (ANDL x x)
        // cond:
        // result: x
        for {
@@ -3336,14 +3391,14 @@ func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ANDQ x l:(MOVQload [off] {sym} ptr mem))
+       // match: (ANDL x l:(MOVLload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ANDQmem x [off] {sym} ptr mem)
+       // result: (ANDLmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               if l.Op != OpAMD64MOVLload {
                        break
                }
                off := l.AuxInt
@@ -3354,7 +3409,7 @@ func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64ANDQmem)
+               v.reset(OpAMD64ANDLmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(x)
@@ -3362,13 +3417,13 @@ func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (ANDQ l:(MOVQload [off] {sym} ptr mem) x)
+       // match: (ANDL l:(MOVLload [off] {sym} ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ANDQmem x [off] {sym} ptr mem)
+       // result: (ANDLmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               if l.Op != OpAMD64MOVLload {
                        break
                }
                off := l.AuxInt
@@ -3380,7 +3435,7 @@ func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64ANDQmem)
+               v.reset(OpAMD64ANDLmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(x)
@@ -3390,42 +3445,42 @@ func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (ANDQconst [c] x)
-       // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-       // result: (BTRQconst [log2(^c)] x)
+       // match: (ANDLconst [c] x)
+       // cond: isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
+       // result: (BTRLconst [log2uint32(^c)] x)
        for {
                c := v.AuxInt
                x := v.Args[0]
-               if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
+               if !(isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
                        break
                }
-               v.reset(OpAMD64BTRQconst)
-               v.AuxInt = log2(^c)
+               v.reset(OpAMD64BTRLconst)
+               v.AuxInt = log2uint32(^c)
                v.AddArg(x)
                return true
        }
-       // match: (ANDQconst [c] (ANDQconst [d] x))
+       // match: (ANDLconst [c] (ANDLconst [d] x))
        // cond:
-       // result: (ANDQconst [c & d] x)
+       // result: (ANDLconst [c & d] x)
        for {
                c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQconst {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
                d := v_0.AuxInt
                x := v_0.Args[0]
-               v.reset(OpAMD64ANDQconst)
+               v.reset(OpAMD64ANDLconst)
                v.AuxInt = c & d
                v.AddArg(x)
                return true
        }
-       // match: (ANDQconst [0xFF] x)
+       // match: (ANDLconst [ 0xFF] x)
        // cond:
        // result: (MOVBQZX x)
        for {
@@ -3437,7 +3492,7 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ANDQconst [0xFFFF] x)
+       // match: (ANDLconst [0xFFFF] x)
        // cond:
        // result: (MOVWQZX x)
        for {
@@ -3449,66 +3504,56 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ANDQconst [0xFFFFFFFF] x)
-       // cond:
-       // result: (MOVLQZX x)
-       for {
-               if v.AuxInt != 0xFFFFFFFF {
-                       break
-               }
-               x := v.Args[0]
-               v.reset(OpAMD64MOVLQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDQconst [0] _)
-       // cond:
-       // result: (MOVQconst [0])
+       // match: (ANDLconst [c] _)
+       // cond: int32(c)==0
+       // result: (MOVLconst [0])
        for {
-               if v.AuxInt != 0 {
+               c := v.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
+               v.reset(OpAMD64MOVLconst)
                v.AuxInt = 0
                return true
        }
-       // match: (ANDQconst [-1] x)
-       // cond:
+       // match: (ANDLconst [c] x)
+       // cond: int32(c)==-1
        // result: x
        for {
-               if v.AuxInt != -1 {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == -1) {
                        break
                }
-               x := v.Args[0]
                v.reset(OpCopy)
                v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (ANDQconst [c] (MOVQconst [d]))
+       // match: (ANDLconst [c] (MOVLconst [d]))
        // cond:
-       // result: (MOVQconst [c&d])
+       // result: (MOVLconst [c&d])
        for {
                c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
+               v.reset(OpAMD64MOVLconst)
                v.AuxInt = c & d
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ANDQmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64ANDLmem_0(v *Value) bool {
        b := v.Block
        _ = b
        typ := &b.Func.Config.Types
        _ = typ
-       // match: (ANDQmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // match: (ANDLmem [off1] {sym} val (ADDQconst [off2] base) mem)
        // cond: is32Bit(off1+off2)
-       // result: (ANDQmem [off1+off2] {sym} val base mem)
+       // result: (ANDLmem [off1+off2] {sym} val base mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -3524,7 +3569,7 @@ func rewriteValueAMD64_OpAMD64ANDQmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64ANDQmem)
+               v.reset(OpAMD64ANDLmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(val)
@@ -3532,9 +3577,9 @@ func rewriteValueAMD64_OpAMD64ANDQmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (ANDQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // match: (ANDLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (ANDQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // result: (ANDLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -3551,7 +3596,7 @@ func rewriteValueAMD64_OpAMD64ANDQmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64ANDQmem)
+               v.reset(OpAMD64ANDLmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(val)
@@ -3559,9 +3604,9 @@ func rewriteValueAMD64_OpAMD64ANDQmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (ANDQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
+       // match: (ANDLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
        // cond:
-       // result: (ANDQ x (MOVQf2i y))
+       // result: (ANDL x (MOVLf2i y))
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -3569,7 +3614,7 @@ func rewriteValueAMD64_OpAMD64ANDQmem_0(v *Value) bool {
                x := v.Args[0]
                ptr := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSDstore {
+               if v_2.Op != OpAMD64MOVSSstore {
                        break
                }
                if v_2.AuxInt != off {
@@ -3583,789 +3628,818 @@ func rewriteValueAMD64_OpAMD64ANDQmem_0(v *Value) bool {
                        break
                }
                y := v_2.Args[1]
-               v.reset(OpAMD64ANDQ)
+               v.reset(OpAMD64ANDL)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64BSFQ_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x)))
-       // cond:
-       // result: (BSFQ (ORQconst <t> [1<<8] x))
+       config := b.Func.Config
+       _ = config
+       // match: (ANDQ (NOTQ (SHLQ (MOVQconst [1]) y)) x)
+       // cond: !config.nacl
+       // result: (BTRQ x y)
        for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ORQconst {
+               if v_0.Op != OpAMD64NOTQ {
                        break
                }
-               t := v_0.Type
-               if v_0.AuxInt != 1<<8 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLQ {
                        break
                }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64MOVBQZX {
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0_0.Args[0]
-               v.reset(OpAMD64BSFQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQconst, t)
-               v0.AuxInt = 1 << 8
-               v0.AddArg(x)
-               v.AddArg(v0)
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               y := v_0_0.Args[1]
+               x := v.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64BTRQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x)))
-       // cond:
-       // result: (BSFQ (ORQconst <t> [1<<16] x))
+       // match: (ANDQ x (NOTQ (SHLQ (MOVQconst [1]) y)))
+       // cond: !config.nacl
+       // result: (BTRQ x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ORQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NOTQ {
                        break
                }
-               t := v_0.Type
-               if v_0.AuxInt != 1<<16 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLQ {
                        break
                }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64MOVWQZX {
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0_0.Args[0]
-               v.reset(OpAMD64BSFQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQconst, t)
-               v0.AuxInt = 1 << 16
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64BTLconst_0(v *Value) bool {
-       // match: (BTLconst [c] (SHRQconst [d] x))
-       // cond: (c+d)<64
-       // result: (BTQconst [c+d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
+               if v_1_0_0.AuxInt != 1 {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !((c + d) < 64) {
+               y := v_1_0.Args[1]
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64BTQconst)
-               v.AuxInt = c + d
+               v.reset(OpAMD64BTRQ)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (BTLconst [c] (SHLQconst [d] x))
-       // cond: c>d
-       // result: (BTLconst [c-d] x)
+       // match: (ANDQ (MOVQconst [c]) x)
+       // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
+       // result: (BTRQconst [log2(^c)] x)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c > d) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
                        break
                }
-               v.reset(OpAMD64BTLconst)
-               v.AuxInt = c - d
+               v.reset(OpAMD64BTRQconst)
+               v.AuxInt = log2(^c)
                v.AddArg(x)
                return true
        }
-       // match: (BTLconst [0] s:(SHRQ x y))
-       // cond:
-       // result: (BTQ y x)
+       // match: (ANDQ x (MOVQconst [c]))
+       // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
+       // result: (BTRQconst [log2(^c)] x)
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               s := v.Args[0]
-               if s.Op != OpAMD64SHRQ {
+               c := v_1.AuxInt
+               if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
                        break
                }
-               _ = s.Args[1]
-               x := s.Args[0]
-               y := s.Args[1]
-               v.reset(OpAMD64BTQ)
-               v.AddArg(y)
+               v.reset(OpAMD64BTRQconst)
+               v.AuxInt = log2(^c)
                v.AddArg(x)
                return true
        }
-       // match: (BTLconst [c] (SHRLconst [d] x))
-       // cond: (c+d)<32
-       // result: (BTLconst [c+d] x)
+       // match: (ANDQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (ANDQconst [c] x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !((c + d) < 32) {
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64BTLconst)
-               v.AuxInt = c + d
+               v.reset(OpAMD64ANDQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (BTLconst [c] (SHLLconst [d] x))
-       // cond: c>d
-       // result: (BTLconst [c-d] x)
+       // match: (ANDQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (ANDQconst [c] x)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c > d) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64BTLconst)
-               v.AuxInt = c - d
+               v.reset(OpAMD64ANDQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (BTLconst [0] s:(SHRL x y))
+       // match: (ANDQ x x)
        // cond:
-       // result: (BTL y x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               s := v.Args[0]
-               if s.Op != OpAMD64SHRL {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               _ = s.Args[1]
-               x := s.Args[0]
-               y := s.Args[1]
-               v.reset(OpAMD64BTL)
-               v.AddArg(y)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64BTQconst_0(v *Value) bool {
-       // match: (BTQconst [c] (SHRQconst [d] x))
-       // cond: (c+d)<64
-       // result: (BTQconst [c+d] x)
+       // match: (ANDQ x l:(MOVQload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ANDQmem x [off] {sym} ptr mem)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !((c + d) < 64) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64BTQconst)
-               v.AuxInt = c + d
+               v.reset(OpAMD64ANDQmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (BTQconst [c] (SHLQconst [d] x))
-       // cond: c>d
-       // result: (BTQconst [c-d] x)
+       // match: (ANDQ l:(MOVQload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ANDQmem x [off] {sym} ptr mem)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c > d) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64BTQconst)
-               v.AuxInt = c - d
+               v.reset(OpAMD64ANDQmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (BTQconst [0] s:(SHRQ x y))
-       // cond:
-       // result: (BTQ y x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (ANDQconst [c] x)
+       // cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
+       // result: (BTRQconst [log2(^c)] x)
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               s := v.Args[0]
-               if s.Op != OpAMD64SHRQ {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl) {
                        break
                }
-               _ = s.Args[1]
-               x := s.Args[0]
-               y := s.Args[1]
-               v.reset(OpAMD64BTQ)
-               v.AddArg(y)
+               v.reset(OpAMD64BTRQconst)
+               v.AuxInt = log2(^c)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64BTRLconst_0(v *Value) bool {
-       // match: (BTRLconst [c] (BTSLconst [c] x))
+       // match: (ANDQconst [c] (ANDQconst [d] x))
        // cond:
-       // result: (BTRLconst [c] x)
+       // result: (ANDQconst [c & d] x)
        for {
                c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64BTSLconst {
-                       break
-               }
-               if v_0.AuxInt != c {
+               if v_0.Op != OpAMD64ANDQconst {
                        break
                }
+               d := v_0.AuxInt
                x := v_0.Args[0]
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = c
+               v.reset(OpAMD64ANDQconst)
+               v.AuxInt = c & d
                v.AddArg(x)
                return true
        }
-       // match: (BTRLconst [c] (BTCLconst [c] x))
+       // match: (ANDQconst [ 0xFF] x)
        // cond:
-       // result: (BTRLconst [c] x)
+       // result: (MOVBQZX x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64BTCLconst {
-                       break
-               }
-               if v_0.AuxInt != c {
+               if v.AuxInt != 0xFF {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTRLconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               v.reset(OpAMD64MOVBQZX)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64BTRQconst_0(v *Value) bool {
-       // match: (BTRQconst [c] (BTSQconst [c] x))
+       // match: (ANDQconst [0xFFFF] x)
        // cond:
-       // result: (BTRQconst [c] x)
+       // result: (MOVWQZX x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64BTSQconst {
-                       break
-               }
-               if v_0.AuxInt != c {
+               if v.AuxInt != 0xFFFF {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTRQconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               v.reset(OpAMD64MOVWQZX)
                v.AddArg(x)
                return true
        }
-       // match: (BTRQconst [c] (BTCQconst [c] x))
+       // match: (ANDQconst [0xFFFFFFFF] x)
        // cond:
-       // result: (BTRQconst [c] x)
+       // result: (MOVLQZX x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64BTCQconst {
-                       break
-               }
-               if v_0.AuxInt != c {
+               if v.AuxInt != 0xFFFFFFFF {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTRQconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               v.reset(OpAMD64MOVLQZX)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64BTSLconst_0(v *Value) bool {
-       // match: (BTSLconst [c] (BTRLconst [c] x))
+       // match: (ANDQconst [0] _)
        // cond:
-       // result: (BTSLconst [c] x)
+       // result: (MOVQconst [0])
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64BTRLconst {
-                       break
-               }
-               if v_0.AuxInt != c {
+               if v.AuxInt != 0 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTSLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (BTSLconst [c] (BTCLconst [c] x))
+       // match: (ANDQconst [-1] x)
        // cond:
-       // result: (BTSLconst [c] x)
+       // result: x
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64BTCLconst {
+               if v.AuxInt != -1 {
                        break
                }
-               if v_0.AuxInt != c {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTSLconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64BTSQconst_0(v *Value) bool {
-       // match: (BTSQconst [c] (BTRQconst [c] x))
+       // match: (ANDQconst [c] (MOVQconst [d]))
        // cond:
-       // result: (BTSQconst [c] x)
+       // result: (MOVQconst [c&d])
        for {
                c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64BTRQconst {
-                       break
-               }
-               if v_0.AuxInt != c {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTSQconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = c & d
                return true
        }
-       // match: (BTSQconst [c] (BTCQconst [c] x))
-       // cond:
-       // result: (BTSQconst [c] x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64ANDQmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (ANDQmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ANDQmem [off1+off2] {sym} val base mem)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64BTCQconst {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if v_0.AuxInt != c {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64BTSQconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64ANDQmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMOVLCC_0(v *Value) bool {
-       // match: (CMOVLCC x y (InvertFlags cond))
-       // cond:
-       // result: (CMOVLLS x y cond)
+       // match: (ANDQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ANDQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[2]
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64InvertFlags {
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLLS)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cond)
-               return true
-       }
-       // match: (CMOVLCC _ x (FlagEQ))
-       // cond:
-       // result: x
-       for {
-               _ = v.Args[2]
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagEQ {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.reset(OpAMD64ANDQmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (CMOVLCC _ x (FlagGT_UGT))
+       // match: (ANDQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
        // cond:
-       // result: x
+       // result: (ANDQ x (MOVQf2i y))
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               x := v.Args[1]
+               x := v.Args[0]
+               ptr := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagGT_UGT {
+               if v_2.Op != OpAMD64MOVSDstore {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               if v_2.AuxInt != off {
+                       break
+               }
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64ANDQ)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (CMOVLCC y _ (FlagGT_ULT))
+       return false
+}
+func rewriteValueAMD64_OpAMD64BSFQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x)))
        // cond:
-       // result: y
+       // result: (BSFQ (ORQconst <t> [1<<8] x))
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagGT_ULT {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ORQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               t := v_0.Type
+               if v_0.AuxInt != 1<<8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64MOVBQZX {
+                       break
+               }
+               x := v_0_0.Args[0]
+               v.reset(OpAMD64BSFQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQconst, t)
+               v0.AuxInt = 1 << 8
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (CMOVLCC y _ (FlagLT_ULT))
+       // match: (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x)))
        // cond:
-       // result: y
+       // result: (BSFQ (ORQconst <t> [1<<16] x))
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_ULT {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ORQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               t := v_0.Type
+               if v_0.AuxInt != 1<<16 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64MOVWQZX {
+                       break
+               }
+               x := v_0_0.Args[0]
+               v.reset(OpAMD64BSFQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQconst, t)
+               v0.AuxInt = 1 << 16
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (CMOVLCC _ x (FlagLT_UGT))
-       // cond:
-       // result: x
+       return false
+}
+func rewriteValueAMD64_OpAMD64BTLconst_0(v *Value) bool {
+       // match: (BTLconst [c] (SHRQconst [d] x))
+       // cond: (c+d)<64
+       // result: (BTQconst [c+d] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_UGT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !((c + d) < 64) {
+                       break
+               }
+               v.reset(OpAMD64BTQconst)
+               v.AuxInt = c + d
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMOVLCS_0(v *Value) bool {
-       // match: (CMOVLCS x y (InvertFlags cond))
-       // cond:
-       // result: (CMOVLHI x y cond)
+       // match: (BTLconst [c] (SHLQconst [d] x))
+       // cond: c>d
+       // result: (BTLconst [c-d] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64InvertFlags {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLHI)
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c > d) {
+                       break
+               }
+               v.reset(OpAMD64BTLconst)
+               v.AuxInt = c - d
                v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cond)
                return true
        }
-       // match: (CMOVLCS y _ (FlagEQ))
+       // match: (BTLconst [0] s:(SHRQ x y))
        // cond:
-       // result: y
+       // result: (BTQ y x)
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagEQ {
+               if v.AuxInt != 0 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       // match: (CMOVLCS y _ (FlagGT_UGT))
-       // cond:
-       // result: y
-       for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagGT_UGT {
+               s := v.Args[0]
+               if s.Op != OpAMD64SHRQ {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
+               _ = s.Args[1]
+               x := s.Args[0]
+               y := s.Args[1]
+               v.reset(OpAMD64BTQ)
                v.AddArg(y)
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLCS _ x (FlagGT_ULT))
-       // cond:
-       // result: x
+       // match: (BTLconst [c] (SHRLconst [d] x))
+       // cond: (c+d)<32
+       // result: (BTLconst [c+d] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagGT_ULT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !((c + d) < 32) {
+                       break
+               }
+               v.reset(OpAMD64BTLconst)
+               v.AuxInt = c + d
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLCS _ x (FlagLT_ULT))
-       // cond:
-       // result: x
+       // match: (BTLconst [c] (SHLLconst [d] x))
+       // cond: c>d
+       // result: (BTLconst [c-d] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_ULT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c > d) {
+                       break
+               }
+               v.reset(OpAMD64BTLconst)
+               v.AuxInt = c - d
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLCS y _ (FlagLT_UGT))
+       // match: (BTLconst [0] s:(SHRL x y))
        // cond:
-       // result: y
+       // result: (BTL y x)
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_UGT {
+               if v.AuxInt != 0 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
+               s := v.Args[0]
+               if s.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = s.Args[1]
+               x := s.Args[0]
+               y := s.Args[1]
+               v.reset(OpAMD64BTL)
                v.AddArg(y)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVLEQ_0(v *Value) bool {
-       // match: (CMOVLEQ x y (InvertFlags cond))
-       // cond:
-       // result: (CMOVLEQ x y cond)
+func rewriteValueAMD64_OpAMD64BTQconst_0(v *Value) bool {
+       // match: (BTQconst [c] (SHRQconst [d] x))
+       // cond: (c+d)<64
+       // result: (BTQconst [c+d] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64InvertFlags {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLEQ)
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !((c + d) < 64) {
+                       break
+               }
+               v.reset(OpAMD64BTQconst)
+               v.AuxInt = c + d
                v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cond)
                return true
        }
-       // match: (CMOVLEQ _ x (FlagEQ))
-       // cond:
-       // result: x
+       // match: (BTQconst [c] (SHLQconst [d] x))
+       // cond: c>d
+       // result: (BTQconst [c-d] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagEQ {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c > d) {
+                       break
+               }
+               v.reset(OpAMD64BTQconst)
+               v.AuxInt = c - d
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLEQ y _ (FlagGT_UGT))
+       // match: (BTQconst [0] s:(SHRQ x y))
        // cond:
-       // result: y
+       // result: (BTQ y x)
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagGT_UGT {
+               if v.AuxInt != 0 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       // match: (CMOVLEQ y _ (FlagGT_ULT))
-       // cond:
-       // result: y
-       for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagGT_ULT {
+               s := v.Args[0]
+               if s.Op != OpAMD64SHRQ {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
+               _ = s.Args[1]
+               x := s.Args[0]
+               y := s.Args[1]
+               v.reset(OpAMD64BTQ)
                v.AddArg(y)
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLEQ y _ (FlagLT_ULT))
+       return false
+}
+func rewriteValueAMD64_OpAMD64BTRLconst_0(v *Value) bool {
+       // match: (BTRLconst [c] (BTSLconst [c] x))
        // cond:
-       // result: y
+       // result: (BTRLconst [c] x)
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_ULT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64BTSLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               if v_0.AuxInt != c {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64BTRLconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLEQ y _ (FlagLT_UGT))
+       // match: (BTRLconst [c] (BTCLconst [c] x))
        // cond:
-       // result: y
+       // result: (BTRLconst [c] x)
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_UGT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64BTCLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               if v_0.AuxInt != c {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64BTRLconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVLGE_0(v *Value) bool {
-       // match: (CMOVLGE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64BTRQconst_0(v *Value) bool {
+       // match: (BTRQconst [c] (BTSQconst [c] x))
        // cond:
-       // result: (CMOVLLE x y cond)
+       // result: (BTRQconst [c] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64InvertFlags {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64BTSQconst {
                        break
                }
-               cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLLE)
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cond)
-               return true
-       }
-       // match: (CMOVLGE _ x (FlagEQ))
-       // cond:
-       // result: x
-       for {
-               _ = v.Args[2]
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagEQ {
+               if v_0.AuxInt != c {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               x := v_0.Args[0]
+               v.reset(OpAMD64BTRQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLGE _ x (FlagGT_UGT))
+       // match: (BTRQconst [c] (BTCQconst [c] x))
        // cond:
-       // result: x
+       // result: (BTRQconst [c] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagGT_UGT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64BTCQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               if v_0.AuxInt != c {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64BTRQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLGE _ x (FlagGT_ULT))
+       return false
+}
+func rewriteValueAMD64_OpAMD64BTSLconst_0(v *Value) bool {
+       // match: (BTSLconst [c] (BTRLconst [c] x))
        // cond:
-       // result: x
+       // result: (BTSLconst [c] x)
        for {
-               _ = v.Args[2]
-               x := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagGT_ULT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64BTRLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               if v_0.AuxInt != c {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64BTSLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLGE y _ (FlagLT_ULT))
+       // match: (BTSLconst [c] (BTCLconst [c] x))
        // cond:
-       // result: y
+       // result: (BTSLconst [c] x)
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_ULT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64BTCLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               if v_0.AuxInt != c {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64BTSLconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLGE y _ (FlagLT_UGT))
+       return false
+}
+func rewriteValueAMD64_OpAMD64BTSQconst_0(v *Value) bool {
+       // match: (BTSQconst [c] (BTRQconst [c] x))
        // cond:
-       // result: y
+       // result: (BTSQconst [c] x)
        for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_UGT {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64BTRQconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               if v_0.AuxInt != c {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64BTSQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (BTSQconst [c] (BTCQconst [c] x))
+       // cond:
+       // result: (BTSQconst [c] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64BTCQconst {
+                       break
+               }
+               if v_0.AuxInt != c {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64BTSQconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVLGT_0(v *Value) bool {
-       // match: (CMOVLGT x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLCC_0(v *Value) bool {
+       // match: (CMOVLCC x y (InvertFlags cond))
        // cond:
-       // result: (CMOVLLT x y cond)
+       // result: (CMOVLLS x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -4375,28 +4449,28 @@ func rewriteValueAMD64_OpAMD64CMOVLGT_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLLT)
+               v.reset(OpAMD64CMOVLLS)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVLGT y _ (FlagEQ))
+       // match: (CMOVLCC _ x (FlagEQ))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLGT _ x (FlagGT_UGT))
+       // match: (CMOVLCC _ x (FlagGT_UGT))
        // cond:
        // result: x
        for {
@@ -4411,22 +4485,22 @@ func rewriteValueAMD64_OpAMD64CMOVLGT_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLGT _ x (FlagGT_ULT))
+       // match: (CMOVLCC y _ (FlagGT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVLGT y _ (FlagLT_ULT))
+       // match: (CMOVLCC y _ (FlagLT_ULT))
        // cond:
        // result: y
        for {
@@ -4441,27 +4515,27 @@ func rewriteValueAMD64_OpAMD64CMOVLGT_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVLGT y _ (FlagLT_UGT))
+       // match: (CMOVLCC _ x (FlagLT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVLHI_0(v *Value) bool {
-       // match: (CMOVLHI x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLCS_0(v *Value) bool {
+       // match: (CMOVLCS x y (InvertFlags cond))
        // cond:
-       // result: (CMOVLCS x y cond)
+       // result: (CMOVLHI x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -4471,13 +4545,13 @@ func rewriteValueAMD64_OpAMD64CMOVLHI_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLCS)
+               v.reset(OpAMD64CMOVLHI)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVLHI y _ (FlagEQ))
+       // match: (CMOVLCS y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -4492,72 +4566,72 @@ func rewriteValueAMD64_OpAMD64CMOVLHI_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVLHI _ x (FlagGT_UGT))
+       // match: (CMOVLCS y _ (FlagGT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVLHI y _ (FlagGT_ULT))
+       // match: (CMOVLCS _ x (FlagGT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLHI y _ (FlagLT_ULT))
+       // match: (CMOVLCS _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLHI _ x (FlagLT_UGT))
+       // match: (CMOVLCS y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVLLE_0(v *Value) bool {
-       // match: (CMOVLLE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLEQ_0(v *Value) bool {
+       // match: (CMOVLEQ x y (InvertFlags cond))
        // cond:
-       // result: (CMOVLGE x y cond)
+       // result: (CMOVLEQ x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -4567,13 +4641,13 @@ func rewriteValueAMD64_OpAMD64CMOVLLE_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLGE)
+               v.reset(OpAMD64CMOVLEQ)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVLLE _ x (FlagEQ))
+       // match: (CMOVLEQ _ x (FlagEQ))
        // cond:
        // result: x
        for {
@@ -4588,7 +4662,7 @@ func rewriteValueAMD64_OpAMD64CMOVLLE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLLE y _ (FlagGT_UGT))
+       // match: (CMOVLEQ y _ (FlagGT_UGT))
        // cond:
        // result: y
        for {
@@ -4603,7 +4677,7 @@ func rewriteValueAMD64_OpAMD64CMOVLLE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVLLE y _ (FlagGT_ULT))
+       // match: (CMOVLEQ y _ (FlagGT_ULT))
        // cond:
        // result: y
        for {
@@ -4618,42 +4692,42 @@ func rewriteValueAMD64_OpAMD64CMOVLLE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVLLE _ x (FlagLT_ULT))
+       // match: (CMOVLEQ y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVLLE _ x (FlagLT_UGT))
+       // match: (CMOVLEQ y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVLLS_0(v *Value) bool {
-       // match: (CMOVLLS x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLGE_0(v *Value) bool {
+       // match: (CMOVLGE x y (InvertFlags cond))
        // cond:
-       // result: (CMOVLCC x y cond)
+       // result: (CMOVLLE x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -4663,13 +4737,13 @@ func rewriteValueAMD64_OpAMD64CMOVLLS_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLCC)
+               v.reset(OpAMD64CMOVLLE)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVLLS _ x (FlagEQ))
+       // match: (CMOVLGE _ x (FlagEQ))
        // cond:
        // result: x
        for {
@@ -4684,22 +4758,22 @@ func rewriteValueAMD64_OpAMD64CMOVLLS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLLS y _ (FlagGT_UGT))
+       // match: (CMOVLGE _ x (FlagGT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLLS _ x (FlagGT_ULT))
+       // match: (CMOVLGE _ x (FlagGT_ULT))
        // cond:
        // result: x
        for {
@@ -4714,22 +4788,22 @@ func rewriteValueAMD64_OpAMD64CMOVLLS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLLS _ x (FlagLT_ULT))
+       // match: (CMOVLGE y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
        }
-       // match: (CMOVLLS y _ (FlagLT_UGT))
+       // match: (CMOVLGE y _ (FlagLT_UGT))
        // cond:
        // result: y
        for {
@@ -4746,10 +4820,10 @@ func rewriteValueAMD64_OpAMD64CMOVLLS_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVLLT_0(v *Value) bool {
-       // match: (CMOVLLT x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLGT_0(v *Value) bool {
+       // match: (CMOVLGT x y (InvertFlags cond))
        // cond:
-       // result: (CMOVLGT x y cond)
+       // result: (CMOVLLT x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -4759,13 +4833,13 @@ func rewriteValueAMD64_OpAMD64CMOVLLT_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLGT)
+               v.reset(OpAMD64CMOVLLT)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVLLT y _ (FlagEQ))
+       // match: (CMOVLGT y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -4780,72 +4854,72 @@ func rewriteValueAMD64_OpAMD64CMOVLLT_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVLLT y _ (FlagGT_UGT))
+       // match: (CMOVLGT _ x (FlagGT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLLT y _ (FlagGT_ULT))
+       // match: (CMOVLGT _ x (FlagGT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVLLT _ x (FlagLT_ULT))
+       // match: (CMOVLGT y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVLLT _ x (FlagLT_UGT))
+       // match: (CMOVLGT y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVLNE_0(v *Value) bool {
-       // match: (CMOVLNE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLHI_0(v *Value) bool {
+       // match: (CMOVLHI x y (InvertFlags cond))
        // cond:
-       // result: (CMOVLNE x y cond)
+       // result: (CMOVLCS x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -4855,13 +4929,13 @@ func rewriteValueAMD64_OpAMD64CMOVLNE_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVLNE)
+               v.reset(OpAMD64CMOVLCS)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVLNE y _ (FlagEQ))
+       // match: (CMOVLHI y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -4876,7 +4950,7 @@ func rewriteValueAMD64_OpAMD64CMOVLNE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVLNE _ x (FlagGT_UGT))
+       // match: (CMOVLHI _ x (FlagGT_UGT))
        // cond:
        // result: x
        for {
@@ -4891,37 +4965,37 @@ func rewriteValueAMD64_OpAMD64CMOVLNE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVLNE _ x (FlagGT_ULT))
+       // match: (CMOVLHI y _ (FlagGT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVLNE _ x (FlagLT_ULT))
+       // match: (CMOVLHI y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVLNE _ x (FlagLT_UGT))
+       // match: (CMOVLHI _ x (FlagLT_UGT))
        // cond:
        // result: x
        for {
@@ -4938,10 +5012,10 @@ func rewriteValueAMD64_OpAMD64CMOVLNE_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQCC_0(v *Value) bool {
-       // match: (CMOVQCC x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLLE_0(v *Value) bool {
+       // match: (CMOVLLE x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQLS x y cond)
+       // result: (CMOVLGE x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -4951,13 +5025,13 @@ func rewriteValueAMD64_OpAMD64CMOVQCC_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQLS)
+               v.reset(OpAMD64CMOVLGE)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQCC _ x (FlagEQ))
+       // match: (CMOVLLE _ x (FlagEQ))
        // cond:
        // result: x
        for {
@@ -4972,22 +5046,22 @@ func rewriteValueAMD64_OpAMD64CMOVQCC_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQCC _ x (FlagGT_UGT))
+       // match: (CMOVLLE y _ (FlagGT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQCC y _ (FlagGT_ULT))
+       // match: (CMOVLLE y _ (FlagGT_ULT))
        // cond:
        // result: y
        for {
@@ -5002,22 +5076,22 @@ func rewriteValueAMD64_OpAMD64CMOVQCC_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQCC y _ (FlagLT_ULT))
+       // match: (CMOVLLE _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQCC _ x (FlagLT_UGT))
+       // match: (CMOVLLE _ x (FlagLT_UGT))
        // cond:
        // result: x
        for {
@@ -5034,10 +5108,10 @@ func rewriteValueAMD64_OpAMD64CMOVQCC_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
-       // match: (CMOVQCS x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLLS_0(v *Value) bool {
+       // match: (CMOVLLS x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQHI x y cond)
+       // result: (CMOVLCC x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5047,28 +5121,28 @@ func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQHI)
+               v.reset(OpAMD64CMOVLCC)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQCS y _ (FlagEQ))
+       // match: (CMOVLLS _ x (FlagEQ))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQCS y _ (FlagGT_UGT))
+       // match: (CMOVLLS y _ (FlagGT_UGT))
        // cond:
        // result: y
        for {
@@ -5083,7 +5157,7 @@ func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQCS _ x (FlagGT_ULT))
+       // match: (CMOVLLS _ x (FlagGT_ULT))
        // cond:
        // result: x
        for {
@@ -5098,7 +5172,7 @@ func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQCS _ x (FlagLT_ULT))
+       // match: (CMOVLLS _ x (FlagLT_ULT))
        // cond:
        // result: x
        for {
@@ -5113,7 +5187,7 @@ func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQCS y _ (FlagLT_UGT))
+       // match: (CMOVLLS y _ (FlagLT_UGT))
        // cond:
        // result: y
        for {
@@ -5130,10 +5204,10 @@ func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
-       // match: (CMOVQEQ x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLLT_0(v *Value) bool {
+       // match: (CMOVLLT x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQEQ x y cond)
+       // result: (CMOVLGT x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5143,28 +5217,28 @@ func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQEQ)
+               v.reset(OpAMD64CMOVLGT)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQEQ _ x (FlagEQ))
+       // match: (CMOVLLT y _ (FlagEQ))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQEQ y _ (FlagGT_UGT))
+       // match: (CMOVLLT y _ (FlagGT_UGT))
        // cond:
        // result: y
        for {
@@ -5179,7 +5253,7 @@ func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQEQ y _ (FlagGT_ULT))
+       // match: (CMOVLLT y _ (FlagGT_ULT))
        // cond:
        // result: y
        for {
@@ -5194,56 +5268,29 @@ func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQEQ y _ (FlagLT_ULT))
+       // match: (CMOVLLT _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQEQ y _ (FlagLT_UGT))
+       // match: (CMOVLLT _ x (FlagLT_UGT))
        // cond:
-       // result: y
-       for {
-               _ = v.Args[2]
-               y := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64FlagLT_UGT {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       // match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _))))
-       // cond: c != 0
        // result: x
        for {
                _ = v.Args[2]
-               x := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpSelect1 {
-                       break
-               }
-               v_2_0 := v_2.Args[0]
-               if v_2_0.Op != OpAMD64BSFQ {
-                       break
-               }
-               v_2_0_0 := v_2_0.Args[0]
-               if v_2_0_0.Op != OpAMD64ORQconst {
-                       break
-               }
-               c := v_2_0_0.AuxInt
-               if !(c != 0) {
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
@@ -5253,10 +5300,10 @@ func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQGE_0(v *Value) bool {
-       // match: (CMOVQGE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVLNE_0(v *Value) bool {
+       // match: (CMOVLNE x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQLE x y cond)
+       // result: (CMOVLNE x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5266,28 +5313,28 @@ func rewriteValueAMD64_OpAMD64CMOVQGE_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQLE)
+               v.reset(OpAMD64CMOVLNE)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQGE _ x (FlagEQ))
+       // match: (CMOVLNE y _ (FlagEQ))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQGE _ x (FlagGT_UGT))
+       // match: (CMOVLNE _ x (FlagGT_UGT))
        // cond:
        // result: x
        for {
@@ -5302,7 +5349,7 @@ func rewriteValueAMD64_OpAMD64CMOVQGE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQGE _ x (FlagGT_ULT))
+       // match: (CMOVLNE _ x (FlagGT_ULT))
        // cond:
        // result: x
        for {
@@ -5317,42 +5364,42 @@ func rewriteValueAMD64_OpAMD64CMOVQGE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQGE y _ (FlagLT_ULT))
+       // match: (CMOVLNE _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQGE y _ (FlagLT_UGT))
+       // match: (CMOVLNE _ x (FlagLT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQGT_0(v *Value) bool {
-       // match: (CMOVQGT x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQCC_0(v *Value) bool {
+       // match: (CMOVQCC x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQLT x y cond)
+       // result: (CMOVQLS x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5362,28 +5409,28 @@ func rewriteValueAMD64_OpAMD64CMOVQGT_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQLT)
+               v.reset(OpAMD64CMOVQLS)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQGT y _ (FlagEQ))
+       // match: (CMOVQCC _ x (FlagEQ))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQGT _ x (FlagGT_UGT))
+       // match: (CMOVQCC _ x (FlagGT_UGT))
        // cond:
        // result: x
        for {
@@ -5398,22 +5445,22 @@ func rewriteValueAMD64_OpAMD64CMOVQGT_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQGT _ x (FlagGT_ULT))
+       // match: (CMOVQCC y _ (FlagGT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQGT y _ (FlagLT_ULT))
+       // match: (CMOVQCC y _ (FlagLT_ULT))
        // cond:
        // result: y
        for {
@@ -5428,27 +5475,27 @@ func rewriteValueAMD64_OpAMD64CMOVQGT_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQGT y _ (FlagLT_UGT))
+       // match: (CMOVQCC _ x (FlagLT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQHI_0(v *Value) bool {
-       // match: (CMOVQHI x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
+       // match: (CMOVQCS x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQCS x y cond)
+       // result: (CMOVQHI x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5458,13 +5505,13 @@ func rewriteValueAMD64_OpAMD64CMOVQHI_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQCS)
+               v.reset(OpAMD64CMOVQHI)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQHI y _ (FlagEQ))
+       // match: (CMOVQCS y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -5479,72 +5526,72 @@ func rewriteValueAMD64_OpAMD64CMOVQHI_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQHI _ x (FlagGT_UGT))
+       // match: (CMOVQCS y _ (FlagGT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQHI y _ (FlagGT_ULT))
+       // match: (CMOVQCS _ x (FlagGT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQHI y _ (FlagLT_ULT))
+       // match: (CMOVQCS _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQHI _ x (FlagLT_UGT))
+       // match: (CMOVQCS y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQLE_0(v *Value) bool {
-       // match: (CMOVQLE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
+       // match: (CMOVQEQ x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQGE x y cond)
+       // result: (CMOVQEQ x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5554,13 +5601,13 @@ func rewriteValueAMD64_OpAMD64CMOVQLE_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQGE)
+               v.reset(OpAMD64CMOVQEQ)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQLE _ x (FlagEQ))
+       // match: (CMOVQEQ _ x (FlagEQ))
        // cond:
        // result: x
        for {
@@ -5575,7 +5622,7 @@ func rewriteValueAMD64_OpAMD64CMOVQLE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQLE y _ (FlagGT_UGT))
+       // match: (CMOVQEQ y _ (FlagGT_UGT))
        // cond:
        // result: y
        for {
@@ -5590,7 +5637,7 @@ func rewriteValueAMD64_OpAMD64CMOVQLE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQLE y _ (FlagGT_ULT))
+       // match: (CMOVQEQ y _ (FlagGT_ULT))
        // cond:
        // result: y
        for {
@@ -5605,42 +5652,69 @@ func rewriteValueAMD64_OpAMD64CMOVQLE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQLE _ x (FlagLT_ULT))
+       // match: (CMOVQEQ y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQLE _ x (FlagLT_UGT))
+       // match: (CMOVQEQ y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _))))
+       // cond: c != 0
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpSelect1 {
+                       break
+               }
+               v_2_0 := v_2.Args[0]
+               if v_2_0.Op != OpAMD64BSFQ {
+                       break
+               }
+               v_2_0_0 := v_2_0.Args[0]
+               if v_2_0_0.Op != OpAMD64ORQconst {
+                       break
+               }
+               c := v_2_0_0.AuxInt
+               if !(c != 0) {
+                       break
+               }
+               v.reset(OpCopy)
                v.Type = x.Type
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQLS_0(v *Value) bool {
-       // match: (CMOVQLS x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQGE_0(v *Value) bool {
+       // match: (CMOVQGE x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQCC x y cond)
+       // result: (CMOVQLE x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5650,13 +5724,13 @@ func rewriteValueAMD64_OpAMD64CMOVQLS_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQCC)
+               v.reset(OpAMD64CMOVQLE)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQLS _ x (FlagEQ))
+       // match: (CMOVQGE _ x (FlagEQ))
        // cond:
        // result: x
        for {
@@ -5671,22 +5745,22 @@ func rewriteValueAMD64_OpAMD64CMOVQLS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQLS y _ (FlagGT_UGT))
+       // match: (CMOVQGE _ x (FlagGT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQLS _ x (FlagGT_ULT))
+       // match: (CMOVQGE _ x (FlagGT_ULT))
        // cond:
        // result: x
        for {
@@ -5701,22 +5775,22 @@ func rewriteValueAMD64_OpAMD64CMOVQLS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQLS _ x (FlagLT_ULT))
+       // match: (CMOVQGE y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQLS y _ (FlagLT_UGT))
+       // match: (CMOVQGE y _ (FlagLT_UGT))
        // cond:
        // result: y
        for {
@@ -5733,10 +5807,10 @@ func rewriteValueAMD64_OpAMD64CMOVQLS_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQLT_0(v *Value) bool {
-       // match: (CMOVQLT x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQGT_0(v *Value) bool {
+       // match: (CMOVQGT x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQGT x y cond)
+       // result: (CMOVQLT x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5746,13 +5820,13 @@ func rewriteValueAMD64_OpAMD64CMOVQLT_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQGT)
+               v.reset(OpAMD64CMOVQLT)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQLT y _ (FlagEQ))
+       // match: (CMOVQGT y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -5767,72 +5841,72 @@ func rewriteValueAMD64_OpAMD64CMOVQLT_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQLT y _ (FlagGT_UGT))
+       // match: (CMOVQGT _ x (FlagGT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQLT y _ (FlagGT_ULT))
+       // match: (CMOVQGT _ x (FlagGT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVQLT _ x (FlagLT_ULT))
+       // match: (CMOVQGT y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQLT _ x (FlagLT_UGT))
+       // match: (CMOVQGT y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQNE_0(v *Value) bool {
-       // match: (CMOVQNE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQHI_0(v *Value) bool {
+       // match: (CMOVQHI x y (InvertFlags cond))
        // cond:
-       // result: (CMOVQNE x y cond)
+       // result: (CMOVQCS x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5842,13 +5916,13 @@ func rewriteValueAMD64_OpAMD64CMOVQNE_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVQNE)
+               v.reset(OpAMD64CMOVQCS)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVQNE y _ (FlagEQ))
+       // match: (CMOVQHI y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -5863,7 +5937,7 @@ func rewriteValueAMD64_OpAMD64CMOVQNE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVQNE _ x (FlagGT_UGT))
+       // match: (CMOVQHI _ x (FlagGT_UGT))
        // cond:
        // result: x
        for {
@@ -5878,37 +5952,37 @@ func rewriteValueAMD64_OpAMD64CMOVQNE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVQNE _ x (FlagGT_ULT))
+       // match: (CMOVQHI y _ (FlagGT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQNE _ x (FlagLT_ULT))
+       // match: (CMOVQHI y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVQNE _ x (FlagLT_UGT))
+       // match: (CMOVQHI _ x (FlagLT_UGT))
        // cond:
        // result: x
        for {
@@ -5925,10 +5999,10 @@ func rewriteValueAMD64_OpAMD64CMOVQNE_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWCC_0(v *Value) bool {
-       // match: (CMOVWCC x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQLE_0(v *Value) bool {
+       // match: (CMOVQLE x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWLS x y cond)
+       // result: (CMOVQGE x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -5938,13 +6012,13 @@ func rewriteValueAMD64_OpAMD64CMOVWCC_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWLS)
+               v.reset(OpAMD64CMOVQGE)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWCC _ x (FlagEQ))
+       // match: (CMOVQLE _ x (FlagEQ))
        // cond:
        // result: x
        for {
@@ -5959,22 +6033,22 @@ func rewriteValueAMD64_OpAMD64CMOVWCC_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWCC _ x (FlagGT_UGT))
+       // match: (CMOVQLE y _ (FlagGT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWCC y _ (FlagGT_ULT))
+       // match: (CMOVQLE y _ (FlagGT_ULT))
        // cond:
        // result: y
        for {
@@ -5989,22 +6063,22 @@ func rewriteValueAMD64_OpAMD64CMOVWCC_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWCC y _ (FlagLT_ULT))
+       // match: (CMOVQLE _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWCC _ x (FlagLT_UGT))
+       // match: (CMOVQLE _ x (FlagLT_UGT))
        // cond:
        // result: x
        for {
@@ -6021,10 +6095,10 @@ func rewriteValueAMD64_OpAMD64CMOVWCC_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
-       // match: (CMOVWCS x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQLS_0(v *Value) bool {
+       // match: (CMOVQLS x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWHI x y cond)
+       // result: (CMOVQCC x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6034,28 +6108,28 @@ func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWHI)
+               v.reset(OpAMD64CMOVQCC)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWCS y _ (FlagEQ))
+       // match: (CMOVQLS _ x (FlagEQ))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWCS y _ (FlagGT_UGT))
+       // match: (CMOVQLS y _ (FlagGT_UGT))
        // cond:
        // result: y
        for {
@@ -6070,7 +6144,7 @@ func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWCS _ x (FlagGT_ULT))
+       // match: (CMOVQLS _ x (FlagGT_ULT))
        // cond:
        // result: x
        for {
@@ -6085,7 +6159,7 @@ func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWCS _ x (FlagLT_ULT))
+       // match: (CMOVQLS _ x (FlagLT_ULT))
        // cond:
        // result: x
        for {
@@ -6100,7 +6174,7 @@ func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWCS y _ (FlagLT_UGT))
+       // match: (CMOVQLS y _ (FlagLT_UGT))
        // cond:
        // result: y
        for {
@@ -6117,10 +6191,10 @@ func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWEQ_0(v *Value) bool {
-       // match: (CMOVWEQ x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQLT_0(v *Value) bool {
+       // match: (CMOVQLT x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWEQ x y cond)
+       // result: (CMOVQGT x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6130,28 +6204,28 @@ func rewriteValueAMD64_OpAMD64CMOVWEQ_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWEQ)
+               v.reset(OpAMD64CMOVQGT)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWEQ _ x (FlagEQ))
+       // match: (CMOVQLT y _ (FlagEQ))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWEQ y _ (FlagGT_UGT))
+       // match: (CMOVQLT y _ (FlagGT_UGT))
        // cond:
        // result: y
        for {
@@ -6166,7 +6240,7 @@ func rewriteValueAMD64_OpAMD64CMOVWEQ_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWEQ y _ (FlagGT_ULT))
+       // match: (CMOVQLT y _ (FlagGT_ULT))
        // cond:
        // result: y
        for {
@@ -6181,42 +6255,42 @@ func rewriteValueAMD64_OpAMD64CMOVWEQ_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWEQ y _ (FlagLT_ULT))
+       // match: (CMOVQLT _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWEQ y _ (FlagLT_UGT))
+       // match: (CMOVQLT _ x (FlagLT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWGE_0(v *Value) bool {
-       // match: (CMOVWGE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVQNE_0(v *Value) bool {
+       // match: (CMOVQNE x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWLE x y cond)
+       // result: (CMOVQNE x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6226,28 +6300,28 @@ func rewriteValueAMD64_OpAMD64CMOVWGE_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWLE)
+               v.reset(OpAMD64CMOVQNE)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWGE _ x (FlagEQ))
+       // match: (CMOVQNE y _ (FlagEQ))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWGE _ x (FlagGT_UGT))
+       // match: (CMOVQNE _ x (FlagGT_UGT))
        // cond:
        // result: x
        for {
@@ -6262,7 +6336,7 @@ func rewriteValueAMD64_OpAMD64CMOVWGE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWGE _ x (FlagGT_ULT))
+       // match: (CMOVQNE _ x (FlagGT_ULT))
        // cond:
        // result: x
        for {
@@ -6277,42 +6351,42 @@ func rewriteValueAMD64_OpAMD64CMOVWGE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWGE y _ (FlagLT_ULT))
+       // match: (CMOVQNE _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWGE y _ (FlagLT_UGT))
+       // match: (CMOVQNE _ x (FlagLT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWGT_0(v *Value) bool {
-       // match: (CMOVWGT x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVWCC_0(v *Value) bool {
+       // match: (CMOVWCC x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWLT x y cond)
+       // result: (CMOVWLS x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6322,28 +6396,28 @@ func rewriteValueAMD64_OpAMD64CMOVWGT_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWLT)
+               v.reset(OpAMD64CMOVWLS)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWGT y _ (FlagEQ))
+       // match: (CMOVWCC _ x (FlagEQ))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWGT _ x (FlagGT_UGT))
+       // match: (CMOVWCC _ x (FlagGT_UGT))
        // cond:
        // result: x
        for {
@@ -6358,22 +6432,22 @@ func rewriteValueAMD64_OpAMD64CMOVWGT_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWGT _ x (FlagGT_ULT))
+       // match: (CMOVWCC y _ (FlagGT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWGT y _ (FlagLT_ULT))
+       // match: (CMOVWCC y _ (FlagLT_ULT))
        // cond:
        // result: y
        for {
@@ -6388,27 +6462,27 @@ func rewriteValueAMD64_OpAMD64CMOVWGT_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWGT y _ (FlagLT_UGT))
+       // match: (CMOVWCC _ x (FlagLT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWHI_0(v *Value) bool {
-       // match: (CMOVWHI x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
+       // match: (CMOVWCS x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWCS x y cond)
+       // result: (CMOVWHI x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6418,13 +6492,13 @@ func rewriteValueAMD64_OpAMD64CMOVWHI_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWCS)
+               v.reset(OpAMD64CMOVWHI)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWHI y _ (FlagEQ))
+       // match: (CMOVWCS y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -6439,72 +6513,72 @@ func rewriteValueAMD64_OpAMD64CMOVWHI_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWHI _ x (FlagGT_UGT))
+       // match: (CMOVWCS y _ (FlagGT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWHI y _ (FlagGT_ULT))
+       // match: (CMOVWCS _ x (FlagGT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWHI y _ (FlagLT_ULT))
+       // match: (CMOVWCS _ x (FlagLT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWHI _ x (FlagLT_UGT))
+       // match: (CMOVWCS y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWLE_0(v *Value) bool {
-       // match: (CMOVWLE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVWEQ_0(v *Value) bool {
+       // match: (CMOVWEQ x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWGE x y cond)
+       // result: (CMOVWEQ x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6514,13 +6588,13 @@ func rewriteValueAMD64_OpAMD64CMOVWLE_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWGE)
+               v.reset(OpAMD64CMOVWEQ)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWLE _ x (FlagEQ))
+       // match: (CMOVWEQ _ x (FlagEQ))
        // cond:
        // result: x
        for {
@@ -6535,7 +6609,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWLE y _ (FlagGT_UGT))
+       // match: (CMOVWEQ y _ (FlagGT_UGT))
        // cond:
        // result: y
        for {
@@ -6550,7 +6624,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWLE y _ (FlagGT_ULT))
+       // match: (CMOVWEQ y _ (FlagGT_ULT))
        // cond:
        // result: y
        for {
@@ -6565,42 +6639,42 @@ func rewriteValueAMD64_OpAMD64CMOVWLE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWLE _ x (FlagLT_ULT))
+       // match: (CMOVWEQ y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWLE _ x (FlagLT_UGT))
+       // match: (CMOVWEQ y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWLS_0(v *Value) bool {
-       // match: (CMOVWLS x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVWGE_0(v *Value) bool {
+       // match: (CMOVWGE x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWCC x y cond)
+       // result: (CMOVWLE x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6610,13 +6684,13 @@ func rewriteValueAMD64_OpAMD64CMOVWLS_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWCC)
+               v.reset(OpAMD64CMOVWLE)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWLS _ x (FlagEQ))
+       // match: (CMOVWGE _ x (FlagEQ))
        // cond:
        // result: x
        for {
@@ -6631,22 +6705,22 @@ func rewriteValueAMD64_OpAMD64CMOVWLS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWLS y _ (FlagGT_UGT))
+       // match: (CMOVWGE _ x (FlagGT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWLS _ x (FlagGT_ULT))
+       // match: (CMOVWGE _ x (FlagGT_ULT))
        // cond:
        // result: x
        for {
@@ -6661,22 +6735,22 @@ func rewriteValueAMD64_OpAMD64CMOVWLS_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWLS _ x (FlagLT_ULT))
+       // match: (CMOVWGE y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWLS y _ (FlagLT_UGT))
+       // match: (CMOVWGE y _ (FlagLT_UGT))
        // cond:
        // result: y
        for {
@@ -6693,10 +6767,10 @@ func rewriteValueAMD64_OpAMD64CMOVWLS_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWLT_0(v *Value) bool {
-       // match: (CMOVWLT x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVWGT_0(v *Value) bool {
+       // match: (CMOVWGT x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWGT x y cond)
+       // result: (CMOVWLT x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6706,13 +6780,13 @@ func rewriteValueAMD64_OpAMD64CMOVWLT_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWGT)
+               v.reset(OpAMD64CMOVWLT)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWLT y _ (FlagEQ))
+       // match: (CMOVWGT y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -6727,72 +6801,72 @@ func rewriteValueAMD64_OpAMD64CMOVWLT_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWLT y _ (FlagGT_UGT))
+       // match: (CMOVWGT _ x (FlagGT_UGT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWLT y _ (FlagGT_ULT))
+       // match: (CMOVWGT _ x (FlagGT_ULT))
        // cond:
-       // result: y
+       // result: x
        for {
                _ = v.Args[2]
-               y := v.Args[0]
+               x := v.Args[1]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMOVWLT _ x (FlagLT_ULT))
+       // match: (CMOVWGT y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWLT _ x (FlagLT_UGT))
+       // match: (CMOVWGT y _ (FlagLT_UGT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVWNE_0(v *Value) bool {
-       // match: (CMOVWNE x y (InvertFlags cond))
+func rewriteValueAMD64_OpAMD64CMOVWHI_0(v *Value) bool {
+       // match: (CMOVWHI x y (InvertFlags cond))
        // cond:
-       // result: (CMOVWNE x y cond)
+       // result: (CMOVWCS x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
@@ -6802,13 +6876,13 @@ func rewriteValueAMD64_OpAMD64CMOVWNE_0(v *Value) bool {
                        break
                }
                cond := v_2.Args[0]
-               v.reset(OpAMD64CMOVWNE)
+               v.reset(OpAMD64CMOVWCS)
                v.AddArg(x)
                v.AddArg(y)
                v.AddArg(cond)
                return true
        }
-       // match: (CMOVWNE y _ (FlagEQ))
+       // match: (CMOVWHI y _ (FlagEQ))
        // cond:
        // result: y
        for {
@@ -6823,7 +6897,7 @@ func rewriteValueAMD64_OpAMD64CMOVWNE_0(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       // match: (CMOVWNE _ x (FlagGT_UGT))
+       // match: (CMOVWHI _ x (FlagGT_UGT))
        // cond:
        // result: x
        for {
@@ -6838,37 +6912,37 @@ func rewriteValueAMD64_OpAMD64CMOVWNE_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (CMOVWNE _ x (FlagGT_ULT))
+       // match: (CMOVWHI y _ (FlagGT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWNE _ x (FlagLT_ULT))
+       // match: (CMOVWHI y _ (FlagLT_ULT))
        // cond:
-       // result: x
+       // result: y
        for {
                _ = v.Args[2]
-               x := v.Args[1]
+               y := v.Args[0]
                v_2 := v.Args[2]
                if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMOVWNE _ x (FlagLT_UGT))
+       // match: (CMOVWHI _ x (FlagLT_UGT))
        // cond:
        // result: x
        for {
@@ -6885,310 +6959,396 @@ func rewriteValueAMD64_OpAMD64CMOVWNE_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPB_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPB x (MOVLconst [c]))
+func rewriteValueAMD64_OpAMD64CMOVWLE_0(v *Value) bool {
+       // match: (CMOVWLE x y (InvertFlags cond))
        // cond:
-       // result: (CMPBconst x [int64(int8(c))])
+       // result: (CMOVWGE x y cond)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64CMPBconst)
-               v.AuxInt = int64(int8(c))
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWGE)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPB (MOVLconst [c]) x)
+       // match: (CMOVWLE _ x (FlagEQ))
        // cond:
-       // result: (InvertFlags (CMPBconst x [int64(int8(c))]))
+       // result: x
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v0.AuxInt = int64(int8(c))
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPBmem {sym} [off] ptr x mem)
+       // match: (CMOVWLE y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVBload {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWLE y _ (FlagGT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWLE _ x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64CMPBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (CMPB x l:(MOVBload {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPBmem {sym} [off] ptr x mem))
+       // match: (CMOVWLE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVBload {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBmem, types.TypeFlags)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(x)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPBconst_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)==int8(y)
-       // result: (FlagEQ)
+func rewriteValueAMD64_OpAMD64CMOVWLS_0(v *Value) bool {
+       // match: (CMOVWLS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWCC x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) == int8(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagEQ)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWCC)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)<int8(y) && uint8(x)<uint8(y)
-       // result: (FlagLT_ULT)
+       // match: (CMOVWLS _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) < int8(y) && uint8(x) < uint8(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)<int8(y) && uint8(x)>uint8(y)
-       // result: (FlagLT_UGT)
+       // match: (CMOVWLS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) < int8(y) && uint8(x) > uint8(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagLT_UGT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)>int8(y) && uint8(x)<uint8(y)
-       // result: (FlagGT_ULT)
+       // match: (CMOVWLS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) > int8(y) && uint8(x) < uint8(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagGT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)>int8(y) && uint8(x)>uint8(y)
-       // result: (FlagGT_UGT)
+       // match: (CMOVWLS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) > int8(y) && uint8(x) > uint8(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagGT_UGT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPBconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int8(m) && int8(m) < int8(n)
-       // result: (FlagLT_ULT)
+       // match: (CMOVWLS y _ (FlagLT_UGT))
+       // cond:
+       // result: y
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               m := v_0.AuxInt
-               if !(0 <= int8(m) && int8(m) < int8(n)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst (ANDL x y) [0])
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWLT_0(v *Value) bool {
+       // match: (CMOVWLT x y (InvertFlags cond))
        // cond:
-       // result: (TESTB x y)
+       // result: (CMOVWGT x y cond)
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTB)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWGT)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (ANDLconst [c] x) [0])
+       // match: (CMOVWLT y _ (FlagEQ))
        // cond:
-       // result: (TESTBconst [int64(int8(c))] x)
+       // result: y
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTBconst)
-               v.AuxInt = int64(int8(c))
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst x [0])
+       // match: (CMOVWLT y _ (FlagGT_UGT))
        // cond:
-       // result: (TESTB x x)
+       // result: y
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64TESTB)
-               v.AddArg(x)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVWLT y _ (FlagGT_ULT))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(c, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPBmem_0(v *Value) bool {
-       // match: (CMPBmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int8(c)),off)
-       // result: (CMPBconstmem {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
+       // match: (CMOVWLT _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(int64(int8(c)), off)) {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64CMPBconstmem)
-               v.AuxInt = makeValAndOff(int64(int8(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWLT _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64CMOVWNE_0(v *Value) bool {
+       // match: (CMOVWNE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWNE x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWNE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVWNE y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWNE _ x (FlagGT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWNE _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWNE _ x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWNE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPB_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (CMPL x (MOVLconst [c]))
+       // match: (CMPB x (MOVLconst [c]))
        // cond:
-       // result: (CMPLconst x [c])
+       // result: (CMPBconst x [int64(int8(c))])
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -7197,14 +7357,14 @@ func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64CMPLconst)
-               v.AuxInt = c
+               v.reset(OpAMD64CMPBconst)
+               v.AuxInt = int64(int8(c))
                v.AddArg(x)
                return true
        }
-       // match: (CMPL (MOVLconst [c]) x)
+       // match: (CMPB (MOVLconst [c]) x)
        // cond:
-       // result: (InvertFlags (CMPLconst x [c]))
+       // result: (InvertFlags (CMPBconst x [int64(int8(c))]))
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
@@ -7214,19 +7374,19 @@ func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
                c := v_0.AuxInt
                x := v.Args[1]
                v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v0.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v0.AuxInt = int64(int8(c))
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
+       // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPLmem {sym} [off] ptr x mem)
+       // result: (CMPBmem {sym} [off] ptr x mem)
        for {
                _ = v.Args[1]
                l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               if l.Op != OpAMD64MOVBload {
                        break
                }
                off := l.AuxInt
@@ -7238,7 +7398,7 @@ func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64CMPLmem)
+               v.reset(OpAMD64CMPBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -7246,14 +7406,14 @@ func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (CMPL x l:(MOVLload {sym} [off] ptr mem))
+       // match: (CMPB x l:(MOVBload {sym} [off] ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPLmem {sym} [off] ptr x mem))
+       // result: (InvertFlags (CMPBmem {sym} [off] ptr x mem))
        for {
                _ = v.Args[1]
                x := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               if l.Op != OpAMD64MOVBload {
                        break
                }
                off := l.AuxInt
@@ -7265,7 +7425,7 @@ func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
                        break
                }
                v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLmem, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBmem, types.TypeFlags)
                v0.AuxInt = off
                v0.Aux = sym
                v0.AddArg(ptr)
@@ -7276,9 +7436,11 @@ func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)==int32(y)
+func rewriteValueAMD64_OpAMD64CMPBconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)==int8(y)
        // result: (FlagEQ)
        for {
                y := v.AuxInt
@@ -7287,14 +7449,14 @@ func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int32(x) == int32(y)) {
+               if !(int8(x) == int8(y)) {
                        break
                }
                v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)<int8(y) && uint8(x)<uint8(y)
        // result: (FlagLT_ULT)
        for {
                y := v.AuxInt
@@ -7303,14 +7465,14 @@ func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
+               if !(int8(x) < int8(y) && uint8(x) < uint8(y)) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)>uint32(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)<int8(y) && uint8(x)>uint8(y)
        // result: (FlagLT_UGT)
        for {
                y := v.AuxInt
@@ -7319,14 +7481,14 @@ func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
+               if !(int8(x) < int8(y) && uint8(x) > uint8(y)) {
                        break
                }
                v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)>int32(y) && uint32(x)<uint32(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)>int8(y) && uint8(x)<uint8(y)
        // result: (FlagGT_ULT)
        for {
                y := v.AuxInt
@@ -7335,14 +7497,14 @@ func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
+               if !(int8(x) > int8(y) && uint8(x) < uint8(y)) {
                        break
                }
                v.reset(OpAMD64FlagGT_ULT)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)>int32(y) && uint32(x)>uint32(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)>int8(y) && uint8(x)>uint8(y)
        // result: (FlagGT_UGT)
        for {
                y := v.AuxInt
@@ -7351,30 +7513,14 @@ func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
+               if !(int8(x) > int8(y) && uint8(x) > uint8(y)) {
                        break
                }
                v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       // match: (CMPLconst (SHRLconst _ [c]) [n])
-       // cond: 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)
-       // result: (FlagLT_ULT)
-       for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               if !(0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)) {
-                       break
-               }
-               v.reset(OpAMD64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPLconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int32(m) && int32(m) < int32(n)
+       // match: (CMPBconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int8(m) && int8(m) < int8(n)
        // result: (FlagLT_ULT)
        for {
                n := v.AuxInt
@@ -7383,15 +7529,15 @@ func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
                        break
                }
                m := v_0.AuxInt
-               if !(0 <= int32(m) && int32(m) < int32(n)) {
+               if !(0 <= int8(m) && int8(m) < int8(n)) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPLconst (ANDL x y) [0])
+       // match: (CMPBconst (ANDL x y) [0])
        // cond:
-       // result: (TESTL x y)
+       // result: (TESTB x y)
        for {
                if v.AuxInt != 0 {
                        break
@@ -7403,14 +7549,14 @@ func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
                _ = v_0.Args[1]
                x := v_0.Args[0]
                y := v_0.Args[1]
-               v.reset(OpAMD64TESTL)
+               v.reset(OpAMD64TESTB)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (ANDLconst [c] x) [0])
+       // match: (CMPBconst (ANDLconst [c] x) [0])
        // cond:
-       // result: (TESTLconst [c] x)
+       // result: (TESTBconst [int64(int8(c))] x)
        for {
                if v.AuxInt != 0 {
                        break
@@ -7421,36 +7567,31 @@ func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v_0.Args[0]
-               v.reset(OpAMD64TESTLconst)
-               v.AuxInt = c
+               v.reset(OpAMD64TESTBconst)
+               v.AuxInt = int64(int8(c))
                v.AddArg(x)
                return true
        }
-       // match: (CMPLconst x [0])
+       // match: (CMPBconst x [0])
        // cond:
-       // result: (TESTL x x)
+       // result: (TESTB x x)
        for {
                if v.AuxInt != 0 {
                        break
                }
                x := v.Args[0]
-               v.reset(OpAMD64TESTL)
+               v.reset(OpAMD64TESTB)
                v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPLconst_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPLconst l:(MOVLload {sym} [off] ptr mem) [c])
+       // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c])
        // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                c := v.AuxInt
                l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               if l.Op != OpAMD64MOVBload {
                        break
                }
                off := l.AuxInt
@@ -7462,7 +7603,7 @@ func rewriteValueAMD64_OpAMD64CMPLconst_10(v *Value) bool {
                        break
                }
                b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = makeValAndOff(c, off)
@@ -7473,10 +7614,10 @@ func rewriteValueAMD64_OpAMD64CMPLconst_10(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPLmem_0(v *Value) bool {
-       // match: (CMPLmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(c,off)
-       // result: (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+func rewriteValueAMD64_OpAMD64CMPBmem_0(v *Value) bool {
+       // match: (CMPBmem {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int8(c)),off)
+       // result: (CMPBconstmem {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -7488,11 +7629,11 @@ func rewriteValueAMD64_OpAMD64CMPLmem_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               if !(validValAndOff(c, off)) {
+               if !(validValAndOff(int64(int8(c)), off)) {
                        break
                }
-               v.reset(OpAMD64CMPLconstmem)
-               v.AuxInt = makeValAndOff(c, off)
+               v.reset(OpAMD64CMPBconstmem)
+               v.AuxInt = makeValAndOff(int64(int8(c)), off)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
@@ -7500,56 +7641,50 @@ func rewriteValueAMD64_OpAMD64CMPLmem_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (CMPQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (CMPQconst x [c])
+       // match: (CMPL x (MOVLconst [c]))
+       // cond:
+       // result: (CMPLconst x [c])
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64CMPQconst)
+               v.reset(OpAMD64CMPLconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (CMPQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (InvertFlags (CMPQconst x [c]))
+       // match: (CMPL (MOVLconst [c]) x)
+       // cond:
+       // result: (InvertFlags (CMPLconst x [c]))
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(is32Bit(c)) {
-                       break
-               }
                v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
                v0.AuxInt = c
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x)
+       // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPQmem {sym} [off] ptr x mem)
+       // result: (CMPLmem {sym} [off] ptr x mem)
        for {
                _ = v.Args[1]
                l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               if l.Op != OpAMD64MOVLload {
                        break
                }
                off := l.AuxInt
@@ -7561,7 +7696,7 @@ func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64CMPQmem)
+               v.reset(OpAMD64CMPLmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -7569,14 +7704,14 @@ func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (CMPQ x l:(MOVQload {sym} [off] ptr mem))
+       // match: (CMPL x l:(MOVLload {sym} [off] ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPQmem {sym} [off] ptr x mem))
+       // result: (InvertFlags (CMPLmem {sym} [off] ptr x mem))
        for {
                _ = v.Args[1]
                x := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               if l.Op != OpAMD64MOVLload {
                        break
                }
                off := l.AuxInt
@@ -7588,7 +7723,7 @@ func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
                        break
                }
                v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQmem, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLmem, types.TypeFlags)
                v0.AuxInt = off
                v0.Aux = sym
                v0.AddArg(ptr)
@@ -7599,227 +7734,105 @@ func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPQconst_0(v *Value) bool {
-       // match: (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32])
-       // cond:
-       // result: (FlagLT_ULT)
-       for {
-               if v.AuxInt != 32 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               if v_0_0.AuxInt != -16 {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 15 {
-                       break
-               }
-               v.reset(OpAMD64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32])
-       // cond:
-       // result: (FlagLT_ULT)
-       for {
-               if v.AuxInt != 32 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               if v_0_0.AuxInt != -8 {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 7 {
-                       break
-               }
-               v.reset(OpAMD64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x==y
+func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)==int32(y)
        // result: (FlagEQ)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                x := v_0.AuxInt
-               if !(x == y) {
+               if !(int32(x) == int32(y)) {
                        break
                }
                v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x<y && uint64(x)<uint64(y)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
        // result: (FlagLT_ULT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                x := v_0.AuxInt
-               if !(x < y && uint64(x) < uint64(y)) {
+               if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x<y && uint64(x)>uint64(y)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)>uint32(y)
        // result: (FlagLT_UGT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                x := v_0.AuxInt
-               if !(x < y && uint64(x) > uint64(y)) {
+               if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
                        break
                }
                v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x>y && uint64(x)<uint64(y)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)>int32(y) && uint32(x)<uint32(y)
        // result: (FlagGT_ULT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                x := v_0.AuxInt
-               if !(x > y && uint64(x) < uint64(y)) {
+               if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
                        break
                }
                v.reset(OpAMD64FlagGT_ULT)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x>y && uint64(x)>uint64(y)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)>int32(y) && uint32(x)>uint32(y)
        // result: (FlagGT_UGT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                x := v_0.AuxInt
-               if !(x > y && uint64(x) > uint64(y)) {
+               if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
                        break
                }
                v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       // match: (CMPQconst (MOVBQZX _) [c])
-       // cond: 0xFF < c
-       // result: (FlagLT_ULT)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQZX {
-                       break
-               }
-               if !(0xFF < c) {
-                       break
-               }
-               v.reset(OpAMD64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPQconst (MOVWQZX _) [c])
-       // cond: 0xFFFF < c
-       // result: (FlagLT_ULT)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVWQZX {
-                       break
-               }
-               if !(0xFFFF < c) {
-                       break
-               }
-               v.reset(OpAMD64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPQconst (MOVLQZX _) [c])
-       // cond: 0xFFFFFFFF < c
-       // result: (FlagLT_ULT)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLQZX {
-                       break
-               }
-               if !(0xFFFFFFFF < c) {
-                       break
-               }
-               v.reset(OpAMD64FlagLT_ULT)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPQconst (SHRQconst _ [c]) [n])
-       // cond: 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)
+       // match: (CMPLconst (SHRLconst _ [c]) [n])
+       // cond: 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)
        // result: (FlagLT_ULT)
        for {
                n := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
+               if v_0.Op != OpAMD64SHRLconst {
                        break
                }
                c := v_0.AuxInt
-               if !(0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)) {
-                       break
-               }
-               v.reset(OpAMD64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPQconst (ANDQconst _ [m]) [n])
-       // cond: 0 <= m && m < n
-       // result: (FlagLT_ULT)
-       for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               m := v_0.AuxInt
-               if !(0 <= m && m < n) {
+               if !(0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPQconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= m && m < n
+       // match: (CMPLconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int32(m) && int32(m) < int32(n)
        // result: (FlagLT_ULT)
        for {
                n := v.AuxInt
@@ -7828,69 +7841,74 @@ func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
                        break
                }
                m := v_0.AuxInt
-               if !(0 <= m && m < n) {
+               if !(0 <= int32(m) && int32(m) < int32(n)) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPQconst (ANDQ x y) [0])
+       // match: (CMPLconst (ANDL x y) [0])
        // cond:
-       // result: (TESTQ x y)
+       // result: (TESTL x y)
        for {
                if v.AuxInt != 0 {
                        break
                }
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQ {
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
                _ = v_0.Args[1]
                x := v_0.Args[0]
                y := v_0.Args[1]
-               v.reset(OpAMD64TESTQ)
+               v.reset(OpAMD64TESTL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (ANDQconst [c] x) [0])
+       // match: (CMPLconst (ANDLconst [c] x) [0])
        // cond:
-       // result: (TESTQconst [c] x)
+       // result: (TESTLconst [c] x)
        for {
                if v.AuxInt != 0 {
                        break
                }
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQconst {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
                c := v_0.AuxInt
                x := v_0.Args[0]
-               v.reset(OpAMD64TESTQconst)
+               v.reset(OpAMD64TESTLconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (CMPQconst x [0])
+       // match: (CMPLconst x [0])
        // cond:
-       // result: (TESTQ x x)
+       // result: (TESTL x x)
        for {
                if v.AuxInt != 0 {
                        break
                }
                x := v.Args[0]
-               v.reset(OpAMD64TESTQ)
+               v.reset(OpAMD64TESTL)
                v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       // match: (CMPQconst l:(MOVQload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPLconst_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPLconst l:(MOVLload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
                off := l.AuxInt
@@ -7902,7 +7920,7 @@ func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
                        break
                }
                b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = makeValAndOff(c, off)
@@ -7913,17 +7931,17 @@ func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPQmem_0(v *Value) bool {
-       // match: (CMPQmem {sym} [off] ptr (MOVQconst [c]) mem)
+func rewriteValueAMD64_OpAMD64CMPLmem_0(v *Value) bool {
+       // match: (CMPLmem {sym} [off] ptr (MOVLconst [c]) mem)
        // cond: validValAndOff(c,off)
-       // result: (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // result: (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
@@ -7931,7 +7949,7 @@ func rewriteValueAMD64_OpAMD64CMPQmem_0(v *Value) bool {
                if !(validValAndOff(c, off)) {
                        break
                }
-               v.reset(OpAMD64CMPQconstmem)
+               v.reset(OpAMD64CMPLconstmem)
                v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
                v.AddArg(ptr)
@@ -7940,50 +7958,56 @@ func rewriteValueAMD64_OpAMD64CMPQmem_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (CMPW x (MOVLconst [c]))
-       // cond:
-       // result: (CMPWconst x [int64(int16(c))])
+       // match: (CMPQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (CMPQconst x [c])
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64CMPWconst)
-               v.AuxInt = int64(int16(c))
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64CMPQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (CMPW (MOVLconst [c]) x)
-       // cond:
-       // result: (InvertFlags (CMPWconst x [int64(int16(c))]))
+       // match: (CMPQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (InvertFlags (CMPQconst x [c]))
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_0.AuxInt
                x := v.Args[1]
+               if !(is32Bit(c)) {
+                       break
+               }
                v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v0.AuxInt = int64(int16(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = c
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
+       // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPWmem {sym} [off] ptr x mem)
+       // result: (CMPQmem {sym} [off] ptr x mem)
        for {
                _ = v.Args[1]
                l := v.Args[0]
-               if l.Op != OpAMD64MOVWload {
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -7995,7 +8019,7 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64CMPWmem)
+               v.reset(OpAMD64CMPQmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -8003,14 +8027,14 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (CMPW x l:(MOVWload {sym} [off] ptr mem))
+       // match: (CMPQ x l:(MOVQload {sym} [off] ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPWmem {sym} [off] ptr x mem))
+       // result: (InvertFlags (CMPQmem {sym} [off] ptr x mem))
        for {
                _ = v.Args[1]
                x := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVWload {
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -8022,7 +8046,7 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
                        break
                }
                v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWmem, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQmem, types.TypeFlags)
                v0.AuxInt = off
                v0.Aux = sym
                v0.AddArg(ptr)
@@ -8033,162 +8057,298 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)==int16(y)
+func rewriteValueAMD64_OpAMD64CMPQconst_0(v *Value) bool {
+       // match: (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32])
+       // cond:
+       // result: (FlagLT_ULT)
+       for {
+               if v.AuxInt != 32 {
+                       break
+               }
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_0_0.AuxInt != -16 {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 15 {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32])
+       // cond:
+       // result: (FlagLT_ULT)
+       for {
+               if v.AuxInt != 32 {
+                       break
+               }
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_0_0.AuxInt != -8 {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 7 {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x==y
        // result: (FlagEQ)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) == int16(y)) {
+               if !(x == y) {
                        break
                }
                v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)<int16(y) && uint16(x)<uint16(y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x<y && uint64(x)<uint64(y)
        // result: (FlagLT_ULT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) < int16(y) && uint16(x) < uint16(y)) {
+               if !(x < y && uint64(x) < uint64(y)) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)<int16(y) && uint16(x)>uint16(y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x<y && uint64(x)>uint64(y)
        // result: (FlagLT_UGT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) < int16(y) && uint16(x) > uint16(y)) {
+               if !(x < y && uint64(x) > uint64(y)) {
                        break
                }
                v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)>int16(y) && uint16(x)<uint16(y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x>y && uint64(x)<uint64(y)
        // result: (FlagGT_ULT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) > int16(y) && uint16(x) < uint16(y)) {
+               if !(x > y && uint64(x) < uint64(y)) {
                        break
                }
                v.reset(OpAMD64FlagGT_ULT)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)>int16(y) && uint16(x)>uint16(y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x>y && uint64(x)>uint64(y)
        // result: (FlagGT_UGT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) > int16(y) && uint16(x) > uint16(y)) {
+               if !(x > y && uint64(x) > uint64(y)) {
                        break
                }
                v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       // match: (CMPWconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int16(m) && int16(m) < int16(n)
+       // match: (CMPQconst (MOVBQZX _) [c])
+       // cond: 0xFF < c
        // result: (FlagLT_ULT)
        for {
-               n := v.AuxInt
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               if v_0.Op != OpAMD64MOVBQZX {
                        break
                }
-               m := v_0.AuxInt
-               if !(0 <= int16(m) && int16(m) < int16(n)) {
+               if !(0xFF < c) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPWconst (ANDL x y) [0])
-       // cond:
-       // result: (TESTW x y)
+       // match: (CMPQconst (MOVWQZX _) [c])
+       // cond: 0xFFFF < c
+       // result: (FlagLT_ULT)
        for {
-               if v.AuxInt != 0 {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVWQZX {
                        break
                }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               if !(0xFFFF < c) {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTW)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPWconst (ANDLconst [c] x) [0])
-       // cond:
-       // result: (TESTWconst [int64(int16(c))] x)
+       // match: (CMPQconst (MOVLQZX _) [c])
+       // cond: 0xFFFFFFFF < c
+       // result: (FlagLT_ULT)
        for {
-               if v.AuxInt != 0 {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLQZX {
                        break
                }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               if !(0xFFFFFFFF < c) {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTWconst)
-               v.AuxInt = int64(int16(c))
-               v.AddArg(x)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPWconst x [0])
-       // cond:
-       // result: (TESTW x x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPQconst (SHRQconst _ [c]) [n])
+       // cond: 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)
+       // result: (FlagLT_ULT)
        for {
-               if v.AuxInt != 0 {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64TESTW)
-               v.AddArg(x)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               if !(0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)) {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMPQconst (ANDQconst _ [m]) [n])
+       // cond: 0 <= m && m < n
+       // result: (FlagLT_ULT)
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVWload {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               m := v_0.AuxInt
+               if !(0 <= m && m < n) {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= m && m < n
+       // result: (FlagLT_ULT)
+       for {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               m := v_0.AuxInt
+               if !(0 <= m && m < n) {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (ANDQ x y) [0])
+       // cond:
+       // result: (TESTQ x y)
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64TESTQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMPQconst (ANDQconst [c] x) [0])
+       // cond:
+       // result: (TESTQconst [c] x)
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64TESTQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPQconst x [0])
+       // cond:
+       // result: (TESTQ x x)
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64TESTQ)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPQconst l:(MOVQload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -8200,7 +8360,7 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                        break
                }
                b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = makeValAndOff(c, off)
@@ -8211,26 +8371,26 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
-       // match: (CMPWmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int16(c)),off)
-       // result: (CMPWconstmem {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
+func rewriteValueAMD64_OpAMD64CMPQmem_0(v *Value) bool {
+       // match: (CMPQmem {sym} [off] ptr (MOVQconst [c]) mem)
+       // cond: validValAndOff(c,off)
+       // result: (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               if !(validValAndOff(int64(int16(c)), off)) {
+               if !(validValAndOff(c, off)) {
                        break
                }
-               v.reset(OpAMD64CMPWconstmem)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.reset(OpAMD64CMPQconstmem)
+               v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
@@ -8238,510 +8398,612 @@ func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPXCHGLlock_0(v *Value) bool {
-       // match: (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
-       // cond: is32Bit(off1+off2)
-       // result: (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
+func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPW x (MOVLconst [c]))
+       // cond:
+       // result: (CMPWconst x [int64(int16(c))])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(off1 + off2)) {
+               c := v_1.AuxInt
+               v.reset(OpAMD64CMPWconst)
+               v.AuxInt = int64(int16(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPW (MOVLconst [c]) x)
+       // cond:
+       // result: (InvertFlags (CMPWconst x [int64(int16(c))]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64CMPXCHGLlock)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v0.AuxInt = int64(int16(c))
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v *Value) bool {
-       // match: (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
-       // cond: is32Bit(off1+off2)
-       // result: (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
+       // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPWmem {sym} [off] ptr x mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(off1 + off2)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64CMPXCHGQlock)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64CMPWmem)
+               v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool {
-       // match: (LEAL [c] {s} (ADDLconst [d] x))
-       // cond: is32Bit(c+d)
-       // result: (LEAL [c+d] {s} x)
+       // match: (CMPW x l:(MOVWload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPWmem {sym} [off] ptr x mem))
        for {
-               c := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64LEAL)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWmem, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ_0(v *Value) bool {
-       // match: (LEAQ [c] {s} (ADDQconst [d] x))
-       // cond: is32Bit(c+d)
-       // result: (LEAQ [c+d] {s} x)
+func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)==int16(y)
+       // result: (FlagEQ)
        for {
-               c := v.AuxInt
-               s := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               x := v_0.AuxInt
+               if !(int16(x) == int16(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (LEAQ [c] {s} (ADDQ x y))
-       // cond: x.Op != OpSB && y.Op != OpSB
-       // result: (LEAQ1 [c] {s} x y)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)<int16(y) && uint16(x)<uint16(y)
+       // result: (FlagLT_ULT)
        for {
-               c := v.AuxInt
-               s := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(x.Op != OpSB && y.Op != OpSB) {
+               x := v_0.AuxInt
+               if !(int16(x) < int16(y) && uint16(x) < uint16(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)<int16(y) && uint16(x)>uint16(y)
+       // result: (FlagLT_UGT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.AuxInt
+               if !(int16(x) < int16(y) && uint16(x) > uint16(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
+               v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)>int16(y) && uint16(x)<uint16(y)
+       // result: (FlagGT_ULT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.AuxInt
+               if !(int16(x) > int16(y) && uint16(x) < uint16(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagGT_ULT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)>int16(y) && uint16(x)>uint16(y)
+       // result: (FlagGT_UGT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.AuxInt
+               if !(int16(x) > int16(y) && uint16(x) > uint16(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPWconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int16(m) && int16(m) < int16(n)
+       // result: (FlagLT_ULT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               n := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               m := v_0.AuxInt
+               if !(0 <= int16(m) && int16(m) < int16(n)) {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPWconst (ANDL x y) [0])
+       // cond:
+       // result: (TESTW x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               if v.AuxInt != 0 {
+                       break
+               }
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
                _ = v_0.Args[1]
                x := v_0.Args[0]
                y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpAMD64TESTW)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64LEAQ1_0(v *Value) bool {
-       // match: (LEAQ1 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ1 [c+d] {s} x y)
+       // match: (CMPWconst (ANDLconst [c] x) [0])
+       // cond:
+       // result: (TESTWconst [int64(int16(c))] x)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 0 {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c + d
-               v.Aux = s
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64TESTWconst)
+               v.AuxInt = int64(int16(c))
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} y (ADDQconst [d] x))
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ1 [c+d] {s} x y)
+       // match: (CMPWconst x [0])
+       // cond:
+       // result: (TESTW x x)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               x := v_1.Args[0]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               if v.AuxInt != 0 {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c + d
-               v.Aux = s
+               x := v.Args[0]
+               v.reset(OpAMD64TESTW)
+               v.AddArg(x)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
-       // cond:
-       // result: (LEAQ2 [c] {s} x y)
+       // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               if v_1.AuxInt != 1 {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [1] y) x)
-       // cond:
-       // result: (LEAQ2 [c] {s} x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
+       // match: (CMPWmem {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int16(c)),off)
+       // result: (CMPWconstmem {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               if v_0.AuxInt != 1 {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(int64(int16(c)), off)) {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ2)
+               v.reset(OpAMD64CMPWconstmem)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPXCHGLlock_0(v *Value) bool {
+       // match: (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
+       // cond: is32Bit(off1+off2)
+       // result: (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64CMPXCHGLlock)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v *Value) bool {
+       // match: (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
+       // cond: is32Bit(off1+off2)
+       // result: (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64CMPXCHGQlock)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool {
+       // match: (LEAL [c] {s} (ADDLconst [d] x))
+       // cond: is32Bit(c+d)
+       // result: (LEAL [c+d] {s} x)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64LEAL)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               return true
+       }
+       // match: (LEAL [c] {s} (ADDL x y))
+       // cond: x.Op != OpSB && y.Op != OpSB
+       // result: (LEAL1 [c] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(x.Op != OpSB && y.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64LEAL1)
                v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAL1_0(v *Value) bool {
+       // match: (LEAL1 [c] {s} (ADDLconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAL1 [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64LEAL1)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAL1 [c] {s} y (ADDLconst [d] x))
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAL1 [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               x := v_1.Args[0]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64LEAL1)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAL1 [c] {s} x (SHLLconst [1] y))
        // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       // result: (LEAL2 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if v_1.AuxInt != 1 {
                        break
                }
                y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ4)
+               v.reset(OpAMD64LEAL2)
                v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [2] y) x)
+       // match: (LEAL1 [c] {s} (SHLLconst [1] y) x)
        // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       // result: (LEAL2 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if v_0.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               if v_0.AuxInt != 1 {
                        break
                }
                y := v_0.Args[0]
                x := v.Args[1]
-               v.reset(OpAMD64LEAQ4)
+               v.reset(OpAMD64LEAL2)
                v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
+       // match: (LEAL1 [c] {s} x (SHLLconst [2] y))
        // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // result: (LEAL4 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if v_1.AuxInt != 2 {
                        break
                }
                y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
+               v.reset(OpAMD64LEAL4)
                v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [3] y) x)
+       // match: (LEAL1 [c] {s} (SHLLconst [2] y) x)
        // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // result: (LEAL4 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if v_0.Op != OpAMD64SHLLconst {
                        break
                }
-               if v_0.AuxInt != 3 {
+               if v_0.AuxInt != 2 {
                        break
                }
                y := v_0.Args[0]
                x := v.Args[1]
-               v.reset(OpAMD64LEAQ8)
+               v.reset(OpAMD64LEAL4)
                v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (LEAL1 [c] {s} x (SHLLconst [3] y))
+       // cond:
+       // result: (LEAL8 [c] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               if v_1.AuxInt != 3 {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAL8)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [off1] {sym1} y (LEAQ [off2] {sym2} x))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (LEAL1 [c] {s} (SHLLconst [3] y) x)
+       // cond:
+       // result: (LEAL8 [c] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               x := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               if v_0.AuxInt != 3 {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAL8)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ2_0(v *Value) bool {
-       // match: (LEAQ2 [c] {s} (ADDQconst [d] x) y)
+func rewriteValueAMD64_OpAMD64LEAL2_0(v *Value) bool {
+       // match: (LEAL2 [c] {s} (ADDLconst [d] x) y)
        // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ2 [c+d] {s} x y)
+       // result: (LEAL2 [c+d] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
                d := v_0.AuxInt
@@ -8750,23 +9012,23 @@ func rewriteValueAMD64_OpAMD64LEAQ2_0(v *Value) bool {
                if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
+               v.reset(OpAMD64LEAL2)
                v.AuxInt = c + d
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ2 [c] {s} x (ADDQconst [d] y))
+       // match: (LEAL2 [c] {s} x (ADDLconst [d] y))
        // cond: is32Bit(c+2*d) && y.Op != OpSB
-       // result: (LEAQ2 [c+2*d] {s} x y)
+       // result: (LEAL2 [c+2*d] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64ADDLconst {
                        break
                }
                d := v_1.AuxInt
@@ -8774,96 +9036,71 @@ func rewriteValueAMD64_OpAMD64LEAQ2_0(v *Value) bool {
                if !(is32Bit(c+2*d) && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
+               v.reset(OpAMD64LEAL2)
                v.AuxInt = c + 2*d
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
+       // match: (LEAL2 [c] {s} x (SHLLconst [1] y))
        // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       // result: (LEAL4 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
                if v_1.AuxInt != 1 {
                        break
                }
                y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ4)
+               v.reset(OpAMD64LEAL4)
                v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ2 [c] {s} x (SHLQconst [2] y))
+       // match: (LEAL2 [c] {s} x (SHLLconst [2] y))
        // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // result: (LEAL8 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
                if v_1.AuxInt != 2 {
                        break
                }
                y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
+               v.reset(OpAMD64LEAL8)
                v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ4_0(v *Value) bool {
-       // match: (LEAQ4 [c] {s} (ADDQconst [d] x) y)
+func rewriteValueAMD64_OpAMD64LEAL4_0(v *Value) bool {
+       // match: (LEAL4 [c] {s} (ADDLconst [d] x) y)
        // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ4 [c+d] {s} x y)
+       // result: (LEAL4 [c+d] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
                d := v_0.AuxInt
@@ -8872,23 +9109,23 @@ func rewriteValueAMD64_OpAMD64LEAQ4_0(v *Value) bool {
                if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
+               v.reset(OpAMD64LEAL4)
                v.AuxInt = c + d
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ4 [c] {s} x (ADDQconst [d] y))
+       // match: (LEAL4 [c] {s} x (ADDLconst [d] y))
        // cond: is32Bit(c+4*d) && y.Op != OpSB
-       // result: (LEAQ4 [c+4*d] {s} x y)
+       // result: (LEAL4 [c+4*d] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64ADDLconst {
                        break
                }
                d := v_1.AuxInt
@@ -8896,119 +9133,140 @@ func rewriteValueAMD64_OpAMD64LEAQ4_0(v *Value) bool {
                if !(is32Bit(c+4*d) && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
+               v.reset(OpAMD64LEAL4)
                v.AuxInt = c + 4*d
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
+       // match: (LEAL4 [c] {s} x (SHLLconst [1] y))
        // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // result: (LEAL8 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
                if v_1.AuxInt != 1 {
                        break
                }
                y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
+               v.reset(OpAMD64LEAL8)
                v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAL8_0(v *Value) bool {
+       // match: (LEAL8 [c] {s} (ADDLconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAL8 [c+d] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
+               d := v_0.AuxInt
                x := v_0.Args[0]
                y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpAMD64LEAL8)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAL8 [c] {s} x (ADDLconst [d] y))
+       // cond: is32Bit(c+8*d) && y.Op != OpSB
+       // result: (LEAL8 [c+8*d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+8*d) && y.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64LEAL8)
+               v.AuxInt = c + 8*d
+               v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ8_0(v *Value) bool {
-       // match: (LEAQ8 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ8 [c+d] {s} x y)
+func rewriteValueAMD64_OpAMD64LEAQ_0(v *Value) bool {
+       // match: (LEAQ [c] {s} (ADDQconst [d] x))
+       // cond: is32Bit(c+d)
+       // result: (LEAQ [c+d] {s} x)
        for {
                c := v.AuxInt
                s := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                d := v_0.AuxInt
                x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
+               v.reset(OpAMD64LEAQ)
                v.AuxInt = c + d
                v.Aux = s
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ8 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+8*d) && y.Op != OpSB
-       // result: (LEAQ8 [c+8*d] {s} x y)
+       // match: (LEAQ [c] {s} (ADDQ x y))
+       // cond: x.Op != OpSB && y.Op != OpSB
+       // result: (LEAQ1 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+8*d) && y.Op != OpSB) {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c + 8*d
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
                v.Aux = s
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ {
                        break
@@ -9016,1094 +9274,1091 @@ func rewriteValueAMD64_OpAMD64LEAQ8_0(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
+               v.reset(OpAMD64LEAQ)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBQSX_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBload {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ2 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
-                       break
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
+                       break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSX (ANDLconst [c] x))
-       // cond: c & 0x80 == 0
-       // result: (ANDLconst [c & 0x7f] x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ1_0(v *Value) bool {
+       // match: (LEAQ1 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ1 [c+d] {s} x y)
        for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                x := v_0.Args[0]
-               if !(c&0x80 == 0) {
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7f
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSX (MOVBQSX x))
-       // cond:
-       // result: (MOVBQSX x)
+       // match: (LEAQ1 [c] {s} y (ADDQconst [d] x))
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ1 [c+d] {s} x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQSX {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQSX)
+               d := v_1.AuxInt
+               x := v_1.Args[0]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBQSXload_0(v *Value) bool {
-       // match: (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVBQSX x)
+       // match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ2 [c] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBstore {
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64MOVBQSX)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (LEAQ1 [c] {s} (SHLQconst [1] y) x)
+       // cond:
+       // result: (LEAQ2 [c] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64MOVBQSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBQZX_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
+       // cond:
+       // result: (LEAQ4 [c] {s} x y)
        for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpAMD64MOVBload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (LEAQ1 [c] {s} (SHLQconst [2] y) x)
+       // cond:
+       // result: (LEAQ4 [c] {s} x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
        for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if v_1.AuxInt != 3 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (LEAQ1 [c] {s} (SHLQconst [3] y) x)
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBloadidx1 {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if v_0.AuxInt != 3 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQZX (ANDLconst [c] x))
-       // cond:
-       // result: (ANDLconst [c & 0xff] x)
+       // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_0.AuxInt
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
                x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0xff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVBQZX (MOVBQZX x))
-       // cond:
-       // result: (MOVBQZX x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQZX {
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQZX)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBload_0(v *Value) bool {
-       // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVBQZX x)
+       // match: (LEAQ1 [off1] {sym1} y (LEAQ [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBstore {
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               x := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBQZX)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ2_0(v *Value) bool {
+       // match: (LEAQ2 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ2 [c+d] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (LEAQ2 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+2*d) && y.Op != OpSB
+       // result: (LEAQ2 [c+2*d] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+2*d) && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c + 2*d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ4 [c] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVBloadidx1 [off] {sym} ptr idx mem)
+       // match: (LEAQ2 [c] {s} x (SHLQconst [2] y))
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               c := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBload)
+               v.reset(OpAMD64LEAQ2)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBloadidx1_0(v *Value) bool {
-       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+func rewriteValueAMD64_OpAMD64LEAQ4_0(v *Value) bool {
+       // match: (LEAQ4 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ4 [c+d] {s} x y)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
+               v.reset(OpAMD64LEAQ4)
                v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (LEAQ4 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+4*d) && y.Op != OpSB
+       // result: (LEAQ4 [c+4*d] {s} x y)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               idx := v.Args[0]
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
                d := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               y := v_1.Args[0]
+               if !(is32Bit(c+4*d) && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c + 4*d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ8_0(v *Value) bool {
+       // match: (LEAQ8 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ8 [c+d] {s} x y)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                d := v_0.AuxInt
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
+               v.reset(OpAMD64LEAQ8)
                v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVBload [i+c] {s} p mem)
+       // match: (LEAQ8 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+8*d) && y.Op != OpSB
+       // result: (LEAQ8 [c+8*d] {s} x y)
        for {
-               i := v.AuxInt
+               c := v.AuxInt
                s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(is32Bit(i + c)) {
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+8*d) && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = i + c
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c + 8*d
                v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBloadidx1 [i] {s} (MOVQconst [c]) p mem)
-       // cond: is32Bit(i+c)
-       // result: (MOVBload [i+c] {s} p mem)
+       // match: (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_0.AuxInt
-               p := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(i + c)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = i + c
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBstore_0(v *Value) bool {
-       // match: (MOVBstore [off] {sym} ptr y:(SETL x) mem)
-       // cond: y.Uses == 1
-       // result: (SETLmem [off] {sym} ptr x mem)
+func rewriteValueAMD64_OpAMD64MOVBQSX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETL {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBload {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64SETLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETLE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETLEmem [off] {sym} ptr x mem)
+       // match: (MOVBQSX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETLE {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64SETLEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETG x) mem)
-       // cond: y.Uses == 1
-       // result: (SETGmem [off] {sym} ptr x mem)
+       // match: (MOVBQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETG {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64SETGmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETGE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETGEmem [off] {sym} ptr x mem)
+       // match: (MOVBQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETGE {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64SETGEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETEQ x) mem)
-       // cond: y.Uses == 1
-       // result: (SETEQmem [off] {sym} ptr x mem)
+       // match: (MOVBQSX (ANDLconst [c] x))
+       // cond: c & 0x80 == 0
+       // result: (ANDLconst [c & 0x7f] x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETEQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x80 == 0) {
                        break
                }
-               v.reset(OpAMD64SETEQmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7f
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETNE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETNEmem [off] {sym} ptr x mem)
+       // match: (MOVBQSX (MOVBQSX x))
+       // cond:
+       // result: (MOVBQSX x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETNE {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQSX {
                        break
                }
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQSX)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETB x) mem)
-       // cond: y.Uses == 1
-       // result: (SETBmem [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBQSXload_0(v *Value) bool {
+       // match: (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVBQSX x)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETB {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBstore {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64MOVBQSX)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETBE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETBEmem [off] {sym} ptr x mem)
+       // match: (MOVBQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETBE {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETBEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVBQSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETA x) mem)
-       // cond: y.Uses == 1
-       // result: (SETAmem [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETA {
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBQZX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBload {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64SETAmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETAE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETAEmem [off] {sym} ptr x mem)
+       // match: (MOVBQZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETAE {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem)
+       // match: (MOVBQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX (ANDLconst [c] x))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (ANDLconst [c & 0xff] x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBQSX {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0xff
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem)
+       // match: (MOVBQZX (MOVBQZX x))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (MOVBQZX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQZX {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBload_0(v *Value) bool {
+       // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVBQZX x)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBQZX {
+               if v_1.Op != OpAMD64MOVBstore {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBQZX)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVBload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstore)
+               v.reset(OpAMD64MOVBload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+       // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVBstoreconst)
-               v.AuxInt = makeValAndOff(int64(int8(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstore)
+               v.reset(OpAMD64MOVBload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ1 {
                        break
@@ -10113,27 +10368,25 @@ func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
+               v.reset(OpAMD64MOVBloadidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} (ADDQ ptr idx) val mem)
+       // match: (MOVBload [off] {sym} (ADDQ ptr idx) mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
+       // result: (MOVBloadidx1 [off] {sym} ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQ {
                        break
@@ -10141,107 +10394,746 @@ func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
+               v.reset(OpAMD64MOVBloadidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
-       // cond: x0.Uses == 1 && clobber(x0)
-       // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
+       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpAMD64MOVBstore {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               if x0.AuxInt != i-1 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               if x0.Aux != s {
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRWconst {
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBloadidx1_0(v *Value) bool {
+       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               if w != x0_1.Args[0] {
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x0.Args[2]
-               if !(x0.Uses == 1 && clobber(x0)) {
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
-               v0.AuxInt = 8
-               v0.AddArg(w)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
+       // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
+               c := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x2 := v.Args[2]
-               if x2.Op != OpAMD64MOVBstore {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if x2.AuxInt != i-1 {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               if x2.Aux != s {
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRLconst {
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem)
+       // cond: is32Bit(i+c)
+       // result: (MOVBload [i+c] {s} p mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               if x2_1.AuxInt != 8 {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(is32Bit(i + c)) {
                        break
                }
-               if w != x2_1.Args[0] {
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = i + c
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [i] {s} (MOVQconst [c]) p mem)
+       // cond: is32Bit(i+c)
+       // result: (MOVBload [i+c] {s} p mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               x1 := x2.Args[2]
-               if x1.Op != OpAMD64MOVBstore {
+               c := v_0.AuxInt
+               p := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(i + c)) {
                        break
                }
-               if x1.AuxInt != i-2 {
-                       break
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = i + c
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBstore_0(v *Value) bool {
+       // match: (MOVBstore [off] {sym} ptr y:(SETL x) mem)
+       // cond: y.Uses == 1
+       // result: (SETLmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETL {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETLE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETLEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETLE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETLEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETG x) mem)
+       // cond: y.Uses == 1
+       // result: (SETGmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETG {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETGmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETGE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETGEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETGE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETGEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETEQ x) mem)
+       // cond: y.Uses == 1
+       // result: (SETEQmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETEQ {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETEQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETNE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETNEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETNE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETNEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETB x) mem)
+       // cond: y.Uses == 1
+       // result: (SETBmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETB {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETBE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETBEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETBE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETBEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETA x) mem)
+       // cond: y.Uses == 1
+       // result: (SETAmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETA {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETAmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETAE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETAEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETAE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBQSX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBQZX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstoreconst)
+               v.AuxInt = makeValAndOff(int64(int8(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
+       // cond: x0.Uses == 1 && clobber(x0)
+       // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpAMD64SHRWconst {
+                       break
+               }
+               if x0_1.AuxInt != 8 {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               mem := x0.Args[2]
+               if !(x0.Uses == 1 && clobber(x0)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
+               v0.AuxInt = 8
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x2 := v.Args[2]
+               if x2.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x2.AuxInt != i-1 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if p != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x2_1.AuxInt != 8 {
+                       break
+               }
+               if w != x2_1.Args[0] {
+                       break
+               }
+               x1 := x2.Args[2]
+               if x1.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
                }
                if x1.Aux != s {
                        break
@@ -19623,7 +20515,113 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
        }
        // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx2 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 16 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx2 {
+                       break
+               }
+               if x.AuxInt != i-2 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -19631,12 +20629,10 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
                p := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if v_2.AuxInt != 16 {
+               if v_2.Op != OpAMD64SHRQconst {
                        break
                }
+               j := v_2.AuxInt
                w := v_2.Args[0]
                x := v.Args[3]
                if x.Op != OpAMD64MOVWstoreidx2 {
@@ -19655,7 +20651,14 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
                if idx != x.Args[1] {
                        break
                }
-               if w != x.Args[2] {
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
                        break
                }
                mem := x.Args[3]
@@ -19670,201 +20673,562 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
                v0.AuxInt = 1
                v0.AddArg(idx)
                v.AddArg(v0)
-               v.AddArg(w)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       // match: (MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem)
+       // cond: is32Bit(i+2*c)
+       // result: (MOVWstore [i+2*c] {s} p w mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[3]
                p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               if v_2.AuxInt != 16 {
+               c := v_1.AuxInt
+               w := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(i + 2*c)) {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx2 {
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i + 2*c
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULL_0(v *Value) bool {
+       // match: (MULL x (MOVLconst [c]))
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               c := v_1.AuxInt
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULL (MOVLconst [c]) x)
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if x.Aux != s {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLconst [c] (MULLconst [d] x))
+       // cond:
+       // result: (MULLconst [int64(int32(c * d))] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MULLconst {
                        break
                }
-               _ = x.Args[3]
-               if p != x.Args[0] {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = int64(int32(c * d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [-9] x)
+       // cond:
+       // result: (NEGL (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != -9 {
                        break
                }
-               if idx != x.Args[1] {
+               x := v.Args[0]
+               v.reset(OpAMD64NEGL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [-5] x)
+       // cond:
+       // result: (NEGL (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != -5 {
                        break
                }
-               if w != x.Args[2] {
+               x := v.Args[0]
+               v.reset(OpAMD64NEGL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [-3] x)
+       // cond:
+       // result: (NEGL (LEAL2 <v.Type> x x))
+       for {
+               if v.AuxInt != -3 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v.Args[0]
+               v.reset(OpAMD64NEGL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [-1] x)
+       // cond:
+       // result: (NEGL x)
+       for {
+               if v.AuxInt != -1 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               x := v.Args[0]
+               v.reset(OpAMD64NEGL)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [ 0] _)
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULLconst [ 1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [ 3] x)
+       // cond:
+       // result: (LEAL2 x x)
+       for {
+               if v.AuxInt != 3 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL2)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [ 5] x)
+       // cond:
+       // result: (LEAL4 x x)
+       for {
+               if v.AuxInt != 5 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL4)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [ 7] x)
+       // cond:
+       // result: (LEAL2 x (LEAL2 <v.Type> x x))
+       for {
+               if v.AuxInt != 7 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL2)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLconst [ 9] x)
+       // cond:
+       // result: (LEAL8 x x)
+       for {
+               if v.AuxInt != 9 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL8)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [11] x)
+       // cond:
+       // result: (LEAL2 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 11 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL2)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [13] x)
+       // cond:
+       // result: (LEAL4 x (LEAL2 <v.Type> x x))
+       for {
+               if v.AuxInt != 13 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [19] x)
+       // cond:
+       // result: (LEAL2 x (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != 19 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL2)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [21] x)
+       // cond:
+       // result: (LEAL4 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 21 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
                v.AddArg(v0)
-               v.AddArg(w)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
+       // match: (MULLconst [25] x)
+       // cond:
+       // result: (LEAL8 x (LEAL2 <v.Type> x x))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               if v.AuxInt != 25 {
                        break
                }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx2 {
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [27] x)
+       // cond:
+       // result: (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x))
+       for {
+               if v.AuxInt != 27 {
                        break
                }
-               if x.AuxInt != i-2 {
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (MULLconst [37] x)
+       // cond:
+       // result: (LEAL4 x (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != 37 {
                        break
                }
-               if x.Aux != s {
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [41] x)
+       // cond:
+       // result: (LEAL8 x (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 41 {
                        break
                }
-               _ = x.Args[3]
-               if p != x.Args[0] {
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [45] x)
+       // cond:
+       // result: (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x))
+       for {
+               if v.AuxInt != 45 {
                        break
                }
-               if idx != x.Args[1] {
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULLconst [73] x)
+       // cond:
+       // result: (LEAL8 x (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != 73 {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULLconst [81] x)
+       // cond:
+       // result: (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x))
+       for {
+               if v.AuxInt != 81 {
                        break
                }
-               if w0.AuxInt != j-16 {
+               x := v.Args[0]
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v.AddArg(v1)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpAMD64SUBL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               v.reset(OpAMD64LEAL1)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-2) && c >= 34
+       // result: (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-2) && c >= 34) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               v.reset(OpAMD64LEAL2)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 2)
+               v0.AddArg(x)
                v.AddArg(v0)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem)
-       // cond: is32Bit(i+2*c)
-       // result: (MOVWstore [i+2*c] {s} p w mem)
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-4) && c >= 68
+       // result: (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-4) && c >= 68) {
                        break
                }
-               c := v_1.AuxInt
-               w := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(i + 2*c)) {
+               v.reset(OpAMD64LEAL4)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 4)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULLconst [c] x)
+       // cond: isPowerOfTwo(c-8) && c >= 136
+       // result: (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-8) && c >= 136) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i + 2*c
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAL8)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v0.AuxInt = log2(c - 8)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULL_0(v *Value) bool {
-       // match: (MULL x (MOVLconst [c]))
-       // cond:
-       // result: (MULLconst [c] x)
+       // match: (MULLconst [c] x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MULL (MOVLconst [c]) x)
-       // cond:
-       // result: (MULLconst [c] x)
+       // match: (MULLconst [c] x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULLconst_0(v *Value) bool {
-       // match: (MULLconst [c] (MULLconst [d] x))
-       // cond:
-       // result: (MULLconst [int64(int32(c * d))] x)
+       // match: (MULLconst [c] x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
        for {
                c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MULLconst {
+               x := v.Args[0]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = int64(int32(c * d))
-               v.AddArg(x)
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst_30(v *Value) bool {
        // match: (MULLconst [c] (MOVLconst [d]))
        // cond:
        // result: (MOVLconst [int64(int32(c*d))])
@@ -20001,7 +21365,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MULQconst [0] _)
+       // match: (MULQconst [ 0] _)
        // cond:
        // result: (MOVQconst [0])
        for {
@@ -20012,7 +21376,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (MULQconst [1] x)
+       // match: (MULQconst [ 1] x)
        // cond:
        // result: x
        for {
@@ -20025,7 +21389,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MULQconst [3] x)
+       // match: (MULQconst [ 3] x)
        // cond:
        // result: (LEAQ2 x x)
        for {
@@ -20038,7 +21402,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MULQconst [5] x)
+       // match: (MULQconst [ 5] x)
        // cond:
        // result: (LEAQ4 x x)
        for {
@@ -20051,7 +21415,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MULQconst [7] x)
+       // match: (MULQconst [ 7] x)
        // cond:
        // result: (LEAQ2 x (LEAQ2 <v.Type> x x))
        for {
@@ -20072,7 +21436,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
 func rewriteValueAMD64_OpAMD64MULQconst_10(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MULQconst [9] x)
+       // match: (MULQconst [ 9] x)
        // cond:
        // result: (LEAQ8 x x)
        for {