]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: implement non-constant rotates
authorKeith Randall <khr@golang.org>
Wed, 29 Mar 2017 17:04:17 +0000 (10:04 -0700)
committerKeith Randall <khr@golang.org>
Mon, 17 Apr 2017 23:19:45 +0000 (23:19 +0000)
Makes math/bits.Rotate{Left,Right} fast on amd64.

name              old time/op  new time/op  delta
RotateLeft-12     7.42ns ± 6%  5.45ns ± 6%  -26.54%   (p=0.000 n=9+10)
RotateLeft8-12    4.77ns ± 5%  3.42ns ± 7%  -28.25%   (p=0.000 n=8+10)
RotateLeft16-12   4.82ns ± 8%  3.40ns ± 7%  -29.36%  (p=0.000 n=10+10)
RotateLeft32-12   4.87ns ± 7%  3.48ns ± 7%  -28.51%    (p=0.000 n=8+9)
RotateLeft64-12   5.23ns ±10%  3.35ns ± 6%  -35.97%   (p=0.000 n=9+10)
RotateRight-12    7.59ns ± 8%  5.71ns ± 1%  -24.72%   (p=0.000 n=10+8)
RotateRight8-12   4.98ns ± 7%  3.36ns ± 9%  -32.55%  (p=0.000 n=10+10)
RotateRight16-12  5.12ns ± 2%  3.45ns ± 5%  -32.62%  (p=0.000 n=10+10)
RotateRight32-12  4.80ns ± 6%  3.42ns ±16%  -28.68%  (p=0.000 n=10+10)
RotateRight64-12  4.78ns ± 6%  3.42ns ± 6%  -28.50%  (p=0.000 n=10+10)

Update #18940

Change-Id: Ie79fb5581c489ed4d3b859314c5e669a134c119b
Reviewed-on: https://go-review.googlesource.com/39711
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/gc/asm_test.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go

index 4faad77a65240ae0dc0f9eda021eca357dd482da..f4be875ab2cdc5a60de2dd30118d78c281b14e25 100644 (file)
@@ -189,6 +189,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
                ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
                ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
+               ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
+               ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
                ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
                ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
                ssa.OpAMD64PXOR:
index e1b582dbf6d550a0eb1a1457113a8b57272c18d5..d6174a9b190c3ecdf783272d8faff4991d94b430 100644 (file)
@@ -800,6 +800,63 @@ var linuxAMD64Tests = []*asmTest{
                }`,
                []string{"\tCMPQ\t[A-Z]"},
        },
+       // Non-constant rotate
+       {
+               `func rot64l(x uint64, y int) uint64 {
+                       z := uint(y & 63)
+                       return x << z | x >> (64-z)
+               }`,
+               []string{"\tROLQ\t"},
+       },
+       {
+               `func rot64r(x uint64, y int) uint64 {
+                       z := uint(y & 63)
+                       return x >> z | x << (64-z)
+               }`,
+               []string{"\tRORQ\t"},
+       },
+       {
+               `func rot32l(x uint32, y int) uint32 {
+                       z := uint(y & 31)
+                       return x << z | x >> (32-z)
+               }`,
+               []string{"\tROLL\t"},
+       },
+       {
+               `func rot32r(x uint32, y int) uint32 {
+                       z := uint(y & 31)
+                       return x >> z | x << (32-z)
+               }`,
+               []string{"\tRORL\t"},
+       },
+       {
+               `func rot16l(x uint16, y int) uint16 {
+                       z := uint(y & 15)
+                       return x << z | x >> (16-z)
+               }`,
+               []string{"\tROLW\t"},
+       },
+       {
+               `func rot16r(x uint16, y int) uint16 {
+                       z := uint(y & 15)
+                       return x >> z | x << (16-z)
+               }`,
+               []string{"\tRORW\t"},
+       },
+       {
+               `func rot8l(x uint8, y int) uint8 {
+                       z := uint(y & 7)
+                       return x << z | x >> (8-z)
+               }`,
+               []string{"\tROLB\t"},
+       },
+       {
+               `func rot8r(x uint8, y int) uint8 {
+                       z := uint(y & 7)
+                       return x >> z | x << (8-z)
+               }`,
+               []string{"\tRORB\t"},
+       },
 }
 
 var linux386Tests = []*asmTest{
index b7cbe37472d55e3abbbaa58f5597e92053c7bc9d..011bf683ff7c638fff7437fab1cac4092f29f86c 100644 (file)
 (SARB x (MOVQconst [c])) -> (SARBconst [min(c&31,7)] x)
 (SARB x (MOVLconst [c])) -> (SARBconst [min(c&31,7)] x)
 
-(SARL x (ANDLconst [31] y)) -> (SARL x y)
-(SARQ x (ANDQconst [63] y)) -> (SARQ x y)
-
-(SHLL x (ANDLconst [31] y)) -> (SHLL x y)
-(SHLQ x (ANDQconst [63] y)) -> (SHLQ x y)
-(SHLQ x (ANDLconst [63] y)) -> (SHLQ x y)
-
-(SHRL x (ANDLconst [31] y)) -> (SHRL x y)
-(SHRQ x (ANDQconst [63] y)) -> (SHRQ x y)
-(SHRQ x (ANDLconst [63] y)) -> (SHRQ x y)
-
-// Rotate instructions
-
+// Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
+(SHLQ x (ADDQconst [c] y)) && c & 63 == 0 -> (SHLQ x y)
+(SHRQ x (ADDQconst [c] y)) && c & 63 == 0 -> (SHRQ x y)
+(SARQ x (ADDQconst [c] y)) && c & 63 == 0 -> (SARQ x y)
+(SHLQ x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 -> (SHLQ x (NEGQ <t> y))
+(SHRQ x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 -> (SHRQ x (NEGQ <t> y))
+(SARQ x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 -> (SARQ x (NEGQ <t> y))
+(SHLQ x (ANDQconst [c] y)) && c & 63 == 63 -> (SHLQ x y)
+(SHRQ x (ANDQconst [c] y)) && c & 63 == 63 -> (SHRQ x y)
+(SARQ x (ANDQconst [c] y)) && c & 63 == 63 -> (SARQ x y)
+(SHLQ x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> (SHLQ x (NEGQ <t> y))
+(SHRQ x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> (SHRQ x (NEGQ <t> y))
+(SARQ x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> (SARQ x (NEGQ <t> y))
+
+(SHLL x (ADDQconst [c] y)) && c & 31 == 0 -> (SHLL x y)
+(SHRL x (ADDQconst [c] y)) && c & 31 == 0 -> (SHRL x y)
+(SARL x (ADDQconst [c] y)) && c & 31 == 0 -> (SARL x y)
+(SHLL x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 -> (SHLL x (NEGQ <t> y))
+(SHRL x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 -> (SHRL x (NEGQ <t> y))
+(SARL x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 -> (SARL x (NEGQ <t> y))
+(SHLL x (ANDQconst [c] y)) && c & 31 == 31 -> (SHLL x y)
+(SHRL x (ANDQconst [c] y)) && c & 31 == 31 -> (SHRL x y)
+(SARL x (ANDQconst [c] y)) && c & 31 == 31 -> (SARL x y)
+(SHLL x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> (SHLL x (NEGQ <t> y))
+(SHRL x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> (SHRL x (NEGQ <t> y))
+(SARL x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> (SARL x (NEGQ <t> y))
+
+(SHLQ x (ADDLconst [c] y)) && c & 63 == 0 -> (SHLQ x y)
+(SHRQ x (ADDLconst [c] y)) && c & 63 == 0 -> (SHRQ x y)
+(SARQ x (ADDLconst [c] y)) && c & 63 == 0 -> (SARQ x y)
+(SHLQ x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 -> (SHLQ x (NEGL <t> y))
+(SHRQ x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 -> (SHRQ x (NEGL <t> y))
+(SARQ x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 -> (SARQ x (NEGL <t> y))
+(SHLQ x (ANDLconst [c] y)) && c & 63 == 63 -> (SHLQ x y)
+(SHRQ x (ANDLconst [c] y)) && c & 63 == 63 -> (SHRQ x y)
+(SARQ x (ANDLconst [c] y)) && c & 63 == 63 -> (SARQ x y)
+(SHLQ x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> (SHLQ x (NEGL <t> y))
+(SHRQ x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> (SHRQ x (NEGL <t> y))
+(SARQ x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> (SARQ x (NEGL <t> y))
+
+(SHLL x (ADDLconst [c] y)) && c & 31 == 0 -> (SHLL x y)
+(SHRL x (ADDLconst [c] y)) && c & 31 == 0 -> (SHRL x y)
+(SARL x (ADDLconst [c] y)) && c & 31 == 0 -> (SARL x y)
+(SHLL x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 -> (SHLL x (NEGL <t> y))
+(SHRL x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 -> (SHRL x (NEGL <t> y))
+(SARL x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 -> (SARL x (NEGL <t> y))
+(SHLL x (ANDLconst [c] y)) && c & 31 == 31 -> (SHLL x y)
+(SHRL x (ANDLconst [c] y)) && c & 31 == 31 -> (SHRL x y)
+(SARL x (ANDLconst [c] y)) && c & 31 == 31 -> (SARL x y)
+(SHLL x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> (SHLL x (NEGL <t> y))
+(SHRL x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> (SHRL x (NEGL <t> y))
+(SARL x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> (SARL x (NEGL <t> y))
+
+// Constant rotate instructions
 (ADDQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
 ( ORQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
 (XORQ (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
 (ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
 (ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x)
 
-// TODO: non-constant rotates if shift amount is known to be bounded (shift & 63 or something).
+// Non-constant rotates.
+// We want to issue a rotate when the Go source contains code like
+//     y &= 63
+//     x << y | x >> (64-y)
+// The shift rules above convert << to SHLx and >> to SHRx.
+// SHRx converts its shift argument from 64-y to -y.
+// A tricky situation occurs when y==0. Then the original code would be:
+//     x << 0 | x >> 64
+// But x >> 64 is 0, not x. So there's an additional mask that is ANDed in
+// to force the second term to 0. We don't need that mask, but we must match
+// it in order to strip it out.
+(ORQ (SHLQ x y) (ANDQ (SHRQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64])))) -> (ROLQ x y)
+(ORQ (SHLQ x y) (ANDQ (SHRQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64])))) -> (ROLQ x y)
+(ORQ (SHRQ x y) (ANDQ (SHLQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64])))) -> (RORQ x y)
+(ORQ (SHRQ x y) (ANDQ (SHLQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64])))) -> (RORQ x y)
+
+(ORL (SHLL x y) (ANDL (SHRL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])))) -> (ROLL x y)
+(ORL (SHLL x y) (ANDL (SHRL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])))) -> (ROLL x y)
+(ORL (SHRL x y) (ANDL (SHLL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])))) -> (RORL x y)
+(ORL (SHRL x y) (ANDL (SHLL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])))) -> (RORL x y)
+
+// Help with rotate detection
+(CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) -> (FlagLT_ULT)
+(CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst  [7] _))) [32]) -> (FlagLT_ULT)
+
+(ORL (SHLL x (ANDQconst y [15]))
+     (ANDL (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16])))
+           (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16]))))
+  && v.Type.Size() == 2
+  -> (ROLW x y)
+(ORL (SHLL x (ANDLconst y [15]))
+     (ANDL (SHRW x (NEGL (ADDLconst (ANDLconst y [15]) [-16])))
+           (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [15]) [-16])) [16]))))
+  && v.Type.Size() == 2
+  -> (ROLW x y)
+(ORL (SHRW x (ANDQconst y [15]))
+     (SHLL x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))))
+  && v.Type.Size() == 2
+  -> (RORW x y)
+(ORL (SHRW x (ANDLconst y [15]))
+     (SHLL x (NEGL (ADDLconst (ANDLconst y [15]) [-16]))))
+  && v.Type.Size() == 2
+  -> (RORW x y)
+
+(ORL (SHLL x (ANDQconst y [ 7]))
+     (ANDL (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])))
+           (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8]))))
+  && v.Type.Size() == 1
+  -> (ROLB x y)
+(ORL (SHLL x (ANDLconst y [ 7]))
+     (ANDL (SHRB x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])))
+           (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])) [ 8]))))
+  && v.Type.Size() == 1
+  -> (ROLB x y)
+(ORL (SHRB x (ANDQconst y [ 7]))
+     (SHLL x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))))
+  && v.Type.Size() == 1
+  -> (RORB x y)
+(ORL (SHRB x (ANDLconst y [ 7]))
+     (SHLL x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8]))))
+  && v.Type.Size() == 1
+  -> (RORB x y)
+
+// rotate left negative = rotate right
+(ROLQ x (NEGQ y)) -> (RORQ x y)
+(ROLQ x (NEGL y)) -> (RORQ x y)
+(ROLL x (NEGQ y)) -> (RORL x y)
+(ROLL x (NEGL y)) -> (RORL x y)
+(ROLW x (NEGQ y)) -> (RORW x y)
+(ROLW x (NEGL y)) -> (RORW x y)
+(ROLB x (NEGQ y)) -> (RORB x y)
+(ROLB x (NEGL y)) -> (RORB x y)
+
+// rotate right negative = rotate left
+(RORQ x (NEGQ y)) -> (ROLQ x y)
+(RORQ x (NEGL y)) -> (ROLQ x y)
+(RORL x (NEGQ y)) -> (ROLL x y)
+(RORL x (NEGL y)) -> (ROLL x y)
+(RORW x (NEGQ y)) -> (ROLW x y)
+(RORW x (NEGL y)) -> (ROLW x y)
+(RORB x (NEGQ y)) -> (ROLB x y)
+(RORB x (NEGL y)) -> (ROLB x y)
+
+// rotate by constants
+(ROLQ x (MOVQconst [c])) -> (ROLQconst [c&63] x)
+(ROLQ x (MOVLconst [c])) -> (ROLQconst [c&63] x)
+(ROLL x (MOVQconst [c])) -> (ROLLconst [c&31] x)
+(ROLL x (MOVLconst [c])) -> (ROLLconst [c&31] x)
+(ROLW x (MOVQconst [c])) -> (ROLWconst [c&15] x)
+(ROLW x (MOVLconst [c])) -> (ROLWconst [c&15] x)
+(ROLB x (MOVQconst [c])) -> (ROLBconst [c&7 ] x)
+(ROLB x (MOVLconst [c])) -> (ROLBconst [c&7 ] x)
+
+(RORQ x (MOVQconst [c])) -> (ROLQconst [(-c)&63] x)
+(RORQ x (MOVLconst [c])) -> (ROLQconst [(-c)&63] x)
+(RORL x (MOVQconst [c])) -> (ROLLconst [(-c)&31] x)
+(RORL x (MOVLconst [c])) -> (ROLLconst [(-c)&31] x)
+(RORW x (MOVQconst [c])) -> (ROLWconst [(-c)&15] x)
+(RORW x (MOVLconst [c])) -> (ROLWconst [(-c)&15] x)
+(RORB x (MOVQconst [c])) -> (ROLBconst [(-c)&7 ] x)
+(RORB x (MOVLconst [c])) -> (ROLBconst [(-c)&7 ] x)
 
 // Constant shift simplifications
-
 (SHLQconst x [0]) -> x
 (SHRQconst x [0]) -> x
 (SARQconst x [0]) -> x
 (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT)
 (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT)
 (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT)
+
 // TODO: DIVxU also.
 
 // Absorb flag constants into SBB ops.
index d9e5fd5b7a5453169e8b12ae0db956afea41d75f..76e8273a40b84b2b26d5cff118537992c4920e13 100644 (file)
@@ -284,6 +284,14 @@ func init() {
                {name: "SARWconst", argLength: 1, reg: gp11, asm: "SARW", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-15
                {name: "SARBconst", argLength: 1, reg: gp11, asm: "SARB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-7
 
+               {name: "ROLQ", argLength: 2, reg: gp21shift, asm: "ROLQ", resultInArg0: true, clobberFlags: true},              // arg0 rotate left arg1 bits.
+               {name: "ROLL", argLength: 2, reg: gp21shift, asm: "ROLL", resultInArg0: true, clobberFlags: true},              // arg0 rotate left arg1 bits.
+               {name: "ROLW", argLength: 2, reg: gp21shift, asm: "ROLW", resultInArg0: true, clobberFlags: true},              // arg0 rotate left arg1 bits.
+               {name: "ROLB", argLength: 2, reg: gp21shift, asm: "ROLB", resultInArg0: true, clobberFlags: true},              // arg0 rotate left arg1 bits.
+               {name: "RORQ", argLength: 2, reg: gp21shift, asm: "RORQ", resultInArg0: true, clobberFlags: true},              // arg0 rotate right arg1 bits.
+               {name: "RORL", argLength: 2, reg: gp21shift, asm: "RORL", resultInArg0: true, clobberFlags: true},              // arg0 rotate right arg1 bits.
+               {name: "RORW", argLength: 2, reg: gp21shift, asm: "RORW", resultInArg0: true, clobberFlags: true},              // arg0 rotate right arg1 bits.
+               {name: "RORB", argLength: 2, reg: gp21shift, asm: "RORB", resultInArg0: true, clobberFlags: true},              // arg0 rotate right arg1 bits.
                {name: "ROLQconst", argLength: 1, reg: gp11, asm: "ROLQ", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-63
                {name: "ROLLconst", argLength: 1, reg: gp11, asm: "ROLL", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-31
                {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
index 48bc15773e4250c18ad4a2d8cd7246e3721223ac..db86a05717e9054ebb6187c7c776d02b6d1b6573 100644 (file)
@@ -512,6 +512,14 @@ const (
        OpAMD64SARLconst
        OpAMD64SARWconst
        OpAMD64SARBconst
+       OpAMD64ROLQ
+       OpAMD64ROLL
+       OpAMD64ROLW
+       OpAMD64ROLB
+       OpAMD64RORQ
+       OpAMD64RORL
+       OpAMD64RORW
+       OpAMD64RORB
        OpAMD64ROLQconst
        OpAMD64ROLLconst
        OpAMD64ROLWconst
@@ -5944,6 +5952,134 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "ROLQ",
+               argLen:       2,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.AROLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 2},     // CX
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "ROLL",
+               argLen:       2,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.AROLL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 2},     // CX
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "ROLW",
+               argLen:       2,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.AROLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 2},     // CX
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "ROLB",
+               argLen:       2,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.AROLB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 2},     // CX
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "RORQ",
+               argLen:       2,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.ARORQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 2},     // CX
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "RORL",
+               argLen:       2,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.ARORL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 2},     // CX
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "RORW",
+               argLen:       2,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.ARORW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 2},     // CX
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "RORB",
+               argLen:       2,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.ARORB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 2},     // CX
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
        {
                name:         "ROLQconst",
                auxType:      auxInt8,
index df72064b7686098fe3f48bfd8065f7c61d2d36fa..b6a54239bd6cba8872d18eda7b73556bdf379aa3 100644 (file)
@@ -212,14 +212,30 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ORQ(v)
        case OpAMD64ORQconst:
                return rewriteValueAMD64_OpAMD64ORQconst(v)
+       case OpAMD64ROLB:
+               return rewriteValueAMD64_OpAMD64ROLB(v)
        case OpAMD64ROLBconst:
                return rewriteValueAMD64_OpAMD64ROLBconst(v)
+       case OpAMD64ROLL:
+               return rewriteValueAMD64_OpAMD64ROLL(v)
        case OpAMD64ROLLconst:
                return rewriteValueAMD64_OpAMD64ROLLconst(v)
+       case OpAMD64ROLQ:
+               return rewriteValueAMD64_OpAMD64ROLQ(v)
        case OpAMD64ROLQconst:
                return rewriteValueAMD64_OpAMD64ROLQconst(v)
+       case OpAMD64ROLW:
+               return rewriteValueAMD64_OpAMD64ROLW(v)
        case OpAMD64ROLWconst:
                return rewriteValueAMD64_OpAMD64ROLWconst(v)
+       case OpAMD64RORB:
+               return rewriteValueAMD64_OpAMD64RORB(v)
+       case OpAMD64RORL:
+               return rewriteValueAMD64_OpAMD64RORL(v)
+       case OpAMD64RORQ:
+               return rewriteValueAMD64_OpAMD64RORQ(v)
+       case OpAMD64RORW:
+               return rewriteValueAMD64_OpAMD64RORW(v)
        case OpAMD64SARB:
                return rewriteValueAMD64_OpAMD64SARB(v)
        case OpAMD64SARBconst:
@@ -2819,6 +2835,62 @@ func rewriteValueAMD64_OpAMD64CMPQ(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64CMPQconst(v *Value) bool {
+       // match: (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32])
+       // cond:
+       // result: (FlagLT_ULT)
+       for {
+               if v.AuxInt != 32 {
+                       break
+               }
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_0_0.AuxInt != -16 {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 15 {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32])
+       // cond:
+       // result: (FlagLT_ULT)
+       for {
+               if v.AuxInt != 32 {
+                       break
+               }
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_0_0.AuxInt != -8 {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 7 {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
        // match: (CMPQconst (MOVQconst [x]) [y])
        // cond: x==y
        // result: (FlagEQ)
@@ -12637,3770 +12709,2928 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (ORL x x)
+       // match: (ORL (SHLL x y) (ANDL (SHRL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))))
        // cond:
-       // result: x
+       // result: (ROLL x y)
        for {
-               x := v.Args[0]
-               if x != v.Args[1] {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBload {
-                       break
-               }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x != v_1_0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x1.Args[1] {
+               if y != v_1_0_1.Args[0] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               if v_1_1_0.AuxInt != 32 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVBload {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_1_1_0_0_0.AuxInt != -32 {
                        break
                }
-               if p != x0.Args[0] {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if mem != x0.Args[1] {
+               if v_1_1_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_1_1_0_0_0_0.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       // match: (ORL (SHLL x y) (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHRL x (NEGQ y))))
+       // cond:
+       // result: (ROLL x y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWload {
-                       break
-               }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               if sh.AuxInt != 16 {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if p != x1.Args[0] {
+               if v_1_0_0.AuxInt != 32 {
                        break
                }
-               if mem != x1.Args[1] {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)) x0:(MOVWload [i0] {s} p mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               if v_1_0_0_0_0.AuxInt != -32 {
                        break
                }
-               if sh.AuxInt != 16 {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if v_1_0_0_0_0_0.AuxInt != 31 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWload {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRL {
                        break
                }
-               if p != x0.Args[0] {
+               if x != v_1_1.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_1_1_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       // match: (ORL (ANDL (SHRL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))) (SHLL x y))
+       // cond:
+       // result: (ROLL x y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_0_1_0.AuxInt != 32 {
                        break
                }
-               if p != x0.Args[0] {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x0.Args[1] {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_0_1_0_0_0.AuxInt != -32 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-       for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if v_0_1_0_0_0_0.AuxInt != 31 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if x != v_1.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if y != v_1.Args[1] {
                        break
                }
-               if p != x0.Args[0] {
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHRL x (NEGQ y))) (SHLL x y))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               if mem != x0.Args[1] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y) s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               if v_0_0_0.AuxInt != 32 {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               if v_0_0_0_0_0.AuxInt != -32 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if v_0_0_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if p != x1.Args[0] {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRL {
                        break
                }
-               if mem != x1.Args[1] {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if y != v_0_1_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))) s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               if x != v_1.Args[0] {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if y != v_1.Args[1] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHLL x y) (ANDL (SHRL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRL {
                        break
                }
-               if p != x1.Args[0] {
+               if x != v_1_0.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGL {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if y != v_1_0_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               if v_1_1_0.AuxInt != 32 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if p != x1.Args[0] {
+               if v_1_1_0_0_0.AuxInt != -32 {
                        break
                }
-               if idx != x1.Args[1] {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_1_1_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_1_1_0_0_0_0.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL (SHLL x y) (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHRL x (NEGL y))))
+       // cond:
+       // result: (ROLL x y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               if sh.AuxInt != 8 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if v_1_0_0.AuxInt != 32 {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if idx != x1.Args[1] {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_1_0_0_0_0.AuxInt != -32 {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               if v_1_0_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_0_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRL {
+                       break
+               }
+               if x != v_1_1.Args[0] {
+                       break
+               }
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL (ANDL (SHRL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))) (SHLL x y))
+       // cond:
+       // result: (ROLL x y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRL {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if idx != x1.Args[0] {
+               if v_0_1_0.AuxInt != 32 {
                        break
                }
-               if p != x1.Args[1] {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x1.Args[2] {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_0_1_0_0_0.AuxInt != -32 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               if v_0_1_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if sh.AuxInt != 8 {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x != v_1.Args[0] {
                        break
                }
-               if idx != x1.Args[0] {
+               if y != v_1.Args[1] {
                        break
                }
-               if p != x1.Args[1] {
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHRL x (NEGL y))) (SHLL x y))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               if mem != x1.Args[2] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               if v_0_0_0.AuxInt != 32 {
                        break
                }
-               if sh.AuxInt != 8 {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_0_0_0_0.AuxInt != -32 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_0_0_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if idx != x0.Args[1] {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRL {
                        break
                }
-               if mem != x0.Args[2] {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGL {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_0_1_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if sh.AuxInt != 8 {
+               if x != v_1.Args[0] {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if y != v_1.Args[1] {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHRL x y) (ANDL (SHLL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))))
+       // cond:
+       // result: (RORL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRL {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               if p != x0.Args[0] {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLL {
                        break
                }
-               if idx != x0.Args[1] {
+               if x != v_1_0.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_1_0_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if sh.AuxInt != 8 {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_1_1_0.AuxInt != 32 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if idx != x0.Args[0] {
+               if v_1_1_0_0_0.AuxInt != -32 {
                        break
                }
-               if p != x0.Args[1] {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if mem != x0.Args[2] {
+               if v_1_1_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_1_1_0_0_0_0.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL (SHRL x y) (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHLL x (NEGQ y))))
+       // cond:
+       // result: (RORL x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRL {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_1_0_0.AuxInt != 32 {
                        break
                }
-               if idx != x0.Args[0] {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if p != x0.Args[1] {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if mem != x0.Args[2] {
+               if v_1_0_0_0_0.AuxInt != -32 {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if v_1_0_0_0_0_0.AuxInt != 31 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               if sh.AuxInt != 16 {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x != v_1_1.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if p != x1.Args[0] {
+               if y != v_1_1_1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SHLL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))) (SHRL x y))
+       // cond:
+       // result: (RORL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               if mem != x1.Args[2] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLL {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if v_0_1_0.AuxInt != 32 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if p != x1.Args[0] {
+               if v_0_1_0_0_0.AuxInt != -32 {
                        break
                }
-               if idx != x1.Args[1] {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_0_1_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               if x != v_1.Args[0] {
                        break
                }
-               if sh.AuxInt != 16 {
+               if y != v_1.Args[1] {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHLL x (NEGQ y))) (SHRL x y))
+       // cond:
+       // result: (RORL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if idx != x1.Args[0] {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if p != x1.Args[1] {
+               if v_0_0_0.AuxInt != 32 {
                        break
                }
-               if mem != x1.Args[2] {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if v_0_0_0_0_0.AuxInt != -32 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if v_0_0_0_0_0_0.AuxInt != 31 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if idx != x1.Args[0] {
+               if y != v_0_1_1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRL {
                        break
                }
-               if mem != x1.Args[2] {
+               if x != v_1.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_1.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL (SHRL x y) (ANDL (SHLL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))))
+       // cond:
+       // result: (RORL x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRL {
                        break
                }
-               if sh.AuxInt != 16 {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x != v_1_0.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGL {
                        break
                }
-               if p != x0.Args[0] {
+               if y != v_1_0_1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if mem != x0.Args[2] {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_1_1_0.AuxInt != 32 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if sh.AuxInt != 16 {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if v_1_1_0_0_0.AuxInt != -32 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_1_1_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if p != x0.Args[0] {
+               if y != v_1_1_0_0_0_0.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHRL x y) (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHLL x (NEGL y))))
+       // cond:
+       // result: (RORL x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRL {
                        break
                }
-               if mem != x0.Args[2] {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if v_1_0_0.AuxInt != 32 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_1_0_0_0_0.AuxInt != -32 {
                        break
                }
-               if idx != x0.Args[0] {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if p != x0.Args[1] {
+               if v_1_0_0_0_0_0.AuxInt != 31 {
                        break
                }
-               if mem != x0.Args[2] {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLL {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               if x != v_1_1.Args[0] {
+                       break
+               }
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL (ANDL (SHLL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))) (SHRL x y))
+       // cond:
+       // result: (RORL x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               if sh.AuxInt != 16 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if idx != x0.Args[0] {
+               if v_0_1_0.AuxInt != 32 {
                        break
                }
-               if p != x0.Args[1] {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x0.Args[2] {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_0_1_0_0_0.AuxInt != -32 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_0_1_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_0_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRL {
+                       break
+               }
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHLL x (NEGL y))) (SHRL x y))
+       // cond:
+       // result: (RORL x y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               if v_0_0_0.AuxInt != 32 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_0_0_0_0_0.AuxInt != -32 {
                        break
                }
-               if idx != x0.Args[1] {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if mem != x0.Args[2] {
+               if v_0_0_0_0_0_0.AuxInt != 31 {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_0_1_1.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRL {
+                       break
+               }
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (SHLL x (ANDQconst y [15])) (ANDL (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16]))))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDQconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               if v_0_1.AuxInt != 15 {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRW {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if x != v_1_0.Args[0] {
                        break
                }
-               if p != x0.Args[0] {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               if idx != x0.Args[1] {
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if mem != x0.Args[2] {
+               if v_1_0_1_0.AuxInt != -16 {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1_0_1_0_0 := v_1_0_1_0.Args[0]
+               if v_1_0_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if v_1_0_1_0_0.AuxInt != 15 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if y != v_1_0_1_0_0.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_1_1_0.AuxInt != 16 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if idx != x0.Args[0] {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if p != x0.Args[1] {
+               if v_1_1_0_0_0.AuxInt != -16 {
                        break
                }
-               if mem != x0.Args[2] {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_1_0_0_0_0.AuxInt != 15 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (SHLL x (ANDQconst y [15])) (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16])) (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16])))))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDQconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               if v_0_1.AuxInt != 15 {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if idx != x0.Args[0] {
+               if v_1_0_0.AuxInt != 16 {
                        break
                }
-               if p != x0.Args[1] {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x0.Args[2] {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_0_0_0_0.AuxInt != -16 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_1_0_0_0_0_0.AuxInt != 15 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRW {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if x != v_1_1.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if p != x0.Args[0] {
+               v_1_1_1_0 := v_1_1_1.Args[0]
+               if v_1_1_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if idx != x0.Args[1] {
+               if v_1_1_1_0.AuxInt != -16 {
                        break
                }
-               if mem != x0.Args[2] {
+               v_1_1_1_0_0 := v_1_1_1_0.Args[0]
+               if v_1_1_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_1_1_0_0.AuxInt != 15 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if y != v_1_1_1_0_0.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (ANDL (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16]))) (SHLL x (ANDQconst y [15])))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRW {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_0_1_0.AuxInt != -16 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_0_1_0_0 := v_0_0_1_0.Args[0]
+               if v_0_0_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_0_0_1_0_0.AuxInt != 15 {
                        break
                }
-               if idx != x0.Args[1] {
+               y := v_0_0_1_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if mem != x0.Args[2] {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_0_1_0.AuxInt != 16 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               if v_0_1_0_0_0.AuxInt != -16 {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1_0_0_0_0.AuxInt != 15 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               if idx != x0.Args[0] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if p != x0.Args[1] {
+               if x != v_1.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDQconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_1.AuxInt != 15 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [15]) [-16])) [16])) (SHRW x (NEGQ (ADDQconst (ANDQconst y [15]) [-16])))) (SHLL x (ANDQconst y [15])))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               if v_0_0_0.AuxInt != 16 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if idx != x0.Args[0] {
+               if v_0_0_0_0_0.AuxInt != -16 {
                        break
                }
-               if p != x0.Args[1] {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if mem != x0.Args[2] {
+               if v_0_0_0_0_0_0.AuxInt != 15 {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRW {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0_1_1_0 := v_0_1_1.Args[0]
+               if v_0_1_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1_1_0.AuxInt != -16 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0_1_1_0_0 := v_0_1_1_0.Args[0]
+               if v_0_1_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1_1_0_0.AuxInt != 15 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if y != v_0_1_1_0_0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if idx != x1.Args[1] {
+               if x != v_1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDQconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_1.AuxInt != 15 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (SHLL x (ANDLconst y [15])) (ANDL (SHRW x (NEGL (ADDLconst (ANDLconst y [15]) [-16]))) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [15]) [-16])) [16]))))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDLconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1.AuxInt != 15 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRW {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x != v_1_0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGL {
                        break
                }
-               if idx != x1.Args[1] {
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_1_0_1_0.AuxInt != -16 {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1_0_1_0_0 := v_1_0_1_0.Args[0]
+               if v_1_0_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               if v_1_0_1_0_0.AuxInt != 15 {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               if y != v_1_0_1_0_0.Args[0] {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_1_1_0.AuxInt != 16 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if idx != x1.Args[1] {
+               if v_1_1_0_0_0.AuxInt != -16 {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_1_0_0_0_0.AuxInt != 15 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (SHLL x (ANDLconst y [15])) (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [15]) [-16])) [16])) (SHRW x (NEGL (ADDLconst (ANDLconst y [15]) [-16])))))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDLconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1.AuxInt != 15 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if p != x1.Args[0] {
+               if v_1_0_0.AuxInt != 16 {
                        break
                }
-               if idx != x1.Args[1] {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_0_0_0_0.AuxInt != -16 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               if v_1_0_0_0_0_0.AuxInt != 15 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRW {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if x != v_1_1.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGL {
                        break
                }
-               if idx != x1.Args[0] {
+               v_1_1_1_0 := v_1_1_1.Args[0]
+               if v_1_1_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if p != x1.Args[1] {
+               if v_1_1_1_0.AuxInt != -16 {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_1_1_0_0 := v_1_1_1_0.Args[0]
+               if v_1_1_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_1_1_0_0.AuxInt != 15 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if y != v_1_1_1_0_0.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (ANDL (SHRW x (NEGL (ADDLconst (ANDLconst y [15]) [-16]))) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [15]) [-16])) [16]))) (SHLL x (ANDLconst y [15])))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRW {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_0_0_1_0.AuxInt != -16 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_0_0_1_0_0 := v_0_0_1_0.Args[0]
+               if v_0_0_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if idx != x1.Args[0] {
+               if v_0_0_1_0_0.AuxInt != 15 {
                        break
                }
-               if p != x1.Args[1] {
+               y := v_0_0_1_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if mem != x1.Args[2] {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_0_1_0.AuxInt != 16 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1_0_0_0.AuxInt != -16 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1_0_0_0_0.AuxInt != 15 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               if idx != x1.Args[0] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if p != x1.Args[1] {
+               if x != v_1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDLconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_1.AuxInt != 15 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [15]) [-16])) [16])) (SHRW x (NEGL (ADDLconst (ANDLconst y [15]) [-16])))) (SHLL x (ANDLconst y [15])))
+       // cond: v.Type.Size() == 2
+       // result: (ROLW x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               if v_0_0_0.AuxInt != 16 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if idx != x1.Args[0] {
+               if v_0_0_0_0_0.AuxInt != -16 {
                        break
                }
-               if p != x1.Args[1] {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_0_0_0_0_0_0.AuxInt != 15 {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRW {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-       for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_1_1_0 := v_0_1_1.Args[0]
+               if v_0_1_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               if v_0_1_1_0.AuxInt != -16 {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_0_1_1_0_0 := v_0_1_1_0.Args[0]
+               if v_0_1_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_0_1_1_0_0.AuxInt != 15 {
                        break
                }
-               if p != x0.Args[0] {
+               if y != v_0_1_1_0_0.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if x != v_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_1_1.AuxInt != 15 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)) x1:(MOVBload [i1] {s} p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       // match: (ORL (SHRW x (ANDQconst y [15])) (SHLL x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))))
+       // cond: v.Type.Size() == 2
+       // result: (RORW x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRW {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDQconst {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if v_0_1.AuxInt != 15 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBload {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x != v_1.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x1.Args[1] {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_1_1_0.AuxInt != -16 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_1_0_0.AuxInt != 15 {
+                       break
+               }
+               if y != v_1_1_0_0.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64RORW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       // match: (ORL (SHLL x (NEGQ (ADDQconst (ANDQconst y [15]) [-16]))) (SHRW x (ANDQconst y [15])))
+       // cond: v.Type.Size() == 2
+       // result: (RORW x y)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               if r1.AuxInt != 8 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               if v_0_1_0.AuxInt != -16 {
                        break
                }
-               if sh.AuxInt != 16 {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               if v_0_1_0_0.AuxInt != 15 {
                        break
                }
-               if r0.AuxInt != 8 {
+               y := v_0_1_0_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRW {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if x != v_1.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDQconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_1_1.AuxInt != 15 {
                        break
                }
-               if mem != x0.Args[1] {
+               if y != v_1_1.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(v.Type.Size() == 2) {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpAMD64RORW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       // match: (ORL (SHRW x (ANDLconst y [15])) (SHLL x (NEGL (ADDLconst (ANDLconst y [15]) [-16]))))
+       // cond: v.Type.Size() == 2
+       // result: (RORW x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRW {
                        break
                }
-               if sh.AuxInt != 16 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDLconst {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               if v_0_1.AuxInt != 15 {
                        break
                }
-               if r0.AuxInt != 8 {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if x != v_1.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64NEGL {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if v_1_1_0.AuxInt != -16 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if p != x1.Args[0] {
+               if v_1_1_0_0.AuxInt != 15 {
                        break
                }
-               if mem != x1.Args[1] {
+               if y != v_1_1_0_0.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(v.Type.Size() == 2) {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpAMD64RORW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       // match: (ORL (SHLL x (NEGL (ADDLconst (ANDLconst y [15]) [-16]))) (SHRW x (ANDLconst y [15])))
+       // cond: v.Type.Size() == 2
+       // result: (RORW x y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64NEGL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if v_0_1_0.AuxInt != -16 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if v_0_1_0_0.AuxInt != 15 {
                        break
                }
-               if p != x1.Args[0] {
+               y := v_0_1_0_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRW {
                        break
                }
-               if mem != x1.Args[1] {
+               if x != v_1.Args[0] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDLconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if v_1_1.AuxInt != 15 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64RORW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       // match: (ORL (SHLL x (ANDQconst y [ 7])) (ANDL (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8]))))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDQconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               if v_0_1.AuxInt != 7 {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRB {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x != v_1_0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x1.Args[1] {
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_0_1_0.AuxInt != -8 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y) s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_1_0_1_0_0 := v_1_0_1_0.Args[0]
+               if v_1_0_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if v_1_0_1_0_0.AuxInt != 7 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if y != v_1_0_1_0_0.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_1_1_0.AuxInt != 8 {
                        break
                }
-               if p != x0.Args[0] {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x0.Args[1] {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_1_0_0_0.AuxInt != -8 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_1_0_0_0_0.AuxInt != 7 {
+                       break
+               }
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))) s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       // match: (ORL (SHLL x (ANDQconst y [ 7])) (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8])) (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])))))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDQconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if v_0_1.AuxInt != 7 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_1_0_0.AuxInt != 8 {
                        break
                }
-               if mem != x0.Args[1] {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
-       for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_1_0_0_0_0.AuxInt != -8 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               if v_1_0_0_0_0_0.AuxInt != 7 {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRB {
                        break
                }
-               if p != x0.Args[0] {
+               if x != v_1_1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x0.Args[2] {
+               v_1_1_1_0 := v_1_1_1.Args[0]
+               if v_1_1_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_1_1_1_0.AuxInt != -8 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v_1_1_1_0_0 := v_1_1_1_0.Args[0]
+               if v_1_1_1_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_1_1_0_0.AuxInt != 7 {
+                       break
+               }
+               if y != v_1_1_1_0_0.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // match: (ORL (ANDL (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8]))) (SHLL x (ANDQconst y [ 7])))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
        for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRB {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_0_0_1_0.AuxInt != -8 {
                        break
                }
-               if p != x0.Args[0] {
+               v_0_0_1_0_0 := v_0_0_1_0.Args[0]
+               if v_0_0_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if idx != x0.Args[1] {
+               if v_0_0_1_0_0.AuxInt != 7 {
                        break
                }
-               if mem != x0.Args[2] {
+               y := v_0_0_1_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
-       for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1_0.AuxInt != 8 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if sh.AuxInt != 8 {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1_0_0_0.AuxInt != -8 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if idx != x0.Args[0] {
+               if v_0_1_0_0_0_0.AuxInt != 7 {
                        break
                }
-               if p != x0.Args[1] {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if x != v_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_1.AuxInt != 7 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // match: (ORL (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])) [ 8])) (SHRB x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8])))) (SHLL x (ANDQconst y [ 7])))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
        for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if sh.AuxInt != 8 {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_0_0.AuxInt != 8 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if idx != x0.Args[0] {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if p != x0.Args[1] {
+               if v_0_0_0_0_0.AuxInt != -8 {
                        break
                }
-               if mem != x0.Args[2] {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_0_0_0_0_0_0.AuxInt != 7 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRB {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0_1_1_0 := v_0_1_1.Args[0]
+               if v_0_1_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1_1_0.AuxInt != -8 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_0_1_1_0_0 := v_0_1_1_0.Args[0]
+               if v_0_1_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if p != x1.Args[0] {
+               if v_0_1_1_0_0.AuxInt != 7 {
                        break
                }
-               if idx != x1.Args[1] {
+               if y != v_0_1_1_0_0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if x != v_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_1.AuxInt != 7 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // match: (ORL (SHLL x (ANDLconst y [ 7])) (ANDL (SHRB x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])) [ 8]))))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDLconst {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1.AuxInt != 7 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRB {
                        break
                }
-               if p != x1.Args[0] {
+               if x != v_1_0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_1_0_1_0.AuxInt != -8 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1_0_1_0_0 := v_1_0_1_0.Args[0]
+               if v_1_0_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               if v_1_0_1_0_0.AuxInt != 7 {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if y != v_1_0_1_0_0.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if idx != x1.Args[0] {
+               if v_1_1_0.AuxInt != 8 {
                        break
                }
-               if p != x1.Args[1] {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_1_1_0_0_0.AuxInt != -8 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_1_1_0_0_0_0.AuxInt != 7 {
+                       break
+               }
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // match: (ORL (SHLL x (ANDLconst y [ 7])) (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])) [ 8])) (SHRB x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])))))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDLconst {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if v_0_1.AuxInt != 7 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if idx != x1.Args[0] {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if p != x1.Args[1] {
+               if v_1_0_0.AuxInt != 8 {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
-       for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               if v_1_0_0_0_0.AuxInt != -8 {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if v_1_0_0_0_0_0.AuxInt != 7 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               if sh.AuxInt != 16 {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRB {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               if x != v_1_1.Args[0] {
                        break
                }
-               if r0.AuxInt != 8 {
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGL {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_1_1_1_0 := v_1_1_1.Args[0]
+               if v_1_1_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_1_1_1_0.AuxInt != -8 {
                        break
                }
-               if p != x0.Args[0] {
+               v_1_1_1_0_0 := v_1_1_1_0.Args[0]
+               if v_1_1_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if idx != x0.Args[1] {
+               if v_1_1_1_0_0.AuxInt != 7 {
                        break
                }
-               if mem != x0.Args[2] {
+               if y != v_1_1_1_0_0.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(v.Type.Size() == 1) {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORL (ANDL (SHRB x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8]))) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])) [ 8]))) (SHLL x (ANDLconst y [ 7])))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRB {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if v_0_0_1_0.AuxInt != -8 {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               v_0_0_1_0_0 := v_0_0_1_0.Args[0]
+               if v_0_0_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if r0.AuxInt != 8 {
+               if v_0_0_1_0_0.AuxInt != 7 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               y := v_0_0_1_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_0_1_0.AuxInt != 8 {
                        break
                }
-               if idx != x0.Args[1] {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x0.Args[2] {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if v_0_1_0_0_0.AuxInt != -8 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
-       for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if r1.AuxInt != 8 {
+               if v_0_1_0_0_0_0.AuxInt != 7 {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if sh.AuxInt != 16 {
+               if x != v_1.Args[0] {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDLconst {
                        break
                }
-               if r0.AuxInt != 8 {
+               if v_1_1.AuxInt != 7 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if y != v_1_1.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if !(v.Type.Size() == 1) {
                        break
                }
-               if idx != x0.Args[0] {
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])) [ 8])) (SHRB x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8])))) (SHLL x (ANDLconst y [ 7])))
+       // cond: v.Type.Size() == 1
+       // result: (ROLB x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               if p != x0.Args[1] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
                        break
                }
-               if mem != x0.Args[2] {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if v_0_0_0.AuxInt != 8 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
-       for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if v_0_0_0_0_0.AuxInt != -8 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if v_0_0_0_0_0_0.AuxInt != 7 {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRB {
                        break
                }
-               if r0.AuxInt != 8 {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGL {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_0_1_1_0 := v_0_1_1.Args[0]
+               if v_0_1_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_0_1_1_0.AuxInt != -8 {
                        break
                }
-               if idx != x0.Args[0] {
+               v_0_1_1_0_0 := v_0_1_1_0.Args[0]
+               if v_0_1_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if p != x0.Args[1] {
+               if v_0_1_1_0_0.AuxInt != 7 {
                        break
                }
-               if mem != x0.Args[2] {
+               if y != v_0_1_1_0_0.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               if x != v_1.Args[0] {
                        break
                }
-               if sh.AuxInt != 16 {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDLconst {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               if v_1_1.AuxInt != 7 {
                        break
                }
-               if r0.AuxInt != 8 {
+               if y != v_1_1.Args[0] {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if !(v.Type.Size() == 1) {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHRB x (ANDQconst y [ 7])) (SHLL x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))))
+       // cond: v.Type.Size() == 1
+       // result: (RORB x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRB {
                        break
                }
-               if r1.AuxInt != 8 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if v_0_1.AuxInt != 7 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               if p != x1.Args[0] {
+               if x != v_1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if v_1_1_0.AuxInt != -8 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if v_1_1_0_0.AuxInt != 7 {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               if y != v_1_1_0_0.Args[0] {
                        break
                }
-               if r0.AuxInt != 8 {
+               if !(v.Type.Size() == 1) {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v.reset(OpAMD64RORB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHLL x (NEGQ (ADDQconst (ANDQconst y [ 7]) [ -8]))) (SHRB x (ANDQconst y [ 7])))
+       // cond: v.Type.Size() == 1
+       // result: (RORB x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if v_0_1_0.AuxInt != -8 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if p != x1.Args[0] {
+               if v_0_1_0_0.AuxInt != 7 {
                        break
                }
-               if idx != x1.Args[1] {
+               y := v_0_1_0_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRB {
                        break
                }
-               if mem != x1.Args[2] {
+               if x != v_1.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               if v_1_1.AuxInt != 7 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(v.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64RORB)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORL (SHRB x (ANDLconst y [ 7])) (SHLL x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8]))))
+       // cond: v.Type.Size() == 1
+       // result: (RORB x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
-                       break
-               }
-               if sh.AuxInt != 16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRB {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64ANDLconst {
                        break
                }
-               if r0.AuxInt != 8 {
+               if v_0_1.AuxInt != 7 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
+               if x != v_1.Args[0] {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64NEGL {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if v_1_1_0.AuxInt != -8 {
                        break
                }
-               if idx != x1.Args[0] {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if p != x1.Args[1] {
+               if v_1_1_0_0.AuxInt != 7 {
                        break
                }
-               if mem != x1.Args[2] {
+               if y != v_1_1_0_0.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(v.Type.Size() == 1) {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpAMD64RORB)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORL (SHLL x (NEGL (ADDLconst (ANDLconst y [ 7]) [ -8]))) (SHRB x (ANDLconst y [ 7])))
+       // cond: v.Type.Size() == 1
+       // result: (RORB x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
                        break
                }
-               if sh.AuxInt != 16 {
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64NEGL {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if r0.AuxInt != 8 {
+               if v_0_1_0.AuxInt != -8 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
+               if v_0_1_0_0.AuxInt != 7 {
                        break
                }
-               if r1.AuxInt != 8 {
+               y := v_0_1_0_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRB {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x != v_1.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64ANDLconst {
                        break
                }
-               if idx != x1.Args[0] {
+               if v_1_1.AuxInt != 7 {
                        break
                }
-               if p != x1.Args[1] {
+               if y != v_1_1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if !(v.Type.Size() == 1) {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               v.reset(OpAMD64RORB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
@@ -16410,286 +15640,202 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if p != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
-                       break
-               }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
-                       break
-               }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
-               if x1.Aux != s {
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
-                       break
-               }
-               if p != x1.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)) x0:(MOVWload [i0] {s} p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
-                       break
-               }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
                i1 := x1.AuxInt
-               if x1.Aux != s {
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
-               if idx != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
                or := v.Args[1]
                if or.Op != OpAMD64ORL {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -16697,67 +15843,59 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLLconst {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
                or := v.Args[1]
                if or.Op != OpAMD64ORL {
                        break
                }
                y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -16765,67 +15903,59 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y) s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               s0 := v.Args[0]
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
                if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
                if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
-                       break
-               }
-               if p != x1.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -16833,67 +15963,59 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem))) s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               s0 := v.Args[0]
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
                if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORL {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
                if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
-                       break
-               }
-               if p != x1.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -16901,254 +16023,242 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
-                       break
-               }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
-                       break
-               }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               if x1.Aux != s {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if p != x0.Args[0] {
+               if sh.AuxInt != 8 {
                        break
                }
-               if idx != x0.Args[1] {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if mem != x0.Args[2] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
+               x1 := sh.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
+               p := x1.Args[0]
+               idx := x1.Args[1]
                mem := x1.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
+               x0 := v.Args[1]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -17165,58 +16275,41 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
+               x1 := sh.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
+               idx := x1.Args[0]
+               p := x1.Args[1]
                mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
+               x0 := v.Args[1]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -17224,67 +16317,50 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
+               x1 := sh.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
+               p := x1.Args[0]
+               idx := x1.Args[1]
                mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
+               x0 := v.Args[1]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -17301,58 +16377,41 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
+               x1 := sh.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
+               idx := x1.Args[0]
+               p := x1.Args[1]
                mem := x1.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
+               x0 := v.Args[1]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -17369,363 +16428,246 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORL {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLLconst {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLLconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ORLmem x [off] {sym} ptr mem)
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               v.reset(OpAMD64ORLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (ORL l:(MOVLload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ORLmem x [off] {sym} ptr mem)
-       for {
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               if sh.AuxInt != 16 {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               v.reset(OpAMD64ORLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
-       // match: (ORLconst [c] x)
-       // cond: int32(c)==0
-       // result: x
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(int32(c) == 0) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORLconst [c] _)
-       // cond: int32(c)==-1
-       // result: (MOVLconst [-1])
-       for {
-               c := v.AuxInt
-               if !(int32(c) == -1) {
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = -1
-               return true
-       }
-       // match: (ORLconst [c] (MOVLconst [d]))
-       // cond:
-       // result: (MOVLconst [c|d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if idx != x1.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = c | d
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
-       b := v.Block
-       _ = b
-       types := &b.Func.Config.Types
-       _ = types
-       // match: (ORQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (ORQconst [c] x)
-       for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if mem != x1.Args[2] {
                        break
                }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (ORQconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64ORQconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ (SHLQconst x [c]) (SHRQconst x [d]))
-       // cond: d==64-c
-       // result: (ROLQconst x [c])
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if sh.AuxInt != 16 {
                        break
                }
-               if !(d == 64-c) {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ (SHRQconst x [d]) (SHLQconst x [c]))
-       // cond: d==64-c
-       // result: (ROLQconst x [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if idx != x1.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if p != x1.Args[1] {
                        break
                }
-               if !(d == 64-c) {
+               if mem != x1.Args[2] {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORQ x x)
-       // cond:
-       // result: x
-       for {
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       // match: (ORL x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
                x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
                sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               if sh.AuxInt != 16 {
                        break
                }
                x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
                sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               if sh.AuxInt != 16 {
                        break
                }
                x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
                p := x1.Args[0]
-               mem := x1.Args[1]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
                x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVBload {
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -17735,234 +16677,208 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
        // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWload {
-                       break
-               }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
                if sh.AuxInt != 16 {
                        break
                }
                x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
-               if x1.Aux != s {
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if mem != x1.Args[1] {
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
                if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)) x0:(MOVWload [i0] {s} p mem))
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
        // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
                sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
                if sh.AuxInt != 16 {
                        break
                }
                x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
                p := x1.Args[0]
-               mem := x1.Args[1]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
                x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWload {
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
                if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
+       // match: (ORL sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVLload {
-                       break
-               }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 16 {
                        break
                }
                x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLload {
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
-               if x1.Aux != s {
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x0.Args[0] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if p != x0.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)) x0:(MOVLload [i0] {s} p mem))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 32 {
-                       break
-               }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLload {
-                       break
-               }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVLload {
-                       break
-               }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
-                       break
-               }
-               if p != x0.Args[0] {
-                       break
-               }
-               if mem != x0.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = i0
                v0.Aux = s
                v0.AddArg(p)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
        // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
                s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
                p := x1.Args[0]
-               mem := x1.Args[1]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
                or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               if or.Op != OpAMD64ORL {
                        break
                }
                s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -17972,7 +16888,10 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
                y := or.Args[1]
@@ -17980,50 +16899,51 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
                v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
                v2.AuxInt = i0
                v2.Aux = s
                v2.AddArg(p)
+               v2.AddArg(idx)
                v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))))
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
        // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
                s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
                or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               if or.Op != OpAMD64ORL {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -18033,176 +16953,193 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
                        break
                }
+               y := or.Args[1]
                if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
                v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
                v2.AuxInt = i0
                v2.Aux = s
                v2.AddArg(p)
+               v2.AddArg(idx)
                v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y) s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
        // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
                        break
                }
                s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if x0.Aux != s {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
+               y := or.Args[1]
                if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
                v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
                v2.AuxInt = i0
                v2.Aux = s
                v2.AddArg(p)
+               v2.AddArg(idx)
                v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))) s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
        // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if x0.Aux != s {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
+               y := or.Args[1]
                if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
                v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
                v2.AuxInt = i0
                v2.Aux = s
                v2.AddArg(p)
+               v2.AddArg(idx)
                v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
                s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
                p := x1.Args[0]
-               mem := x1.Args[1]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
                or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               if or.Op != OpAMD64ORL {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -18212,58 +17149,62 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
                v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
                v2.AuxInt = i0
                v2.Aux = s
                v2.AddArg(p)
+               v2.AddArg(idx)
                v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
                s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
                or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               if or.Op != OpAMD64ORL {
                        break
                }
                y := or.Args[0]
                s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -18273,153 +17214,176 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
                v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
                v2.AuxInt = i0
                v2.Aux = s
                v2.AddArg(p)
+               v2.AddArg(idx)
                v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y) s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if x0.Aux != s {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
                v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
                v2.AuxInt = i0
                v2.Aux = s
                v2.AddArg(p)
+               v2.AddArg(idx)
                v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))) s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+       // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
                        break
                }
                y := or.Args[0]
                s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if x0.Aux != s {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
                v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
                v2.AuxInt = i0
                v2.Aux = s
                v2.AddArg(p)
+               v2.AddArg(idx)
                v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               x0 := v.Args[0]
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -18428,14 +17392,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 8 {
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               x1 := sh.Args[0]
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -18452,25 +17415,40 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               x0 := v.Args[0]
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -18479,14 +17457,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 8 {
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               x1 := sh.Args[0]
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -18503,41 +17480,55 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               x1 := sh.Args[0]
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -18545,34 +17536,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               x0 := v.Args[0]
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -18581,14 +17588,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 8 {
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               x1 := sh.Args[0]
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
@@ -18596,306 +17601,365 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 8 {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               if p != x0.Args[0] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if idx != x0.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if mem != x0.Args[2] {
+               if idx != x1.Args[0] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 8 {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               if p != x0.Args[0] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if idx != x0.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if mem != x0.Args[2] {
+               if idx != x1.Args[0] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               if sh.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if idx != x0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x1.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               if p != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       // match: (ORL or:(ORL y s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 8 {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               if idx != x0.Args[0] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if p != x0.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if mem != x0.Args[2] {
+               if idx != x1.Args[0] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
                sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 16 {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if sh.AuxInt != 8 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)) x1:(MOVBload [i1] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 8 {
                        break
                }
-               if sh.AuxInt != 16 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
@@ -18905,751 +17969,846 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
-                       break
-               }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if r1.AuxInt != 8 {
                        break
                }
-               if sh.AuxInt != 16 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if sh.AuxInt != 16 {
                        break
                }
-               if idx != x1.Args[0] {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               if p != x1.Args[1] {
+               if r0.AuxInt != 8 {
                        break
                }
-               if mem != x1.Args[2] {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if r1.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
-                       break
-               }
-               if p != x1.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               if p != x0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               if p != x0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y) s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               if sh.AuxInt != 16 {
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
                p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if mem != x0.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
-                       break
-               }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem))) s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
                        break
                }
-               if sh.AuxInt != 16 {
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               if idx != x0.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x0.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
                sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
                sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
                sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
-       for {
-               x0 := v.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+       // match: (ORL x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
                sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)) x0:(MOVLloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
                sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)) x0:(MOVLloadidx1 [i0] {s} p idx mem))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
                sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)) x0:(MOVLloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
                sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)) x0:(MOVLloadidx1 [i0] {s} idx p mem))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       // match: (ORL sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
        for {
                sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               x1 := sh.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               x0 := v.Args[1]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = i0
-               v0.Aux = s
-               v0.AddArg(p)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -19657,17 +18816,22 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -19683,38 +18847,35 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -19722,17 +18883,22 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -19748,38 +18914,35 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -19787,17 +18950,22 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -19813,38 +18981,35 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -19852,17 +19017,22 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -19878,297 +19048,296 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if r0.AuxInt != 8 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if p != x0.Args[0] {
+               if r1.AuxInt != 8 {
                        break
                }
-               if idx != x0.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x0.Args[2] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if r0.AuxInt != 8 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if p != x0.Args[0] {
+               if r1.AuxInt != 8 {
                        break
                }
-               if idx != x0.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x0.Args[2] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if r0.AuxInt != 8 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if idx != x0.Args[0] {
+               if r1.AuxInt != 8 {
                        break
                }
-               if p != x0.Args[1] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if mem != x0.Args[2] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLLconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if r0.AuxInt != 8 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
-                       break
-               }
-               if idx != x0.Args[0] {
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
                        break
                }
-               if p != x0.Args[1] {
+               if r1.AuxInt != 8 {
                        break
                }
-               if mem != x0.Args[2] {
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
@@ -20181,9 +19350,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
@@ -20204,36 +19376,36 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
@@ -20246,9 +19418,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
@@ -20269,37 +19444,36 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
@@ -20312,8 +19486,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
@@ -20325,46 +19503,45 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
@@ -20377,8 +19554,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
@@ -20390,45 +19571,45 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
@@ -20441,9 +19622,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
@@ -20455,45 +19640,44 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
@@ -20506,9 +19690,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
@@ -20520,46 +19708,44 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
@@ -20572,8 +19758,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
@@ -20594,37 +19785,35 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
@@ -20637,8 +19826,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
@@ -20659,37 +19853,44 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -20697,17 +19898,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -20723,38 +19921,44 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -20762,17 +19966,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -20788,38 +19989,45 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -20827,64 +20035,67 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -20892,64 +20103,66 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -20957,64 +20170,67 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -21022,64 +20238,68 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
+                       break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -21087,18 +20307,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -21114,37 +20329,45 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL or:(ORL y s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
        for {
-               s1 := v.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORL {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLLconst {
                        break
                }
                j1 := s1.AuxInt
                x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -21152,18 +20375,13 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLLconst {
                        break
                }
                j0 := s0.AuxInt
                x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -21179,1557 +20397,1280 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORLmem x [off] {sym} ptr mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               v.reset(OpAMD64ORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORLmem x [off] {sym} ptr mem)
+       for {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v.reset(OpAMD64ORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool {
+       // match: (ORLconst [c] x)
+       // cond: int32(c)==0
+       // result: x
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
                        break
                }
-               if p != x1.Args[0] {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORLconst [c] _)
+       // cond: int32(c)==-1
+       // result: (MOVLconst [-1])
+       for {
+               c := v.AuxInt
+               if !(int32(c) == -1) {
                        break
                }
-               if idx != x1.Args[1] {
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORLconst [c] (MOVLconst [d]))
+       // cond:
+       // result: (MOVLconst [c|d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = c | d
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
+       b := v.Block
+       _ = b
+       types := &b.Func.Config.Types
+       _ = types
+       // match: (ORQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (ORQconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v.reset(OpAMD64ORQconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (ORQconst [c] x)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               v.reset(OpAMD64ORQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if !(d == 64-c) {
                        break
                }
-               if idx != x1.Args[1] {
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               if mem != x1.Args[2] {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64]))))
+       // cond:
+       // result: (ROLQ x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQ {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQ {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRQ {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if x != v_1_0.Args[0] {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if y != v_1_0_1.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               if idx != x1.Args[1] {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_1_1_0.AuxInt != 64 {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if v_1_1_0_0_0.AuxInt != -64 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if v_1_1_0_0_0_0.AuxInt != 63 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if y != v_1_1_0_0_0_0.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v.reset(OpAMD64ROLQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORQ (SHLQ x y) (ANDQ (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64])) (SHRQ x (NEGQ y))))
+       // cond:
+       // result: (ROLQ x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQ {
                        break
                }
-               if p != x1.Args[0] {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQ {
                        break
                }
-               if idx != x1.Args[1] {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_0_0.AuxInt != 64 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if v_1_0_0_0_0.AuxInt != -64 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if v_1_0_0_0_0_0.AuxInt != 63 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               if idx != x1.Args[0] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRQ {
                        break
                }
-               if p != x1.Args[1] {
+               if x != v_1_1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if y != v_1_1_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v.reset(OpAMD64ROLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       // match: (ORQ (ANDQ (SHRQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64]))) (SHLQ x y))
+       // cond:
+       // result: (ROLQ x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               s0 := or.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRQ {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               y := or.Args[1]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if v_0_1_0.AuxInt != 64 {
                        break
                }
-               if idx != x1.Args[0] {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if p != x1.Args[1] {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_0_1_0_0_0.AuxInt != -64 {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               if v_0_1_0_0_0_0.AuxInt != 63 {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQ {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if x != v_1.Args[0] {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if y != v_1.Args[1] {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v.reset(OpAMD64ROLQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORQ (ANDQ (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64])) (SHRQ x (NEGQ y))) (SHLQ x y))
+       // cond:
+       // result: (ROLQ x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               if idx != x1.Args[0] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               if p != x1.Args[1] {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_0_0_0.AuxInt != 64 {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               y := or.Args[0]
-               s0 := or.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if v_0_0_0_0_0.AuxInt != -64 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               s1 := v.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if v_0_0_0_0_0_0.AuxInt != 63 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRQ {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if idx != x1.Args[0] {
+               if y != v_0_1_1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQ {
                        break
                }
-               if mem != x1.Args[2] {
+               if x != v_1.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if y != v_1.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j0
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v2.AuxInt = i0
-               v2.Aux = s
-               v2.AddArg(p)
-               v2.AddArg(idx)
-               v2.AddArg(mem)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v.reset(OpAMD64ROLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       // match: (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64]))))
+       // cond:
+       // result: (ROLQ x y)
        for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBload {
-                       break
-               }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQ {
                        break
                }
-               if sh.AuxInt != 8 {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQ {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRQ {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if x != v_1_0.Args[0] {
                        break
                }
-               if p != x0.Args[0] {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x0.Args[1] {
+               if y != v_1_0_1.Args[0] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)) x1:(MOVBload [i1] {s} p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if sh.AuxInt != 8 {
+               if v_1_1_0.AuxInt != 64 {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBload {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if v_1_1_0_0_0.AuxInt != -64 {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if mem != x1.Args[1] {
+               if v_1_1_0_0_0_0.AuxInt != 63 {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if y != v_1_1_0_0_0_0.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpAMD64ROLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       // match: (ORQ (SHLQ x y) (ANDQ (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64])) (SHRQ x (NEGL y))))
+       // cond:
+       // result: (ROLQ x y)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQ {
                        break
                }
-               if r1.AuxInt != 8 {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQ {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if v_1_0_0.AuxInt != 64 {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if r0.AuxInt != 8 {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if v_1_0_0_0_0.AuxInt != -64 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_1_0_0_0_0_0.AuxInt != 63 {
                        break
                }
-               if mem != x0.Args[1] {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRQ {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               if x != v_1_1.Args[0] {
+                       break
+               }
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64ROLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       // match: (ORQ (ANDQ (SHRQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64]))) (SHLQ x y))
+       // cond:
+       // result: (ROLQ x y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               if sh.AuxInt != 16 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRQ {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGL {
                        break
                }
-               if r0.AuxInt != 8 {
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
+               if v_0_1_0.AuxInt != 64 {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if v_0_1_0_0_0.AuxInt != -64 {
                        break
                }
-               if p != x1.Args[0] {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if mem != x1.Args[1] {
+               if v_0_1_0_0_0_0.AuxInt != 63 {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
-       for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64BSWAPL {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQ {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLload {
+               if x != v_1.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if y != v_1.Args[1] {
                        break
                }
-               if sh.AuxInt != 32 {
+               v.reset(OpAMD64ROLQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORQ (ANDQ (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64])) (SHRQ x (NEGL y))) (SHLQ x y))
+       // cond:
+       // result: (ROLQ x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLload {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_0_0_0.AuxInt != 64 {
                        break
                }
-               if p != x0.Args[0] {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x0.Args[1] {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if v_0_0_0_0_0.AuxInt != -64 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))) r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
-       for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if v_0_0_0_0_0_0.AuxInt != 63 {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRQ {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLload {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGL {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64BSWAPL {
+               if y != v_0_1_1.Args[0] {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQ {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x != v_1.Args[0] {
                        break
                }
-               if p != x1.Args[0] {
+               if y != v_1.Args[1] {
                        break
                }
-               if mem != x1.Args[1] {
+               v.reset(OpAMD64ROLQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64]))))
+       // cond:
+       // result: (RORQ x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQ {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQ {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-       for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLQ {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if x != v_1_0.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               if y != v_1_0_1.Args[0] {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if p != x1.Args[0] {
+               if v_1_1_0.AuxInt != 64 {
                        break
                }
-               if mem != x1.Args[1] {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-       for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               if v_1_1_0_0_0.AuxInt != -64 {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               if v_1_1_0_0_0_0.AuxInt != 63 {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if y != v_1_1_0_0_0_0.Args[0] {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORQ (SHRQ x y) (ANDQ (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64])) (SHLQ x (NEGQ y))))
+       // cond:
+       // result: (RORQ x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQ {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQ {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               if mem != x1.Args[1] {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_1_0_0.AuxInt != 64 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y) s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-       for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               if v_1_0_0_0_0.AuxInt != -64 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               if v_1_0_0_0_0_0.AuxInt != 63 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               if p != x0.Args[0] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLQ {
                        break
                }
-               if mem != x0.Args[1] {
+               if x != v_1_1.Args[0] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))) s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       // match: (ORQ (ANDQ (SHLQ x (NEGQ y)) (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64]))) (SHRQ x y))
+       // cond:
+       // result: (RORQ x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLQ {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBload {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGQ {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBload {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPQconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_0_1_0.AuxInt != 64 {
                        break
                }
-               if p != x0.Args[0] {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x0.Args[1] {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if v_0_1_0_0_0.AuxInt != -64 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_1_0_0_0_0.AuxInt != 63 {
+                       break
+               }
+               if y != v_0_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQ {
+                       break
+               }
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       // match: (ORQ (ANDQ (SBBQcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [63]) [-64])) [64])) (SHLQ x (NEGQ y))) (SHRQ x y))
+       // cond:
+       // result: (RORQ x y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               if r0.AuxInt != 8 {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if v_0_0_0.AuxInt != 64 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               if v_0_0_0_0_0.AuxInt != -64 {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if v_0_0_0_0_0_0.AuxInt != 63 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
                        break
                }
-               if p != x1.Args[0] {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGQ {
                        break
                }
-               if mem != x1.Args[1] {
+               if y != v_0_1_1.Args[0] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQ {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       // match: (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64]))))
+       // cond:
+       // result: (RORQ x y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQ {
                        break
                }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLQ {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if x != v_1_0.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGL {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if y != v_1_0_1.Args[0] {
                        break
                }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if v_1_1_0.AuxInt != 64 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if p != x1.Args[0] {
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if mem != x1.Args[1] {
+               if v_1_1_0_0_0.AuxInt != -64 {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               if v_1_1_0_0_0_0.AuxInt != 63 {
+                       break
+               }
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       // match: (ORQ (SHRQ x y) (ANDQ (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64])) (SHLQ x (NEGL y))))
+       // cond:
+       // result: (RORQ x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQ {
                        break
                }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               if r1.AuxInt != 8 {
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               if v_1_0_0.AuxInt != 64 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if r0.AuxInt != 8 {
+               if v_1_0_0_0_0.AuxInt != -64 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if v_1_0_0_0_0_0.AuxInt != 63 {
                        break
                }
-               if p != x0.Args[0] {
+               if y != v_1_0_0_0_0_0.Args[0] {
                        break
                }
-               if mem != x0.Args[1] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLQ {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if x != v_1_1.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       // match: (ORQ (ANDQ (SHLQ x (NEGL y)) (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64]))) (SHRQ x y))
+       // cond:
+       // result: (RORQ x y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLQ {
                        break
                }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGL {
                        break
                }
-               if r1.AuxInt != 8 {
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWload {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               mem := x1.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if v_0_1_0.AuxInt != 64 {
                        break
                }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if r0.AuxInt != 8 {
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWload {
+               if v_0_1_0_0_0.AuxInt != -64 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_0_1_0_0_0_0.AuxInt != 63 {
                        break
                }
-               if mem != x0.Args[1] {
+               if y != v_0_1_0_0_0_0.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQ {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
-               return true
-       }
-       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
-       for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if x != v_1.Args[0] {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if y != v_1.Args[1] {
                        break
                }
-               if sh.AuxInt != 8 {
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORQ (ANDQ (SBBQcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [63]) [-64])) [64])) (SHLQ x (NEGL y))) (SHRQ x y))
+       // cond:
+       // result: (RORQ x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBQcarrymask {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPLconst {
                        break
                }
-               if p != x0.Args[0] {
+               if v_0_0_0.AuxInt != 64 {
                        break
                }
-               if idx != x0.Args[1] {
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGL {
                        break
                }
-               if mem != x0.Args[2] {
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if v_0_0_0_0_0.AuxInt != -64 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
-       for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDLconst {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if v_0_0_0_0_0_0.AuxInt != 63 {
                        break
                }
-               if sh.AuxInt != 8 {
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGL {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if y != v_0_1_1.Args[0] {
                        break
                }
-               if p != x0.Args[0] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQ {
                        break
                }
-               if idx != x0.Args[1] {
+               if x != v_1.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               if y != v_1.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORQ x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
        // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
        for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
                sh := v.Args[1]
                if sh.Op != OpAMD64SHLQconst {
                        break
@@ -22737,116 +21678,100 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if sh.AuxInt != 8 {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
-                       break
-               }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               if idx != x0.Args[0] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
                if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
        // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
        for {
-               x1 := v.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
-                       break
-               }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
+               sh := v.Args[0]
                if sh.Op != OpAMD64SHLQconst {
                        break
                }
                if sh.AuxInt != 8 {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
-               if idx != x0.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x0.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
                if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
-               if sh.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
                i1 := x1.AuxInt
@@ -22856,227 +21781,188 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
-                       break
-               }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)) x0:(MOVWload [i0] {s} p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
        for {
                sh := v.Args[0]
                if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if sh.AuxInt != 8 {
-                       break
-               }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 16 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
                i1 := x1.AuxInt
-               if x1.Aux != s {
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLload {
                        break
                }
-               if sh.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 32 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLload {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
-                       break
-               }
-               if p != x1.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
-       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)) x0:(MOVLload [i0] {s} p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
        for {
                sh := v.Args[0]
                if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if sh.AuxInt != 8 {
-                       break
-               }
-               x0 := sh.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 32 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLload {
                        break
                }
                i1 := x1.AuxInt
-               if x1.Aux != s {
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLload {
                        break
                }
-               if idx != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AuxInt = 8
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
                p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 16 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
                i0 := x0.AuxInt
@@ -23086,64 +21972,58 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x0.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
-                       break
-               }
-               if mem != x0.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 16 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
                i0 := x0.AuxInt
@@ -23153,265 +22033,297 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x0.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
-                       break
-               }
-               if mem != x0.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y) s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r1.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
+               if x1.Aux != s {
                        break
                }
-               if sh.AuxInt != 16 {
+               if p != x1.Args[0] {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               if mem != x1.Args[1] {
                        break
                }
-               if r0.AuxInt != 8 {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem))) s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
                        break
                }
                i0 := x0.AuxInt
-               if x0.Aux != s {
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if idx != x0.Args[0] {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
-               if p != x0.Args[1] {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if mem != x0.Args[2] {
+               if p != x1.Args[0] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
                i1 := x1.AuxInt
                s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 16 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
-                       break
-               }
-               if p != x0.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
-                       break
-               }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
-                       break
-               }
-               if p != x1.Args[0] {
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y) s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 16 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
                i1 := x1.AuxInt
@@ -23421,294 +22333,305 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if p != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
-                       break
-               }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))) s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 16 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
                p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               mem := x0.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
-                       break
-               }
-               if p != x1.Args[1] {
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
                v0.AddArg(v1)
+               v0.AddArg(y)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               sh := v.Args[0]
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
                if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if sh.AuxInt != 16 {
+               if sh.AuxInt != 8 {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if r0.AuxInt != 8 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if p != x1.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64ROLWconst {
+               if idx != x1.Args[1] {
                        break
                }
-               if r1.AuxInt != 8 {
+               if mem != x1.Args[2] {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
+       for {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64BSWAPL {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
                sh := v.Args[1]
                if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if sh.AuxInt != 32 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
+               if sh.AuxInt != 8 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ x0:(MOVBloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64BSWAPL {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
                sh := v.Args[1]
                if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if sh.AuxInt != 32 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
+               if sh.AuxInt != 8 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64BSWAPL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -23716,60 +22639,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 32 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
-                       break
-               }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
-               r1 := v.Args[0]
-               if r1.Op != OpAMD64BSWAPL {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -23777,189 +22690,145 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               sh := v.Args[1]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 32 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
-                       break
-               }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
                sh := v.Args[0]
                if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64BSWAPL {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
-                       break
-               }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
        for {
                sh := v.Args[0]
                if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               if sh.AuxInt != 32 {
+               if sh.AuxInt != 8 {
                        break
                }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64BSWAPL {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
-                       break
-               }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 32 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
-                       break
-               }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -23967,60 +22836,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64BSWAPL {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)))
-       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
-       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               sh := v.Args[0]
-               if sh.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if sh.AuxInt != 32 {
-                       break
-               }
-               r0 := sh.Args[0]
-               if r0.Op != OpAMD64BSWAPL {
-                       break
-               }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVLloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -24028,54 +22887,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               r1 := v.Args[1]
-               if r1.Op != OpAMD64BSWAPL {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVLloadidx1 {
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -24083,67 +22938,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ x0:(MOVWloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -24151,290 +22989,270 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 16 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               if sh.AuxInt != 16 {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               if sh.AuxInt != 16 {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               if x0.Aux != s {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if idx != x0.Args[0] {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
                i1 := x1.AuxInt
-               if x1.Aux != s {
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if p != x1.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[1] {
+               if idx != x0.Args[0] {
                        break
                }
-               if mem != x1.Args[2] {
+               if p != x0.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
+               p := x0.Args[0]
+               idx := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -24450,127 +23268,93 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
+               idx := x0.Args[0]
+               p := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
+               p := x0.Args[0]
+               idx := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -24586,44 +23370,84 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ x0:(MOVLloadidx1 [i0] {s} idx p mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               x0 := v.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)) x0:(MOVLloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -24631,14 +23455,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -24654,44 +23472,33 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)) x0:(MOVLloadidx1 [i0] {s} p idx mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -24699,14 +23506,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -24722,45 +23523,33 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem)) x0:(MOVLloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -24768,67 +23557,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} idx p mem)) x0:(MOVLloadidx1 [i0] {s} idx p mem))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
                        break
                }
-               y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               if sh.AuxInt != 32 {
                        break
                }
-               j1 := s1.AuxInt
-               x1 := s1.Args[0]
-               if x1.Op != OpAMD64MOVBloadidx1 {
+               x1 := sh.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -24836,60 +23608,42 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               x0 := s0.Args[0]
-               if x0.Op != OpAMD64MOVBloadidx1 {
+               x0 := v.Args[1]
+               if x0.Op != OpAMD64MOVLloadidx1 {
                        break
                }
                i0 := x0.AuxInt
                if x0.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
                        break
                }
                b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v0.AuxInt = i0
+               v0.Aux = s
+               v0.AddArg(p)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s1 := or.Args[0]
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
@@ -24903,8 +23657,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
                if s0.Op != OpAMD64SHLQconst {
                        break
                }
@@ -24917,16 +23674,17 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -24934,30 +23692,23 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s1 := or.Args[0]
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
@@ -24971,8 +23722,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
                if s0.Op != OpAMD64SHLQconst {
                        break
                }
@@ -24985,16 +23739,17 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25002,31 +23757,23 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
@@ -25040,7 +23787,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               s0 := v.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
                if s0.Op != OpAMD64SHLQconst {
                        break
                }
@@ -25062,7 +23813,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25070,31 +23822,23 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
-       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
@@ -25108,7 +23852,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               s0 := v.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
                if s0.Op != OpAMD64SHLQconst {
                        break
                }
@@ -25130,7 +23878,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25138,81 +23887,64 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
-               v2.AuxInt = 8
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r0.AuxInt != 8 {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
                or := v.Args[1]
                if or.Op != OpAMD64ORQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25220,80 +23952,64 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r0.AuxInt != 8 {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
                or := v.Args[1]
                if or.Op != OpAMD64ORQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if p != x1.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if idx != x1.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25301,80 +24017,64 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r0.AuxInt != 8 {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               p := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
                or := v.Args[1]
                if or.Op != OpAMD64ORQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25382,80 +24082,64 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r0.AuxInt != 8 {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               idx := x0.Args[0]
-               p := x0.Args[1]
-               mem := x0.Args[2]
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
                or := v.Args[1]
                if or.Op != OpAMD64ORQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if idx != x0.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if p != x0.Args[1] {
                        break
                }
-               if mem != x1.Args[2] {
+               if mem != x0.Args[2] {
                        break
                }
-               y := or.Args[1]
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25463,38 +24147,33 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
-       for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -25502,25 +24181,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
                j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -25536,7 +24204,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25544,38 +24212,33 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -25583,25 +24246,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               y := or.Args[1]
+               s1 := v.Args[1]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
                j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
@@ -25617,7 +24269,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25625,38 +24277,34 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -25664,41 +24312,29 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               s1 := v.Args[1]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
                j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25706,38 +24342,34 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
-               s0 := v.Args[0]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i0 := x0.AuxInt
@@ -25745,41 +24377,29 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x0.Args[0]
                p := x0.Args[1]
                mem := x0.Args[2]
-               or := v.Args[1]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               s1 := v.Args[1]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
                j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
                if x1.Aux != s {
                        break
                }
-               if idx != x1.Args[0] {
+               if p != x1.Args[0] {
                        break
                }
-               if p != x1.Args[1] {
+               if idx != x1.Args[1] {
                        break
                }
                if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25787,80 +24407,64 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
                or := v.Args[0]
                if or.Op != OpAMD64ORQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
-                       break
-               }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
                j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if r0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25868,80 +24472,64 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
                or := v.Args[0]
                if or.Op != OpAMD64ORQ {
                        break
                }
-               s1 := or.Args[0]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
-                       break
-               }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
+               s0 := or.Args[0]
                if s0.Op != OpAMD64SHLQconst {
                        break
                }
                j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if r0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -25949,80 +24537,64 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
                or := v.Args[0]
                if or.Op != OpAMD64ORQ {
                        break
                }
                y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
-                       break
-               }
-               i1 := x1.AuxInt
-               s := x1.Aux
-               p := x1.Args[0]
-               idx := x1.Args[1]
-               mem := x1.Args[2]
-               s0 := v.Args[1]
+               s0 := or.Args[1]
                if s0.Op != OpAMD64SHLQconst {
                        break
                }
                j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if r0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if p != x0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -26030,80 +24602,64 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0+8   && j0 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y)
        for {
                or := v.Args[0]
                if or.Op != OpAMD64ORQ {
                        break
                }
                y := or.Args[0]
-               s1 := or.Args[1]
-               if s1.Op != OpAMD64SHLQconst {
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if r1.AuxInt != 8 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := r1.Args[0]
-               if x1.Op != OpAMD64MOVWloadidx1 {
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
                        break
                }
                i1 := x1.AuxInt
-               s := x1.Aux
-               idx := x1.Args[0]
-               p := x1.Args[1]
-               mem := x1.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
+               if x1.Aux != s {
                        break
                }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r0.AuxInt != 8 {
-                       break
-               }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
-                       break
-               }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
-                       break
-               }
-               if p != x0.Args[0] {
+               if idx != x1.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if p != x1.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -26111,41 +24667,28 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s1 := or.Args[0]
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
                j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
@@ -26154,20 +24697,16 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
@@ -26175,16 +24714,17 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -26192,41 +24732,28 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               s1 := or.Args[0]
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
                j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
@@ -26235,20 +24762,16 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               y := or.Args[1]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
@@ -26256,16 +24779,17 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if x0.Aux != s {
                        break
                }
-               if idx != x0.Args[0] {
+               if p != x0.Args[0] {
                        break
                }
-               if p != x0.Args[1] {
+               if idx != x0.Args[1] {
                        break
                }
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -26273,42 +24797,28 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
                j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
@@ -26317,19 +24827,16 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                p := x1.Args[0]
                idx := x1.Args[1]
                mem := x1.Args[2]
-               s0 := v.Args[1]
-               if s0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
                        break
                }
-               if r0.AuxInt != 8 {
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
                        break
                }
-               x0 := r0.Args[0]
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
                if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
@@ -26346,7 +24853,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                if mem != x0.Args[2] {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
                        break
                }
                b = mergePoint(b, x0, x1)
@@ -26354,42 +24862,28 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
                v1.AddArg(v2)
                v0.AddArg(v1)
                v0.AddArg(y)
                return true
        }
-       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
-       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
-       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
        for {
-               or := v.Args[0]
-               if or.Op != OpAMD64ORQ {
-                       break
-               }
-               y := or.Args[0]
-               s1 := or.Args[1]
+               s1 := v.Args[0]
                if s1.Op != OpAMD64SHLQconst {
                        break
                }
                j1 := s1.AuxInt
-               r1 := s1.Args[0]
-               if r1.Op != OpAMD64ROLWconst {
-                       break
-               }
-               if r1.AuxInt != 8 {
-                       break
-               }
-               x1 := r1.Args[0]
+               x1 := s1.Args[0]
                if x1.Op != OpAMD64MOVWloadidx1 {
                        break
                }
@@ -26398,271 +24892,6281 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
                idx := x1.Args[0]
                p := x1.Args[1]
                mem := x1.Args[2]
-               s0 := v.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
                if s0.Op != OpAMD64SHLQconst {
                        break
                }
                j0 := s0.AuxInt
-               r0 := s0.Args[0]
-               if r0.Op != OpAMD64ROLWconst {
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               if r0.AuxInt != 8 {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x0 := r0.Args[0]
-               if x0.Op != OpAMD64MOVWloadidx1 {
+               if idx != x0.Args[0] {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)) or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               s1 := v.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem)) y) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s0 := or.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               y := or.Args[1]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} idx p mem))) s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && j1 == j0+16   && j0 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s0 := or.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               s1 := v.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j0
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v2.AuxInt = i0
+               v2.Aux = s
+               v2.AddArg(p)
+               v2.AddArg(idx)
+               v2.AddArg(mem)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)) x1:(MOVBload [i1] {s} p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))) r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQload, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y) s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem))) s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWload, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLload [i0] {s} p mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWload {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               mem := x1.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWload {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLload, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ x1:(MOVBloadidx1 [i1] {s} idx p mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               x1 := v.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} p idx mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} idx p mem)) x1:(MOVBloadidx1 [i1] {s} idx p mem))
+       // cond: i1 == i0+1   && x0.Uses == 1   && x1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 8 {
+                       break
+               }
+               x0 := sh.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = 8
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+2   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 16 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               r1 := v.Args[0]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               sh := v.Args[1]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} idx p mem))) r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} idx p mem)))
+       // cond: i1 == i0+4   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && sh.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(sh)
+       // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem))
+       for {
+               sh := v.Args[0]
+               if sh.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if sh.AuxInt != 32 {
+                       break
+               }
+               r0 := sh.Args[0]
+               if r0.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               r1 := v.Args[1]
+               if r1.Op != OpAMD64BSWAPL {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(sh)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, types.UInt64)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)) or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem)) y) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} idx p mem))) s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} idx p mem)))
+       // cond: i1 == i0+1   && j1 == j0-8   && j1 % 16 == 0   && x0.Uses == 1   && x1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <types.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               x1 := s1.Args[0]
+               if x1.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               x0 := s0.Args[0]
+               if x0.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, types.UInt16)
+               v2.AuxInt = 8
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, types.UInt16)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               y := or.Args[1]
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               p := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))) or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               s0 := v.Args[0]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               idx := x0.Args[0]
+               p := x0.Args[1]
+               mem := x0.Args[2]
+               or := v.Args[1]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               if idx != x1.Args[0] {
+                       break
+               }
+               if p != x1.Args[1] {
+                       break
+               }
+               if mem != x1.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem))) y) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               s1 := or.Args[0]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               y := or.Args[1]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               p := x1.Args[0]
+               idx := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ or:(ORQ y s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} idx p mem)))) s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} idx p mem))))
+       // cond: i1 == i0+2   && j1 == j0-16   && j1 % 32 == 0   && x0.Uses == 1   && x1.Uses == 1   && r0.Uses == 1   && r1.Uses == 1   && s0.Uses == 1   && s1.Uses == 1   && or.Uses == 1   && mergePoint(b,x0,x1) != nil   && clobber(x0)   && clobber(x1)   && clobber(r0)   && clobber(r1)   && clobber(s0)   && clobber(s1)   && clobber(or)
+       // result: @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <types.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y)
+       for {
+               or := v.Args[0]
+               if or.Op != OpAMD64ORQ {
+                       break
+               }
+               y := or.Args[0]
+               s1 := or.Args[1]
+               if s1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j1 := s1.AuxInt
+               r1 := s1.Args[0]
+               if r1.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r1.AuxInt != 8 {
+                       break
+               }
+               x1 := r1.Args[0]
+               if x1.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i1 := x1.AuxInt
+               s := x1.Aux
+               idx := x1.Args[0]
+               p := x1.Args[1]
+               mem := x1.Args[2]
+               s0 := v.Args[1]
+               if s0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               j0 := s0.AuxInt
+               r0 := s0.Args[0]
+               if r0.Op != OpAMD64ROLWconst {
+                       break
+               }
+               if r0.AuxInt != 8 {
+                       break
+               }
+               x0 := r0.Args[0]
+               if x0.Op != OpAMD64MOVWloadidx1 {
+                       break
+               }
+               i0 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               if idx != x0.Args[0] {
+                       break
+               }
+               if p != x0.Args[1] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
+               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v1.AuxInt = j1
+               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
+               v3.AuxInt = i0
+               v3.Aux = s
+               v3.AddArg(p)
+               v3.AddArg(idx)
+               v3.AddArg(mem)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v0.AddArg(y)
+               return true
+       }
+       // match: (ORQ x l:(MOVQload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORQmem x [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORQ l:(MOVQload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORQmem x [off] {sym} ptr mem)
+       for {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
+       // match: (ORQconst [0] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORQconst [-1] _)
+       // cond:
+       // result: (MOVQconst [-1])
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORQconst [c] (MOVQconst [d]))
+       // cond:
+       // result: (MOVQconst [c|d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = c | d
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ROLB(v *Value) bool {
+       // match: (ROLB x (NEGQ y))
+       // cond:
+       // result: (RORB x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64RORB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ROLB x (NEGL y))
+       // cond:
+       // result: (RORB x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64RORB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ROLB x (MOVQconst [c]))
+       // cond:
+       // result: (ROLBconst [c&7 ] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c & 7
+               v.AddArg(x)
+               return true
+       }
+       // match: (ROLB x (MOVLconst [c]))
+       // cond:
+       // result: (ROLBconst [c&7 ] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c & 7
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ROLBconst(v *Value) bool {
+       // match: (ROLBconst [c] (ROLBconst [d] x))
+       // cond:
+       // result: (ROLBconst [(c+d)& 7] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ROLBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = (c + d) & 7
+               v.AddArg(x)
+               return true
+       }
+       // match: (ROLBconst x [0])
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ROLL(v *Value) bool {
+       // match: (ROLL x (NEGQ y))
+       // cond:
+       // result: (RORL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ROLL x (NEGL y))
+       // cond:
+       // result: (RORL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ROLL x (MOVQconst [c]))
+       // cond:
+       // result: (ROLLconst [c&31] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
+               return true
+       }
+       // match: (ROLL x (MOVLconst [c]))
+       // cond:
+       // result: (ROLLconst [c&31] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ROLLconst(v *Value) bool {
+       // match: (ROLLconst [c] (ROLLconst [d] x))
+       // cond:
+       // result: (ROLLconst [(c+d)&31] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ROLLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = (c + d) & 31
+               v.AddArg(x)
+               return true
+       }
+       // match: (ROLLconst x [0])
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ROLQ(v *Value) bool {
+       // match: (ROLQ x (NEGQ y))
+       // cond:
+       // result: (RORQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ROLQ x (NEGL y))
+       // cond:
+       // result: (RORQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64RORQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ROLQ x (MOVQconst [c]))
+       // cond:
+       // result: (ROLQconst [c&63] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
+               return true
+       }
+       // match: (ROLQ x (MOVLconst [c]))
+       // cond:
+       // result: (ROLQconst [c&63] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ROLQconst(v *Value) bool {
+       // match: (ROLQconst [c] (ROLQconst [d] x))
+       // cond:
+       // result: (ROLQconst [(c+d)&63] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ROLQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = (c + d) & 63
+               v.AddArg(x)
+               return true
+       }
+       // match: (ROLQconst x [0])
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ROLW(v *Value) bool {
+       // match: (ROLW x (NEGQ y))
+       // cond:
+       // result: (RORW x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64RORW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ROLW x (NEGL y))
+       // cond:
+       // result: (RORW x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               if idx != x0.Args[0] {
+               y := v_1.Args[0]
+               v.reset(OpAMD64RORW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ROLW x (MOVQconst [c]))
+       // cond:
+       // result: (ROLWconst [c&15] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               if p != x0.Args[1] {
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c & 15
+               v.AddArg(x)
+               return true
+       }
+       // match: (ROLW x (MOVLconst [c]))
+       // cond:
+       // result: (ROLWconst [c&15] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               if mem != x0.Args[2] {
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c & 15
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ROLWconst(v *Value) bool {
+       // match: (ROLWconst [c] (ROLWconst [d] x))
+       // cond:
+       // result: (ROLWconst [(c+d)&15] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ROLWconst {
                        break
                }
-               if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(r0) && clobber(r1) && clobber(s0) && clobber(s1) && clobber(or)) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = (c + d) & 15
+               v.AddArg(x)
+               return true
+       }
+       // match: (ROLWconst x [0])
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type)
+               x := v.Args[0]
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v1.AuxInt = j1
-               v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, types.UInt32)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, types.UInt32)
-               v3.AuxInt = i0
-               v3.Aux = s
-               v3.AddArg(p)
-               v3.AddArg(idx)
-               v3.AddArg(mem)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v0.AddArg(y)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (ORQ x l:(MOVQload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ORQmem x [off] {sym} ptr mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64RORB(v *Value) bool {
+       // match: (RORB x (NEGQ y))
+       // cond:
+       // result: (ROLB x y)
        for {
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               y := v_1.Args[0]
+               v.reset(OpAMD64ROLB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (RORB x (NEGL y))
+       // cond:
+       // result: (ROLB x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               v.reset(OpAMD64ORQmem)
-               v.AuxInt = off
-               v.Aux = sym
+               y := v_1.Args[0]
+               v.reset(OpAMD64ROLB)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQ l:(MOVQload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (ORQmem x [off] {sym} ptr mem)
+       // match: (RORB x (MOVQconst [c]))
+       // cond:
+       // result: (ROLBconst [(-c)&7 ] x)
        for {
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = (-c) & 7
+               v.AddArg(x)
+               return true
+       }
+       // match: (RORB x (MOVLconst [c]))
+       // cond:
+       // result: (ROLBconst [(-c)&7 ] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64ORQmem)
-               v.AuxInt = off
-               v.Aux = sym
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = (-c) & 7
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool {
-       // match: (ORQconst [0] x)
+func rewriteValueAMD64_OpAMD64RORL(v *Value) bool {
+       // match: (RORL x (NEGQ y))
        // cond:
-       // result: x
+       // result: (ROLL x y)
        for {
-               if v.AuxInt != 0 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
+               y := v_1.Args[0]
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (RORL x (NEGL y))
+       // cond:
+       // result: (ROLL x y)
+       for {
                x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64ROLL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ORQconst [-1] _)
+       // match: (RORL x (MOVQconst [c]))
        // cond:
-       // result: (MOVQconst [-1])
+       // result: (ROLLconst [(-c)&31] x)
        for {
-               if v.AuxInt != -1 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = -1
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = (-c) & 31
+               v.AddArg(x)
                return true
        }
-       // match: (ORQconst [c] (MOVQconst [d]))
+       // match: (RORL x (MOVLconst [c]))
        // cond:
-       // result: (MOVQconst [c|d])
+       // result: (ROLLconst [(-c)&31] x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = c | d
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = (-c) & 31
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ROLBconst(v *Value) bool {
-       // match: (ROLBconst [c] (ROLBconst [d] x))
+func rewriteValueAMD64_OpAMD64RORQ(v *Value) bool {
+       // match: (RORQ x (NEGQ y))
        // cond:
-       // result: (ROLBconst [(c+d)& 7] x)
+       // result: (ROLQ x y)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ROLBconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = (c + d) & 7
+               y := v_1.Args[0]
+               v.reset(OpAMD64ROLQ)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ROLBconst x [0])
+       // match: (RORQ x (NEGL y))
        // cond:
-       // result: x
+       // result: (ROLQ x y)
        for {
-               if v.AuxInt != 0 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               y := v_1.Args[0]
+               v.reset(OpAMD64ROLQ)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ROLLconst(v *Value) bool {
-       // match: (ROLLconst [c] (ROLLconst [d] x))
+       // match: (RORQ x (MOVQconst [c]))
        // cond:
-       // result: (ROLLconst [(c+d)&31] x)
+       // result: (ROLQconst [(-c)&63] x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ROLLconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = (c + d) & 31
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = (-c) & 63
                v.AddArg(x)
                return true
        }
-       // match: (ROLLconst x [0])
+       // match: (RORQ x (MOVLconst [c]))
        // cond:
-       // result: x
+       // result: (ROLQconst [(-c)&63] x)
        for {
-               if v.AuxInt != 0 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = (-c) & 63
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ROLQconst(v *Value) bool {
-       // match: (ROLQconst [c] (ROLQconst [d] x))
+func rewriteValueAMD64_OpAMD64RORW(v *Value) bool {
+       // match: (RORW x (NEGQ y))
        // cond:
-       // result: (ROLQconst [(c+d)&63] x)
+       // result: (ROLW x y)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ROLQconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = (c + d) & 63
+               y := v_1.Args[0]
+               v.reset(OpAMD64ROLW)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (ROLQconst x [0])
+       // match: (RORW x (NEGL y))
        // cond:
-       // result: x
+       // result: (ROLW x y)
        for {
-               if v.AuxInt != 0 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               y := v_1.Args[0]
+               v.reset(OpAMD64ROLW)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ROLWconst(v *Value) bool {
-       // match: (ROLWconst [c] (ROLWconst [d] x))
+       // match: (RORW x (MOVQconst [c]))
        // cond:
-       // result: (ROLWconst [(c+d)&15] x)
+       // result: (ROLWconst [(-c)&15] x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ROLWconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
+               c := v_1.AuxInt
                v.reset(OpAMD64ROLWconst)
-               v.AuxInt = (c + d) & 15
+               v.AuxInt = (-c) & 15
                v.AddArg(x)
                return true
        }
-       // match: (ROLWconst x [0])
+       // match: (RORW x (MOVLconst [c]))
        // cond:
-       // result: x
+       // result: (ROLWconst [(-c)&15] x)
        for {
-               if v.AuxInt != 0 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = (-c) & 15
                v.AddArg(x)
                return true
        }
@@ -26732,6 +31236,8 @@ func rewriteValueAMD64_OpAMD64SARBconst(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64SARL(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (SARL x (MOVQconst [c]))
        // cond:
        // result: (SARLconst [c&31] x)
@@ -26762,24 +31268,186 @@ func rewriteValueAMD64_OpAMD64SARL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (SARL x (ANDLconst [31] y))
-       // cond:
+       // match: (SARL x (ADDQconst [c] y))
+       // cond: c & 31 == 0
        // result: (SARL x y)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDLconst {
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SARL x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SARL x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SARL x (ANDQconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SARL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SARL x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SARL x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SARL x (ADDLconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SARL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SARL x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SARL x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               if v_1.AuxInt != 31 {
+               v.reset(OpAMD64SARL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SARL x (ANDLconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SARL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
+               c := v_1.AuxInt
                y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
                v.reset(OpAMD64SARL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
+       // match: (SARL x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SARL x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SARLconst(v *Value) bool {
@@ -26813,52 +31481,216 @@ func rewriteValueAMD64_OpAMD64SARLconst(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64SARQ(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (SARQ x (MOVQconst [c]))
        // cond:
        // result: (SARQconst [c&63] x)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64SARQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
+               return true
+       }
+       // match: (SARQ x (MOVLconst [c]))
+       // cond:
+       // result: (SARQconst [c&63] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64SARQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
+               return true
+       }
+       // match: (SARQ x (ADDQconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SARQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SARQ x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SARQ x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SARQ x (ANDQconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SARQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SARQ x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SARQ x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SARQ x (ADDLconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SARQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SARQ x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SARQ x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64SARQconst)
-               v.AuxInt = c & 63
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SARQ x (MOVLconst [c]))
-       // cond:
-       // result: (SARQconst [c&63] x)
+       // match: (SARQ x (ANDLconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SARQ x y)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SARQconst)
-               v.AuxInt = c & 63
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SARQ x (ANDQconst [63] y))
-       // cond:
-       // result: (SARQ x y)
+       // match: (SARQ x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SARQ x (NEGL <t> y))
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDQconst {
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               if v_1.AuxInt != 63 {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               y := v_1.Args[0]
                v.reset(OpAMD64SARQ)
                v.AddArg(x)
-               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
        return false
@@ -28275,6 +33107,8 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (SHLL x (MOVQconst [c]))
        // cond:
        // result: (SHLLconst [c&31] x)
@@ -28305,24 +33139,186 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (SHLL x (ANDLconst [31] y))
-       // cond:
+       // match: (SHLL x (ADDQconst [c] y))
+       // cond: c & 31 == 0
        // result: (SHLL x y)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDLconst {
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLL x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHLL x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHLL x (ANDQconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHLL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLL x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHLL x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHLL x (ADDLconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHLL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLL x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHLL x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               if v_1.AuxInt != 31 {
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHLL x (ANDLconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHLL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
+               c := v_1.AuxInt
                y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
                v.reset(OpAMD64SHLL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
+       // match: (SHLL x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHLL x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
@@ -28342,6 +33338,8 @@ func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (SHLQ x (MOVQconst [c]))
        // cond:
        // result: (SHLQconst [c&63] x)
@@ -28357,55 +33355,199 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (SHLQ x (MOVLconst [c]))
-       // cond:
-       // result: (SHLQconst [c&63] x)
+       // match: (SHLQ x (MOVLconst [c]))
+       // cond:
+       // result: (SHLQconst [c&63] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
+               return true
+       }
+       // match: (SHLQ x (ADDQconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHLQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLQ x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHLQ x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHLQ x (ANDQconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHLQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLQ x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHLQ x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHLQ x (ADDLconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHLQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLQ x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHLQ x (NEGL <t> y))
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = c & 63
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SHLQ x (ANDQconst [63] y))
-       // cond:
+       // match: (SHLQ x (ANDLconst [c] y))
+       // cond: c & 63 == 63
        // result: (SHLQ x y)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDQconst {
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
-               if v_1.AuxInt != 63 {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               y := v_1.Args[0]
                v.reset(OpAMD64SHLQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (ANDLconst [63] y))
-       // cond:
-       // result: (SHLQ x y)
+       // match: (SHLQ x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHLQ x (NEGL <t> y))
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDLconst {
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               if v_1.AuxInt != 63 {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               y := v_1.Args[0]
                v.reset(OpAMD64SHLQ)
                v.AddArg(x)
-               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
        return false
@@ -28514,6 +33656,8 @@ func rewriteValueAMD64_OpAMD64SHRBconst(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (SHRL x (MOVQconst [c]))
        // cond:
        // result: (SHRLconst [c&31] x)
@@ -28544,24 +33688,186 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (SHRL x (ANDLconst [31] y))
-       // cond:
+       // match: (SHRL x (ADDQconst [c] y))
+       // cond: c & 31 == 0
        // result: (SHRL x y)
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDLconst {
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHRL x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHRL x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHRL x (ANDQconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHRL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHRL x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHRL x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHRL x (ADDLconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHRL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHRL x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHRL x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               if v_1.AuxInt != 31 {
+               v.reset(OpAMD64SHRL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHRL x (ANDLconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHRL x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
+               c := v_1.AuxInt
                y := v_1.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
                v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
+       // match: (SHRL x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHRL x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
@@ -28581,6 +33887,8 @@ func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool {
        return false
 }
 func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (SHRQ x (MOVQconst [c]))
        // cond:
        // result: (SHRQconst [c&63] x)
@@ -28611,8 +33919,53 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       // match: (SHRQ x (ANDQconst [63] y))
-       // cond:
+       // match: (SHRQ x (ADDQconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHRQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHRQ x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHRQ x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHRQ x (ANDQconst [c] y))
+       // cond: c & 63 == 63
        // result: (SHRQ x y)
        for {
                x := v.Args[0]
@@ -28620,17 +33973,89 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
                if v_1.Op != OpAMD64ANDQconst {
                        break
                }
-               if v_1.AuxInt != 63 {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHRQ x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHRQ x (NEGQ <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHRQ x (ADDLconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHRQ x y)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
                        break
                }
+               c := v_1.AuxInt
                y := v_1.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
                v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHRQ x (ANDLconst [63] y))
-       // cond:
+       // match: (SHRQ x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHRQ x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SHRQ x (ANDLconst [c] y))
+       // cond: c & 63 == 63
        // result: (SHRQ x y)
        for {
                x := v.Args[0]
@@ -28638,15 +34063,42 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool {
                if v_1.Op != OpAMD64ANDLconst {
                        break
                }
-               if v_1.AuxInt != 63 {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               y := v_1.Args[0]
                v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
+       // match: (SHRQ x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHRQ x (NEGL <t> y))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
+                       break
+               }
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool {