]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: add MOVBE index load/store
authorWayne Zuo <wdvxdr1123@gmail.com>
Thu, 24 Mar 2022 14:53:41 +0000 (22:53 +0800)
committerKeith Randall <khr@golang.org>
Mon, 11 Apr 2022 15:41:56 +0000 (15:41 +0000)
Fixes #51724

Change-Id: I94e650a7482dc4c479d597f0162a6a89d779708d
Reviewed-on: https://go-review.googlesource.com/c/go/+/395474
Reviewed-by: Keith Randall <khr@golang.org>
Trust: Cherry Mui <cherryyz@google.com>

src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/addressingmodes.go
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
test/codegen/memcombine.go

index d34fdc611b6a0183dc576769d03e3f73132f1ff8..9628ce5644022b14171d5c1f14f2513c097e448c 100644 (file)
@@ -786,7 +786,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
        case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
-               ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2:
+               ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
+               ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
                p := s.Prog(v.Op.Asm())
                memIdx(&p.From, v)
                ssagen.AddAux(&p.From, v)
@@ -808,7 +809,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
                ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
                ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
-               ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8:
+               ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
+               ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[2].Reg()
index 28fa86cd643c79a893326669dd1468832a3d4ab2..d600e31666eb71069e9656c9f1fec37cd057c757 100644 (file)
@@ -356,6 +356,26 @@ var combine = map[[2]Op]Op{
        [2]Op{OpAMD64SHRXQload, OpAMD64LEAQ1}: OpAMD64SHRXQloadidx1,
        [2]Op{OpAMD64SHRXQload, OpAMD64LEAQ8}: OpAMD64SHRXQloadidx8,
 
+       // amd64/v3
+       [2]Op{OpAMD64MOVBELload, OpAMD64ADDQ}:  OpAMD64MOVBELloadidx1,
+       [2]Op{OpAMD64MOVBEQload, OpAMD64ADDQ}:  OpAMD64MOVBEQloadidx1,
+       [2]Op{OpAMD64MOVBELload, OpAMD64LEAQ1}: OpAMD64MOVBELloadidx1,
+       [2]Op{OpAMD64MOVBELload, OpAMD64LEAQ4}: OpAMD64MOVBELloadidx4,
+       [2]Op{OpAMD64MOVBELload, OpAMD64LEAQ8}: OpAMD64MOVBELloadidx8,
+       [2]Op{OpAMD64MOVBEQload, OpAMD64LEAQ1}: OpAMD64MOVBEQloadidx1,
+       [2]Op{OpAMD64MOVBEQload, OpAMD64LEAQ8}: OpAMD64MOVBEQloadidx8,
+
+       [2]Op{OpAMD64MOVBEWstore, OpAMD64ADDQ}:  OpAMD64MOVBEWstoreidx1,
+       [2]Op{OpAMD64MOVBELstore, OpAMD64ADDQ}:  OpAMD64MOVBELstoreidx1,
+       [2]Op{OpAMD64MOVBEQstore, OpAMD64ADDQ}:  OpAMD64MOVBEQstoreidx1,
+       [2]Op{OpAMD64MOVBEWstore, OpAMD64LEAQ1}: OpAMD64MOVBEWstoreidx1,
+       [2]Op{OpAMD64MOVBEWstore, OpAMD64LEAQ2}: OpAMD64MOVBEWstoreidx2,
+       [2]Op{OpAMD64MOVBELstore, OpAMD64LEAQ1}: OpAMD64MOVBELstoreidx1,
+       [2]Op{OpAMD64MOVBELstore, OpAMD64LEAQ4}: OpAMD64MOVBELstoreidx4,
+       [2]Op{OpAMD64MOVBELstore, OpAMD64LEAQ8}: OpAMD64MOVBELstoreidx8,
+       [2]Op{OpAMD64MOVBEQstore, OpAMD64LEAQ1}: OpAMD64MOVBEQstoreidx1,
+       [2]Op{OpAMD64MOVBEQstore, OpAMD64LEAQ8}: OpAMD64MOVBEQstoreidx8,
+
        // 386
        [2]Op{Op386MOVBload, Op386ADDL}:  Op386MOVBloadidx1,
        [2]Op{Op386MOVWload, Op386ADDL}:  Op386MOVWloadidx1,
index d760d7d79e34055bf736f88c854fa507112feaf7..ab84504d1ab99b7590e61e4ca53ff8cf4fb83755 100644 (file)
@@ -937,6 +937,20 @@ func init() {
                {name: "MOVBELstore", argLength: 3, reg: gpstore, asm: "MOVBEL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
                {name: "MOVBEQload", argLength: 2, reg: gpload, asm: "MOVBEQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 8 bytes from arg0+auxint+aux. arg1=mem
                {name: "MOVBEQstore", argLength: 3, reg: gpstore, asm: "MOVBEQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
+               // indexed MOVBE loads
+               {name: "MOVBELloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBEL", scale: 1, aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load and swap 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
+               {name: "MOVBELloadidx4", argLength: 3, reg: gploadidx, asm: "MOVBEL", scale: 4, aux: "SymOff", typ: "UInt32", symEffect: "Read"},                    // load and swap 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem. Zero extend.
+               {name: "MOVBELloadidx8", argLength: 3, reg: gploadidx, asm: "MOVBEL", scale: 8, aux: "SymOff", typ: "UInt32", symEffect: "Read"},                    // load and swap 4 bytes from arg0+8*arg1+auxint+aux. arg2=mem. Zero extend.
+               {name: "MOVBEQloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBEQ", scale: 1, aux: "SymOff", typ: "UInt64", symEffect: "Read"}, // load and swap 8 bytes from arg0+arg1+auxint+aux. arg2=mem
+               {name: "MOVBEQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", typ: "UInt64", symEffect: "Read"},                    // load and swap 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
+               // indexed MOVBE stores
+               {name: "MOVBEWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEW", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVBEWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVBEW", scale: 2, aux: "SymOff", symEffect: "Write"},                    // swap and store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
+               {name: "MOVBELstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEL", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVBELstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVBEL", scale: 4, aux: "SymOff", symEffect: "Write"},                    // swap and store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
+               {name: "MOVBELstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEL", scale: 8, aux: "SymOff", symEffect: "Write"},                    // swap and store 4 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
+               {name: "MOVBEQstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEQ", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+               {name: "MOVBEQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", symEffect: "Write"},                    // swap and store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
 
                // CPUID feature: BMI2.
                {name: "SHLXLload", argLength: 3, reg: gp21shxload, asm: "SHLXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 32
index 005a033a409d57bfd10dca7c3d87586ddb961c2c..1c941e84e17f6b23e0578e170c112c85e98f29a2 100644 (file)
@@ -1050,6 +1050,18 @@ const (
        OpAMD64MOVBELstore
        OpAMD64MOVBEQload
        OpAMD64MOVBEQstore
+       OpAMD64MOVBELloadidx1
+       OpAMD64MOVBELloadidx4
+       OpAMD64MOVBELloadidx8
+       OpAMD64MOVBEQloadidx1
+       OpAMD64MOVBEQloadidx8
+       OpAMD64MOVBEWstoreidx1
+       OpAMD64MOVBEWstoreidx2
+       OpAMD64MOVBELstoreidx1
+       OpAMD64MOVBELstoreidx4
+       OpAMD64MOVBELstoreidx8
+       OpAMD64MOVBEQstoreidx1
+       OpAMD64MOVBEQstoreidx8
        OpAMD64SHLXLload
        OpAMD64SHLXQload
        OpAMD64SHRXLload
@@ -13910,6 +13922,201 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:        "MOVBELloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVBEL,
+               scale:       1,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:      "MOVBELloadidx4",
+               auxType:   auxSymOff,
+               argLen:    3,
+               symEffect: SymRead,
+               asm:       x86.AMOVBEL,
+               scale:     4,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:      "MOVBELloadidx8",
+               auxType:   auxSymOff,
+               argLen:    3,
+               symEffect: SymRead,
+               asm:       x86.AMOVBEL,
+               scale:     8,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:        "MOVBEQloadidx1",
+               auxType:     auxSymOff,
+               argLen:      3,
+               commutative: true,
+               symEffect:   SymRead,
+               asm:         x86.AMOVBEQ,
+               scale:       1,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:      "MOVBEQloadidx8",
+               auxType:   auxSymOff,
+               argLen:    3,
+               symEffect: SymRead,
+               asm:       x86.AMOVBEQ,
+               scale:     8,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:        "MOVBEWstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVBEW,
+               scale:       1,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+               },
+       },
+       {
+               name:      "MOVBEWstoreidx2",
+               auxType:   auxSymOff,
+               argLen:    4,
+               symEffect: SymWrite,
+               asm:       x86.AMOVBEW,
+               scale:     2,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+               },
+       },
+       {
+               name:        "MOVBELstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVBEL,
+               scale:       1,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+               },
+       },
+       {
+               name:      "MOVBELstoreidx4",
+               auxType:   auxSymOff,
+               argLen:    4,
+               symEffect: SymWrite,
+               asm:       x86.AMOVBEL,
+               scale:     4,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+               },
+       },
+       {
+               name:      "MOVBELstoreidx8",
+               auxType:   auxSymOff,
+               argLen:    4,
+               symEffect: SymWrite,
+               asm:       x86.AMOVBEL,
+               scale:     8,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+               },
+       },
+       {
+               name:        "MOVBEQstoreidx1",
+               auxType:     auxSymOff,
+               argLen:      4,
+               commutative: true,
+               symEffect:   SymWrite,
+               asm:         x86.AMOVBEQ,
+               scale:       1,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+               },
+       },
+       {
+               name:      "MOVBEQstoreidx8",
+               auxType:   auxSymOff,
+               argLen:    4,
+               symEffect: SymWrite,
+               asm:       x86.AMOVBEQ,
+               scale:     8,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+               },
+       },
        {
                name:           "SHLXLload",
                auxType:        auxSymOff,
index 0d4e96f862ccc35087c6d7f800f0c9e2cb1e5738..0292d7f0f3eb4c4528065b06c4bd50adf0980a37 100644 (file)
@@ -76,7 +76,7 @@ func load_be64(b []byte) uint64 {
 
 func load_be64_idx(b []byte, idx int) uint64 {
        // amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
-       // amd64/v3: `MOVBEQ`
+       // amd64/v3: `MOVBEQ\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
        // s390x:`MOVD\s\(.*\)\(.*\*1\),`
        // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
        // ppc64le:`MOVDBR`,-`MOV[BHW]Z`
@@ -94,7 +94,7 @@ func load_be32(b []byte) uint32 {
 
 func load_be32_idx(b []byte, idx int) uint32 {
        // amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
-       // amd64/v3: `MOVBEL`
+       // amd64/v3: `MOVBEL\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
        // s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
        // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
        // ppc64le:`MOVWBR`,-`MOV[BH]Z`
@@ -421,7 +421,7 @@ func store_be64(b []byte, x uint64) {
 
 func store_be64_idx(b []byte, x uint64, idx int) {
        // amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
-       // amd64/v3:`MOVBEQ`
+       // amd64/v3:`MOVBEQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
        // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
        // ppc64le:`MOVDBR`
        // s390x:`MOVD\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
@@ -451,7 +451,7 @@ func store_be32_load(b, x *[8]byte) {
 
 func store_be32_idx(b []byte, x uint32, idx int) {
        // amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
-       // amd64/v3:`MOVBEL`
+       // amd64/v3:`MOVBEL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
        // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
        // ppc64le:`MOVWBR`
        // s390x:`MOVW\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
@@ -469,7 +469,7 @@ func store_be16(b []byte, x uint16) {
 
 func store_be16_idx(b []byte, x uint16, idx int) {
        // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
-       // amd64/v3: `MOVBEW`
+       // amd64/v3:`MOVBEW\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
        // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
        // ppc64le:`MOVHBR`
        // s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`