]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: add MOVBEWstore support for GOAMD64>=3
authorWayne Zuo <wdvxdr@golangcn.org>
Wed, 30 Mar 2022 11:27:21 +0000 (19:27 +0800)
committerEmmanuel Odeke <emmanuel@orijtech.com>
Sun, 3 Apr 2022 23:48:16 +0000 (23:48 +0000)
This CL add MOVBE support for 16-bit version, but MOVBEWload is
excluded because it does not satisfy zero extented.

For #51724

Change-Id: I3fadf20bcbb9b423f6355e6a1e340107e8e621ac
Reviewed-on: https://go-review.googlesource.com/c/go/+/396617
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Trust: Emmanuel Odeke <emmanuel@orijtech.com>

src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/memcombine.go

index 0a3f80191bf812f373d5e5d04bebeb50c61ef93e..84d90760f2c1ee128f4b2b30848c7b2f179d7825 100644 (file)
@@ -778,7 +778,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
        case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
                ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
                ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify,
-               ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore:
+               ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[1].Reg()
index 87fe0fbee13d1a6b8c6c88a21227171d2dff5351..0eb5c616126eb1057dccccaa63fefc2a1e19ea3a 100644 (file)
 (BSWAP(Q|L) x:(MOV(Q|L)load [i] {s} p mem))    && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBE(Q|L)load [i] {s} p mem)
 (BSWAP(Q|L) (MOVBE(Q|L)load [i] {s} p m))    => (MOV(Q|L)load [i] {s} p m)
 (MOVBE(Q|L)store [i] {s} p (BSWAP(Q|L) x) m) => (MOV(Q|L)store [i] {s} p x m)
+(MOVWstore [i] {s} p x:(ROLWconst [8] w) mem)   && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBEWstore [i] {s} p w mem)
+(MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 => (MOVWstore [i] {s} p w mem)
 
 (ORQ                   x0:(MOVBELload [i0] {s} p mem)
     sh:(SHLQconst [32] x1:(MOVBELload [i1] {s} p mem)))
index 50e23871ddf66792fdb2e2a848ac50c875fea796..b2dfcd561a221f60474d6f6eed8eeb4239276541 100644 (file)
@@ -924,6 +924,8 @@ func init() {
                {name: "TZCNTL", argLength: 1, reg: gp11, asm: "TZCNTL", clobberFlags: true},
 
                // CPUID feature: MOVBE
+               // MOVBEWload does not satisfy zero extended, so only use MOVBEWstore
+               {name: "MOVBEWstore", argLength: 3, reg: gpstore, asm: "MOVBEW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
                {name: "MOVBELload", argLength: 2, reg: gpload, asm: "MOVBEL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 4 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
                {name: "MOVBELstore", argLength: 3, reg: gpstore, asm: "MOVBEL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
                {name: "MOVBEQload", argLength: 2, reg: gpload, asm: "MOVBEQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 8 bytes from arg0+auxint+aux. arg1=mem
index 0830cf5f7cc6e7c7d91fd32adc39d7118e54cd92..6b6e037e5a444fca671400762bef60f1962ae17d 100644 (file)
@@ -1043,6 +1043,7 @@ const (
        OpAMD64BLSRL
        OpAMD64TZCNTQ
        OpAMD64TZCNTL
+       OpAMD64MOVBEWstore
        OpAMD64MOVBELload
        OpAMD64MOVBELstore
        OpAMD64MOVBEQload
@@ -13791,6 +13792,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "MOVBEWstore",
+               auxType:        auxSymOff,
+               argLen:         3,
+               faultOnNilArg0: true,
+               symEffect:      SymWrite,
+               asm:            x86.AMOVBEW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+                       },
+               },
+       },
        {
                name:           "MOVBELload",
                auxType:        auxSymOff,
index c17d8b03e20dd169318668d4a0ff9dd691c97868..8dab76db8f54d6966538f742fb99e6ec9963f635 100644 (file)
@@ -226,6 +226,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64MOVBELstore(v)
        case OpAMD64MOVBEQstore:
                return rewriteValueAMD64_OpAMD64MOVBEQstore(v)
+       case OpAMD64MOVBEWstore:
+               return rewriteValueAMD64_OpAMD64MOVBEWstore(v)
        case OpAMD64MOVBQSX:
                return rewriteValueAMD64_OpAMD64MOVBQSX(v)
        case OpAMD64MOVBQSXload:
@@ -9542,6 +9544,34 @@ func rewriteValueAMD64_OpAMD64MOVBEQstore(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64MOVBEWstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem)
+       // cond: x.Uses == 1
+       // result: (MOVWstore [i] {s} p w mem)
+       for {
+               i := auxIntToInt32(v.AuxInt)
+               s := auxToSym(v.Aux)
+               p := v_0
+               x := v_1
+               if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 {
+                       break
+               }
+               w := x.Args[0]
+               mem := v_2
+               if !(x.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(i)
+               v.Aux = symToAux(s)
+               v.AddArg3(p, w, mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
@@ -14466,6 +14496,28 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
                v.AddArg3(p, v0, mem)
                return true
        }
+       // match: (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem)
+       // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3
+       // result: (MOVBEWstore [i] {s} p w mem)
+       for {
+               i := auxIntToInt32(v.AuxInt)
+               s := auxToSym(v.Aux)
+               p := v_0
+               x := v_1
+               if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 {
+                       break
+               }
+               w := x.Args[0]
+               mem := v_2
+               if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64MOVBEWstore)
+               v.AuxInt = int32ToAuxInt(i)
+               v.Aux = symToAux(s)
+               v.AddArg3(p, w, mem)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
index 97e1d4bdfb7ad6eff579801f1b23362c9a75e6de..ad42538dcdba878a62373fd2c9f3489a0428cb9f 100644 (file)
@@ -105,20 +105,20 @@ func load_be32_idx(b []byte, idx int) {
        sink32 = binary.BigEndian.Uint32(b[idx:])
 }
 
-func load_be16(b []byte) {
+func load_be16(b []byte) uint16 {
        // amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
        // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
        // ppc64le:`MOVHBR`
        // s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
-       sink16 = binary.BigEndian.Uint16(b)
+       return binary.BigEndian.Uint16(b)
 }
 
-func load_be16_idx(b []byte, idx int) {
+func load_be16_idx(b []byte, idx int) uint16 {
        // amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
        // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
        // ppc64le:`MOVHBR`
        // s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
-       sink16 = binary.BigEndian.Uint16(b[idx:])
+       return binary.BigEndian.Uint16(b[idx:])
 }
 
 func load_le_byte2_uint16(s []byte) uint16 {
@@ -463,7 +463,8 @@ func store_be32_idx(b []byte, idx int) {
 }
 
 func store_be16(b []byte) {
-       // amd64:`ROLW\s\$8`,-`SHR.`
+       // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
+       // amd64/v3:`MOVBEW`,-`ROLW`
        // arm64:`MOVH`,`REV16W`,-`MOVB`
        // ppc64le:`MOVHBR`
        // s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
@@ -471,7 +472,8 @@ func store_be16(b []byte) {
 }
 
 func store_be16_idx(b []byte, idx int) {
-       // amd64:`ROLW\s\$8`,-`SHR.`
+       // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
+       // amd64/v3: `MOVBEW`
        // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
        // ppc64le:`MOVHBR`
        // s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
@@ -511,7 +513,8 @@ func store_le_byte_8(b []byte, val uint64) {
 func store_be_byte_2(b []byte, val uint16) {
        _ = b[2]
        // arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
-       // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+       // amd64/v1,amd64/v2:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+       // amd64/v3: `MOVBEW`
        b[1], b[2] = byte(val>>8), byte(val)
 }