From: Wayne Zuo Date: Wed, 30 Mar 2022 11:27:21 +0000 (+0800) Subject: cmd/compile: add MOVBEWstore support for GOAMD64>=3 X-Git-Tag: go1.19beta1~790 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=ba6df85c7c94c7b26d4979e92fdb9ec7fa4cc1e4;p=gostls13.git cmd/compile: add MOVBEWstore support for GOAMD64>=3 This CL add MOVBE support for 16-bit version, but MOVBEWload is excluded because it does not satisfy zero extented. For #51724 Change-Id: I3fadf20bcbb9b423f6355e6a1e340107e8e621ac Reviewed-on: https://go-review.googlesource.com/c/go/+/396617 Reviewed-by: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Gopher Robot Trust: Emmanuel Odeke --- diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 0a3f80191b..84d90760f2 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -778,7 +778,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify, - ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore: + ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 87fe0fbee1..0eb5c61612 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -2225,6 +2225,8 @@ (BSWAP(Q|L) x:(MOV(Q|L)load [i] {s} p mem)) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBE(Q|L)load [i] {s} p mem) (BSWAP(Q|L) (MOVBE(Q|L)load [i] {s} p m)) => (MOV(Q|L)load [i] {s} p m) (MOVBE(Q|L)store [i] {s} p (BSWAP(Q|L) x) m) => (MOV(Q|L)store [i] {s} p x m) +(MOVWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBEWstore [i] {s} p w mem) +(MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 => (MOVWstore [i] {s} p w mem) (ORQ x0:(MOVBELload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVBELload [i1] {s} p mem))) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 50e23871dd..b2dfcd561a 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -924,6 +924,8 @@ func init() { {name: "TZCNTL", argLength: 1, reg: gp11, asm: "TZCNTL", clobberFlags: true}, // CPUID feature: MOVBE + // MOVBEWload does not satisfy zero extended, so only use MOVBEWstore + {name: "MOVBEWstore", argLength: 3, reg: gpstore, asm: "MOVBEW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVBELload", argLength: 2, reg: gpload, asm: "MOVBEL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 4 bytes from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVBELstore", argLength: 3, reg: gpstore, asm: "MOVBEL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem {name: "MOVBEQload", argLength: 2, reg: gpload, asm: "MOVBEQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 8 bytes from arg0+auxint+aux. arg1=mem diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 0830cf5f7c..6b6e037e5a 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1043,6 +1043,7 @@ const ( OpAMD64BLSRL OpAMD64TZCNTQ OpAMD64TZCNTL + OpAMD64MOVBEWstore OpAMD64MOVBELload OpAMD64MOVBELstore OpAMD64MOVBEQload @@ -13791,6 +13792,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MOVBEWstore", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: x86.AMOVBEW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB + }, + }, + }, { name: "MOVBELload", auxType: auxSymOff, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index c17d8b03e2..8dab76db8f 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -226,6 +226,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64MOVBELstore(v) case OpAMD64MOVBEQstore: return rewriteValueAMD64_OpAMD64MOVBEQstore(v) + case OpAMD64MOVBEWstore: + return rewriteValueAMD64_OpAMD64MOVBEWstore(v) case OpAMD64MOVBQSX: return rewriteValueAMD64_OpAMD64MOVBQSX(v) case OpAMD64MOVBQSXload: @@ -9542,6 +9544,34 @@ func rewriteValueAMD64_OpAMD64MOVBEQstore(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64MOVBEWstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) + // cond: x.Uses == 1 + // result: (MOVWstore [i] {s} p w mem) + for { + i := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + p := v_0 + x := v_1 + if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 { + break + } + w := x.Args[0] + mem := v_2 + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(i) + v.Aux = symToAux(s) + v.AddArg3(p, w, mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -14466,6 +14496,28 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { v.AddArg3(p, v0, mem) return true } + // match: (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem) + // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3 + // result: (MOVBEWstore [i] {s} p w mem) + for { + i := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + p := v_0 + x := v_1 + if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 { + break + } + w := x.Args[0] + mem := v_2 + if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64MOVBEWstore) + v.AuxInt = int32ToAuxInt(i) + v.Aux = symToAux(s) + v.AddArg3(p, w, mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index 97e1d4bdfb..ad42538dcd 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -105,20 +105,20 @@ func load_be32_idx(b []byte, idx int) { sink32 = binary.BigEndian.Uint32(b[idx:]) } -func load_be16(b []byte) { +func load_be16(b []byte) uint16 { // amd64:`ROLW\s\$8`,-`MOVB`,-`OR` // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB` // ppc64le:`MOVHBR` // s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW` - sink16 = binary.BigEndian.Uint16(b) + return binary.BigEndian.Uint16(b) } -func load_be16_idx(b []byte, idx int) { +func load_be16_idx(b []byte, idx int) uint16 { // amd64:`ROLW\s\$8`,-`MOVB`,-`OR` // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` // ppc64le:`MOVHBR` // s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW` - sink16 = binary.BigEndian.Uint16(b[idx:]) + return binary.BigEndian.Uint16(b[idx:]) } func load_le_byte2_uint16(s []byte) uint16 { @@ -463,7 +463,8 @@ func store_be32_idx(b []byte, idx int) { } func store_be16(b []byte) { - // amd64:`ROLW\s\$8`,-`SHR.` + // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.` + // amd64/v3:`MOVBEW`,-`ROLW` // arm64:`MOVH`,`REV16W`,-`MOVB` // ppc64le:`MOVHBR` // s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s` @@ -471,7 +472,8 @@ func store_be16(b []byte) { } func store_be16_idx(b []byte, idx int) { - // amd64:`ROLW\s\$8`,-`SHR.` + // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.` + // amd64/v3: `MOVBEW` // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB` // ppc64le:`MOVHBR` // s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s` @@ -511,7 +513,8 @@ func store_le_byte_8(b []byte, val uint64) { func store_be_byte_2(b []byte, val uint16) { _ = b[2] // arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB` - // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + // amd64/v1,amd64/v2:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + // amd64/v3: `MOVBEW` b[1], b[2] = byte(val>>8), byte(val) }