]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize 386's comparison
authorBen Shi <powerman1st@163.com>
Sun, 29 Jul 2018 12:50:50 +0000 (12:50 +0000)
committerBen Shi <powerman1st@163.com>
Mon, 20 Aug 2018 14:23:22 +0000 (14:23 +0000)
CMPL/CMPW/CMPB can take a memory operand on 386, and this CL
implements that optimization.

1. The total size of pkg/linux_386 decreases about 45KB, excluding
cmd/compile.

2. The go1 benchmark shows a little improvement.
name                     old time/op    new time/op    delta
BinaryTree17-4              3.36s ± 2%     3.37s ± 3%    ~     (p=0.537 n=40+40)
Fannkuch11-4                3.59s ± 1%     3.53s ± 2%  -1.58%  (p=0.000 n=40+40)
FmtFprintfEmpty-4          46.0ns ± 3%    45.8ns ± 3%    ~     (p=0.249 n=40+40)
FmtFprintfString-4         80.0ns ± 4%    78.8ns ± 3%  -1.49%  (p=0.001 n=40+40)
FmtFprintfInt-4            89.7ns ± 2%    90.3ns ± 2%  +0.74%  (p=0.003 n=40+40)
FmtFprintfIntInt-4          144ns ± 3%     143ns ± 3%  -0.95%  (p=0.003 n=40+40)
FmtFprintfPrefixedInt-4     181ns ± 4%     180ns ± 2%    ~     (p=0.103 n=40+40)
FmtFprintfFloat-4           412ns ± 3%     408ns ± 4%  -0.97%  (p=0.018 n=40+40)
FmtManyArgs-4               607ns ± 4%     605ns ± 4%    ~     (p=0.148 n=40+40)
GobDecode-4                7.19ms ± 4%    7.24ms ± 5%    ~     (p=0.340 n=40+40)
GobEncode-4                7.04ms ± 9%    6.99ms ± 9%    ~     (p=0.289 n=40+40)
Gzip-4                      400ms ± 6%     398ms ± 5%    ~     (p=0.168 n=40+40)
Gunzip-4                   41.2ms ± 3%    41.7ms ± 3%  +1.40%  (p=0.001 n=40+40)
HTTPClientServer-4         62.5µs ± 1%    62.1µs ± 2%  -0.61%  (p=0.000 n=37+37)
JSONEncode-4               20.7ms ± 4%    20.4ms ± 3%  -1.60%  (p=0.000 n=40+40)
JSONDecode-4               69.4ms ± 4%    69.2ms ± 6%    ~     (p=0.177 n=40+40)
Mandelbrot200-4            5.22ms ± 6%    5.21ms ± 3%    ~     (p=0.531 n=40+40)
GoParse-4                  3.29ms ± 3%    3.28ms ± 3%    ~     (p=0.321 n=40+39)
RegexpMatchEasy0_32-4       104ns ± 4%     103ns ± 7%  -0.89%  (p=0.040 n=40+40)
RegexpMatchEasy0_1K-4       852ns ± 3%     853ns ± 2%    ~     (p=0.357 n=40+40)
RegexpMatchEasy1_32-4       113ns ± 8%     113ns ± 3%    ~     (p=0.906 n=40+40)
RegexpMatchEasy1_1K-4      1.03µs ± 4%    1.03µs ± 5%    ~     (p=0.326 n=40+40)
RegexpMatchMedium_32-4      136ns ± 3%     133ns ± 3%  -2.31%  (p=0.000 n=40+40)
RegexpMatchMedium_1K-4     44.0µs ± 3%    43.7µs ± 3%    ~     (p=0.053 n=40+40)
RegexpMatchHard_32-4       2.27µs ± 3%    2.26µs ± 4%    ~     (p=0.391 n=40+40)
RegexpMatchHard_1K-4       68.0µs ± 3%    68.9µs ± 3%  +1.28%  (p=0.000 n=40+40)
Revcomp-4                   1.86s ± 5%     1.86s ± 2%    ~     (p=0.950 n=40+40)
Template-4                 73.4ms ± 4%    69.9ms ± 7%  -4.78%  (p=0.000 n=40+40)
TimeParse-4                 449ns ± 4%     441ns ± 5%  -1.76%  (p=0.000 n=40+40)
TimeFormat-4                416ns ± 3%     417ns ± 4%    ~     (p=0.304 n=40+40)
[Geo mean]                 67.7µs         67.3µs       -0.55%

name                     old speed      new speed      delta
GobDecode-4               107MB/s ± 4%   106MB/s ± 5%    ~     (p=0.336 n=40+40)
GobEncode-4               109MB/s ± 5%   110MB/s ± 9%    ~     (p=0.142 n=38+40)
Gzip-4                   48.5MB/s ± 5%  48.8MB/s ± 5%    ~     (p=0.172 n=40+40)
Gunzip-4                  472MB/s ± 3%   465MB/s ± 3%  -1.39%  (p=0.001 n=40+40)
JSONEncode-4             93.6MB/s ± 4%  95.1MB/s ± 3%  +1.61%  (p=0.000 n=40+40)
JSONDecode-4             28.0MB/s ± 3%  28.1MB/s ± 6%    ~     (p=0.181 n=40+40)
GoParse-4                17.6MB/s ± 3%  17.7MB/s ± 3%    ~     (p=0.350 n=40+39)
RegexpMatchEasy0_32-4     308MB/s ± 4%   311MB/s ± 6%  +0.96%  (p=0.025 n=40+40)
RegexpMatchEasy0_1K-4    1.20GB/s ± 3%  1.20GB/s ± 2%    ~     (p=0.317 n=40+40)
RegexpMatchEasy1_32-4     282MB/s ± 7%   282MB/s ± 3%    ~     (p=0.516 n=40+40)
RegexpMatchEasy1_1K-4     994MB/s ± 4%   991MB/s ± 5%    ~     (p=0.319 n=40+40)
RegexpMatchMedium_32-4   7.31MB/s ± 3%  7.49MB/s ± 3%  +2.46%  (p=0.000 n=40+40)
RegexpMatchMedium_1K-4   23.3MB/s ± 3%  23.4MB/s ± 3%    ~     (p=0.052 n=40+40)
RegexpMatchHard_32-4     14.1MB/s ± 3%  14.1MB/s ± 4%    ~     (p=0.391 n=40+40)
RegexpMatchHard_1K-4     15.1MB/s ± 3%  14.9MB/s ± 3%  -1.27%  (p=0.000 n=40+40)
Revcomp-4                 137MB/s ± 5%   137MB/s ± 2%    ~     (p=0.942 n=40+40)
Template-4               26.5MB/s ± 4%  27.8MB/s ± 7%  +5.03%  (p=0.000 n=40+40)
[Geo mean]               78.6MB/s       79.0MB/s       +0.57%

Change-Id: Idcacc6881ef57cd7dc33aa87b711282842b72a53
Reviewed-on: https://go-review.googlesource.com/126618
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/flagalloc.go
src/cmd/compile/internal/ssa/gen/386.rules
src/cmd/compile/internal/ssa/gen/386Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/x86/ssa.go
test/codegen/comparisons.go

index 050595ff8df31527a7f37b182f37388a56d02b43..56c12e320adfae129bc60f52c444ce28ac034855 100644 (file)
@@ -4,6 +4,30 @@
 
 package ssa
 
+// When breaking up a combined load-compare to separated load and compare operations,
+// opLoad specifies the load operation, and opCmp specifies the compare operation.
+type typeCmdLoadMap struct {
+       opLoad Op
+       opCmp  Op
+}
+
+var opCmpLoadMap = map[Op]typeCmdLoadMap{
+       OpAMD64CMPQload:      {OpAMD64MOVQload, OpAMD64CMPQ},
+       OpAMD64CMPLload:      {OpAMD64MOVLload, OpAMD64CMPL},
+       OpAMD64CMPWload:      {OpAMD64MOVWload, OpAMD64CMPW},
+       OpAMD64CMPBload:      {OpAMD64MOVBload, OpAMD64CMPB},
+       Op386CMPLload:        {Op386MOVLload, Op386CMPL},
+       Op386CMPWload:        {Op386MOVWload, Op386CMPW},
+       Op386CMPBload:        {Op386MOVBload, Op386CMPB},
+       OpAMD64CMPQconstload: {OpAMD64MOVQload, OpAMD64CMPQconst},
+       OpAMD64CMPLconstload: {OpAMD64MOVLload, OpAMD64CMPLconst},
+       OpAMD64CMPWconstload: {OpAMD64MOVWload, OpAMD64CMPWconst},
+       OpAMD64CMPBconstload: {OpAMD64MOVBload, OpAMD64CMPBconst},
+       Op386CMPLconstload:   {Op386MOVLload, Op386CMPLconst},
+       Op386CMPWconstload:   {Op386MOVWload, Op386CMPWconst},
+       Op386CMPBconstload:   {Op386MOVBload, Op386CMPBconst},
+}
+
 // flagalloc allocates the flag register among all the flag-generating
 // instructions. Flag values are recomputed if they need to be
 // spilled/restored.
@@ -122,55 +146,55 @@ func flagalloc(f *Func) {
                        if spill[v.ID] && v.MemoryArg() != nil {
                                switch v.Op {
                                case OpAMD64CMPQload:
-                                       load := b.NewValue2IA(v.Pos, OpAMD64MOVQload, f.Config.Types.UInt64, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
-                                       v.Op = OpAMD64CMPQ
+                                       load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt64, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
+                                       v.Op = opCmpLoadMap[v.Op].opCmp
                                        v.AuxInt = 0
                                        v.Aux = nil
                                        v.SetArgs2(load, v.Args[1])
-                               case OpAMD64CMPLload:
-                                       load := b.NewValue2IA(v.Pos, OpAMD64MOVLload, f.Config.Types.UInt32, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
-                                       v.Op = OpAMD64CMPL
+                               case OpAMD64CMPLload, Op386CMPLload:
+                                       load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt32, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
+                                       v.Op = opCmpLoadMap[v.Op].opCmp
                                        v.AuxInt = 0
                                        v.Aux = nil
                                        v.SetArgs2(load, v.Args[1])
-                               case OpAMD64CMPWload:
-                                       load := b.NewValue2IA(v.Pos, OpAMD64MOVWload, f.Config.Types.UInt16, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
-                                       v.Op = OpAMD64CMPW
+                               case OpAMD64CMPWload, Op386CMPWload:
+                                       load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt16, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
+                                       v.Op = opCmpLoadMap[v.Op].opCmp
                                        v.AuxInt = 0
                                        v.Aux = nil
                                        v.SetArgs2(load, v.Args[1])
-                               case OpAMD64CMPBload:
-                                       load := b.NewValue2IA(v.Pos, OpAMD64MOVBload, f.Config.Types.UInt8, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
-                                       v.Op = OpAMD64CMPB
+                               case OpAMD64CMPBload, Op386CMPBload:
+                                       load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt8, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
+                                       v.Op = opCmpLoadMap[v.Op].opCmp
                                        v.AuxInt = 0
                                        v.Aux = nil
                                        v.SetArgs2(load, v.Args[1])
 
                                case OpAMD64CMPQconstload:
                                        vo := v.AuxValAndOff()
-                                       load := b.NewValue2IA(v.Pos, OpAMD64MOVQload, f.Config.Types.UInt64, vo.Off(), v.Aux, v.Args[0], v.Args[1])
-                                       v.Op = OpAMD64CMPQconst
+                                       load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt64, vo.Off(), v.Aux, v.Args[0], v.Args[1])
+                                       v.Op = opCmpLoadMap[v.Op].opCmp
                                        v.AuxInt = vo.Val()
                                        v.Aux = nil
                                        v.SetArgs1(load)
-                               case OpAMD64CMPLconstload:
+                               case OpAMD64CMPLconstload, Op386CMPLconstload:
                                        vo := v.AuxValAndOff()
-                                       load := b.NewValue2IA(v.Pos, OpAMD64MOVLload, f.Config.Types.UInt32, vo.Off(), v.Aux, v.Args[0], v.Args[1])
-                                       v.Op = OpAMD64CMPLconst
+                                       load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt32, vo.Off(), v.Aux, v.Args[0], v.Args[1])
+                                       v.Op = opCmpLoadMap[v.Op].opCmp
                                        v.AuxInt = vo.Val()
                                        v.Aux = nil
                                        v.SetArgs1(load)
-                               case OpAMD64CMPWconstload:
+                               case OpAMD64CMPWconstload, Op386CMPWconstload:
                                        vo := v.AuxValAndOff()
-                                       load := b.NewValue2IA(v.Pos, OpAMD64MOVWload, f.Config.Types.UInt16, vo.Off(), v.Aux, v.Args[0], v.Args[1])
-                                       v.Op = OpAMD64CMPWconst
+                                       load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt16, vo.Off(), v.Aux, v.Args[0], v.Args[1])
+                                       v.Op = opCmpLoadMap[v.Op].opCmp
                                        v.AuxInt = vo.Val()
                                        v.Aux = nil
                                        v.SetArgs1(load)
-                               case OpAMD64CMPBconstload:
+                               case OpAMD64CMPBconstload, Op386CMPBconstload:
                                        vo := v.AuxValAndOff()
-                                       load := b.NewValue2IA(v.Pos, OpAMD64MOVBload, f.Config.Types.UInt8, vo.Off(), v.Aux, v.Args[0], v.Args[1])
-                                       v.Op = OpAMD64CMPBconst
+                                       load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt8, vo.Off(), v.Aux, v.Args[0], v.Args[1])
+                                       v.Op = opCmpLoadMap[v.Op].opCmp
                                        v.AuxInt = vo.Val()
                                        v.Aux = nil
                                        v.SetArgs1(load)
index 65ac53268976a1d32d84e11eeb7c622431d2eeca..94f24a81ef50785193ef4ea1a6f35706e9b09712 100644 (file)
 // a register to use for holding the address of the constant pool entry.
 (MOVSSconst [c]) && config.ctxt.Flag_shared -> (MOVSSconst2 (MOVSSconst1 [c]))
 (MOVSDconst [c]) && config.ctxt.Flag_shared -> (MOVSDconst2 (MOVSDconst1 [c]))
+
+(CMP(L|W|B) l:(MOV(L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (CMP(L|W|B)load {sym} [off] ptr x mem)
+(CMP(L|W|B) x l:(MOV(L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (InvertFlags (CMP(L|W|B)load {sym} [off] ptr x mem))
+
+(CMP(L|W|B)const l:(MOV(L|W|B)load {sym} [off] ptr mem) [c])
+       && l.Uses == 1
+       && validValAndOff(c, off)
+       && clobber(l) ->
+  @l.Block (CMP(L|W|B)constload {sym} [makeValAndOff(c,off)] ptr mem)
+
+(CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int32(c)),off) -> (CMPLconstload {sym} [makeValAndOff(int64(int32(c)),off)] ptr mem)
+(CMPWload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),off) -> (CMPWconstload {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
+(CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
index 0a77776b52e77795d36f30b3b2f490de0b013f32..7a274269d26aa713d6bf8955d07cfc2ad67e0a1a 100644 (file)
@@ -117,9 +117,11 @@ func init() {
                gp11mod   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax}
                gp21mul   = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}
 
-               gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}}
-               gp1flags = regInfo{inputs: []regMask{gpsp}}
-               flagsgp  = regInfo{inputs: nil, outputs: gponly}
+               gp2flags     = regInfo{inputs: []regMask{gpsp, gpsp}}
+               gp1flags     = regInfo{inputs: []regMask{gpsp}}
+               gp0flagsLoad = regInfo{inputs: []regMask{gpspsb, 0}}
+               gp1flagsLoad = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
+               flagsgp      = regInfo{inputs: nil, outputs: gponly}
 
                readflags = regInfo{inputs: nil, outputs: gponly}
                flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
@@ -235,6 +237,16 @@ func init() {
                {name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int16"}, // arg0 compare to auxint
                {name: "CMPBconst", argLength: 1, reg: gp1flags, asm: "CMPB", typ: "Flags", aux: "Int8"},  // arg0 compare to auxint
 
+               // compare *(arg0+auxint+aux) to arg1 (in that order). arg2=mem.
+               {name: "CMPLload", argLength: 3, reg: gp1flagsLoad, asm: "CMPL", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
+               {name: "CMPWload", argLength: 3, reg: gp1flagsLoad, asm: "CMPW", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
+               {name: "CMPBload", argLength: 3, reg: gp1flagsLoad, asm: "CMPB", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
+
+               // compare *(arg0+ValAndOff(AuxInt).Off()+aux) to ValAndOff(AuxInt).Val() (in that order). arg1=mem.
+               {name: "CMPLconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPL", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
+               {name: "CMPWconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPW", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
+               {name: "CMPBconstload", argLength: 2, reg: gp0flagsLoad, asm: "CMPB", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
+
                {name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags", usesScratch: true}, // arg0 compare to arg1, f32
                {name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags", usesScratch: true}, // arg0 compare to arg1, f64
 
index 6a14ee080178b744f0eba68cdddcce5032952fc7..34b1d8a4bba222bd1a33867c4f543fb5eadfcdee 100644 (file)
@@ -300,6 +300,12 @@ const (
        Op386CMPLconst
        Op386CMPWconst
        Op386CMPBconst
+       Op386CMPLload
+       Op386CMPWload
+       Op386CMPBload
+       Op386CMPLconstload
+       Op386CMPWconstload
+       Op386CMPBconstload
        Op386UCOMISS
        Op386UCOMISD
        Op386TESTL
@@ -3329,6 +3335,87 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "CMPLload",
+               auxType:        auxSymOff,
+               argLen:         3,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ACMPL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 255},   // AX CX DX BX SP BP SI DI
+                               {0, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+               },
+       },
+       {
+               name:           "CMPWload",
+               auxType:        auxSymOff,
+               argLen:         3,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ACMPW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 255},   // AX CX DX BX SP BP SI DI
+                               {0, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+               },
+       },
+       {
+               name:           "CMPBload",
+               auxType:        auxSymOff,
+               argLen:         3,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ACMPB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 255},   // AX CX DX BX SP BP SI DI
+                               {0, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+               },
+       },
+       {
+               name:           "CMPLconstload",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ACMPL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+               },
+       },
+       {
+               name:           "CMPWconstload",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ACMPW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+               },
+       },
+       {
+               name:           "CMPBconstload",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               faultOnNilArg0: true,
+               symEffect:      SymRead,
+               asm:            x86.ACMPB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+               },
+       },
        {
                name:        "UCOMISS",
                argLen:      2,
index a204d48d073bb90fcb5c1ea28e4511ed982e91b4..db2c62089d6de6b1505e71280f1c10c09755306f 100644 (file)
@@ -47,14 +47,20 @@ func rewriteValue386(v *Value) bool {
                return rewriteValue386_Op386CMPB_0(v)
        case Op386CMPBconst:
                return rewriteValue386_Op386CMPBconst_0(v)
+       case Op386CMPBload:
+               return rewriteValue386_Op386CMPBload_0(v)
        case Op386CMPL:
                return rewriteValue386_Op386CMPL_0(v)
        case Op386CMPLconst:
-               return rewriteValue386_Op386CMPLconst_0(v)
+               return rewriteValue386_Op386CMPLconst_0(v) || rewriteValue386_Op386CMPLconst_10(v)
+       case Op386CMPLload:
+               return rewriteValue386_Op386CMPLload_0(v)
        case Op386CMPW:
                return rewriteValue386_Op386CMPW_0(v)
        case Op386CMPWconst:
                return rewriteValue386_Op386CMPWconst_0(v)
+       case Op386CMPWload:
+               return rewriteValue386_Op386CMPWload_0(v)
        case Op386LEAL:
                return rewriteValue386_Op386LEAL_0(v)
        case Op386LEAL1:
@@ -2216,9 +2222,65 @@ func rewriteValue386_Op386CMPB_0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPBload {sym} [off] ptr x mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVBload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386CMPBload)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (CMPB x l:(MOVBload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPBload {sym} [off] ptr x mem))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVBload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386InvertFlags)
+               v0 := b.NewValue0(v.Pos, Op386CMPBload, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386CMPBconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (CMPBconst (MOVLconst [x]) [y])
        // cond: int8(x)==int8(y)
        // result: (FlagEQ)
@@ -2365,6 +2427,60 @@ func rewriteValue386_Op386CMPBconst_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPBconstload {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != Op386MOVBload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+                       break
+               }
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, Op386CMPBconstload, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386CMPBload_0(v *Value) bool {
+       // match: (CMPBload {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int8(c)),off)
+       // result: (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(int64(int8(c)), off)) {
+                       break
+               }
+               v.reset(Op386CMPBconstload)
+               v.AuxInt = makeValAndOff(int64(int8(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386CMPL_0(v *Value) bool {
@@ -2404,6 +2520,60 @@ func rewriteValue386_Op386CMPL_0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPLload {sym} [off] ptr x mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386CMPLload)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (CMPL x l:(MOVLload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPLload {sym} [off] ptr x mem))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386InvertFlags)
+               v0 := b.NewValue0(v.Pos, Op386CMPLload, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386CMPLconst_0(v *Value) bool {
@@ -2571,6 +2741,65 @@ func rewriteValue386_Op386CMPLconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValue386_Op386CMPLconst_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPLconst l:(MOVLload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+                       break
+               }
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, Op386CMPLconstload, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386CMPLload_0(v *Value) bool {
+       // match: (CMPLload {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int32(c)),off)
+       // result: (CMPLconstload {sym} [makeValAndOff(int64(int32(c)),off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(int64(int32(c)), off)) {
+                       break
+               }
+               v.reset(Op386CMPLconstload)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValue386_Op386CMPW_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -2608,9 +2837,65 @@ func rewriteValue386_Op386CMPW_0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPWload {sym} [off] ptr x mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVWload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386CMPWload)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (CMPW x l:(MOVWload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPWload {sym} [off] ptr x mem))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVWload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386InvertFlags)
+               v0 := b.NewValue0(v.Pos, Op386CMPWload, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386CMPWconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (CMPWconst (MOVLconst [x]) [y])
        // cond: int16(x)==int16(y)
        // result: (FlagEQ)
@@ -2757,6 +3042,60 @@ func rewriteValue386_Op386CMPWconst_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPWconstload {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != Op386MOVWload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+                       break
+               }
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, Op386CMPWconstload, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386CMPWload_0(v *Value) bool {
+       // match: (CMPWload {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int16(c)),off)
+       // result: (CMPWconstload {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(int64(int16(c)), off)) {
+                       break
+               }
+               v.reset(Op386CMPWconstload)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValue386_Op386LEAL_0(v *Value) bool {
index b781d957258a7923b3280c3605d33112002cf34e..d75a55c5659a228583a45dc8c33f509b49d5a2ab 100644 (file)
@@ -417,6 +417,21 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Offset = v.AuxInt
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Args[0].Reg()
+       case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = v.Args[0].Reg()
+               gc.AddAux(&p.From, v)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Args[1].Reg()
+       case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload:
+               sc := v.AuxValAndOff()
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = v.Args[0].Reg()
+               gc.AddAux2(&p.From, v, sc.Off())
+               p.To.Type = obj.TYPE_CONST
+               p.To.Offset = sc.Val()
        case ssa.Op386MOVLconst:
                x := v.Reg()
 
index 2f010bcbaefc632fdf0e55bfe4765e5ee81b821c..ebd75d85d9178960d4f364ef8bcb55370d48c101 100644 (file)
@@ -122,6 +122,16 @@ func CmpMem5(p **int) {
        *p = nil
 }
 
+func CmpMem6(a []int) int {
+       // 386:`CMPL\s8\([A-Z]+\),`
+       // amd64:`CMPQ\s16\([A-Z]+\),`
+       if a[1] > a[2] {
+               return 1
+       } else {
+               return 2
+       }
+}
+
 // Check tbz/tbnz are generated when comparing against zero on arm64
 
 func CmpZero1(a int32, ptr *int) {