]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.ssa] cmd/compile: use INC/DEC instead of add when we can
authorIlya Tocar <ilya.tocar@intel.com>
Fri, 5 Feb 2016 16:24:53 +0000 (19:24 +0300)
committerDavid Chase <drchase@google.com>
Mon, 8 Feb 2016 22:02:58 +0000 (22:02 +0000)
INC/DEC produces slightly faster and smaller code.

Change-Id: I329d9bdb01b90041be45e053d9df640818bf0c2d
Reviewed-on: https://go-review.googlesource.com/19238
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/gc/ssa.go

index b7019d68b76860c668c289f6e54db6f8f90d9434..35a492923fa326472e2d8c8093d52165d6ceb7f9 100644 (file)
@@ -3985,12 +3985,47 @@ func (s *genState) genValue(v *ssa.Value) {
                r := regnum(v)
                a := regnum(v.Args[0])
                if r == a {
-                       p := Prog(v.Op.Asm())
-                       p.From.Type = obj.TYPE_CONST
-                       p.From.Offset = v.AuxInt
-                       p.To.Type = obj.TYPE_REG
-                       p.To.Reg = r
-                       return
+                       if v.AuxInt == 1 {
+                               var asm int
+                               switch v.Op {
+                               // Software optimization manual recommends add $1,reg.
+                               // But inc/dec is 1 byte smaller. ICC always uses inc
+                               // Clang/GCC choose depending on flags, but prefer add.
+                               // Experiments show that inc/dec is both a little faster
+                               // and make a binary a little smaller.
+                               case ssa.OpAMD64ADDQconst:
+                                       asm = x86.AINCQ
+                               case ssa.OpAMD64ADDLconst:
+                                       asm = x86.AINCL
+                               case ssa.OpAMD64ADDWconst:
+                                       asm = x86.AINCW
+                               }
+                               p := Prog(asm)
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = r
+                               return
+                       } else if v.AuxInt == -1 {
+                               var asm int
+                               switch v.Op {
+                               case ssa.OpAMD64ADDQconst:
+                                       asm = x86.ADECQ
+                               case ssa.OpAMD64ADDLconst:
+                                       asm = x86.ADECL
+                               case ssa.OpAMD64ADDWconst:
+                                       asm = x86.ADECW
+                               }
+                               p := Prog(asm)
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = r
+                               return
+                       } else {
+                               p := Prog(v.Op.Asm())
+                               p.From.Type = obj.TYPE_CONST
+                               p.From.Offset = v.AuxInt
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = r
+                               return
+                       }
                }
                var asm int
                switch v.Op {
@@ -4027,15 +4062,83 @@ func (s *genState) genValue(v *ssa.Value) {
                //p.From3 = new(obj.Addr)
                //p.From3.Type = obj.TYPE_REG
                //p.From3.Reg = regnum(v.Args[0])
+       case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64SUBWconst:
+               x := regnum(v.Args[0])
+               r := regnum(v)
+               // We have 3-op add (lea), so transforming a = b - const into
+               // a = b + (- const), saves us 1 instruction. We can't fit
+               // - (-1 << 31) into  4 bytes offset in lea.
+               // We handle 2-address just fine below.
+               if v.AuxInt == -1<<31 || x == r {
+                       if x != r {
+                               // This code compensates for the fact that the register allocator
+                               // doesn't understand 2-address instructions yet.  TODO: fix that.
+                               p := Prog(moveByType(v.Type))
+                               p.From.Type = obj.TYPE_REG
+                               p.From.Reg = x
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = r
+                       }
+                       p := Prog(v.Op.Asm())
+                       p.From.Type = obj.TYPE_CONST
+                       p.From.Offset = v.AuxInt
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = r
+               } else if x == r && v.AuxInt == -1 {
+                       var asm int
+                       // x = x - (-1) is the same as x++
+                       // See OpAMD64ADDQconst comments about inc vs add $1,reg
+                       switch v.Op {
+                       case ssa.OpAMD64SUBQconst:
+                               asm = x86.AINCQ
+                       case ssa.OpAMD64SUBLconst:
+                               asm = x86.AINCL
+                       case ssa.OpAMD64SUBWconst:
+                               asm = x86.AINCW
+                       }
+                       p := Prog(asm)
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = r
+               } else if x == r && v.AuxInt == 1 {
+                       var asm int
+                       switch v.Op {
+                       case ssa.OpAMD64SUBQconst:
+                               asm = x86.ADECQ
+                       case ssa.OpAMD64SUBLconst:
+                               asm = x86.ADECL
+                       case ssa.OpAMD64SUBWconst:
+                               asm = x86.ADECW
+                       }
+                       p := Prog(asm)
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = r
+               } else {
+                       var asm int
+                       switch v.Op {
+                       case ssa.OpAMD64SUBQconst:
+                               asm = x86.ALEAQ
+                       case ssa.OpAMD64SUBLconst:
+                               asm = x86.ALEAL
+                       case ssa.OpAMD64SUBWconst:
+                               asm = x86.ALEAW
+                       }
+                       p := Prog(asm)
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Reg = x
+                       p.From.Offset = -v.AuxInt
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = r
+               }
+
        case ssa.OpAMD64ADDBconst,
                ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ANDWconst, ssa.OpAMD64ANDBconst,
                ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64ORWconst, ssa.OpAMD64ORBconst,
                ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64XORWconst, ssa.OpAMD64XORBconst,
-               ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64SUBWconst, ssa.OpAMD64SUBBconst,
-               ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHLWconst, ssa.OpAMD64SHLBconst,
-               ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
-               ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
-               ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
+               ssa.OpAMD64SUBBconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHLWconst,
+               ssa.OpAMD64SHLBconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst,
+               ssa.OpAMD64SHRBconst, ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst,
+               ssa.OpAMD64SARBconst, ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst,
+               ssa.OpAMD64ROLBconst:
                // This code compensates for the fact that the register allocator
                // doesn't understand 2-address instructions yet.  TODO: fix that.
                x := regnum(v.Args[0])