From a0da2d242c0830daf9de469f2db7f1b85523bf05 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 4 Feb 2016 15:08:47 -0800 Subject: [PATCH] [dev.ssa] cmd/compile: Use ADD instead of LEA when we can MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If the output register is one of the input registers, we can use a real add instead of LEA. Change-Id: Ide58f1536afb077c0b939d3a8c7555807fd1c5e3 Reviewed-on: https://go-review.googlesource.com/19234 Reviewed-by: Alexandru Moșoi --- src/cmd/compile/internal/gc/ssa.go | 75 +++++++++++++++++++----------- src/cmd/compile/internal/ssa/TODO | 2 - 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 8ae02bd4ca..7b85b2fc8a 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3689,31 +3689,41 @@ func opregreg(op int, dest, src int16) *obj.Prog { func (s *genState) genValue(v *ssa.Value) { lineno = v.Line switch v.Op { - case ssa.OpAMD64ADDQ: - // TODO: use addq instead of leaq if target is in the right register. - p := Prog(x86.ALEAQ) - p.From.Type = obj.TYPE_MEM - p.From.Reg = regnum(v.Args[0]) - p.From.Scale = 1 - p.From.Index = regnum(v.Args[1]) - p.To.Type = obj.TYPE_REG - p.To.Reg = regnum(v) - case ssa.OpAMD64ADDL: - p := Prog(x86.ALEAL) - p.From.Type = obj.TYPE_MEM - p.From.Reg = regnum(v.Args[0]) - p.From.Scale = 1 - p.From.Index = regnum(v.Args[1]) - p.To.Type = obj.TYPE_REG - p.To.Reg = regnum(v) - case ssa.OpAMD64ADDW: - p := Prog(x86.ALEAW) - p.From.Type = obj.TYPE_MEM - p.From.Reg = regnum(v.Args[0]) - p.From.Scale = 1 - p.From.Index = regnum(v.Args[1]) - p.To.Type = obj.TYPE_REG - p.To.Reg = regnum(v) + case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL, ssa.OpAMD64ADDW: + r := regnum(v) + r1 := regnum(v.Args[0]) + r2 := regnum(v.Args[1]) + switch { + case r == r1: + p := Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r2 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + case r == r2: + p := Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + default: + var asm int + switch v.Op { + case ssa.OpAMD64ADDQ: + asm = x86.ALEAQ + case ssa.OpAMD64ADDL: + asm = x86.ALEAL + case ssa.OpAMD64ADDW: + asm = x86.ALEAW + } + p := Prog(asm) + p.From.Type = obj.TYPE_MEM + p.From.Reg = r1 + p.From.Scale = 1 + p.From.Index = r2 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + } // 2-address opcode arithmetic, symmetric case ssa.OpAMD64ADDB, ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ANDW, ssa.OpAMD64ANDB, @@ -3903,7 +3913,16 @@ func (s *genState) genValue(v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst, ssa.OpAMD64ADDWconst: - // TODO: use addq instead of leaq if target is in the right register. + r := regnum(v) + a := regnum(v.Args[0]) + if r == a { + p := Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.To.Type = obj.TYPE_REG + p.To.Reg = r + return + } var asm int switch v.Op { case ssa.OpAMD64ADDQconst: @@ -3915,10 +3934,10 @@ func (s *genState) genValue(v *ssa.Value) { } p := Prog(asm) p.From.Type = obj.TYPE_MEM - p.From.Reg = regnum(v.Args[0]) + p.From.Reg = a p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG - p.To.Reg = regnum(v) + p.To.Reg = r case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst: r := regnum(v) x := regnum(v.Args[0]) diff --git a/src/cmd/compile/internal/ssa/TODO b/src/cmd/compile/internal/ssa/TODO index 3191670a0e..73396c7637 100644 --- a/src/cmd/compile/internal/ssa/TODO +++ b/src/cmd/compile/internal/ssa/TODO @@ -21,7 +21,6 @@ Optimizations (better compiled code) - Add a value range propagation pass (for bounds elim & bitwidth reduction) - Make dead store pass inter-block - (x86) More combining address arithmetic into loads/stores -- (x86) use ADDQ instead of LEAQ when we can - redundant CMP in sequences like this: SUBQ $8, AX CMP AX, $0 @@ -37,7 +36,6 @@ Optimizations (better compiled code) Same for interfaces? - boolean logic: movb/xorb$1/testb/jeq -> movb/testb/jne - (ADDQconst (SUBQconst x)) and vice-versa -- combine LEAQs - store followed by load to same address - (CMPconst [0] (AND x y)) -> (TEST x y) - more (LOAD (ADDQ )) -> LOADIDX -- 2.48.1