case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
// Flag condition: ZERO && !PARITY
// Generate:
- // MOV SRC,AX
- // CMOV*NE DST,AX
- // CMOV*PC AX,DST
+ // MOV SRC,TMP
+ // CMOV*NE DST,TMP
+ // CMOV*PC TMP,DST
//
// TODO(rasky): we could generate:
// CMOV*NE DST,SRC
// CMOV*PC SRC,DST
// But this requires a way for regalloc to know that SRC might be
// clobbered by this instruction.
- if v.Args[1].Reg() != x86.REG_AX {
- opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
- }
+ t := v.RegTmp()
+ opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
+
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Reg()
p.To.Type = obj.TYPE_REG
- p.To.Reg = x86.REG_AX
+ p.To.Reg = t
var q *obj.Prog
if v.Op == ssa.OpAMD64CMOVQEQF {
q = s.Prog(x86.ACMOVQPC)
q = s.Prog(x86.ACMOVWPC)
}
q.From.Type = obj.TYPE_REG
- q.From.Reg = x86.REG_AX
+ q.From.Reg = t
q.To.Type = obj.TYPE_REG
q.To.Reg = v.Reg()
ssagen.AddAux(&p.To, v)
case ssa.OpAMD64SETNEF:
+ t := v.RegTmp()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
q := s.Prog(x86.ASETPS)
q.To.Type = obj.TYPE_REG
- q.To.Reg = x86.REG_AX
+ q.To.Reg = t
// ORL avoids partial register write and is smaller than ORQ, used by old compiler
- opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
+ opregreg(s, x86.AORL, v.Reg(), t)
case ssa.OpAMD64SETEQF:
+ t := v.RegTmp()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
q := s.Prog(x86.ASETPC)
q.To.Type = obj.TYPE_REG
- q.To.Reg = x86.REG_AX
+ q.To.Reg = t
// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
- opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
+ opregreg(s, x86.AANDL, v.Reg(), t)
case ssa.OpAMD64InvertFlags:
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
gp1flags1flags = regInfo{inputs: []regMask{gp, 0}, outputs: []regMask{gp, 0}}
readflags = regInfo{inputs: nil, outputs: gponly}
- flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
gpload = regInfo{inputs: []regMask{gpspsbg, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}, outputs: gponly}
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsbg, gpsp, 0}, outputs: gponly}
- gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
gp21shxload = regInfo{inputs: []regMask{gpspsbg, gp, 0}, outputs: gponly}
gp21shxloadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, gp, 0}, outputs: gponly}
// InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
// NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
// code generation in amd64/ssa.go.
- {name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
+ {name: "CMOVQEQF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true, needIntTemp: true},
{name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
{name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
- {name: "CMOVLEQF", argLength: 3, reg: gp21pax, asm: "CMOVLNE", resultInArg0: true},
+ {name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true, needIntTemp: true},
{name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
{name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
- {name: "CMOVWEQF", argLength: 3, reg: gp21pax, asm: "CMOVWNE", resultInArg0: true},
+ {name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true, needIntTemp: true},
{name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
{name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
// Need different opcodes for floating point conditions because
// any comparison involving a NaN is always FALSE and thus
// the patterns for inverting conditions cannot be used.
- {name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ", clobberFlags: true}, // extract == condition from arg0
- {name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE", clobberFlags: true}, // extract != condition from arg0
- {name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"}, // extract "ordered" (No Nan present) condition from arg0
- {name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"}, // extract "unordered" (Nan present) condition from arg0
+ {name: "SETEQF", argLength: 1, reg: flagsgp, asm: "SETEQ", clobberFlags: true, needIntTemp: true}, // extract == condition from arg0
+ {name: "SETNEF", argLength: 1, reg: flagsgp, asm: "SETNE", clobberFlags: true, needIntTemp: true}, // extract != condition from arg0
+ {name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"}, // extract "ordered" (No Nan present) condition from arg0
+ {name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"}, // extract "unordered" (Nan present) condition from arg0
{name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"}, // extract floating > condition from arg0
{name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0
resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
resultNotInArgs bool // outputs must not be allocated to the same registers as inputs
clobberFlags bool // this op clobbers flags register
+ needIntTemp bool // need a temporary free integer register
call bool // is a function call
tailCall bool // is a tail call
nilCheck bool // this op is a nil check on arg0
if v.clobberFlags {
fmt.Fprintln(w, "clobberFlags: true,")
}
+ if v.needIntTemp {
+ fmt.Fprintln(w, "needIntTemp: true,")
+ }
if v.call {
fmt.Fprintln(w, "call: true,")
}
// when register allocation is done, maps value ids to locations
RegAlloc []Location
+ // temporary registers allocated to rare instructions
+ tempRegs map[ID]*Register
+
// map from LocalSlot to set of Values that we want to store in that slot.
NamedValues map[LocalSlot][]*Value
// Names is a copy of NamedValues.Keys. We keep a separate list
if int(v.ID) < len(r) && r[v.ID] != nil {
s += " : " + html.EscapeString(r[v.ID].String())
}
+ if reg := v.Block.Func.tempRegs[v.ID]; reg != nil {
+ s += " tmp=" + reg.String()
+ }
var names []string
for name, values := range v.Block.Func.NamedValues {
for _, value := range values {
resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
resultNotInArgs bool // outputs must not be allocated to the same registers as inputs
clobberFlags bool // this op clobbers flags register
+ needIntTemp bool // need a temporary free integer register
call bool // is a function call
tailCall bool // is a tail call
nilCheck bool // this op is a nil check on arg0
name: "CMOVQEQF",
argLen: 3,
resultInArg0: true,
+ needIntTemp: true,
asm: x86.ACMOVQNE,
reg: regInfo{
inputs: []inputInfo{
- {0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
- clobbers: 1, // AX
outputs: []outputInfo{
- {0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
name: "CMOVLEQF",
argLen: 3,
resultInArg0: true,
+ needIntTemp: true,
asm: x86.ACMOVLNE,
reg: regInfo{
inputs: []inputInfo{
- {0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
- clobbers: 1, // AX
outputs: []outputInfo{
- {0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
name: "CMOVWEQF",
argLen: 3,
resultInArg0: true,
+ needIntTemp: true,
asm: x86.ACMOVWNE,
reg: regInfo{
inputs: []inputInfo{
- {0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
- clobbers: 1, // AX
outputs: []outputInfo{
- {0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
name: "SETEQF",
argLen: 1,
clobberFlags: true,
+ needIntTemp: true,
asm: x86.ASETEQ,
reg: regInfo{
- clobbers: 1, // AX
outputs: []outputInfo{
- {0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
name: "SETNEF",
argLen: 1,
clobberFlags: true,
+ needIntTemp: true,
asm: x86.ASETNE,
reg: regInfo{
- clobbers: 1, // AX
outputs: []outputInfo{
- {0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
return s.f.Config.hasGReg && s.GReg == r
}
+// Dummy value used to represent the value being held in a temporary register.
+var tmpVal Value
+
func (s *regAllocState) regalloc(f *Func) {
regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
defer f.retSparseSet(regValLiveSet)
// Process all the non-phi values.
for idx, v := range oldSched {
+ tmpReg := noRegister
if s.f.pass.debug > regDebug {
fmt.Printf(" processing %s\n", v.LongString())
}
}
ok:
+ // Pick a temporary register if needed.
+ // It should be distinct from all the input registers, so we
+ // allocate it after all the input registers, but before
+ // the input registers are freed via advanceUses below.
+ // (Not all instructions need that distinct part, but it is conservative.)
+ if opcodeTable[v.Op].needIntTemp {
+ m := s.allocatable & s.f.Config.gpRegMask
+ if m&^desired.avoid != 0 {
+ m &^= desired.avoid
+ }
+ tmpReg = s.allocReg(m, &tmpVal)
+ s.nospill |= regMask(1) << tmpReg
+ }
+
// Now that all args are in regs, we're ready to issue the value itself.
// Before we pick a register for the output value, allow input registers
// to be deallocated. We do this here so that the output can use the
outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
maxOutIdx := -1
var used regMask
+ if tmpReg != noRegister {
+ // Ensure output registers are distinct from the temporary register.
+ // (Not all instructions need that distinct part, but it is conservative.)
+ used |= regMask(1) << tmpReg
+ }
for _, out := range regspec.outputs {
mask := out.regs & s.allocatable &^ used
if mask == 0 {
s.assignReg(r, v, v)
}
}
+ if tmpReg != noRegister {
+ // Remember the temp register allocation, if any.
+ if s.f.tempRegs == nil {
+ s.f.tempRegs = map[ID]*Register{}
+ }
+ s.f.tempRegs[v.ID] = &s.registers[tmpReg]
+ }
}
// deallocate dead args, if we have not done so
for _, a := range v.Args {
s += fmt.Sprintf(" %v", a)
}
- var r []Location
- if v.Block != nil {
- r = v.Block.Func.RegAlloc
+ if v.Block == nil {
+ return s
}
+ r := v.Block.Func.RegAlloc
if int(v.ID) < len(r) && r[v.ID] != nil {
s += " : " + r[v.ID].String()
}
+ if reg := v.Block.Func.tempRegs[v.ID]; reg != nil {
+ s += " tmp=" + reg.String()
+ }
var names []string
- if v.Block != nil {
- for name, values := range v.Block.Func.NamedValues {
- for _, value := range values {
- if value == v {
- names = append(names, name.String())
- break // drop duplicates.
- }
+ for name, values := range v.Block.Func.NamedValues {
+ for _, value := range values {
+ if value == v {
+ names = append(names, name.String())
+ break // drop duplicates.
}
}
}
return reg.(*Register).objNum
}
+// RegTmp returns the temporary register assigned to v, in cmd/internal/obj/$ARCH numbering.
+func (v *Value) RegTmp() int16 {
+ reg := v.Block.Func.tempRegs[v.ID]
+ if reg == nil {
+ v.Fatalf("nil tmp register for value: %s\n%s\n", v.LongString(), v.Block.Func)
+ }
+ return reg.objNum
+}
+
func (v *Value) RegName() string {
reg := v.Block.Func.RegAlloc[v.ID]
if reg == nil {