ScorePhi = iota // towards top of block
ScoreArg
ScoreNilCheck
+ ScoreCarryChainTail
ScoreReadTuple
ScoreVarDef
ScoreMemory
// this value is already live. This also removes its
// false dependency on the other part of the tuple.
// Also ensures tuple is never spilled.
- score[v.ID] = ScoreReadTuple
+ if (v.Op == OpSelect1 || v.Op == OpSelect0) && v.Args[0].Op.isCarry() {
+ // Score tuple ops of carry ops later to ensure they do not
+ // delay scheduling the tuple-generating op. If such tuple ops
+ // are not placed more readily, unrelated carry clobbering ops
+ // may be placed inbetween two carry-dependent operations.
+ score[v.ID] = ScoreFlags
+ } else {
+ score[v.ID] = ScoreReadTuple
+ }
+ case v.Op.isCarry():
+ if w := v.getCarryProducer(); w != nil {
+ // The producing op is not the final user of the carry bit. Its
+ // current score is one of unscored, Flags, or CarryChainTail.
+ // These occur if the producer has not been scored, another user
+ // of the producers carry flag was scored (there are >1 users of
+ // the carry out flag), or it was visited earlier and already
+ // scored CarryChainTail (and prove w is not a tail).
+ score[w.ID] = ScoreFlags
+ }
+ // Verify v has not been scored. If v has not been visited, v may be the
+ // the final (tail) operation in a carry chain. If v is not, v will be
+ // rescored above when v's carry-using op is scored. When scoring is done,
+ // only tail operations will retain the CarryChainTail score.
+ if score[v.ID] != ScoreFlags {
+ // Score the tail of carry chain operations to a lower (earlier in the
+ // block) priority. This creates a priority inversion which allows only
+ // one chain to be scheduled, if possible.
+ score[v.ID] = ScoreCarryChainTail
+ }
case v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags():
// Schedule flag register generation as late as possible.
// This makes sure that we only have one live flags
v := heap.Pop(priq).(*Value)
+ if f.pass.debug > 1 && score[v.ID] == ScoreCarryChainTail && v.Op.isCarry() {
+ // Add some debugging noise if the chain of carrying ops will not
+ // likely be scheduled without potential carry flag clobbers.
+ if !isCarryChainReady(v, uses) {
+ f.Warnl(v.Pos, "carry chain ending with %v not ready", v)
+ }
+ }
+
// Add it to the schedule.
// Do not emit tuple-reading ops until we're ready to emit the tuple-generating op.
//TODO: maybe remove ReadTuple score above, if it does not help on performance
return order
}
+// Return whether all dependent carry ops can be scheduled after this.
+func isCarryChainReady(v *Value, uses []int32) bool {
+ // A chain can be scheduled in it's entirety if
+ // the use count of each dependent op is 1. If none,
+ // schedule the first.
+ j := 1 // The first op uses[k.ID] == 0. Dependent ops are always >= 1.
+ for k := v; k != nil; k = k.getCarryProducer() {
+ j += int(uses[k.ID]) - 1
+ }
+ return j == 0
+}
+
+// Return whether op is an operation which produces a carry bit value, but does not consume it.
+func (op Op) isCarryCreator() bool {
+ switch op {
+ case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst:
+ return true
+ }
+ return false
+}
+
+// Return whether op consumes or creates a carry a bit value.
+func (op Op) isCarry() bool {
+ switch op {
+ case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero:
+ return true
+ }
+ return op.isCarryCreator()
+}
+
+// Return the producing *Value of the carry bit of this op, or nil if none.
+func (v *Value) getCarryProducer() *Value {
+ if v.Op.isCarry() && !v.Op.isCarryCreator() {
+ // PPC64 carry dependencies are conveyed through their final argument.
+ // Likewise, there is always an OpSelect1 between them.
+ return v.Args[len(v.Args)-1].Args[0]
+ }
+ return nil
+}
+
type bySourcePos []*Value
func (s bySourcePos) Len() int { return len(s) }