From 9f8f8c27dca1b27e9567df4f3aa4e7d8c31f3ec2 Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Sat, 22 Aug 2015 19:38:12 -0700
Subject: [PATCH] [dev.ssa] cmd/compile: support spilling and loading flags

This CL takes a simple approach to spilling and loading flags.
We never spill. When a load is needed, we recalculate,
loading the arguments as needed.

This is simple and architecture-independent.
It is not very efficient, but as of this CL,
there are fewer than 200 flag spills during make.bash.

This was tested by manually reverting CLs 13813 and 13843,
causing SETcc, MOV, and LEA instructions to clobber flags,
which dramatically increases the number of flags spills.
With that done, all stdlib tests that used to pass
still pass.

For future reference, here are some other, more efficient
amd64-only schemes that we could adapt in the future if needed.

(1) Spill exactly the flags needed.

For example, if we know that the flags will be needed
by a SETcc or Jcc op later, we could use SETcc to
extract just the relevant flag. When needed,
we could use TESTB and change the op to JNE/SETNE.
(Alternatively, we could leave the op unaltered
and prepare an appropriate CMPB instruction
to produce the desired flag.)

However, this requires separate handling for every
instruction that uses the flags register,
including (say) SBBQcarrymask.

We could enable this on an ad hoc basis for common cases
and fall back to recalculation for other cases.

(2) Spill all flags with PUSHF and POPF

This modifies SP, which the runtime won't like.
It also requires coordination with stackalloc to
make sure that we have a stack slot ready for use.

(3) Spill almost all flags with LAHF, SETO, and SAHF

See http://blog.freearrow.com/archives/396
for details. This would handle all the flags we currently
use. However, LAHF and SAHF are not universally available
and it requires arranging for AX to be free.

Change-Id: Ie36600fd8e807ef2bee83e2e2ae3685112a7f276
Reviewed-on: https://go-review.googlesource.com/13844
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/ssa/regalloc.go | 52 ++++++++++++++++++++----
 1 file changed, 44 insertions(+), 8 deletions(-)

diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index d593faf95b..b62f9042b6 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -38,6 +38,12 @@
 // x3 can then be used wherever x is referenced again.
 // If the spill (x2) is never used, it will be removed at the end of regalloc.
 //
+// Flags values are special. Instead of attempting to spill and restore the flags
+// register, we recalculate it if needed.
+// There are more efficient schemes (see the discussion in CL 13844),
+// but flag restoration is empirically rare, and this approach is simple
+// and architecture-independent.
+//
 // Phi values are special, as always.  We define two kinds of phis, those
 // where the merge happens in a register (a "register" phi) and those where
 // the merge happens in a stack location (a "stack" phi).
@@ -391,17 +397,45 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool) *Val
 		}
 		c = s.curBlock.NewValue1(v.Line, OpCopy, v.Type, s.regs[r2].c)
 	} else {
+		switch {
+		// It is difficult to spill and reload flags on many architectures.
+		// Instead, we regenerate the flags register by issuing the same instruction again.
+		// This requires (possibly) spilling and reloading that instruction's args.
+		case v.Type.IsFlags():
+			ns := s.nospill
+			// Place v's arguments in registers, spilling and loading as needed
+			args := make([]*Value, 0, len(v.Args))
+			regspec := opcodeTable[v.Op].reg
+			for _, i := range regspec.inputs {
+				a := v.Args[i.idx]
+				// Extract the original arguments to v
+				for a.Op == OpLoadReg || a.Op == OpStoreReg || a.Op == OpCopy {
+					a = a.Args[0]
+				}
+				if a.Type.IsFlags() {
+					s.f.Fatalf("cannot load flags value with flags arg: %v has unwrapped arg %v", v.LongString(), a.LongString())
+				}
+				cc := s.allocValToReg(a, i.regs, true)
+				args = append(args, cc)
+			}
+			s.nospill = ns
+			// Recalculate v
+			c = s.curBlock.NewValue0(v.Line, v.Op, v.Type)
+			c.Aux = v.Aux
+			c.AuxInt = v.AuxInt
+			c.resetArgs()
+			c.AddArgs(args...)
+
 		// Load v from its spill location.
 		// TODO: rematerialize if we can.
-		if vi.spill2 != nil {
+		case vi.spill2 != nil:
 			c = s.curBlock.NewValue1(v.Line, OpLoadReg, v.Type, vi.spill2)
 			vi.spill2used = true
-		} else {
+		case vi.spill != nil:
 			c = s.curBlock.NewValue1(v.Line, OpLoadReg, v.Type, vi.spill)
 			vi.spillUsed = true
-		}
-		if v.Type.IsFlags() {
-			v.Unimplementedf("spill of flags not implemented yet")
+		default:
+			s.f.Fatalf("attempt to load unspilled value %v", v.LongString())
 		}
 	}
 	s.assignReg(r, v, c)
@@ -716,9 +750,11 @@ func (s *regAllocState) regalloc(f *Func) {
 
 			// Issue a spill for this value.  We issue spills unconditionally,
 			// then at the end of regalloc delete the ones we never use.
-			spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v)
-			s.values[v.ID].spill = spill
-			s.values[v.ID].spillUsed = false
+			if !v.Type.IsFlags() {
+				spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v)
+				s.values[v.ID].spill = spill
+				s.values[v.ID].spillUsed = false
+			}
 
 			// Increment pc for next Value.
 			pc++
-- 
2.51.0