]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.ssa] cmd/compile: add FP comparison ops
authorDavid Chase <drchase@google.com>
Tue, 18 Aug 2015 18:39:26 +0000 (14:39 -0400)
committerDavid Chase <drchase@google.com>
Tue, 25 Aug 2015 15:29:22 +0000 (15:29 +0000)
Basic ops, no particular optimization in the pattern
matching yet (e.g. x!=x for Nan detection, x cmp constant,
etc.)

Change-Id: I0043564081d6dc0eede876c4a9eb3c33cbd1521c
Reviewed-on: https://go-review.googlesource.com/13704
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/gc/testdata/fp_ssa.go
src/cmd/compile/internal/ssa/fuse.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/gen/main.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go

index 8e44ede318a28a0ea0037a568370e686e732a07b..676de231150389eb2aca92d3f4f2a153607b26f5 100644 (file)
@@ -848,6 +848,8 @@ var opToSSA = map[opAndType]ssa.Op{
        opAndType{OEQ, TCHAN}:      ssa.OpEqPtr,
        opAndType{OEQ, TUINTPTR}:   ssa.OpEqPtr,
        opAndType{OEQ, TUNSAFEPTR}: ssa.OpEqPtr,
+       opAndType{OEQ, TFLOAT64}:   ssa.OpEq64F,
+       opAndType{OEQ, TFLOAT32}:   ssa.OpEq32F,
 
        opAndType{ONE, TBOOL}:      ssa.OpNeq8,
        opAndType{ONE, TINT8}:      ssa.OpNeq8,
@@ -866,42 +868,52 @@ var opToSSA = map[opAndType]ssa.Op{
        opAndType{ONE, TCHAN}:      ssa.OpNeqPtr,
        opAndType{ONE, TUINTPTR}:   ssa.OpNeqPtr,
        opAndType{ONE, TUNSAFEPTR}: ssa.OpNeqPtr,
-
-       opAndType{OLT, TINT8}:   ssa.OpLess8,
-       opAndType{OLT, TUINT8}:  ssa.OpLess8U,
-       opAndType{OLT, TINT16}:  ssa.OpLess16,
-       opAndType{OLT, TUINT16}: ssa.OpLess16U,
-       opAndType{OLT, TINT32}:  ssa.OpLess32,
-       opAndType{OLT, TUINT32}: ssa.OpLess32U,
-       opAndType{OLT, TINT64}:  ssa.OpLess64,
-       opAndType{OLT, TUINT64}: ssa.OpLess64U,
-
-       opAndType{OGT, TINT8}:   ssa.OpGreater8,
-       opAndType{OGT, TUINT8}:  ssa.OpGreater8U,
-       opAndType{OGT, TINT16}:  ssa.OpGreater16,
-       opAndType{OGT, TUINT16}: ssa.OpGreater16U,
-       opAndType{OGT, TINT32}:  ssa.OpGreater32,
-       opAndType{OGT, TUINT32}: ssa.OpGreater32U,
-       opAndType{OGT, TINT64}:  ssa.OpGreater64,
-       opAndType{OGT, TUINT64}: ssa.OpGreater64U,
-
-       opAndType{OLE, TINT8}:   ssa.OpLeq8,
-       opAndType{OLE, TUINT8}:  ssa.OpLeq8U,
-       opAndType{OLE, TINT16}:  ssa.OpLeq16,
-       opAndType{OLE, TUINT16}: ssa.OpLeq16U,
-       opAndType{OLE, TINT32}:  ssa.OpLeq32,
-       opAndType{OLE, TUINT32}: ssa.OpLeq32U,
-       opAndType{OLE, TINT64}:  ssa.OpLeq64,
-       opAndType{OLE, TUINT64}: ssa.OpLeq64U,
-
-       opAndType{OGE, TINT8}:   ssa.OpGeq8,
-       opAndType{OGE, TUINT8}:  ssa.OpGeq8U,
-       opAndType{OGE, TINT16}:  ssa.OpGeq16,
-       opAndType{OGE, TUINT16}: ssa.OpGeq16U,
-       opAndType{OGE, TINT32}:  ssa.OpGeq32,
-       opAndType{OGE, TUINT32}: ssa.OpGeq32U,
-       opAndType{OGE, TINT64}:  ssa.OpGeq64,
-       opAndType{OGE, TUINT64}: ssa.OpGeq64U,
+       opAndType{ONE, TFLOAT64}:   ssa.OpNeq64F,
+       opAndType{ONE, TFLOAT32}:   ssa.OpNeq32F,
+
+       opAndType{OLT, TINT8}:    ssa.OpLess8,
+       opAndType{OLT, TUINT8}:   ssa.OpLess8U,
+       opAndType{OLT, TINT16}:   ssa.OpLess16,
+       opAndType{OLT, TUINT16}:  ssa.OpLess16U,
+       opAndType{OLT, TINT32}:   ssa.OpLess32,
+       opAndType{OLT, TUINT32}:  ssa.OpLess32U,
+       opAndType{OLT, TINT64}:   ssa.OpLess64,
+       opAndType{OLT, TUINT64}:  ssa.OpLess64U,
+       opAndType{OLT, TFLOAT64}: ssa.OpLess64F,
+       opAndType{OLT, TFLOAT32}: ssa.OpLess32F,
+
+       opAndType{OGT, TINT8}:    ssa.OpGreater8,
+       opAndType{OGT, TUINT8}:   ssa.OpGreater8U,
+       opAndType{OGT, TINT16}:   ssa.OpGreater16,
+       opAndType{OGT, TUINT16}:  ssa.OpGreater16U,
+       opAndType{OGT, TINT32}:   ssa.OpGreater32,
+       opAndType{OGT, TUINT32}:  ssa.OpGreater32U,
+       opAndType{OGT, TINT64}:   ssa.OpGreater64,
+       opAndType{OGT, TUINT64}:  ssa.OpGreater64U,
+       opAndType{OGT, TFLOAT64}: ssa.OpGreater64F,
+       opAndType{OGT, TFLOAT32}: ssa.OpGreater32F,
+
+       opAndType{OLE, TINT8}:    ssa.OpLeq8,
+       opAndType{OLE, TUINT8}:   ssa.OpLeq8U,
+       opAndType{OLE, TINT16}:   ssa.OpLeq16,
+       opAndType{OLE, TUINT16}:  ssa.OpLeq16U,
+       opAndType{OLE, TINT32}:   ssa.OpLeq32,
+       opAndType{OLE, TUINT32}:  ssa.OpLeq32U,
+       opAndType{OLE, TINT64}:   ssa.OpLeq64,
+       opAndType{OLE, TUINT64}:  ssa.OpLeq64U,
+       opAndType{OLE, TFLOAT64}: ssa.OpLeq64F,
+       opAndType{OLE, TFLOAT32}: ssa.OpLeq32F,
+
+       opAndType{OGE, TINT8}:    ssa.OpGeq8,
+       opAndType{OGE, TUINT8}:   ssa.OpGeq8U,
+       opAndType{OGE, TINT16}:   ssa.OpGeq16,
+       opAndType{OGE, TUINT16}:  ssa.OpGeq16U,
+       opAndType{OGE, TINT32}:   ssa.OpGeq32,
+       opAndType{OGE, TUINT32}:  ssa.OpGeq32U,
+       opAndType{OGE, TINT64}:   ssa.OpGeq64,
+       opAndType{OGE, TUINT64}:  ssa.OpGeq64U,
+       opAndType{OGE, TFLOAT64}: ssa.OpGeq64F,
+       opAndType{OGE, TFLOAT32}: ssa.OpGeq32F,
 
        opAndType{OLROT, TUINT8}:  ssa.OpLrot8,
        opAndType{OLROT, TUINT16}: ssa.OpLrot16,
@@ -2198,7 +2210,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
 }
 
 // opregreg emits instructions for
-//     dest := dest op src
+//     dest := dest(To) op src(From)
 // and also returns the created obj.Prog so it
 // may be further adjusted (offset, scale, etc).
 func opregreg(op int, dest, src int16) *obj.Prog {
@@ -2522,11 +2534,11 @@ func genValue(v *ssa.Value) {
                p.To.Reg = regnum(v)
        case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
                ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB:
-               p := Prog(v.Op.Asm())
-               p.From.Type = obj.TYPE_REG
-               p.From.Reg = regnum(v.Args[0])
-               p.To.Type = obj.TYPE_REG
-               p.To.Reg = regnum(v.Args[1])
+               opregreg(v.Op.Asm(), regnum(v.Args[1]), regnum(v.Args[0]))
+       case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
+               // Go assembler has swapped operands for UCOMISx relative to CMP,
+               // must account for that right here.
+               opregreg(v.Op.Asm(), regnum(v.Args[0]), regnum(v.Args[1]))
        case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst,
                ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
                p := Prog(v.Op.Asm())
@@ -2763,11 +2775,34 @@ func genValue(v *ssa.Value) {
        case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
                ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
                ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
+               ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
                ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
+               ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
                ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
                p := Prog(v.Op.Asm())
                p.To.Type = obj.TYPE_REG
                p.To.Reg = regnum(v)
+
+       case ssa.OpAMD64SETNEF:
+               p := Prog(v.Op.Asm())
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = regnum(v)
+               q := Prog(x86.ASETPS)
+               q.To.Type = obj.TYPE_REG
+               q.To.Reg = x86.REG_AX
+               // TODO AORQ copied from old code generator, why not AORB?
+               opregreg(x86.AORQ, regnum(v), x86.REG_AX)
+
+       case ssa.OpAMD64SETEQF:
+               p := Prog(v.Op.Asm())
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = regnum(v)
+               q := Prog(x86.ASETPC)
+               q.To.Type = obj.TYPE_REG
+               q.To.Reg = x86.REG_AX
+               // TODO AANDQ copied from old code generator, why not AANDB?
+               opregreg(x86.AANDQ, regnum(v), x86.REG_AX)
+
        case ssa.OpAMD64InvertFlags:
                v.Fatalf("InvertFlags should never make it to codegen %v", v)
        case ssa.OpAMD64REPSTOSQ:
@@ -2808,7 +2843,9 @@ func movZero(as int, width int64, nbytes int64, offset int64, regnum int16) (nle
        return nleft, offset
 }
 
-var blockJump = [...]struct{ asm, invasm int }{
+var blockJump = [...]struct {
+       asm, invasm int
+}{
        ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
        ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
        ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
@@ -2819,6 +2856,63 @@ var blockJump = [...]struct{ asm, invasm int }{
        ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
        ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
        ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
+       ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
+       ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
+}
+
+type floatingEQNEJump struct {
+       jump, index int
+}
+
+var eqfJumps = [2][2]floatingEQNEJump{
+       {{x86.AJNE, 1}, {x86.AJPS, 1}}, // next == b.Succs[0]
+       {{x86.AJNE, 1}, {x86.AJPC, 0}}, // next == b.Succs[1]
+}
+var nefJumps = [2][2]floatingEQNEJump{
+       {{x86.AJNE, 0}, {x86.AJPC, 1}}, // next == b.Succs[0]
+       {{x86.AJNE, 0}, {x86.AJPS, 0}}, // next == b.Succs[1]
+}
+
+func oneFPJump(b *ssa.Block, jumps *floatingEQNEJump, likely ssa.BranchPrediction, branches []branch) []branch {
+       p := Prog(jumps.jump)
+       p.To.Type = obj.TYPE_BRANCH
+       to := jumps.index
+       branches = append(branches, branch{p, b.Succs[to]})
+       if to == 1 {
+               likely = -likely
+       }
+       // liblink reorders the instruction stream as it sees fit.
+       // Pass along what we know so liblink can make use of it.
+       // TODO: Once we've fully switched to SSA,
+       // make liblink leave our output alone.
+       switch likely {
+       case ssa.BranchUnlikely:
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = 0
+       case ssa.BranchLikely:
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = 1
+       }
+       return branches
+}
+
+func genFPJump(b, next *ssa.Block, jumps *[2][2]floatingEQNEJump, branches []branch) []branch {
+       likely := b.Likely
+       switch next {
+       case b.Succs[0]:
+               branches = oneFPJump(b, &jumps[0][0], likely, branches)
+               branches = oneFPJump(b, &jumps[0][1], likely, branches)
+       case b.Succs[1]:
+               branches = oneFPJump(b, &jumps[1][0], likely, branches)
+               branches = oneFPJump(b, &jumps[1][1], likely, branches)
+       default:
+               branches = oneFPJump(b, &jumps[1][0], likely, branches)
+               branches = oneFPJump(b, &jumps[1][1], likely, branches)
+               q := Prog(obj.AJMP)
+               q.To.Type = obj.TYPE_BRANCH
+               branches = append(branches, branch{q, b.Succs[1]})
+       }
+       return branches
 }
 
 func genBlock(b, next *ssa.Block, branches []branch) []branch {
@@ -2849,12 +2943,18 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch {
                        p.To.Type = obj.TYPE_BRANCH
                        branches = append(branches, branch{p, b.Succs[0]})
                }
+
+       case ssa.BlockAMD64EQF:
+               branches = genFPJump(b, next, &eqfJumps, branches)
+
+       case ssa.BlockAMD64NEF:
+               branches = genFPJump(b, next, &nefJumps, branches)
+
        case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
                ssa.BlockAMD64LT, ssa.BlockAMD64GE,
                ssa.BlockAMD64LE, ssa.BlockAMD64GT,
                ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
                ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
-
                jmp := blockJump[b.Kind]
                likely := b.Likely
                var p *obj.Prog
index 1a52100d6b99ad3dffcd2f03f7432c449d49d205..95e3cf9196ad9e3eeec5fd43ce823cd66125b8c2 100644 (file)
@@ -10,64 +10,6 @@ package main
 
 import "fmt"
 
-func fail64(s string, f func(a, b float64) float64, a, b, e float64) int {
-       d := f(a, b)
-       if d != e {
-               fmt.Printf("For (float64) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
-               return 1
-       }
-       return 0
-}
-
-func fail32(s string, f func(a, b float32) float32, a, b, e float32) int {
-       d := f(a, b)
-       if d != e {
-               fmt.Printf("For (float32) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
-               return 1
-       }
-       return 0
-}
-
-func expect64(s string, x, expected float64) int {
-       if x != expected {
-               println("Expected", expected, "for", s, ", got", x)
-               return 1
-       }
-       return 0
-}
-
-func expect32(s string, x, expected float32) int {
-       if x != expected {
-               println("Expected", expected, "for", s, ", got", x)
-               return 1
-       }
-       return 0
-}
-
-func expectAll64(s string, expected, a, b, c, d, e, f, g, h, i float64) int {
-       fails := 0
-       fails += expect64(s+":a", a, expected)
-       fails += expect64(s+":b", b, expected)
-       fails += expect64(s+":c", c, expected)
-       fails += expect64(s+":d", d, expected)
-       fails += expect64(s+":e", e, expected)
-       fails += expect64(s+":f", f, expected)
-       fails += expect64(s+":g", g, expected)
-       return fails
-}
-
-func expectAll32(s string, expected, a, b, c, d, e, f, g, h, i float32) int {
-       fails := 0
-       fails += expect32(s+":a", a, expected)
-       fails += expect32(s+":b", b, expected)
-       fails += expect32(s+":c", c, expected)
-       fails += expect32(s+":d", d, expected)
-       fails += expect32(s+":e", e, expected)
-       fails += expect32(s+":f", f, expected)
-       fails += expect32(s+":g", g, expected)
-       return fails
-}
-
 // manysub_ssa is designed to tickle bugs that depend on register
 // pressure or unfriendly operand ordering in registers (and at
 // least once it succeeded in this).
@@ -245,6 +187,952 @@ func integer2floatConversions() int {
        return fails
 }
 
+const (
+       aa = 0x1000000000000000
+       ab = 0x100000000000000
+       ac = 0x10000000000000
+       ad = 0x1000000000000
+       ba = 0x100000000000
+       bb = 0x10000000000
+       bc = 0x1000000000
+       bd = 0x100000000
+       ca = 0x10000000
+       cb = 0x1000000
+       cc = 0x100000
+       cd = 0x10000
+       da = 0x1000
+       db = 0x100
+       dc = 0x10
+       dd = 0x1
+)
+
+func compares64_ssa(a, b, c, d float64) (lt, le, eq, ne, ge, gt uint64) {
+
+       switch {
+       }
+
+       if a < a {
+               lt += aa
+       }
+       if a < b {
+               lt += ab
+       }
+       if a < c {
+               lt += ac
+       }
+       if a < d {
+               lt += ad
+       }
+
+       if b < a {
+               lt += ba
+       }
+       if b < b {
+               lt += bb
+       }
+       if b < c {
+               lt += bc
+       }
+       if b < d {
+               lt += bd
+       }
+
+       if c < a {
+               lt += ca
+       }
+       if c < b {
+               lt += cb
+       }
+       if c < c {
+               lt += cc
+       }
+       if c < d {
+               lt += cd
+       }
+
+       if d < a {
+               lt += da
+       }
+       if d < b {
+               lt += db
+       }
+       if d < c {
+               lt += dc
+       }
+       if d < d {
+               lt += dd
+       }
+
+       if a <= a {
+               le += aa
+       }
+       if a <= b {
+               le += ab
+       }
+       if a <= c {
+               le += ac
+       }
+       if a <= d {
+               le += ad
+       }
+
+       if b <= a {
+               le += ba
+       }
+       if b <= b {
+               le += bb
+       }
+       if b <= c {
+               le += bc
+       }
+       if b <= d {
+               le += bd
+       }
+
+       if c <= a {
+               le += ca
+       }
+       if c <= b {
+               le += cb
+       }
+       if c <= c {
+               le += cc
+       }
+       if c <= d {
+               le += cd
+       }
+
+       if d <= a {
+               le += da
+       }
+       if d <= b {
+               le += db
+       }
+       if d <= c {
+               le += dc
+       }
+       if d <= d {
+               le += dd
+       }
+
+       if a == a {
+               eq += aa
+       }
+       if a == b {
+               eq += ab
+       }
+       if a == c {
+               eq += ac
+       }
+       if a == d {
+               eq += ad
+       }
+
+       if b == a {
+               eq += ba
+       }
+       if b == b {
+               eq += bb
+       }
+       if b == c {
+               eq += bc
+       }
+       if b == d {
+               eq += bd
+       }
+
+       if c == a {
+               eq += ca
+       }
+       if c == b {
+               eq += cb
+       }
+       if c == c {
+               eq += cc
+       }
+       if c == d {
+               eq += cd
+       }
+
+       if d == a {
+               eq += da
+       }
+       if d == b {
+               eq += db
+       }
+       if d == c {
+               eq += dc
+       }
+       if d == d {
+               eq += dd
+       }
+
+       if a != a {
+               ne += aa
+       }
+       if a != b {
+               ne += ab
+       }
+       if a != c {
+               ne += ac
+       }
+       if a != d {
+               ne += ad
+       }
+
+       if b != a {
+               ne += ba
+       }
+       if b != b {
+               ne += bb
+       }
+       if b != c {
+               ne += bc
+       }
+       if b != d {
+               ne += bd
+       }
+
+       if c != a {
+               ne += ca
+       }
+       if c != b {
+               ne += cb
+       }
+       if c != c {
+               ne += cc
+       }
+       if c != d {
+               ne += cd
+       }
+
+       if d != a {
+               ne += da
+       }
+       if d != b {
+               ne += db
+       }
+       if d != c {
+               ne += dc
+       }
+       if d != d {
+               ne += dd
+       }
+
+       if a >= a {
+               ge += aa
+       }
+       if a >= b {
+               ge += ab
+       }
+       if a >= c {
+               ge += ac
+       }
+       if a >= d {
+               ge += ad
+       }
+
+       if b >= a {
+               ge += ba
+       }
+       if b >= b {
+               ge += bb
+       }
+       if b >= c {
+               ge += bc
+       }
+       if b >= d {
+               ge += bd
+       }
+
+       if c >= a {
+               ge += ca
+       }
+       if c >= b {
+               ge += cb
+       }
+       if c >= c {
+               ge += cc
+       }
+       if c >= d {
+               ge += cd
+       }
+
+       if d >= a {
+               ge += da
+       }
+       if d >= b {
+               ge += db
+       }
+       if d >= c {
+               ge += dc
+       }
+       if d >= d {
+               ge += dd
+       }
+
+       if a > a {
+               gt += aa
+       }
+       if a > b {
+               gt += ab
+       }
+       if a > c {
+               gt += ac
+       }
+       if a > d {
+               gt += ad
+       }
+
+       if b > a {
+               gt += ba
+       }
+       if b > b {
+               gt += bb
+       }
+       if b > c {
+               gt += bc
+       }
+       if b > d {
+               gt += bd
+       }
+
+       if c > a {
+               gt += ca
+       }
+       if c > b {
+               gt += cb
+       }
+       if c > c {
+               gt += cc
+       }
+       if c > d {
+               gt += cd
+       }
+
+       if d > a {
+               gt += da
+       }
+       if d > b {
+               gt += db
+       }
+       if d > c {
+               gt += dc
+       }
+       if d > d {
+               gt += dd
+       }
+
+       return
+}
+
+func compares32_ssa(a, b, c, d float32) (lt, le, eq, ne, ge, gt uint64) {
+
+       switch {
+       }
+
+       if a < a {
+               lt += aa
+       }
+       if a < b {
+               lt += ab
+       }
+       if a < c {
+               lt += ac
+       }
+       if a < d {
+               lt += ad
+       }
+
+       if b < a {
+               lt += ba
+       }
+       if b < b {
+               lt += bb
+       }
+       if b < c {
+               lt += bc
+       }
+       if b < d {
+               lt += bd
+       }
+
+       if c < a {
+               lt += ca
+       }
+       if c < b {
+               lt += cb
+       }
+       if c < c {
+               lt += cc
+       }
+       if c < d {
+               lt += cd
+       }
+
+       if d < a {
+               lt += da
+       }
+       if d < b {
+               lt += db
+       }
+       if d < c {
+               lt += dc
+       }
+       if d < d {
+               lt += dd
+       }
+
+       if a <= a {
+               le += aa
+       }
+       if a <= b {
+               le += ab
+       }
+       if a <= c {
+               le += ac
+       }
+       if a <= d {
+               le += ad
+       }
+
+       if b <= a {
+               le += ba
+       }
+       if b <= b {
+               le += bb
+       }
+       if b <= c {
+               le += bc
+       }
+       if b <= d {
+               le += bd
+       }
+
+       if c <= a {
+               le += ca
+       }
+       if c <= b {
+               le += cb
+       }
+       if c <= c {
+               le += cc
+       }
+       if c <= d {
+               le += cd
+       }
+
+       if d <= a {
+               le += da
+       }
+       if d <= b {
+               le += db
+       }
+       if d <= c {
+               le += dc
+       }
+       if d <= d {
+               le += dd
+       }
+
+       if a == a {
+               eq += aa
+       }
+       if a == b {
+               eq += ab
+       }
+       if a == c {
+               eq += ac
+       }
+       if a == d {
+               eq += ad
+       }
+
+       if b == a {
+               eq += ba
+       }
+       if b == b {
+               eq += bb
+       }
+       if b == c {
+               eq += bc
+       }
+       if b == d {
+               eq += bd
+       }
+
+       if c == a {
+               eq += ca
+       }
+       if c == b {
+               eq += cb
+       }
+       if c == c {
+               eq += cc
+       }
+       if c == d {
+               eq += cd
+       }
+
+       if d == a {
+               eq += da
+       }
+       if d == b {
+               eq += db
+       }
+       if d == c {
+               eq += dc
+       }
+       if d == d {
+               eq += dd
+       }
+
+       if a != a {
+               ne += aa
+       }
+       if a != b {
+               ne += ab
+       }
+       if a != c {
+               ne += ac
+       }
+       if a != d {
+               ne += ad
+       }
+
+       if b != a {
+               ne += ba
+       }
+       if b != b {
+               ne += bb
+       }
+       if b != c {
+               ne += bc
+       }
+       if b != d {
+               ne += bd
+       }
+
+       if c != a {
+               ne += ca
+       }
+       if c != b {
+               ne += cb
+       }
+       if c != c {
+               ne += cc
+       }
+       if c != d {
+               ne += cd
+       }
+
+       if d != a {
+               ne += da
+       }
+       if d != b {
+               ne += db
+       }
+       if d != c {
+               ne += dc
+       }
+       if d != d {
+               ne += dd
+       }
+
+       if a >= a {
+               ge += aa
+       }
+       if a >= b {
+               ge += ab
+       }
+       if a >= c {
+               ge += ac
+       }
+       if a >= d {
+               ge += ad
+       }
+
+       if b >= a {
+               ge += ba
+       }
+       if b >= b {
+               ge += bb
+       }
+       if b >= c {
+               ge += bc
+       }
+       if b >= d {
+               ge += bd
+       }
+
+       if c >= a {
+               ge += ca
+       }
+       if c >= b {
+               ge += cb
+       }
+       if c >= c {
+               ge += cc
+       }
+       if c >= d {
+               ge += cd
+       }
+
+       if d >= a {
+               ge += da
+       }
+       if d >= b {
+               ge += db
+       }
+       if d >= c {
+               ge += dc
+       }
+       if d >= d {
+               ge += dd
+       }
+
+       if a > a {
+               gt += aa
+       }
+       if a > b {
+               gt += ab
+       }
+       if a > c {
+               gt += ac
+       }
+       if a > d {
+               gt += ad
+       }
+
+       if b > a {
+               gt += ba
+       }
+       if b > b {
+               gt += bb
+       }
+       if b > c {
+               gt += bc
+       }
+       if b > d {
+               gt += bd
+       }
+
+       if c > a {
+               gt += ca
+       }
+       if c > b {
+               gt += cb
+       }
+       if c > c {
+               gt += cc
+       }
+       if c > d {
+               gt += cd
+       }
+
+       if d > a {
+               gt += da
+       }
+       if d > b {
+               gt += db
+       }
+       if d > c {
+               gt += dc
+       }
+       if d > d {
+               gt += dd
+       }
+
+       return
+}
+
+func le64_ssa(x, y float64) bool {
+       switch {
+       }
+       return x <= y
+}
+func ge64_ssa(x, y float64) bool {
+       switch {
+       }
+       return x >= y
+}
+func lt64_ssa(x, y float64) bool {
+       switch {
+       }
+       return x < y
+}
+func gt64_ssa(x, y float64) bool {
+       switch {
+       }
+       return x > y
+}
+func eq64_ssa(x, y float64) bool {
+       switch {
+       }
+       return x == y
+}
+func ne64_ssa(x, y float64) bool {
+       switch {
+       }
+       return x != y
+}
+
+func eqbr64_ssa(x, y float64) float64 {
+       switch {
+       }
+       if x == y {
+               return 17
+       }
+       return 42
+}
+func nebr64_ssa(x, y float64) float64 {
+       switch {
+       }
+       if x != y {
+               return 17
+       }
+       return 42
+}
+func gebr64_ssa(x, y float64) float64 {
+       switch {
+       }
+       if x >= y {
+               return 17
+       }
+       return 42
+}
+func lebr64_ssa(x, y float64) float64 {
+       switch {
+       }
+       if x <= y {
+               return 17
+       }
+       return 42
+}
+func ltbr64_ssa(x, y float64) float64 {
+       switch {
+       }
+       if x < y {
+               return 17
+       }
+       return 42
+}
+func gtbr64_ssa(x, y float64) float64 {
+       switch {
+       }
+       if x > y {
+               return 17
+       }
+       return 42
+}
+
+func le32_ssa(x, y float32) bool {
+       switch {
+       }
+       return x <= y
+}
+func ge32_ssa(x, y float32) bool {
+       switch {
+       }
+       return x >= y
+}
+func lt32_ssa(x, y float32) bool {
+       switch {
+       }
+       return x < y
+}
+func gt32_ssa(x, y float32) bool {
+       switch {
+       }
+       return x > y
+}
+func eq32_ssa(x, y float32) bool {
+       switch {
+       }
+       return x == y
+}
+func ne32_ssa(x, y float32) bool {
+       switch {
+       }
+       return x != y
+}
+
+func eqbr32_ssa(x, y float32) float32 {
+       switch {
+       }
+       if x == y {
+               return 17
+       }
+       return 42
+}
+func nebr32_ssa(x, y float32) float32 {
+       switch {
+       }
+       if x != y {
+               return 17
+       }
+       return 42
+}
+func gebr32_ssa(x, y float32) float32 {
+       switch {
+       }
+       if x >= y {
+               return 17
+       }
+       return 42
+}
+func lebr32_ssa(x, y float32) float32 {
+       switch {
+       }
+       if x <= y {
+               return 17
+       }
+       return 42
+}
+func ltbr32_ssa(x, y float32) float32 {
+       switch {
+       }
+       if x < y {
+               return 17
+       }
+       return 42
+}
+func gtbr32_ssa(x, y float32) float32 {
+       switch {
+       }
+       if x > y {
+               return 17
+       }
+       return 42
+}
+
+func fail64(s string, f func(a, b float64) float64, a, b, e float64) int {
+       d := f(a, b)
+       if d != e {
+               fmt.Printf("For (float64) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
+               return 1
+       }
+       return 0
+}
+
+func fail64bool(s string, f func(a, b float64) bool, a, b float64, e bool) int {
+       d := f(a, b)
+       if d != e {
+               fmt.Printf("For (float64) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
+               return 1
+       }
+       return 0
+}
+
+func fail32(s string, f func(a, b float32) float32, a, b, e float32) int {
+       d := f(a, b)
+       if d != e {
+               fmt.Printf("For (float32) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
+               return 1
+       }
+       return 0
+}
+
+func fail32bool(s string, f func(a, b float32) bool, a, b float32, e bool) int {
+       d := f(a, b)
+       if d != e {
+               fmt.Printf("For (float32) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
+               return 1
+       }
+       return 0
+}
+
+func expect64(s string, x, expected float64) int {
+       if x != expected {
+               println("Expected", expected, "for", s, ", got", x)
+               return 1
+       }
+       return 0
+}
+
+func expect32(s string, x, expected float32) int {
+       if x != expected {
+               println("Expected", expected, "for", s, ", got", x)
+               return 1
+       }
+       return 0
+}
+
+func expectUint64(s string, x, expected uint64) int {
+       if x != expected {
+               fmt.Printf("Expected 0x%016x for %s, got 0x%016x\n", expected, s, x)
+               return 1
+       }
+       return 0
+}
+
+func expectAll64(s string, expected, a, b, c, d, e, f, g, h, i float64) int {
+       fails := 0
+       fails += expect64(s+":a", a, expected)
+       fails += expect64(s+":b", b, expected)
+       fails += expect64(s+":c", c, expected)
+       fails += expect64(s+":d", d, expected)
+       fails += expect64(s+":e", e, expected)
+       fails += expect64(s+":f", f, expected)
+       fails += expect64(s+":g", g, expected)
+       return fails
+}
+
+func expectAll32(s string, expected, a, b, c, d, e, f, g, h, i float32) int {
+       fails := 0
+       fails += expect32(s+":a", a, expected)
+       fails += expect32(s+":b", b, expected)
+       fails += expect32(s+":c", c, expected)
+       fails += expect32(s+":d", d, expected)
+       fails += expect32(s+":e", e, expected)
+       fails += expect32(s+":f", f, expected)
+       fails += expect32(s+":g", g, expected)
+       return fails
+}
+
+var ev64 [2]float64 = [2]float64{42.0, 17.0}
+var ev32 [2]float32 = [2]float32{42.0, 17.0}
+
+func cmpOpTest(s string,
+       f func(a, b float64) bool,
+       g func(a, b float64) float64,
+       ff func(a, b float32) bool,
+       gg func(a, b float32) float32,
+       zero, one, inf, nan float64, result uint) int {
+       fails := 0
+       fails += fail64bool(s, f, zero, zero, result>>16&1 == 1)
+       fails += fail64bool(s, f, zero, one, result>>12&1 == 1)
+       fails += fail64bool(s, f, zero, inf, result>>8&1 == 1)
+       fails += fail64bool(s, f, zero, nan, result>>4&1 == 1)
+       fails += fail64bool(s, f, nan, nan, result&1 == 1)
+
+       fails += fail64(s, g, zero, zero, ev64[result>>16&1])
+       fails += fail64(s, g, zero, one, ev64[result>>12&1])
+       fails += fail64(s, g, zero, inf, ev64[result>>8&1])
+       fails += fail64(s, g, zero, nan, ev64[result>>4&1])
+       fails += fail64(s, g, nan, nan, ev64[result>>0&1])
+
+       {
+               zero := float32(zero)
+               one := float32(one)
+               inf := float32(inf)
+               nan := float32(nan)
+               fails += fail32bool(s, ff, zero, zero, (result>>16)&1 == 1)
+               fails += fail32bool(s, ff, zero, one, (result>>12)&1 == 1)
+               fails += fail32bool(s, ff, zero, inf, (result>>8)&1 == 1)
+               fails += fail32bool(s, ff, zero, nan, (result>>4)&1 == 1)
+               fails += fail32bool(s, ff, nan, nan, result&1 == 1)
+
+               fails += fail32(s, gg, zero, zero, ev32[(result>>16)&1])
+               fails += fail32(s, gg, zero, one, ev32[(result>>12)&1])
+               fails += fail32(s, gg, zero, inf, ev32[(result>>8)&1])
+               fails += fail32(s, gg, zero, nan, ev32[(result>>4)&1])
+               fails += fail32(s, gg, nan, nan, ev32[(result>>0)&1])
+       }
+
+       return fails
+}
+
 func main() {
 
        a := 3.0
@@ -273,6 +1161,8 @@ func main() {
        // but should not underflow in float and in fact is exactly representable.
        fails += fail64("*", mul64_ssa, dtiny, dtiny, 1.9636373861190906e-90)
 
+       // Intended to create register pressure which forces
+       // asymmetric op into different code paths.
        aa, ab, ac, ad, ba, bb, bc, bd, ca, cb, cc, cd, da, db, dc, dd := manysub_ssa(1000.0, 100.0, 10.0, 1.0)
 
        fails += expect64("aa", aa, 11.0)
@@ -297,6 +1187,39 @@ func main() {
 
        fails += integer2floatConversions()
 
+       var zero64 float64 = 0.0
+       var one64 float64 = 1.0
+       var inf64 float64 = 1.0 / zero64
+       var nan64 float64 = sub64_ssa(inf64, inf64)
+
+       fails += cmpOpTest("!=", ne64_ssa, nebr64_ssa, ne32_ssa, nebr32_ssa, zero64, one64, inf64, nan64, 0x01111)
+       fails += cmpOpTest("==", eq64_ssa, eqbr64_ssa, eq32_ssa, eqbr32_ssa, zero64, one64, inf64, nan64, 0x10000)
+       fails += cmpOpTest("<=", le64_ssa, lebr64_ssa, le32_ssa, lebr32_ssa, zero64, one64, inf64, nan64, 0x11100)
+       fails += cmpOpTest("<", lt64_ssa, ltbr64_ssa, lt32_ssa, ltbr32_ssa, zero64, one64, inf64, nan64, 0x01100)
+       fails += cmpOpTest(">", gt64_ssa, gtbr64_ssa, gt32_ssa, gtbr32_ssa, zero64, one64, inf64, nan64, 0x00000)
+       fails += cmpOpTest(">=", ge64_ssa, gebr64_ssa, ge32_ssa, gebr32_ssa, zero64, one64, inf64, nan64, 0x10000)
+
+       {
+               lt, le, eq, ne, ge, gt := compares64_ssa(0.0, 1.0, inf64, nan64)
+               fails += expectUint64("lt", lt, 0x0110001000000000)
+               fails += expectUint64("le", le, 0x1110011000100000)
+               fails += expectUint64("eq", eq, 0x1000010000100000)
+               fails += expectUint64("ne", ne, 0x0111101111011111)
+               fails += expectUint64("ge", ge, 0x1000110011100000)
+               fails += expectUint64("gt", gt, 0x0000100011000000)
+               // fmt.Printf("lt=0x%016x, le=0x%016x, eq=0x%016x, ne=0x%016x, ge=0x%016x, gt=0x%016x\n",
+               //      lt, le, eq, ne, ge, gt)
+       }
+       {
+               lt, le, eq, ne, ge, gt := compares32_ssa(0.0, 1.0, float32(inf64), float32(nan64))
+               fails += expectUint64("lt", lt, 0x0110001000000000)
+               fails += expectUint64("le", le, 0x1110011000100000)
+               fails += expectUint64("eq", eq, 0x1000010000100000)
+               fails += expectUint64("ne", ne, 0x0111101111011111)
+               fails += expectUint64("ge", ge, 0x1000110011100000)
+               fails += expectUint64("gt", gt, 0x0000100011000000)
+       }
+
        if fails > 0 {
                fmt.Printf("Saw %v failures\n", fails)
                panic("Failed.")
index e6bd44d57307703beda153e6e86c4492603d229e..e390fc4998825130d9dfcbdfbadefa05eedb55fe 100644 (file)
@@ -35,7 +35,7 @@ func fuse(f *Func) {
                }
 
                // trash b, just in case
-               b.Kind = blockInvalid
+               b.Kind = BlockInvalid
                b.Values = nil
                b.Preds = nil
                b.Succs = nil
index 86b443c10d516f0bc1c59ce2fe5abe76569ec4f3..ff89a7e899835e058e5e57304596b083b78788a8 100644 (file)
 (Less32 x y) -> (SETL (CMPL <TypeFlags> x y))
 (Less16 x y) -> (SETL (CMPW <TypeFlags> x y))
 (Less8  x y) -> (SETL (CMPB <TypeFlags> x y))
-
 (Less64U x y) -> (SETB (CMPQ <TypeFlags> x y))
 (Less32U x y) -> (SETB (CMPL <TypeFlags> x y))
 (Less16U x y) -> (SETB (CMPW <TypeFlags> x y))
 (Less8U  x y) -> (SETB (CMPB <TypeFlags> x y))
+// Use SETGF with reversed operands to dodge NaN case
+(Less64F x y) -> (SETGF (UCOMISD <TypeFlags> y x))
+(Less32F x y) -> (SETGF (UCOMISS <TypeFlags> y x))
 
 (Leq64 x y) -> (SETLE (CMPQ <TypeFlags> x y))
 (Leq32 x y) -> (SETLE (CMPL <TypeFlags> x y))
 (Leq16 x y) -> (SETLE (CMPW <TypeFlags> x y))
 (Leq8  x y) -> (SETLE (CMPB <TypeFlags> x y))
-
 (Leq64U x y) -> (SETBE (CMPQ <TypeFlags> x y))
 (Leq32U x y) -> (SETBE (CMPL <TypeFlags> x y))
 (Leq16U x y) -> (SETBE (CMPW <TypeFlags> x y))
 (Leq8U  x y) -> (SETBE (CMPB <TypeFlags> x y))
+// Use SETGEF with reversed operands to dodge NaN case
+(Leq64F x y) -> (SETGEF (UCOMISD <TypeFlags> y x))
+(Leq32F x y) -> (SETGEF (UCOMISS <TypeFlags> y x))
 
 (Greater64 x y) -> (SETG (CMPQ <TypeFlags> x y))
 (Greater32 x y) -> (SETG (CMPL <TypeFlags> x y))
 (Greater16 x y) -> (SETG (CMPW <TypeFlags> x y))
 (Greater8  x y) -> (SETG (CMPB <TypeFlags> x y))
-
 (Greater64U x y) -> (SETA (CMPQ <TypeFlags> x y))
 (Greater32U x y) -> (SETA (CMPL <TypeFlags> x y))
 (Greater16U x y) -> (SETA (CMPW <TypeFlags> x y))
 (Greater8U  x y) -> (SETA (CMPB <TypeFlags> x y))
+// Note Go assembler gets UCOMISx operand order wrong, but it is right here
+// Bug is accommodated at generation of assembly language.
+(Greater64F x y) -> (SETGF (UCOMISD <TypeFlags> x y))
+(Greater32F x y) -> (SETGF (UCOMISS <TypeFlags> x y))
 
 (Geq64 x y) -> (SETGE (CMPQ <TypeFlags> x y))
 (Geq32 x y) -> (SETGE (CMPL <TypeFlags> x y))
 (Geq16 x y) -> (SETGE (CMPW <TypeFlags> x y))
 (Geq8  x y) -> (SETGE (CMPB <TypeFlags> x y))
-
 (Geq64U x y) -> (SETAE (CMPQ <TypeFlags> x y))
 (Geq32U x y) -> (SETAE (CMPL <TypeFlags> x y))
 (Geq16U x y) -> (SETAE (CMPW <TypeFlags> x y))
 (Geq8U  x y) -> (SETAE (CMPB <TypeFlags> x y))
+// Note Go assembler gets UCOMISx operand order wrong, but it is right here
+// Bug is accommodated at generation of assembly language.
+(Geq64F x y) -> (SETGEF (UCOMISD <TypeFlags> x y))
+(Geq32F x y) -> (SETGEF (UCOMISS <TypeFlags> x y))
 
 (Eq64 x y) -> (SETEQ (CMPQ <TypeFlags> x y))
 (Eq32 x y) -> (SETEQ (CMPL <TypeFlags> x y))
 (Eq16 x y) -> (SETEQ (CMPW <TypeFlags> x y))
 (Eq8 x y) -> (SETEQ (CMPB <TypeFlags> x y))
 (EqPtr x y) -> (SETEQ (CMPQ <TypeFlags> x y))
+(Eq64F x y) -> (SETEQF (UCOMISD <TypeFlags> x y))
+(Eq32F x y) -> (SETEQF (UCOMISS <TypeFlags> x y))
 
 (Neq64 x y) -> (SETNE (CMPQ <TypeFlags> x y))
 (Neq32 x y) -> (SETNE (CMPL <TypeFlags> x y))
 (Neq16 x y) -> (SETNE (CMPW <TypeFlags> x y))
 (Neq8 x y) -> (SETNE (CMPB <TypeFlags> x y))
 (NeqPtr x y) -> (SETNE (CMPQ <TypeFlags> x y))
+(Neq64F x y) -> (SETNEF (UCOMISD <TypeFlags> x y))
+(Neq32F x y) -> (SETNEF (UCOMISS <TypeFlags> x y))
 
 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem)
 (Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem)
 (If (SETA  cmp) yes no) -> (UGT cmp yes no)
 (If (SETAE cmp) yes no) -> (UGE cmp yes no)
 
+// Special case for floating point - LF/LEF not generated
+(If (SETGF  cmp) yes no) -> (UGT  cmp yes no)
+(If (SETGEF cmp) yes no) -> (UGE  cmp yes no)
+(If (SETEQF cmp) yes no) -> (EQF  cmp yes no)
+(If (SETNEF cmp) yes no) -> (EQF  cmp yes no)
+
 (If cond yes no) -> (NE (TESTB <TypeFlags> cond cond) yes no)
 
 (NE (TESTB (SETL  cmp)) yes no) -> (LT  cmp yes no)
 (NE (TESTB (SETA  cmp)) yes no) -> (UGT cmp yes no)
 (NE (TESTB (SETAE cmp)) yes no) -> (UGE cmp yes no)
 
+// Special case for floating point - LF/LEF not generated
+(NE (TESTB (SETGF  cmp)) yes no) -> (UGT  cmp yes no)
+(NE (TESTB (SETGEF cmp)) yes no) -> (UGE  cmp yes no)
+(NE (TESTB (SETEQF cmp)) yes no) -> (EQF  cmp yes no)
+(NE (TESTB (SETNEF cmp)) yes no) -> (NEF  cmp yes no)
+
+// Disabled because it interferes with the pattern match above and makes worse code.
+// (SETNEF x) -> (ORQ (SETNE <config.Frontend().TypeInt8()> x) (SETNAN <config.Frontend().TypeInt8()> x))
+// (SETEQF x) -> (ANDQ (SETEQ <config.Frontend().TypeInt8()> x) (SETORD <config.Frontend().TypeInt8()> x))
+
 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
 
 (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
        (MOVSDstoreidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
 
-
 (ADDQconst [0] x) -> x
 
 // lower Zero instructions with word sizes
index 8b8da225d136ddecdbe01b75869e338bb52c7e53..e610458c927e80e8646227dcd6bb3ffaca66b00a 100644 (file)
@@ -83,7 +83,6 @@ func init() {
                flags      = buildReg("FLAGS")
                callerSave = gp | fp | flags
        )
-
        // Common slices of register masks
        var (
                gponly    = []regMask{gp}
@@ -110,8 +109,9 @@ func init() {
 
                gp2flags  = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
                gp1flags  = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
-               flagsgp   = regInfo{inputs: flagsonly, outputs: gponly, clobbers: flags}
+               flagsgp   = regInfo{inputs: flagsonly, outputs: gponly}
                readflags = regInfo{inputs: flagsonly, outputs: gponly}
+               flagsgpax = regInfo{inputs: flagsonly, clobbers: ax, outputs: []regMask{gp &^ ax}}
 
                gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
                gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
@@ -124,10 +124,11 @@ func init() {
                fp21    = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
                fp21x15 = regInfo{inputs: []regMask{fp &^ x15, fp &^ x15},
                        clobbers: x15, outputs: []regMask{fp &^ x15}}
-
-               fpgp = regInfo{inputs: fponly, outputs: gponly}
-               gpfp = regInfo{inputs: gponly, outputs: fponly}
-               fp11 = regInfo{inputs: fponly, outputs: fponly}
+               fpgp     = regInfo{inputs: fponly, outputs: gponly}
+               gpfp     = regInfo{inputs: gponly, outputs: fponly}
+               fp11     = regInfo{inputs: fponly, outputs: fponly}
+               fp2flags = regInfo{inputs: []regMask{fp, fp}, outputs: flagsonly}
+               // fp1flags = regInfo{inputs: fponly, outputs: flagsonly}
 
                fpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly}
                fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly}
@@ -249,6 +250,9 @@ func init() {
                {name: "CMPWconst", reg: gp1flags, asm: "CMPW"}, // arg0 compare to auxint
                {name: "CMPBconst", reg: gp1flags, asm: "CMPB"}, // arg0 compare to auxint
 
+               {name: "UCOMISS", reg: fp2flags, asm: "UCOMISS"}, // arg0 compare to arg1, f32
+               {name: "UCOMISD", reg: fp2flags, asm: "UCOMISD"}, // arg0 compare to arg1, f64
+
                {name: "TESTQ", reg: gp2flags, asm: "TESTQ"},      // (arg0 & arg1) compare to 0
                {name: "TESTL", reg: gp2flags, asm: "TESTL"},      // (arg0 & arg1) compare to 0
                {name: "TESTW", reg: gp2flags, asm: "TESTW"},      // (arg0 & arg1) compare to 0
@@ -316,6 +320,16 @@ func init() {
                {name: "SETBE", reg: readflags, asm: "SETLS"}, // extract unsigned <= condition from arg0
                {name: "SETA", reg: readflags, asm: "SETHI"},  // extract unsigned > condition from arg0
                {name: "SETAE", reg: readflags, asm: "SETCC"}, // extract unsigned >= condition from arg0
+               // Need different opcodes for floating point conditions because
+               // any comparison involving a NaN is always FALSE and thus
+               // the patterns for inverting conditions cannot be used.
+               {name: "SETEQF", reg: flagsgpax, asm: "SETEQ"}, // extract == condition from arg0
+               {name: "SETNEF", reg: flagsgpax, asm: "SETNE"}, // extract != condition from arg0
+               {name: "SETORD", reg: flagsgp, asm: "SETPC"},   // extract "ordered" (No Nan present) condition from arg0
+               {name: "SETNAN", reg: flagsgp, asm: "SETPS"},   // extract "unordered" (Nan present) condition from arg0
+
+               {name: "SETGF", reg: flagsgp, asm: "SETHI"},  // extract floating > condition from arg0
+               {name: "SETGEF", reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0
 
                {name: "MOVBQSX", reg: gp11nf, asm: "MOVBQSX"}, // sign extend arg0 from int8 to int64
                {name: "MOVBQZX", reg: gp11nf, asm: "MOVBQZX"}, // zero extend arg0 from int8 to int64
@@ -395,6 +409,10 @@ func init() {
                {name: "ULE"},
                {name: "UGT"},
                {name: "UGE"},
+               {name: "EQF"},
+               {name: "NEF"},
+               {name: "ORD"}, // FP, ordered comparison (parity zero)
+               {name: "NAN"}, // FP, unordered comparison (parity one)
        }
 
        archs = append(archs, arch{"AMD64", AMD64ops, AMD64blocks, regNamesAMD64})
index 4a65a87ea8699bef49ca3841f9ccbaa1effada88..a0040d3017f048d16672775e7b9c9bfe9d4b8f05 100644 (file)
@@ -161,6 +161,8 @@ var genericOps = []opData{
        {name: "Eq64"},
        {name: "EqPtr"},
        {name: "EqFat"}, // slice/interface; arg0 or arg1 is nil; other cases handled by frontend
+       {name: "Eq32F"},
+       {name: "Eq64F"},
 
        {name: "Neq8"}, // arg0 != arg1
        {name: "Neq16"},
@@ -168,6 +170,8 @@ var genericOps = []opData{
        {name: "Neq64"},
        {name: "NeqPtr"},
        {name: "NeqFat"}, // slice/interface; arg0 or arg1 is nil; other cases handled by frontend
+       {name: "Neq32F"},
+       {name: "Neq64F"},
 
        {name: "Less8"}, // arg0 < arg1
        {name: "Less8U"},
@@ -177,6 +181,8 @@ var genericOps = []opData{
        {name: "Less32U"},
        {name: "Less64"},
        {name: "Less64U"},
+       {name: "Less32F"},
+       {name: "Less64F"},
 
        {name: "Leq8"}, // arg0 <= arg1
        {name: "Leq8U"},
@@ -186,6 +192,8 @@ var genericOps = []opData{
        {name: "Leq32U"},
        {name: "Leq64"},
        {name: "Leq64U"},
+       {name: "Leq32F"},
+       {name: "Leq64F"},
 
        {name: "Greater8"}, // arg0 > arg1
        {name: "Greater8U"},
@@ -195,6 +203,8 @@ var genericOps = []opData{
        {name: "Greater32U"},
        {name: "Greater64"},
        {name: "Greater64U"},
+       {name: "Greater32F"},
+       {name: "Greater64F"},
 
        {name: "Geq8"}, // arg0 <= arg1
        {name: "Geq8U"},
@@ -204,6 +214,8 @@ var genericOps = []opData{
        {name: "Geq32U"},
        {name: "Geq64"},
        {name: "Geq64U"},
+       {name: "Geq32F"},
+       {name: "Geq64F"},
 
        // 1-input ops
        {name: "Not"}, // !arg0
index 6620c0a1d005e305b1691cd442ceb8416dc6bb37..1383566e3a720b8f8925874df71aa3acff269246 100644 (file)
@@ -76,7 +76,7 @@ func genOp() {
 
        // generate Block* declarations
        fmt.Fprintln(w, "const (")
-       fmt.Fprintln(w, "blockInvalid BlockKind = iota")
+       fmt.Fprintln(w, "BlockInvalid BlockKind = iota")
        for _, a := range archs {
                fmt.Fprintln(w)
                for _, d := range a.blocks {
@@ -87,7 +87,7 @@ func genOp() {
 
        // generate block kind string method
        fmt.Fprintln(w, "var blockString = [...]string{")
-       fmt.Fprintln(w, "blockInvalid:\"BlockInvalid\",")
+       fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",")
        for _, a := range archs {
                fmt.Fprintln(w)
                for _, b := range a.blocks {
index aa51cbc3010f769fb6f80f760ab1afac1d4045c3..4eccb463da2024168f70a1d50ae3a3fb0050ff39 100644 (file)
@@ -5,7 +5,7 @@ package ssa
 import "cmd/internal/obj/x86"
 
 const (
-       blockInvalid BlockKind = iota
+       BlockInvalid BlockKind = iota
 
        BlockAMD64EQ
        BlockAMD64NE
@@ -17,6 +17,10 @@ const (
        BlockAMD64ULE
        BlockAMD64UGT
        BlockAMD64UGE
+       BlockAMD64EQF
+       BlockAMD64NEF
+       BlockAMD64ORD
+       BlockAMD64NAN
 
        BlockExit
        BlockDead
@@ -26,7 +30,7 @@ const (
 )
 
 var blockString = [...]string{
-       blockInvalid: "BlockInvalid",
+       BlockInvalid: "BlockInvalid",
 
        BlockAMD64EQ:  "EQ",
        BlockAMD64NE:  "NE",
@@ -38,6 +42,10 @@ var blockString = [...]string{
        BlockAMD64ULE: "ULE",
        BlockAMD64UGT: "UGT",
        BlockAMD64UGE: "UGE",
+       BlockAMD64EQF: "EQF",
+       BlockAMD64NEF: "NEF",
+       BlockAMD64ORD: "ORD",
+       BlockAMD64NAN: "NAN",
 
        BlockExit:  "Exit",
        BlockDead:  "Dead",
@@ -143,6 +151,8 @@ const (
        OpAMD64CMPLconst
        OpAMD64CMPWconst
        OpAMD64CMPBconst
+       OpAMD64UCOMISS
+       OpAMD64UCOMISD
        OpAMD64TESTQ
        OpAMD64TESTL
        OpAMD64TESTW
@@ -199,6 +209,12 @@ const (
        OpAMD64SETBE
        OpAMD64SETA
        OpAMD64SETAE
+       OpAMD64SETEQF
+       OpAMD64SETNEF
+       OpAMD64SETORD
+       OpAMD64SETNAN
+       OpAMD64SETGF
+       OpAMD64SETGEF
        OpAMD64MOVBQSX
        OpAMD64MOVBQZX
        OpAMD64MOVWQSX
@@ -361,12 +377,16 @@ const (
        OpEq64
        OpEqPtr
        OpEqFat
+       OpEq32F
+       OpEq64F
        OpNeq8
        OpNeq16
        OpNeq32
        OpNeq64
        OpNeqPtr
        OpNeqFat
+       OpNeq32F
+       OpNeq64F
        OpLess8
        OpLess8U
        OpLess16
@@ -375,6 +395,8 @@ const (
        OpLess32U
        OpLess64
        OpLess64U
+       OpLess32F
+       OpLess64F
        OpLeq8
        OpLeq8U
        OpLeq16
@@ -383,6 +405,8 @@ const (
        OpLeq32U
        OpLeq64
        OpLeq64U
+       OpLeq32F
+       OpLeq64F
        OpGreater8
        OpGreater8U
        OpGreater16
@@ -391,6 +415,8 @@ const (
        OpGreater32U
        OpGreater64
        OpGreater64U
+       OpGreater32F
+       OpGreater64F
        OpGeq8
        OpGeq8U
        OpGeq16
@@ -399,6 +425,8 @@ const (
        OpGeq32U
        OpGeq64
        OpGeq64U
+       OpGeq32F
+       OpGeq64F
        OpNot
        OpNeg8
        OpNeg16
@@ -1707,6 +1735,32 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name: "UCOMISS",
+               asm:  x86.AUCOMISS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+                               {1, 4294901760}, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+                       },
+                       outputs: []regMask{
+                               8589934592, // .FLAGS
+                       },
+               },
+       },
+       {
+               name: "UCOMISD",
+               asm:  x86.AUCOMISD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+                               {1, 4294901760}, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+                       },
+                       outputs: []regMask{
+                               8589934592, // .FLAGS
+                       },
+               },
+       },
        {
                name: "TESTQ",
                asm:  x86.ATESTQ,
@@ -2432,6 +2486,84 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name: "SETEQF",
+               asm:  x86.ASETEQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 8589934592}, // .FLAGS
+                       },
+                       clobbers: 8589934593, // .AX .FLAGS
+                       outputs: []regMask{
+                               65518, // .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+                       },
+               },
+       },
+       {
+               name: "SETNEF",
+               asm:  x86.ASETNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 8589934592}, // .FLAGS
+                       },
+                       clobbers: 8589934593, // .AX .FLAGS
+                       outputs: []regMask{
+                               65518, // .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+                       },
+               },
+       },
+       {
+               name: "SETORD",
+               asm:  x86.ASETPC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 8589934592}, // .FLAGS
+                       },
+                       clobbers: 8589934592, // .FLAGS
+                       outputs: []regMask{
+                               65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+                       },
+               },
+       },
+       {
+               name: "SETNAN",
+               asm:  x86.ASETPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 8589934592}, // .FLAGS
+                       },
+                       clobbers: 8589934592, // .FLAGS
+                       outputs: []regMask{
+                               65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+                       },
+               },
+       },
+       {
+               name: "SETGF",
+               asm:  x86.ASETHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 8589934592}, // .FLAGS
+                       },
+                       clobbers: 8589934592, // .FLAGS
+                       outputs: []regMask{
+                               65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+                       },
+               },
+       },
+       {
+               name: "SETGEF",
+               asm:  x86.ASETCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 8589934592}, // .FLAGS
+                       },
+                       clobbers: 8589934592, // .FLAGS
+                       outputs: []regMask{
+                               65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+                       },
+               },
+       },
        {
                name: "MOVBQSX",
                asm:  x86.AMOVBQSX,
@@ -3386,6 +3518,14 @@ var opcodeTable = [...]opInfo{
                name:    "EqFat",
                generic: true,
        },
+       {
+               name:    "Eq32F",
+               generic: true,
+       },
+       {
+               name:    "Eq64F",
+               generic: true,
+       },
        {
                name:    "Neq8",
                generic: true,
@@ -3410,6 +3550,14 @@ var opcodeTable = [...]opInfo{
                name:    "NeqFat",
                generic: true,
        },
+       {
+               name:    "Neq32F",
+               generic: true,
+       },
+       {
+               name:    "Neq64F",
+               generic: true,
+       },
        {
                name:    "Less8",
                generic: true,
@@ -3442,6 +3590,14 @@ var opcodeTable = [...]opInfo{
                name:    "Less64U",
                generic: true,
        },
+       {
+               name:    "Less32F",
+               generic: true,
+       },
+       {
+               name:    "Less64F",
+               generic: true,
+       },
        {
                name:    "Leq8",
                generic: true,
@@ -3474,6 +3630,14 @@ var opcodeTable = [...]opInfo{
                name:    "Leq64U",
                generic: true,
        },
+       {
+               name:    "Leq32F",
+               generic: true,
+       },
+       {
+               name:    "Leq64F",
+               generic: true,
+       },
        {
                name:    "Greater8",
                generic: true,
@@ -3506,6 +3670,14 @@ var opcodeTable = [...]opInfo{
                name:    "Greater64U",
                generic: true,
        },
+       {
+               name:    "Greater32F",
+               generic: true,
+       },
+       {
+               name:    "Greater64F",
+               generic: true,
+       },
        {
                name:    "Geq8",
                generic: true,
@@ -3538,6 +3710,14 @@ var opcodeTable = [...]opInfo{
                name:    "Geq64U",
                generic: true,
        },
+       {
+               name:    "Geq32F",
+               generic: true,
+       },
+       {
+               name:    "Geq64F",
+               generic: true,
+       },
        {
                name:    "Not",
                generic: true,
index b50fecda2e35501a2af16d8f45f8846590093b38..dc6dce995bf397e0256aab30f9abba762148da32 100644 (file)
@@ -2082,6 +2082,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto end4d77d0b016f93817fd6e5f60fa0e7ef2
        end4d77d0b016f93817fd6e5f60fa0e7ef2:
                ;
+       case OpEq32F:
+               // match: (Eq32F x y)
+               // cond:
+               // result: (SETEQF (UCOMISS <TypeFlags> x y))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETEQF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end034925b03df528b1ffec9fafdcd56c8e
+       end034925b03df528b1ffec9fafdcd56c8e:
+               ;
        case OpEq64:
                // match: (Eq64 x y)
                // cond:
@@ -2103,6 +2124,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto endae6c62e4e20b4f62694b6ee40dbd9211
        endae6c62e4e20b4f62694b6ee40dbd9211:
                ;
+       case OpEq64F:
+               // match: (Eq64F x y)
+               // cond:
+               // result: (SETEQF (UCOMISD <TypeFlags> x y))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETEQF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end62b2fb60187571e6ab0c53696ef7d030
+       end62b2fb60187571e6ab0c53696ef7d030:
+               ;
        case OpEq8:
                // match: (Eq8 x y)
                // cond:
@@ -2208,6 +2250,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto end713c3dfa0f7247dcc232bcfc916fb044
        end713c3dfa0f7247dcc232bcfc916fb044:
                ;
+       case OpGeq32F:
+               // match: (Geq32F x y)
+               // cond:
+               // result: (SETGEF (UCOMISS <TypeFlags> x y))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETGEF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end5847ac7f2e264fba4c408ebb60c1e8a5
+       end5847ac7f2e264fba4c408ebb60c1e8a5:
+               ;
        case OpGeq32U:
                // match: (Geq32U x y)
                // cond:
@@ -2250,6 +2313,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto end63f44e3fec8d92723b5bde42d6d7eea0
        end63f44e3fec8d92723b5bde42d6d7eea0:
                ;
+       case OpGeq64F:
+               // match: (Geq64F x y)
+               // cond:
+               // result: (SETGEF (UCOMISD <TypeFlags> x y))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETGEF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto endb40fbc46a8fc04fef95182771e2933c2
+       endb40fbc46a8fc04fef95182771e2933c2:
+               ;
        case OpGeq64U:
                // match: (Geq64U x y)
                // cond:
@@ -2390,6 +2474,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto endbf0b2b1368aadff48969a7386eee5795
        endbf0b2b1368aadff48969a7386eee5795:
                ;
+       case OpGreater32F:
+               // match: (Greater32F x y)
+               // cond:
+               // result: (SETGF (UCOMISS <TypeFlags> x y))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETGF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto endb65b042358784f18002ae59ea6f2c51a
+       endb65b042358784f18002ae59ea6f2c51a:
+               ;
        case OpGreater32U:
                // match: (Greater32U x y)
                // cond:
@@ -2432,6 +2537,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto endaef0cfa5e27e23cf5e527061cf251069
        endaef0cfa5e27e23cf5e527061cf251069:
                ;
+       case OpGreater64F:
+               // match: (Greater64F x y)
+               // cond:
+               // result: (SETGF (UCOMISD <TypeFlags> x y))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETGF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end1a6ca23bbb3e885473865e3b3ea501e7
+       end1a6ca23bbb3e885473865e3b3ea501e7:
+               ;
        case OpGreater64U:
                // match: (Greater64U x y)
                // cond:
@@ -2728,6 +2854,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto endf422ecc8da0033e22242de9c67112537
        endf422ecc8da0033e22242de9c67112537:
                ;
+       case OpLeq32F:
+               // match: (Leq32F x y)
+               // cond:
+               // result: (SETGEF (UCOMISS <TypeFlags> y x))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETGEF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(y)
+                       v0.AddArg(x)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end98f7b2e6e15ce282d044c812454fe77f
+       end98f7b2e6e15ce282d044c812454fe77f:
+               ;
        case OpLeq32U:
                // match: (Leq32U x y)
                // cond:
@@ -2770,6 +2917,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto endf03da5e28dccdb4797671f39e824fb10
        endf03da5e28dccdb4797671f39e824fb10:
                ;
+       case OpLeq64F:
+               // match: (Leq64F x y)
+               // cond:
+               // result: (SETGEF (UCOMISD <TypeFlags> y x))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETGEF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(y)
+                       v0.AddArg(x)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end7efa164f4e4f5a395f547b1885b7eef4
+       end7efa164f4e4f5a395f547b1885b7eef4:
+               ;
        case OpLeq64U:
                // match: (Leq64U x y)
                // cond:
@@ -2896,6 +3064,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto end8da8d2030c0a323a84503c1240c566ae
        end8da8d2030c0a323a84503c1240c566ae:
                ;
+       case OpLess32F:
+               // match: (Less32F x y)
+               // cond:
+               // result: (SETGF (UCOMISS <TypeFlags> y x))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETGF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(y)
+                       v0.AddArg(x)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end54f94ce87c18a1ed2beb8d0161bea907
+       end54f94ce87c18a1ed2beb8d0161bea907:
+               ;
        case OpLess32U:
                // match: (Less32U x y)
                // cond:
@@ -2938,6 +3127,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto endf8e7a24c25692045bbcfd2c9356d1a8c
        endf8e7a24c25692045bbcfd2c9356d1a8c:
                ;
+       case OpLess64F:
+               // match: (Less64F x y)
+               // cond:
+               // result: (SETGF (UCOMISD <TypeFlags> y x))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETGF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(y)
+                       v0.AddArg(x)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end92720155a95cbfae47ea469583c4d3c7
+       end92720155a95cbfae47ea469583c4d3c7:
+               ;
        case OpLess64U:
                // match: (Less64U x y)
                // cond:
@@ -5902,6 +6112,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto end39c4bf6d063f8a0b6f0064c96ce25173
        end39c4bf6d063f8a0b6f0064c96ce25173:
                ;
+       case OpNeq32F:
+               // match: (Neq32F x y)
+               // cond:
+               // result: (SETNEF (UCOMISS <TypeFlags> x y))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETNEF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end4eb0af70b64b789e55d83c15e426b0c5
+       end4eb0af70b64b789e55d83c15e426b0c5:
+               ;
        case OpNeq64:
                // match: (Neq64 x y)
                // cond:
@@ -5923,6 +6154,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                goto end8ab0bcb910c0d3213dd8726fbcc4848e
        end8ab0bcb910c0d3213dd8726fbcc4848e:
                ;
+       case OpNeq64F:
+               // match: (Neq64F x y)
+               // cond:
+               // result: (SETNEF (UCOMISD <TypeFlags> x y))
+               {
+                       x := v.Args[0]
+                       y := v.Args[1]
+                       v.Op = OpAMD64SETNEF
+                       v.AuxInt = 0
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+                       v0.Type = TypeFlags
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       v.AddArg(v0)
+                       return true
+               }
+               goto end73beb54a015a226bc2e83bdd39e7ee46
+       end73beb54a015a226bc2e83bdd39e7ee46:
+               ;
        case OpNeq8:
                // match: (Neq8 x y)
                // cond:
@@ -10358,6 +10610,86 @@ func rewriteBlockAMD64(b *Block) bool {
                }
                goto end9bea9963c3c5dfb97249a5feb8287f94
        end9bea9963c3c5dfb97249a5feb8287f94:
+               ;
+               // match: (If (SETGF  cmp) yes no)
+               // cond:
+               // result: (UGT  cmp yes no)
+               {
+                       v := b.Control
+                       if v.Op != OpAMD64SETGF {
+                               goto enda72d68674cfa26b5982a43756bca6767
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGT
+                       b.Control = cmp
+                       b.Succs[0] = yes
+                       b.Succs[1] = no
+                       return true
+               }
+               goto enda72d68674cfa26b5982a43756bca6767
+       enda72d68674cfa26b5982a43756bca6767:
+               ;
+               // match: (If (SETGEF cmp) yes no)
+               // cond:
+               // result: (UGE  cmp yes no)
+               {
+                       v := b.Control
+                       if v.Op != OpAMD64SETGEF {
+                               goto endccc171c1d66dd60ac0275d1f78259315
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGE
+                       b.Control = cmp
+                       b.Succs[0] = yes
+                       b.Succs[1] = no
+                       return true
+               }
+               goto endccc171c1d66dd60ac0275d1f78259315
+       endccc171c1d66dd60ac0275d1f78259315:
+               ;
+               // match: (If (SETEQF cmp) yes no)
+               // cond:
+               // result: (EQF  cmp yes no)
+               {
+                       v := b.Control
+                       if v.Op != OpAMD64SETEQF {
+                               goto end58cb74d05266a79003ebdd733afb66fa
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64EQF
+                       b.Control = cmp
+                       b.Succs[0] = yes
+                       b.Succs[1] = no
+                       return true
+               }
+               goto end58cb74d05266a79003ebdd733afb66fa
+       end58cb74d05266a79003ebdd733afb66fa:
+               ;
+               // match: (If (SETNEF cmp) yes no)
+               // cond:
+               // result: (EQF  cmp yes no)
+               {
+                       v := b.Control
+                       if v.Op != OpAMD64SETNEF {
+                               goto endfe25939ca97349543bc2d2ce4f97ba41
+                       }
+                       cmp := v.Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64EQF
+                       b.Control = cmp
+                       b.Succs[0] = yes
+                       b.Succs[1] = no
+                       return true
+               }
+               goto endfe25939ca97349543bc2d2ce4f97ba41
+       endfe25939ca97349543bc2d2ce4f97ba41:
                ;
                // match: (If cond yes no)
                // cond:
@@ -10652,6 +10984,98 @@ func rewriteBlockAMD64(b *Block) bool {
                }
                goto endbd122fd599aeb9e60881a0fa735e2fde
        endbd122fd599aeb9e60881a0fa735e2fde:
+               ;
+               // match: (NE (TESTB (SETGF  cmp)) yes no)
+               // cond:
+               // result: (UGT  cmp yes no)
+               {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               goto endb2499521f7f351e24757f8c918c3598e
+                       }
+                       if v.Args[0].Op != OpAMD64SETGF {
+                               goto endb2499521f7f351e24757f8c918c3598e
+                       }
+                       cmp := v.Args[0].Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGT
+                       b.Control = cmp
+                       b.Succs[0] = yes
+                       b.Succs[1] = no
+                       return true
+               }
+               goto endb2499521f7f351e24757f8c918c3598e
+       endb2499521f7f351e24757f8c918c3598e:
+               ;
+               // match: (NE (TESTB (SETGEF cmp)) yes no)
+               // cond:
+               // result: (UGE  cmp yes no)
+               {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               goto end20461774babea665c4ca7c4f790a7209
+                       }
+                       if v.Args[0].Op != OpAMD64SETGEF {
+                               goto end20461774babea665c4ca7c4f790a7209
+                       }
+                       cmp := v.Args[0].Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64UGE
+                       b.Control = cmp
+                       b.Succs[0] = yes
+                       b.Succs[1] = no
+                       return true
+               }
+               goto end20461774babea665c4ca7c4f790a7209
+       end20461774babea665c4ca7c4f790a7209:
+               ;
+               // match: (NE (TESTB (SETEQF cmp)) yes no)
+               // cond:
+               // result: (EQF  cmp yes no)
+               {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               goto end236616ef13d489b78736cda7bcc1d168
+                       }
+                       if v.Args[0].Op != OpAMD64SETEQF {
+                               goto end236616ef13d489b78736cda7bcc1d168
+                       }
+                       cmp := v.Args[0].Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64EQF
+                       b.Control = cmp
+                       b.Succs[0] = yes
+                       b.Succs[1] = no
+                       return true
+               }
+               goto end236616ef13d489b78736cda7bcc1d168
+       end236616ef13d489b78736cda7bcc1d168:
+               ;
+               // match: (NE (TESTB (SETNEF cmp)) yes no)
+               // cond:
+               // result: (NEF  cmp yes no)
+               {
+                       v := b.Control
+                       if v.Op != OpAMD64TESTB {
+                               goto endc992f3c266b16cb5f6aa98faa8f55600
+                       }
+                       if v.Args[0].Op != OpAMD64SETNEF {
+                               goto endc992f3c266b16cb5f6aa98faa8f55600
+                       }
+                       cmp := v.Args[0].Args[0]
+                       yes := b.Succs[0]
+                       no := b.Succs[1]
+                       b.Kind = BlockAMD64NEF
+                       b.Control = cmp
+                       b.Succs[0] = yes
+                       b.Succs[1] = no
+                       return true
+               }
+               goto endc992f3c266b16cb5f6aa98faa8f55600
+       endc992f3c266b16cb5f6aa98faa8f55600:
                ;
                // match: (NE (InvertFlags cmp) yes no)
                // cond: