]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/gc, cmd/6g: generate boolean values without jumps
authorJosh Bleecher Snyder <josharian@gmail.com>
Wed, 8 Apr 2015 16:54:15 +0000 (09:54 -0700)
committerJosh Bleecher Snyder <josharian@gmail.com>
Fri, 17 Apr 2015 20:12:18 +0000 (20:12 +0000)
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.

For example, given

func f(i, j int) bool {
return i == j
}

Before

"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20

After

"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET

regexp benchmarks, best of 12 runs:

benchmark                                 old ns/op      new ns/op      delta
BenchmarkNotOnePassShortB                 782            733            -6.27%
BenchmarkLiteral                          180            171            -5.00%
BenchmarkNotLiteral                       2855           2721           -4.69%
BenchmarkMatchHard_32                     2672           2557           -4.30%
BenchmarkMatchHard_1K                     80182          76732          -4.30%
BenchmarkMatchEasy1_32M                   76440180       73304748       -4.10%
BenchmarkMatchEasy1_32K                   68798          66350          -3.56%
BenchmarkAnchoredLongMatch                482            465            -3.53%
BenchmarkMatchEasy1_1M                    2373042        2292692        -3.39%
BenchmarkReplaceAll                       2776           2690           -3.10%
BenchmarkNotOnePassShortA                 1397           1360           -2.65%
BenchmarkMatchClass_InRange               3842           3742           -2.60%
BenchmarkMatchEasy0_32                    125            122            -2.40%
BenchmarkMatchEasy0_32K                   11414          11164          -2.19%
BenchmarkMatchEasy0_1K                    668            654            -2.10%
BenchmarkAnchoredShortMatch               260            255            -1.92%
BenchmarkAnchoredLiteralShortNonMatch     164            161            -1.83%
BenchmarkOnePassShortB                    623            612            -1.77%
BenchmarkOnePassShortA                    801            788            -1.62%
BenchmarkMatchClass                       4094           4033           -1.49%
BenchmarkMatchEasy0_32M                   14078800       13890704       -1.34%
BenchmarkMatchHard_32K                    4095844        4045820        -1.22%
BenchmarkMatchEasy1_1K                    1663           1643           -1.20%
BenchmarkMatchHard_1M                     131261708      129708215      -1.18%
BenchmarkMatchHard_32M                    4210112412     4169292003     -0.97%
BenchmarkMatchMedium_32K                  2460752        2438611        -0.90%
BenchmarkMatchEasy0_1M                    422914         419672         -0.77%
BenchmarkMatchMedium_1M                   78581121       78040160       -0.69%
BenchmarkMatchMedium_32M                  2515287278     2498464906     -0.67%
BenchmarkMatchMedium_32                   1754           1746           -0.46%
BenchmarkMatchMedium_1K                   52105          52106          +0.00%
BenchmarkAnchoredLiteralLongNonMatch      185            185            +0.00%
BenchmarkMatchEasy1_32                    107            107            +0.00%
BenchmarkOnePassLongNotPrefix             505            505            +0.00%
BenchmarkOnePassLongPrefix                147            147            +0.00%

The godoc binary is ~0.12% smaller after this CL.

Updates #5729.

toolstash -cmp passes for all architectures other than amd64 and amd64p32.

Other architectures can be done in follow-up CLs.

Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
src/cmd/6g/galign.go
src/cmd/6g/gsubr.go
src/cmd/6g/prog.go
src/cmd/internal/gc/cgen.go
src/cmd/internal/gc/cplx.go
src/cmd/internal/gc/go.go
src/cmd/internal/gc/syntax.go

index 8ba8dd434eb3a2c76495901e3abf4276f323c797..5b41a670ec2801394912e1f8816f3a80fb4c1423 100644 (file)
@@ -10,11 +10,11 @@ import (
        "cmd/internal/obj/x86"
 )
 
-var thechar int = '6'
-
-var thestring string = "amd64"
-
-var thelinkarch *obj.LinkArch = &x86.Linkamd64
+var (
+       thechar     int           = '6'
+       thestring   string        = "amd64"
+       thelinkarch *obj.LinkArch = &x86.Linkamd64
+)
 
 func linkarchinit() {
        if obj.Getgoarch() == "amd64p32" {
@@ -27,13 +27,12 @@ func linkarchinit() {
 
 var MAXWIDTH int64 = 1 << 50
 
-var addptr int = x86.AADDQ
-
-var movptr int = x86.AMOVQ
-
-var leaptr int = x86.ALEAQ
-
-var cmpptr int = x86.ACMPQ
+var (
+       addptr int = x86.AADDQ
+       movptr int = x86.AMOVQ
+       leaptr int = x86.ALEAQ
+       cmpptr int = x86.ACMPQ
+)
 
 /*
  * go declares several platform-specific type aliases:
@@ -101,6 +100,7 @@ func main() {
        gc.Thearch.Expandchecks = expandchecks
        gc.Thearch.Getg = getg
        gc.Thearch.Gins = gins
+       gc.Thearch.Ginsboolval = ginsboolval
        gc.Thearch.Ginscon = ginscon
        gc.Thearch.Ginsnop = ginsnop
        gc.Thearch.Gmove = gmove
index 0ac103e853389944ce2036b49e17326fd48165b4..53d0f038d91a649c058a7f8deecec8ee5763afc0 100644 (file)
@@ -99,6 +99,10 @@ func ginscon(as int, c int64, n2 *gc.Node) {
        gins(as, &n1, n2)
 }
 
+func ginsboolval(a int, n *gc.Node) {
+       gins(jmptoset(a), nil, n)
+}
+
 /*
  * set up nodes representing 2^63
  */
@@ -698,6 +702,21 @@ func optoas(op int, t *gc.Type) int {
                gc.OPS<<16 | gc.TFLOAT64:
                a = x86.AJPS
 
+       case gc.OPC<<16 | gc.TBOOL,
+               gc.OPC<<16 | gc.TINT8,
+               gc.OPC<<16 | gc.TUINT8,
+               gc.OPC<<16 | gc.TINT16,
+               gc.OPC<<16 | gc.TUINT16,
+               gc.OPC<<16 | gc.TINT32,
+               gc.OPC<<16 | gc.TUINT32,
+               gc.OPC<<16 | gc.TINT64,
+               gc.OPC<<16 | gc.TUINT64,
+               gc.OPC<<16 | gc.TPTR32,
+               gc.OPC<<16 | gc.TPTR64,
+               gc.OPC<<16 | gc.TFLOAT32,
+               gc.OPC<<16 | gc.TFLOAT64:
+               a = x86.AJPC
+
        case gc.OLT<<16 | gc.TINT8,
                gc.OLT<<16 | gc.TINT16,
                gc.OLT<<16 | gc.TINT32,
@@ -902,7 +921,8 @@ func optoas(op int, t *gc.Type) int {
                gc.OMINUS<<16 | gc.TPTR64:
                a = x86.ANEGQ
 
-       case gc.OAND<<16 | gc.TINT8,
+       case gc.OAND<<16 | gc.TBOOL,
+               gc.OAND<<16 | gc.TINT8,
                gc.OAND<<16 | gc.TUINT8:
                a = x86.AANDB
 
@@ -920,7 +940,8 @@ func optoas(op int, t *gc.Type) int {
                gc.OAND<<16 | gc.TPTR64:
                a = x86.AANDQ
 
-       case gc.OOR<<16 | gc.TINT8,
+       case gc.OOR<<16 | gc.TBOOL,
+               gc.OOR<<16 | gc.TINT8,
                gc.OOR<<16 | gc.TUINT8:
                a = x86.AORB
 
@@ -1134,6 +1155,46 @@ func optoas(op int, t *gc.Type) int {
        return a
 }
 
+// jmptoset returns ASETxx for AJxx.
+func jmptoset(jmp int) int {
+       switch jmp {
+       case x86.AJEQ:
+               return x86.ASETEQ
+       case x86.AJNE:
+               return x86.ASETNE
+       case x86.AJLT:
+               return x86.ASETLT
+       case x86.AJCS:
+               return x86.ASETCS
+       case x86.AJLE:
+               return x86.ASETLE
+       case x86.AJLS:
+               return x86.ASETLS
+       case x86.AJGT:
+               return x86.ASETGT
+       case x86.AJHI:
+               return x86.ASETHI
+       case x86.AJGE:
+               return x86.ASETGE
+       case x86.AJCC:
+               return x86.ASETCC
+       case x86.AJMI:
+               return x86.ASETMI
+       case x86.AJOC:
+               return x86.ASETOC
+       case x86.AJOS:
+               return x86.ASETOS
+       case x86.AJPC:
+               return x86.ASETPC
+       case x86.AJPL:
+               return x86.ASETPL
+       case x86.AJPS:
+               return x86.ASETPS
+       }
+       gc.Fatal("jmptoset: no entry for %v", gc.Oconv(jmp, 0))
+       panic("unreachable")
+}
+
 const (
        ODynam   = 1 << 0
        OAddable = 1 << 1
index fe9f0138518429a6bd2980d83650368af83798dc..4c41d87ccbd44d317f2b2e8463b4880e8562a16f 100644 (file)
@@ -196,6 +196,22 @@ var progtable = [x86.ALAST]obj.ProgInfo{
        x86.ASBBL:     {gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
        x86.ASBBQ:     {gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
        x86.ASBBW:     {gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
+       x86.ASETCC:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETCS:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETEQ:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETGE:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETGT:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETHI:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETLE:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETLS:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETLT:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETMI:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETNE:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETOC:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETOS:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETPC:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETPL:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
+       x86.ASETPS:    {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
        x86.ASHLB:     {gc.SizeB | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
        x86.ASHLL:     {gc.SizeL | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
        x86.ASHLQ:     {gc.SizeQ | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
index 3a3a4c6076192f823cf9bf5d8a34839f3fea7111..b6691efef538727e9c3f65acce8774b8f671b158 100644 (file)
@@ -345,20 +345,12 @@ func Cgen(n *Node, res *Node) {
                Dump("cgen-res", res)
                Fatal("cgen: unknown op %v", Nconv(n, obj.FmtShort|obj.FmtSign))
 
-       // these call bgen to get a bool value
        case OOROR, OANDAND,
                OEQ, ONE,
                OLT, OLE,
                OGE, OGT,
                ONOT:
-               p1 := Gbranch(obj.AJMP, nil, 0)
-               p2 := Pc
-               Thearch.Gmove(Nodbool(true), res)
-               p3 := Gbranch(obj.AJMP, nil, 0)
-               Patch(p1, Pc)
-               Bgen(n, true, 0, p2)
-               Thearch.Gmove(Nodbool(false), res)
-               Patch(p3, Pc)
+               Bvgen(n, res, true)
                return
 
        case OPLUS:
@@ -1640,11 +1632,56 @@ func Igen(n *Node, a *Node, res *Node) {
 //             goto to
 //     }
 func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
+       bgenx(n, nil, wantTrue, likely, to)
+}
+
+// Bvgen generates code for calculating boolean values:
+//     res = n == wantTrue
+func Bvgen(n, res *Node, wantTrue bool) {
+       if Thearch.Ginsboolval == nil {
+               // Direct value generation not implemented for this architecture.
+               // Implement using jumps.
+               bvgenjump(n, res, wantTrue, true)
+               return
+       }
+       bgenx(n, res, wantTrue, 0, nil)
+}
+
+// bvgenjump implements boolean value generation using jumps:
+//     if n == wantTrue {
+//             res = 1
+//     } else {
+//             res = 0
+//     }
+// geninit controls whether n's Ninit is generated.
+func bvgenjump(n, res *Node, wantTrue, geninit bool) {
+       init := n.Ninit
+       if !geninit {
+               n.Ninit = nil
+       }
+       p1 := Gbranch(obj.AJMP, nil, 0)
+       p2 := Pc
+       Thearch.Gmove(Nodbool(true), res)
+       p3 := Gbranch(obj.AJMP, nil, 0)
+       Patch(p1, Pc)
+       Bgen(n, wantTrue, 0, p2)
+       Thearch.Gmove(Nodbool(false), res)
+       Patch(p3, Pc)
+       n.Ninit = init
+}
+
+// bgenx is the backend for Bgen and Bvgen.
+// If res is nil, it generates a branch.
+// Otherwise, it generates a boolean value.
+func bgenx(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
        if Debug['g'] != 0 {
-               fmt.Printf("\nbgen wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
-               Dump("bgen", n)
+               fmt.Printf("\nbgenx wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
+               Dump("n", n)
+               Dump("res", res)
        }
 
+       genval := res != nil
+
        if n == nil {
                n = Nodbool(true)
        }
@@ -1659,9 +1696,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
        }
 
        if n.Type.Etype != TBOOL {
-               Yyerror("cgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
-               Patch(Thearch.Gins(obj.AEND, nil, nil), to)
-               return
+               Fatal("bgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
        }
 
        for n.Op == OCONVNOP {
@@ -1670,44 +1705,90 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
        }
 
        if Thearch.Bgen_float != nil && n.Left != nil && Isfloat[n.Left.Type.Etype] {
+               if genval {
+                       bvgenjump(n, res, wantTrue, false)
+                       return
+               }
                Thearch.Bgen_float(n, wantTrue, likely, to)
                return
        }
 
        switch n.Op {
        default:
+               if genval {
+                       Cgen(n, res)
+                       if !wantTrue {
+                               Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
+                       }
+                       return
+               }
+
                var tmp Node
                Regalloc(&tmp, n.Type, nil)
                Cgen(n, &tmp)
-               bgenNonZero(&tmp, wantTrue, likely, to)
+               bgenNonZero(&tmp, nil, wantTrue, likely, to)
                Regfree(&tmp)
                return
 
        case ONAME:
+               if genval {
+                       // 5g, 7g, and 9g might need a temporary or other help here,
+                       // but they don't support direct generation of a bool value yet.
+                       // We can fix that as we go.
+                       switch Ctxt.Arch.Thechar {
+                       case '5', '7', '9':
+                               Fatal("genval 5g, 7g, 9g ONAMES not fully implemented")
+                       }
+                       Cgen(n, res)
+                       if !wantTrue {
+                               Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
+                       }
+                       return
+               }
+
                if n.Addable && Ctxt.Arch.Thechar != '5' && Ctxt.Arch.Thechar != '7' && Ctxt.Arch.Thechar != '9' {
                        // no need for a temporary
-                       bgenNonZero(n, wantTrue, likely, to)
+                       bgenNonZero(n, nil, wantTrue, likely, to)
                        return
                }
                var tmp Node
                Regalloc(&tmp, n.Type, nil)
                Cgen(n, &tmp)
-               bgenNonZero(&tmp, wantTrue, likely, to)
+               bgenNonZero(&tmp, nil, wantTrue, likely, to)
                Regfree(&tmp)
                return
 
        case OLITERAL:
-               // n is a constant. If n == wantTrue, jump; otherwise do nothing.
+               // n is a constant.
                if !Isconst(n, CTBOOL) {
                        Fatal("bgen: non-bool const %v\n", Nconv(n, obj.FmtLong))
                }
+               if genval {
+                       Cgen(Nodbool(wantTrue == n.Val.U.Bval), res)
+                       return
+               }
+               // If n == wantTrue, jump; otherwise do nothing.
                if wantTrue == n.Val.U.Bval {
                        Patch(Gbranch(obj.AJMP, nil, likely), to)
                }
                return
 
        case OANDAND, OOROR:
-               if (n.Op == OANDAND) == wantTrue {
+               and := (n.Op == OANDAND) == wantTrue
+               if genval {
+                       p1 := Gbranch(obj.AJMP, nil, 0)
+                       p2 := Gbranch(obj.AJMP, nil, 0)
+                       Patch(p2, Pc)
+                       Cgen(Nodbool(!and), res)
+                       p3 := Gbranch(obj.AJMP, nil, 0)
+                       Patch(p1, Pc)
+                       Bgen(n.Left, wantTrue != and, 0, p2)
+                       Bvgen(n.Right, res, wantTrue)
+                       Patch(p3, Pc)
+                       return
+               }
+
+               if and {
                        p1 := Gbranch(obj.AJMP, nil, 0)
                        p2 := Gbranch(obj.AJMP, nil, 0)
                        Patch(p1, Pc)
@@ -1726,7 +1807,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
                if n.Left == nil || n.Left.Type == nil {
                        return
                }
-               Bgen(n.Left, !wantTrue, likely, to)
+               bgenx(n.Left, res, !wantTrue, likely, to)
                return
 
        case OEQ, ONE, OLT, OGT, OLE, OGE:
@@ -1743,15 +1824,21 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
        if !wantTrue {
                if Isfloat[nr.Type.Etype] {
                        // Brcom is not valid on floats when NaN is involved.
+                       ll := n.Ninit // avoid re-genning Ninit
+                       n.Ninit = nil
+                       if genval {
+                               bgenx(n, res, true, likely, to)
+                               Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res) // res = !res
+                               n.Ninit = ll
+                               return
+                       }
                        p1 := Gbranch(obj.AJMP, nil, 0)
                        p2 := Gbranch(obj.AJMP, nil, 0)
                        Patch(p1, Pc)
-                       ll := n.Ninit // avoid re-genning Ninit
-                       n.Ninit = nil
-                       Bgen(n, true, -likely, p2)
-                       n.Ninit = ll
+                       bgenx(n, res, true, -likely, p2)
                        Patch(Gbranch(obj.AJMP, nil, 0), to)
                        Patch(p2, Pc)
+                       n.Ninit = ll
                        return
                }
 
@@ -1786,17 +1873,22 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
                Regalloc(&tmp, ptr.Type, &ptr)
                Cgen(&ptr, &tmp)
                Regfree(&ptr)
-               bgenNonZero(&tmp, a == OEQ != wantTrue, likely, to)
+               bgenNonZero(&tmp, res, a == OEQ != wantTrue, likely, to)
                Regfree(&tmp)
                return
        }
 
        if Iscomplex[nl.Type.Etype] {
-               complexbool(a, nl, nr, wantTrue, likely, to)
+               complexbool(a, nl, nr, res, wantTrue, likely, to)
                return
        }
 
        if Ctxt.Arch.Regsize == 4 && Is64(nr.Type) {
+               if genval {
+                       // TODO: Teach Cmp64 to generate boolean values and remove this.
+                       bvgenjump(n, res, wantTrue, false)
+                       return
+               }
                if !nl.Addable || Isconst(nl, CTINT) {
                        nl = CgenTemp(nl)
                }
@@ -1838,7 +1930,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
 
                if Smallintconst(nr) && Ctxt.Arch.Thechar != '9' {
                        Thearch.Gins(Thearch.Optoas(OCMP, nr.Type), nl, nr)
-                       Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to)
+                       bins(nr.Type, res, a, likely, to)
                        return
                }
 
@@ -1869,6 +1961,9 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
        if Isfloat[nl.Type.Etype] {
                switch Ctxt.Arch.Thechar {
                case '5':
+                       if genval {
+                               Fatal("genval 5g Isfloat special cases not implemented")
+                       }
                        switch n.Op {
                        case ONE:
                                Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nr.Type, likely), to)
@@ -1883,19 +1978,40 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
                        switch n.Op {
                        case OEQ:
                                // neither NE nor P
-                               p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
-                               p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
-                               Patch(Gbranch(obj.AJMP, nil, 0), to)
-                               Patch(p1, Pc)
-                               Patch(p2, Pc)
+                               if genval {
+                                       var reg Node
+                                       Regalloc(&reg, Types[TBOOL], nil)
+                                       Thearch.Ginsboolval(Thearch.Optoas(OEQ, nr.Type), &reg)
+                                       Thearch.Ginsboolval(Thearch.Optoas(OPC, nr.Type), res)
+                                       Thearch.Gins(Thearch.Optoas(OAND, Types[TBOOL]), &reg, res)
+                                       Regfree(&reg)
+                               } else {
+                                       p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
+                                       p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
+                                       Patch(Gbranch(obj.AJMP, nil, 0), to)
+                                       Patch(p1, Pc)
+                                       Patch(p2, Pc)
+                               }
                                return
                        case ONE:
                                // either NE or P
-                               Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
-                               Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
+                               if genval {
+                                       var reg Node
+                                       Regalloc(&reg, Types[TBOOL], nil)
+                                       Thearch.Ginsboolval(Thearch.Optoas(ONE, nr.Type), &reg)
+                                       Thearch.Ginsboolval(Thearch.Optoas(OPS, nr.Type), res)
+                                       Thearch.Gins(Thearch.Optoas(OOR, Types[TBOOL]), &reg, res)
+                                       Regfree(&reg)
+                               } else {
+                                       Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
+                                       Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
+                               }
                                return
                        }
                case '7', '9':
+                       if genval {
+                               Fatal("genval 7g, 9g Isfloat special cases not implemented")
+                       }
                        switch n.Op {
                        // On arm64 and ppc64, <= and >= mishandle NaN. Must decompose into < or > and =.
                        // TODO(josh): Convert a <= b to b > a instead?
@@ -1912,11 +2028,11 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
                }
        }
 
-       // Not a special case. Insert an appropriate conditional jump.
-       Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to)
+       // Not a special case. Insert the conditional jump or value gen.
+       bins(nr.Type, res, a, likely, to)
 }
 
-func bgenNonZero(n *Node, wantTrue bool, likely int, to *obj.Prog) {
+func bgenNonZero(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
        // TODO: Optimize on systems that can compare to zero easily.
        a := ONE
        if !wantTrue {
@@ -1925,7 +2041,21 @@ func bgenNonZero(n *Node, wantTrue bool, likely int, to *obj.Prog) {
        var zero Node
        Nodconst(&zero, n.Type, 0)
        Thearch.Gins(Thearch.Optoas(OCMP, n.Type), n, &zero)
-       Patch(Gbranch(Thearch.Optoas(a, n.Type), n.Type, likely), to)
+       bins(n.Type, res, a, likely, to)
+}
+
+// bins inserts an instruction to handle the result of a compare.
+// If res is non-nil, it inserts appropriate value generation instructions.
+// If res is nil, it inserts a branch to to.
+func bins(typ *Type, res *Node, a, likely int, to *obj.Prog) {
+       a = Thearch.Optoas(a, typ)
+       if res != nil {
+               // value gen
+               Thearch.Ginsboolval(a, res)
+       } else {
+               // jump
+               Patch(Gbranch(a, typ, likely), to)
+       }
 }
 
 /*
index c457bbf924d3fd0f71af77de092e8d2c927bde03..cf48c922d7c90159879f9b1cf1c5cef2c329be9b 100644 (file)
@@ -14,7 +14,7 @@ func overlap_cplx(f *Node, t *Node) bool {
        return f.Op == OINDREG && t.Op == OINDREG && f.Xoffset+f.Type.Width >= t.Xoffset && t.Xoffset+t.Type.Width >= f.Xoffset
 }
 
-func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) {
+func complexbool(op int, nl, nr, res *Node, wantTrue bool, likely int, to *obj.Prog) {
        // make both sides addable in ullman order
        if nr != nil {
                if nl.Ullman > nr.Ullman && !nl.Addable {
@@ -35,7 +35,10 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog)
        subnode(&rreal, &rimag, nr)
 
        // build tree
-       // real(l) == real(r) && imag(l) == imag(r)
+       // if branching:
+       //      real(l) == real(r) && imag(l) == imag(r)
+       // if generating a value, use a branch-free version:
+       //      real(l) == real(r) & imag(l) == imag(r)
        realeq := Node{
                Op:    OEQ,
                Left:  &lreal,
@@ -55,6 +58,19 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog)
                Type:  Types[TBOOL],
        }
 
+       if res != nil {
+               // generating a value
+               and.Op = OAND
+               if op == ONE {
+                       and.Op = OOR
+                       realeq.Op = ONE
+                       imageq.Op = ONE
+               }
+               Bvgen(&and, res, true)
+               return
+       }
+
+       // generating a branch
        if op == ONE {
                wantTrue = !wantTrue
        }
index c0ec7b5ef41faf4cd72bcf6c7f1ff2964c6ca46a..d399ebbd36d510948419187159eed238f0d411b8 100644 (file)
@@ -791,6 +791,13 @@ type Arch struct {
        Expandchecks func(*obj.Prog)
        Getg         func(*Node)
        Gins         func(int, *Node, *Node) *obj.Prog
+       // Ginsboolval inserts instructions to convert the result
+       // of a just-completed comparison to a boolean value.
+       // The first argument is the conditional jump instruction
+       // corresponding to the desired value.
+       // The second argument is the destination.
+       // If not present, Ginsboolval will be emulated with jumps.
+       Ginsboolval  func(int, *Node)
        Ginscon      func(int, int64, *Node)
        Ginsnop      func()
        Gmove        func(*Node, *Node)
index 1012c66c45e050606318041035e68e9168fed3ae..d448188a550e1115b1ae35ee8eeef6781f6a74ce 100644 (file)
@@ -306,6 +306,7 @@ const (
        ORROTC  // right rotate-carry: ARCR.
        ORETJMP // return to other function
        OPS     // compare parity set (for x86 NaN check)
+       OPC     // compare parity clear (for x86 NaN check)
        OSQRT   // sqrt(float64), on systems that have hw support
        OGETG   // runtime.getg() (read g pointer)