From: Josh Bleecher Snyder Date: Wed, 8 Apr 2015 16:54:15 +0000 (-0700) Subject: cmd/internal/gc, cmd/6g: generate boolean values without jumps X-Git-Tag: go1.5beta1~1036 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=13cb62c764dceefe9bbf16817200629781f9564c;p=gostls13.git cmd/internal/gc, cmd/6g: generate boolean values without jumps Use SETcc instructions instead of Jcc to generate boolean values. This generates shorter, jump-free code, which may in turn enable other peephole optimizations. For example, given func f(i, j int) bool { return i == j } Before "".f t=1 size=32 value=0 args=0x18 locals=0x0 0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24 0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB) 0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX 0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP 0x000a 00010 (x.go:4) CMPQ BX, BP 0x000d 00013 (x.go:4) JEQ 21 0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP) 0x0014 00020 (x.go:4) RET 0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP) 0x001a 00026 (x.go:4) JMP 20 After "".f t=1 size=32 value=0 args=0x18 locals=0x0 0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24 0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB) 0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX 0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP 0x000a 00010 (x.go:4) CMPQ BX, BP 0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP) 0x0012 00018 (x.go:4) RET regexp benchmarks, best of 12 runs: benchmark old ns/op new ns/op delta BenchmarkNotOnePassShortB 782 733 -6.27% BenchmarkLiteral 180 171 -5.00% BenchmarkNotLiteral 2855 2721 -4.69% BenchmarkMatchHard_32 2672 2557 -4.30% BenchmarkMatchHard_1K 80182 76732 -4.30% BenchmarkMatchEasy1_32M 76440180 73304748 -4.10% BenchmarkMatchEasy1_32K 68798 66350 -3.56% BenchmarkAnchoredLongMatch 482 465 -3.53% BenchmarkMatchEasy1_1M 2373042 2292692 -3.39% BenchmarkReplaceAll 2776 2690 -3.10% BenchmarkNotOnePassShortA 1397 1360 -2.65% BenchmarkMatchClass_InRange 3842 3742 -2.60% BenchmarkMatchEasy0_32 125 122 -2.40% BenchmarkMatchEasy0_32K 11414 11164 -2.19% BenchmarkMatchEasy0_1K 668 654 -2.10% BenchmarkAnchoredShortMatch 260 255 -1.92% BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83% BenchmarkOnePassShortB 623 612 -1.77% BenchmarkOnePassShortA 801 788 -1.62% BenchmarkMatchClass 4094 4033 -1.49% BenchmarkMatchEasy0_32M 14078800 13890704 -1.34% BenchmarkMatchHard_32K 4095844 4045820 -1.22% BenchmarkMatchEasy1_1K 1663 1643 -1.20% BenchmarkMatchHard_1M 131261708 129708215 -1.18% BenchmarkMatchHard_32M 4210112412 4169292003 -0.97% BenchmarkMatchMedium_32K 2460752 2438611 -0.90% BenchmarkMatchEasy0_1M 422914 419672 -0.77% BenchmarkMatchMedium_1M 78581121 78040160 -0.69% BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67% BenchmarkMatchMedium_32 1754 1746 -0.46% BenchmarkMatchMedium_1K 52105 52106 +0.00% BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00% BenchmarkMatchEasy1_32 107 107 +0.00% BenchmarkOnePassLongNotPrefix 505 505 +0.00% BenchmarkOnePassLongPrefix 147 147 +0.00% The godoc binary is ~0.12% smaller after this CL. Updates #5729. toolstash -cmp passes for all architectures other than amd64 and amd64p32. Other architectures can be done in follow-up CLs. Change-Id: I0e167e259274b722958567fc0af83a17ca002da7 Reviewed-on: https://go-review.googlesource.com/2284 Reviewed-by: Russ Cox --- diff --git a/src/cmd/6g/galign.go b/src/cmd/6g/galign.go index 8ba8dd434e..5b41a670ec 100644 --- a/src/cmd/6g/galign.go +++ b/src/cmd/6g/galign.go @@ -10,11 +10,11 @@ import ( "cmd/internal/obj/x86" ) -var thechar int = '6' - -var thestring string = "amd64" - -var thelinkarch *obj.LinkArch = &x86.Linkamd64 +var ( + thechar int = '6' + thestring string = "amd64" + thelinkarch *obj.LinkArch = &x86.Linkamd64 +) func linkarchinit() { if obj.Getgoarch() == "amd64p32" { @@ -27,13 +27,12 @@ func linkarchinit() { var MAXWIDTH int64 = 1 << 50 -var addptr int = x86.AADDQ - -var movptr int = x86.AMOVQ - -var leaptr int = x86.ALEAQ - -var cmpptr int = x86.ACMPQ +var ( + addptr int = x86.AADDQ + movptr int = x86.AMOVQ + leaptr int = x86.ALEAQ + cmpptr int = x86.ACMPQ +) /* * go declares several platform-specific type aliases: @@ -101,6 +100,7 @@ func main() { gc.Thearch.Expandchecks = expandchecks gc.Thearch.Getg = getg gc.Thearch.Gins = gins + gc.Thearch.Ginsboolval = ginsboolval gc.Thearch.Ginscon = ginscon gc.Thearch.Ginsnop = ginsnop gc.Thearch.Gmove = gmove diff --git a/src/cmd/6g/gsubr.go b/src/cmd/6g/gsubr.go index 0ac103e853..53d0f038d9 100644 --- a/src/cmd/6g/gsubr.go +++ b/src/cmd/6g/gsubr.go @@ -99,6 +99,10 @@ func ginscon(as int, c int64, n2 *gc.Node) { gins(as, &n1, n2) } +func ginsboolval(a int, n *gc.Node) { + gins(jmptoset(a), nil, n) +} + /* * set up nodes representing 2^63 */ @@ -698,6 +702,21 @@ func optoas(op int, t *gc.Type) int { gc.OPS<<16 | gc.TFLOAT64: a = x86.AJPS + case gc.OPC<<16 | gc.TBOOL, + gc.OPC<<16 | gc.TINT8, + gc.OPC<<16 | gc.TUINT8, + gc.OPC<<16 | gc.TINT16, + gc.OPC<<16 | gc.TUINT16, + gc.OPC<<16 | gc.TINT32, + gc.OPC<<16 | gc.TUINT32, + gc.OPC<<16 | gc.TINT64, + gc.OPC<<16 | gc.TUINT64, + gc.OPC<<16 | gc.TPTR32, + gc.OPC<<16 | gc.TPTR64, + gc.OPC<<16 | gc.TFLOAT32, + gc.OPC<<16 | gc.TFLOAT64: + a = x86.AJPC + case gc.OLT<<16 | gc.TINT8, gc.OLT<<16 | gc.TINT16, gc.OLT<<16 | gc.TINT32, @@ -902,7 +921,8 @@ func optoas(op int, t *gc.Type) int { gc.OMINUS<<16 | gc.TPTR64: a = x86.ANEGQ - case gc.OAND<<16 | gc.TINT8, + case gc.OAND<<16 | gc.TBOOL, + gc.OAND<<16 | gc.TINT8, gc.OAND<<16 | gc.TUINT8: a = x86.AANDB @@ -920,7 +940,8 @@ func optoas(op int, t *gc.Type) int { gc.OAND<<16 | gc.TPTR64: a = x86.AANDQ - case gc.OOR<<16 | gc.TINT8, + case gc.OOR<<16 | gc.TBOOL, + gc.OOR<<16 | gc.TINT8, gc.OOR<<16 | gc.TUINT8: a = x86.AORB @@ -1134,6 +1155,46 @@ func optoas(op int, t *gc.Type) int { return a } +// jmptoset returns ASETxx for AJxx. +func jmptoset(jmp int) int { + switch jmp { + case x86.AJEQ: + return x86.ASETEQ + case x86.AJNE: + return x86.ASETNE + case x86.AJLT: + return x86.ASETLT + case x86.AJCS: + return x86.ASETCS + case x86.AJLE: + return x86.ASETLE + case x86.AJLS: + return x86.ASETLS + case x86.AJGT: + return x86.ASETGT + case x86.AJHI: + return x86.ASETHI + case x86.AJGE: + return x86.ASETGE + case x86.AJCC: + return x86.ASETCC + case x86.AJMI: + return x86.ASETMI + case x86.AJOC: + return x86.ASETOC + case x86.AJOS: + return x86.ASETOS + case x86.AJPC: + return x86.ASETPC + case x86.AJPL: + return x86.ASETPL + case x86.AJPS: + return x86.ASETPS + } + gc.Fatal("jmptoset: no entry for %v", gc.Oconv(jmp, 0)) + panic("unreachable") +} + const ( ODynam = 1 << 0 OAddable = 1 << 1 diff --git a/src/cmd/6g/prog.go b/src/cmd/6g/prog.go index fe9f013851..4c41d87ccb 100644 --- a/src/cmd/6g/prog.go +++ b/src/cmd/6g/prog.go @@ -196,6 +196,22 @@ var progtable = [x86.ALAST]obj.ProgInfo{ x86.ASBBL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0}, x86.ASBBQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0}, x86.ASBBW: {gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0}, + x86.ASETCC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETCS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETEQ: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETGE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETGT: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETHI: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETLE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETLS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETLT: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETMI: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETNE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETOC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETOS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETPC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETPL: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, + x86.ASETPS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0}, x86.ASHLB: {gc.SizeB | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0}, x86.ASHLL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0}, x86.ASHLQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0}, diff --git a/src/cmd/internal/gc/cgen.go b/src/cmd/internal/gc/cgen.go index 3a3a4c6076..b6691efef5 100644 --- a/src/cmd/internal/gc/cgen.go +++ b/src/cmd/internal/gc/cgen.go @@ -345,20 +345,12 @@ func Cgen(n *Node, res *Node) { Dump("cgen-res", res) Fatal("cgen: unknown op %v", Nconv(n, obj.FmtShort|obj.FmtSign)) - // these call bgen to get a bool value case OOROR, OANDAND, OEQ, ONE, OLT, OLE, OGE, OGT, ONOT: - p1 := Gbranch(obj.AJMP, nil, 0) - p2 := Pc - Thearch.Gmove(Nodbool(true), res) - p3 := Gbranch(obj.AJMP, nil, 0) - Patch(p1, Pc) - Bgen(n, true, 0, p2) - Thearch.Gmove(Nodbool(false), res) - Patch(p3, Pc) + Bvgen(n, res, true) return case OPLUS: @@ -1640,11 +1632,56 @@ func Igen(n *Node, a *Node, res *Node) { // goto to // } func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { + bgenx(n, nil, wantTrue, likely, to) +} + +// Bvgen generates code for calculating boolean values: +// res = n == wantTrue +func Bvgen(n, res *Node, wantTrue bool) { + if Thearch.Ginsboolval == nil { + // Direct value generation not implemented for this architecture. + // Implement using jumps. + bvgenjump(n, res, wantTrue, true) + return + } + bgenx(n, res, wantTrue, 0, nil) +} + +// bvgenjump implements boolean value generation using jumps: +// if n == wantTrue { +// res = 1 +// } else { +// res = 0 +// } +// geninit controls whether n's Ninit is generated. +func bvgenjump(n, res *Node, wantTrue, geninit bool) { + init := n.Ninit + if !geninit { + n.Ninit = nil + } + p1 := Gbranch(obj.AJMP, nil, 0) + p2 := Pc + Thearch.Gmove(Nodbool(true), res) + p3 := Gbranch(obj.AJMP, nil, 0) + Patch(p1, Pc) + Bgen(n, wantTrue, 0, p2) + Thearch.Gmove(Nodbool(false), res) + Patch(p3, Pc) + n.Ninit = init +} + +// bgenx is the backend for Bgen and Bvgen. +// If res is nil, it generates a branch. +// Otherwise, it generates a boolean value. +func bgenx(n, res *Node, wantTrue bool, likely int, to *obj.Prog) { if Debug['g'] != 0 { - fmt.Printf("\nbgen wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to) - Dump("bgen", n) + fmt.Printf("\nbgenx wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to) + Dump("n", n) + Dump("res", res) } + genval := res != nil + if n == nil { n = Nodbool(true) } @@ -1659,9 +1696,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { } if n.Type.Etype != TBOOL { - Yyerror("cgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0)) - Patch(Thearch.Gins(obj.AEND, nil, nil), to) - return + Fatal("bgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0)) } for n.Op == OCONVNOP { @@ -1670,44 +1705,90 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { } if Thearch.Bgen_float != nil && n.Left != nil && Isfloat[n.Left.Type.Etype] { + if genval { + bvgenjump(n, res, wantTrue, false) + return + } Thearch.Bgen_float(n, wantTrue, likely, to) return } switch n.Op { default: + if genval { + Cgen(n, res) + if !wantTrue { + Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res) + } + return + } + var tmp Node Regalloc(&tmp, n.Type, nil) Cgen(n, &tmp) - bgenNonZero(&tmp, wantTrue, likely, to) + bgenNonZero(&tmp, nil, wantTrue, likely, to) Regfree(&tmp) return case ONAME: + if genval { + // 5g, 7g, and 9g might need a temporary or other help here, + // but they don't support direct generation of a bool value yet. + // We can fix that as we go. + switch Ctxt.Arch.Thechar { + case '5', '7', '9': + Fatal("genval 5g, 7g, 9g ONAMES not fully implemented") + } + Cgen(n, res) + if !wantTrue { + Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res) + } + return + } + if n.Addable && Ctxt.Arch.Thechar != '5' && Ctxt.Arch.Thechar != '7' && Ctxt.Arch.Thechar != '9' { // no need for a temporary - bgenNonZero(n, wantTrue, likely, to) + bgenNonZero(n, nil, wantTrue, likely, to) return } var tmp Node Regalloc(&tmp, n.Type, nil) Cgen(n, &tmp) - bgenNonZero(&tmp, wantTrue, likely, to) + bgenNonZero(&tmp, nil, wantTrue, likely, to) Regfree(&tmp) return case OLITERAL: - // n is a constant. If n == wantTrue, jump; otherwise do nothing. + // n is a constant. if !Isconst(n, CTBOOL) { Fatal("bgen: non-bool const %v\n", Nconv(n, obj.FmtLong)) } + if genval { + Cgen(Nodbool(wantTrue == n.Val.U.Bval), res) + return + } + // If n == wantTrue, jump; otherwise do nothing. if wantTrue == n.Val.U.Bval { Patch(Gbranch(obj.AJMP, nil, likely), to) } return case OANDAND, OOROR: - if (n.Op == OANDAND) == wantTrue { + and := (n.Op == OANDAND) == wantTrue + if genval { + p1 := Gbranch(obj.AJMP, nil, 0) + p2 := Gbranch(obj.AJMP, nil, 0) + Patch(p2, Pc) + Cgen(Nodbool(!and), res) + p3 := Gbranch(obj.AJMP, nil, 0) + Patch(p1, Pc) + Bgen(n.Left, wantTrue != and, 0, p2) + Bvgen(n.Right, res, wantTrue) + Patch(p3, Pc) + return + } + + if and { p1 := Gbranch(obj.AJMP, nil, 0) p2 := Gbranch(obj.AJMP, nil, 0) Patch(p1, Pc) @@ -1726,7 +1807,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { if n.Left == nil || n.Left.Type == nil { return } - Bgen(n.Left, !wantTrue, likely, to) + bgenx(n.Left, res, !wantTrue, likely, to) return case OEQ, ONE, OLT, OGT, OLE, OGE: @@ -1743,15 +1824,21 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { if !wantTrue { if Isfloat[nr.Type.Etype] { // Brcom is not valid on floats when NaN is involved. + ll := n.Ninit // avoid re-genning Ninit + n.Ninit = nil + if genval { + bgenx(n, res, true, likely, to) + Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res) // res = !res + n.Ninit = ll + return + } p1 := Gbranch(obj.AJMP, nil, 0) p2 := Gbranch(obj.AJMP, nil, 0) Patch(p1, Pc) - ll := n.Ninit // avoid re-genning Ninit - n.Ninit = nil - Bgen(n, true, -likely, p2) - n.Ninit = ll + bgenx(n, res, true, -likely, p2) Patch(Gbranch(obj.AJMP, nil, 0), to) Patch(p2, Pc) + n.Ninit = ll return } @@ -1786,17 +1873,22 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { Regalloc(&tmp, ptr.Type, &ptr) Cgen(&ptr, &tmp) Regfree(&ptr) - bgenNonZero(&tmp, a == OEQ != wantTrue, likely, to) + bgenNonZero(&tmp, res, a == OEQ != wantTrue, likely, to) Regfree(&tmp) return } if Iscomplex[nl.Type.Etype] { - complexbool(a, nl, nr, wantTrue, likely, to) + complexbool(a, nl, nr, res, wantTrue, likely, to) return } if Ctxt.Arch.Regsize == 4 && Is64(nr.Type) { + if genval { + // TODO: Teach Cmp64 to generate boolean values and remove this. + bvgenjump(n, res, wantTrue, false) + return + } if !nl.Addable || Isconst(nl, CTINT) { nl = CgenTemp(nl) } @@ -1838,7 +1930,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { if Smallintconst(nr) && Ctxt.Arch.Thechar != '9' { Thearch.Gins(Thearch.Optoas(OCMP, nr.Type), nl, nr) - Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to) + bins(nr.Type, res, a, likely, to) return } @@ -1869,6 +1961,9 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { if Isfloat[nl.Type.Etype] { switch Ctxt.Arch.Thechar { case '5': + if genval { + Fatal("genval 5g Isfloat special cases not implemented") + } switch n.Op { case ONE: Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nr.Type, likely), to) @@ -1883,19 +1978,40 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { switch n.Op { case OEQ: // neither NE nor P - p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely) - p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely) - Patch(Gbranch(obj.AJMP, nil, 0), to) - Patch(p1, Pc) - Patch(p2, Pc) + if genval { + var reg Node + Regalloc(®, Types[TBOOL], nil) + Thearch.Ginsboolval(Thearch.Optoas(OEQ, nr.Type), ®) + Thearch.Ginsboolval(Thearch.Optoas(OPC, nr.Type), res) + Thearch.Gins(Thearch.Optoas(OAND, Types[TBOOL]), ®, res) + Regfree(®) + } else { + p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely) + p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely) + Patch(Gbranch(obj.AJMP, nil, 0), to) + Patch(p1, Pc) + Patch(p2, Pc) + } return case ONE: // either NE or P - Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to) - Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to) + if genval { + var reg Node + Regalloc(®, Types[TBOOL], nil) + Thearch.Ginsboolval(Thearch.Optoas(ONE, nr.Type), ®) + Thearch.Ginsboolval(Thearch.Optoas(OPS, nr.Type), res) + Thearch.Gins(Thearch.Optoas(OOR, Types[TBOOL]), ®, res) + Regfree(®) + } else { + Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to) + Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to) + } return } case '7', '9': + if genval { + Fatal("genval 7g, 9g Isfloat special cases not implemented") + } switch n.Op { // On arm64 and ppc64, <= and >= mishandle NaN. Must decompose into < or > and =. // TODO(josh): Convert a <= b to b > a instead? @@ -1912,11 +2028,11 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) { } } - // Not a special case. Insert an appropriate conditional jump. - Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to) + // Not a special case. Insert the conditional jump or value gen. + bins(nr.Type, res, a, likely, to) } -func bgenNonZero(n *Node, wantTrue bool, likely int, to *obj.Prog) { +func bgenNonZero(n, res *Node, wantTrue bool, likely int, to *obj.Prog) { // TODO: Optimize on systems that can compare to zero easily. a := ONE if !wantTrue { @@ -1925,7 +2041,21 @@ func bgenNonZero(n *Node, wantTrue bool, likely int, to *obj.Prog) { var zero Node Nodconst(&zero, n.Type, 0) Thearch.Gins(Thearch.Optoas(OCMP, n.Type), n, &zero) - Patch(Gbranch(Thearch.Optoas(a, n.Type), n.Type, likely), to) + bins(n.Type, res, a, likely, to) +} + +// bins inserts an instruction to handle the result of a compare. +// If res is non-nil, it inserts appropriate value generation instructions. +// If res is nil, it inserts a branch to to. +func bins(typ *Type, res *Node, a, likely int, to *obj.Prog) { + a = Thearch.Optoas(a, typ) + if res != nil { + // value gen + Thearch.Ginsboolval(a, res) + } else { + // jump + Patch(Gbranch(a, typ, likely), to) + } } /* diff --git a/src/cmd/internal/gc/cplx.go b/src/cmd/internal/gc/cplx.go index c457bbf924..cf48c922d7 100644 --- a/src/cmd/internal/gc/cplx.go +++ b/src/cmd/internal/gc/cplx.go @@ -14,7 +14,7 @@ func overlap_cplx(f *Node, t *Node) bool { return f.Op == OINDREG && t.Op == OINDREG && f.Xoffset+f.Type.Width >= t.Xoffset && t.Xoffset+t.Type.Width >= f.Xoffset } -func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) { +func complexbool(op int, nl, nr, res *Node, wantTrue bool, likely int, to *obj.Prog) { // make both sides addable in ullman order if nr != nil { if nl.Ullman > nr.Ullman && !nl.Addable { @@ -35,7 +35,10 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) subnode(&rreal, &rimag, nr) // build tree - // real(l) == real(r) && imag(l) == imag(r) + // if branching: + // real(l) == real(r) && imag(l) == imag(r) + // if generating a value, use a branch-free version: + // real(l) == real(r) & imag(l) == imag(r) realeq := Node{ Op: OEQ, Left: &lreal, @@ -55,6 +58,19 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) Type: Types[TBOOL], } + if res != nil { + // generating a value + and.Op = OAND + if op == ONE { + and.Op = OOR + realeq.Op = ONE + imageq.Op = ONE + } + Bvgen(&and, res, true) + return + } + + // generating a branch if op == ONE { wantTrue = !wantTrue } diff --git a/src/cmd/internal/gc/go.go b/src/cmd/internal/gc/go.go index c0ec7b5ef4..d399ebbd36 100644 --- a/src/cmd/internal/gc/go.go +++ b/src/cmd/internal/gc/go.go @@ -791,6 +791,13 @@ type Arch struct { Expandchecks func(*obj.Prog) Getg func(*Node) Gins func(int, *Node, *Node) *obj.Prog + // Ginsboolval inserts instructions to convert the result + // of a just-completed comparison to a boolean value. + // The first argument is the conditional jump instruction + // corresponding to the desired value. + // The second argument is the destination. + // If not present, Ginsboolval will be emulated with jumps. + Ginsboolval func(int, *Node) Ginscon func(int, int64, *Node) Ginsnop func() Gmove func(*Node, *Node) diff --git a/src/cmd/internal/gc/syntax.go b/src/cmd/internal/gc/syntax.go index 1012c66c45..d448188a55 100644 --- a/src/cmd/internal/gc/syntax.go +++ b/src/cmd/internal/gc/syntax.go @@ -306,6 +306,7 @@ const ( ORROTC // right rotate-carry: ARCR. ORETJMP // return to other function OPS // compare parity set (for x86 NaN check) + OPC // compare parity clear (for x86 NaN check) OSQRT // sqrt(float64), on systems that have hw support OGETG // runtime.getg() (read g pointer)