From: Josh Bleecher Snyder Date: Sat, 25 Jun 2016 20:40:43 +0000 (-0700) Subject: cmd/compile: unroll comparisons to short constant strings X-Git-Tag: go1.8beta1~1300 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=c9fd997524ce7d531579500218f11b528bab4c88;p=gostls13.git cmd/compile: unroll comparisons to short constant strings Unroll s == "ab" to len(s) == 2 && s[0] == 'a' && s[1] == 'b' This generates faster and shorter code by avoiding a runtime call. Do something similar for !=. The cutoff length is 6. This was chosen empirically by examining binary sizes on arm, arm64, 386, and amd64 using the SSA backend. For all architectures examined, 4, 5, and 6 were the ideal cutoff, with identical binary sizes. The distribution of constant string equality sizes during 'go build -a std' is: 40.81% 622 len 0 14.11% 215 len 4 9.45% 144 len 1 7.81% 119 len 3 7.48% 114 len 5 5.12% 78 len 7 4.13% 63 len 2 3.54% 54 len 8 2.69% 41 len 6 1.18% 18 len 10 0.85% 13 len 9 0.66% 10 len 14 0.59% 9 len 17 0.46% 7 len 11 0.26% 4 len 12 0.20% 3 len 19 0.13% 2 len 13 0.13% 2 len 15 0.13% 2 len 16 0.07% 1 len 20 0.07% 1 len 23 0.07% 1 len 33 0.07% 1 len 36 A cutoff of length 6 covers most of the cases. Benchmarks on amd64 comparing a string to a constant of length 3: Cmp/1same-8 4.78ns ± 6% 0.94ns ± 9% -80.26% (p=0.000 n=20+20) Cmp/1diffbytes-8 6.43ns ± 6% 0.96ns ±11% -85.13% (p=0.000 n=20+20) Cmp/3same-8 4.71ns ± 5% 1.28ns ± 5% -72.90% (p=0.000 n=20+20) Cmp/3difffirstbyte-8 6.33ns ± 7% 1.27ns ± 7% -79.90% (p=0.000 n=20+20) Cmp/3difflastbyte-8 6.34ns ± 8% 1.26ns ± 9% -80.13% (p=0.000 n=20+20) The change to the prove test preserves the existing intent of the test. When the string was short, there was a new "proved in bounds" report that referred to individual byte comparisons. Change-Id: I593ac303b0d11f275672090c5c786ea0c6b8da13 Reviewed-on: https://go-review.googlesource.com/26758 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go index e53fd7ac97..2c873b8163 100644 --- a/src/cmd/compile/internal/gc/walk.go +++ b/src/cmd/compile/internal/gc/walk.go @@ -1371,20 +1371,7 @@ opswitch: n = callnew(n.Type.Elem()) } - // If one argument to the comparison is an empty string, - // comparing the lengths instead will yield the same result - // without the function call. case OCMPSTR: - if (Isconst(n.Left, CTSTR) && len(n.Left.Val().U.(string)) == 0) || (Isconst(n.Right, CTSTR) && len(n.Right.Val().U.(string)) == 0) { - // TODO(marvin): Fix Node.EType type union. - r := Nod(Op(n.Etype), Nod(OLEN, n.Left, nil), Nod(OLEN, n.Right, nil)) - r = typecheck(r, Erv) - r = walkexpr(r, init) - r.Type = n.Type - n = r - break - } - // s + "badgerbadgerbadger" == "badgerbadgerbadger" if (Op(n.Etype) == OEQ || Op(n.Etype) == ONE) && Isconst(n.Right, CTSTR) && n.Left.Op == OADDSTR && n.Left.List.Len() == 2 && Isconst(n.Left.List.Second(), CTSTR) && strlit(n.Right) == strlit(n.Left.List.Second()) { // TODO(marvin): Fix Node.EType type union. @@ -1396,12 +1383,61 @@ opswitch: break } + // Rewrite comparisons to short constant strings as length+byte-wise comparisons. + var cs, ncs *Node // const string, non-const string + switch { + case Isconst(n.Left, CTSTR) && Isconst(n.Right, CTSTR): + // ignore; will be constant evaluated + case Isconst(n.Left, CTSTR): + cs = n.Left + ncs = n.Right + case Isconst(n.Right, CTSTR): + cs = n.Right + ncs = n.Left + } + if cs != nil { + cmp := Op(n.Etype) + // maxRewriteLen was chosen empirically. + // It is the value that minimizes cmd/go file size + // across most architectures. + // See the commit description for CL 26758 for details. + maxRewriteLen := 6 + var and Op + switch cmp { + case OEQ: + and = OANDAND + case ONE: + and = OOROR + default: + // Don't do byte-wise comparisons for <, <=, etc. + // They're fairly complicated. + // Length-only checks are ok, though. + maxRewriteLen = 0 + } + if s := cs.Val().U.(string); len(s) <= maxRewriteLen { + if len(s) > 0 { + ncs = safeexpr(ncs, init) + } + // TODO(marvin): Fix Node.EType type union. + r := Nod(cmp, Nod(OLEN, ncs, nil), Nodintconst(int64(len(s)))) + for i := 0; i < len(s); i++ { + cb := Nodintconst(int64(s[i])) + ncb := Nod(OINDEX, ncs, Nodintconst(int64(i))) + r = Nod(and, r, Nod(cmp, ncb, cb)) + } + r = typecheck(r, Erv) + r = walkexpr(r, init) + r.Type = n.Type + n = r + break + } + } + var r *Node // TODO(marvin): Fix Node.EType type union. if Op(n.Etype) == OEQ || Op(n.Etype) == ONE { // prepare for rewrite below n.Left = cheapexpr(n.Left, init) - n.Right = cheapexpr(n.Right, init) r = mkcall("eqstring", Types[TBOOL], init, conv(n.Left, Types[TSTRING]), conv(n.Right, Types[TSTRING])) @@ -1415,7 +1451,6 @@ opswitch: } else { // len(left) != len(right) || !eqstring(left, right) r = Nod(ONOT, r, nil) - r = Nod(OOROR, Nod(ONE, Nod(OLEN, n.Left, nil), Nod(OLEN, n.Right, nil)), r) } @@ -1424,7 +1459,6 @@ opswitch: } else { // sys_cmpstring(s1, s2) :: 0 r = mkcall("cmpstring", Types[TINT], init, conv(n.Left, Types[TSTRING]), conv(n.Right, Types[TSTRING])) - // TODO(marvin): Fix Node.EType type union. r = Nod(Op(n.Etype), r, nodintconst(0)) } diff --git a/test/prove.go b/test/prove.go index 8bcc9ae614..65eed745cb 100644 --- a/test/prove.go +++ b/test/prove.go @@ -250,7 +250,7 @@ func f9(a, b bool) int { func f10(a string) int { n := len(a) - if a[:n>>1] == "aaa" { + if a[:n>>1] == "aaaaaaaaaaaaaa" { return 0 } return 1