//
// if eq(p[0], q[0]) && eq(p[1], q[1]) && ... {
// } else {
- // return
+ // goto neq
// }
//
// And so on.
//
// Otherwise it generates:
//
- // for i := 0; i < nelem; i++ {
- // if eq(p[i], q[i]) {
+ // iterateTo := nelem/unroll*unroll
+ // for i := 0; i < iterateTo; i += unroll {
+ // if eq(p[i+0], q[i+0]) && eq(p[i+1], q[i+1]) && ... && eq(p[i+unroll-1], q[i+unroll-1]) {
// } else {
// goto neq
// }
// }
+ // if eq(p[iterateTo+0], q[iterateTo+0]) && eq(p[iterateTo+1], q[iterateTo+1]) && ... {
+ // } else {
+ // goto neq
+ // }
//
- // TODO(josharian): consider doing some loop unrolling
- // for larger nelem as well, processing a few elements at a time in a loop.
checkAll := func(unroll int64, last bool, eq func(pi, qi ir.Node) ir.Node) {
// checkIdx generates a node to check for equality at index i.
checkIdx := func(i ir.Node) ir.Node {
return eq(pi, qi)
}
- if nelem <= unroll {
- if last {
- // Do last comparison in a different manner.
- nelem--
- }
- // Generate a series of checks.
- for i := int64(0); i < nelem; i++ {
- // if check {} else { goto neq }
- nif := ir.NewIfStmt(base.Pos, checkIdx(ir.NewInt(i)), nil, nil)
- nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq))
- fn.Body.Append(nif)
- }
- if last {
- fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, checkIdx(ir.NewInt(nelem))))
- }
- } else {
- // Generate a for loop.
- // for i := 0; i < nelem; i++
+ iterations := nelem / unroll
+ iterateTo := iterations * unroll
+ // If a loop is iterated only once, there shouldn't be any loop at all.
+ if iterations == 1 {
+ iterateTo = 0
+ }
+
+ if iterateTo > 0 {
+ // Generate an unrolled for loop.
+ // for i := 0; i < nelem/unroll*unroll; i += unroll
i := typecheck.Temp(types.Types[types.TINT])
init := ir.NewAssignStmt(base.Pos, i, ir.NewInt(0))
- cond := ir.NewBinaryExpr(base.Pos, ir.OLT, i, ir.NewInt(nelem))
- post := ir.NewAssignStmt(base.Pos, i, ir.NewBinaryExpr(base.Pos, ir.OADD, i, ir.NewInt(1)))
- loop := ir.NewForStmt(base.Pos, nil, cond, post, nil)
+ cond := ir.NewBinaryExpr(base.Pos, ir.OLT, i, ir.NewInt(iterateTo))
+ loop := ir.NewForStmt(base.Pos, nil, cond, nil, nil)
loop.PtrInit().Append(init)
- // if eq(pi, qi) {} else { goto neq }
- nif := ir.NewIfStmt(base.Pos, checkIdx(i), nil, nil)
- nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq))
- loop.Body.Append(nif)
+
+ // if eq(p[i+0], q[i+0]) && eq(p[i+1], q[i+1]) && ... && eq(p[i+unroll-1], q[i+unroll-1]) {
+ // } else {
+ // goto neq
+ // }
+ for j := int64(0); j < unroll; j++ {
+ // if check {} else { goto neq }
+ nif := ir.NewIfStmt(base.Pos, checkIdx(i), nil, nil)
+ nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq))
+ loop.Body.Append(nif)
+ post := ir.NewAssignStmt(base.Pos, i, ir.NewBinaryExpr(base.Pos, ir.OADD, i, ir.NewInt(1)))
+ loop.Body.Append(post)
+ }
+
fn.Body.Append(loop)
- if last {
- fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, ir.NewBool(true)))
+
+ if nelem == iterateTo {
+ if last {
+ fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, ir.NewBool(true)))
+ }
+ return
}
}
+
+ // Generate remaining checks, if nelem is not a multiple of unroll.
+ if last {
+ // Do last comparison in a different manner.
+ nelem--
+ }
+ // if eq(p[iterateTo+0], q[iterateTo+0]) && eq(p[iterateTo+1], q[iterateTo+1]) && ... {
+ // } else {
+ // goto neq
+ // }
+ for j := iterateTo; j < nelem; j++ {
+ // if check {} else { goto neq }
+ nif := ir.NewIfStmt(base.Pos, checkIdx(ir.NewInt(j)), nil, nil)
+ nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq))
+ fn.Body.Append(nif)
+ }
+ if last {
+ fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, checkIdx(ir.NewInt(nelem))))
+ }
}
switch t.Elem().Kind() {
case types.TSTRING:
// Do two loops. First, check that all the lengths match (cheap).
// Second, check that all the contents match (expensive).
- // TODO: when the array size is small, unroll the length match checks.
checkAll(3, false, func(pi, qi ir.Node) ir.Node {
// Compare lengths.
eqlen, _ := EqString(pi, qi)
--- /dev/null
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reflectdata_test
+
+import "testing"
+
+func BenchmarkEqArrayOfStrings5(b *testing.B) {
+ var a [5]string
+ var c [5]string
+
+ for i := 0; i < 5; i++ {
+ a[i] = "aaaa"
+ c[i] = "cccc"
+ }
+
+ for j := 0; j < b.N; j++ {
+ _ = a == c
+ }
+}
+
+func BenchmarkEqArrayOfStrings64(b *testing.B) {
+ var a [64]string
+ var c [64]string
+
+ for i := 0; i < 64; i++ {
+ a[i] = "aaaa"
+ c[i] = "cccc"
+ }
+
+ for j := 0; j < b.N; j++ {
+ _ = a == c
+ }
+}
+
+func BenchmarkEqArrayOfStrings1024(b *testing.B) {
+ var a [1024]string
+ var c [1024]string
+
+ for i := 0; i < 1024; i++ {
+ a[i] = "aaaa"
+ c[i] = "cccc"
+ }
+
+ for j := 0; j < b.N; j++ {
+ _ = a == c
+ }
+}
+
+func BenchmarkEqArrayOfFloats5(b *testing.B) {
+ var a [5]float32
+ var c [5]float32
+
+ for i := 0; i < b.N; i++ {
+ _ = a == c
+ }
+}
+
+func BenchmarkEqArrayOfFloats64(b *testing.B) {
+ var a [64]float32
+ var c [64]float32
+
+ for i := 0; i < b.N; i++ {
+ _ = a == c
+ }
+}
+
+func BenchmarkEqArrayOfFloats1024(b *testing.B) {
+ var a [1024]float32
+ var c [1024]float32
+
+ for i := 0; i < b.N; i++ {
+ _ = a == c
+ }
+}