]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: optimize VPTEST for 2-operand cases
authorJunyang Shao <shaojunyang@google.com>
Tue, 16 Sep 2025 17:27:36 +0000 (17:27 +0000)
committerJunyang Shao <shaojunyang@google.com>
Thu, 18 Sep 2025 18:07:23 +0000 (11:07 -0700)
Change-Id: Ica2d5ee48082c69e86b12b519ba8df7a2556392f
Reviewed-on: https://go-review.googlesource.com/c/go/+/704355
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/ssa/_gen/AMD64.rules
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/simd.go [new file with mode: 0644]

index 3c73737dc050cef48a74e3b74c15661cc6e686ec..05fc64d486329d84b26067bc0b3d629ef637dd6a 100644 (file)
 (VMOVDQUstore(128|256|512) [off1] {sym} x:(ADDQconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 => (VMOVDQUstore(128|256|512) [off1+off2] {sym} ptr val mem)
 (VMOVDQUload(128|256|512) [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) => (VMOVDQUload(128|256|512) [off1+off2] {mergeSym(sym1, sym2)} base mem)
 (VMOVDQUstore(128|256|512) [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) => (VMOVDQUstore(128|256|512) [off1+off2] {mergeSym(sym1, sym2)} base val mem)
+
+// 2-op VPTEST optimizations
+(SETEQ (VPTEST x:(VPAND(128|256) j k) y)) && x == y && x.Uses == 2 => (SETEQ (VPTEST j k))
+(SETEQ (VPTEST x:(VPAND(D|Q)512 j k) y)) && x == y && x.Uses == 2 => (SETEQ (VPTEST j k))
+(SETEQ (VPTEST x:(VPANDN(128|256) j k) y)) && x == y && x.Uses == 2 => (SETB (VPTEST k j)) // AndNot has swapped its operand order
+(SETEQ (VPTEST x:(VPANDN(D|Q)512 j k) y)) && x == y && x.Uses == 2 => (SETB (VPTEST k j)) // AndNot has swapped its operand order
+(EQ (VPTEST x:(VPAND(128|256) j k) y) yes no) && x == y && x.Uses == 2 => (EQ (VPTEST j k) yes no)
+(EQ (VPTEST x:(VPAND(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (EQ (VPTEST j k) yes no)
+(EQ (VPTEST x:(VPANDN(128|256) j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order
+(EQ (VPTEST x:(VPANDN(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order
index 70b6d549fb98a2ac0d43fd19afe8288cca6f2b0d..26a06fc3fc5c48ad3e2f49f81d668cd2a3010056 100644 (file)
@@ -22607,6 +22607,190 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
                }
                break
        }
+       // match: (SETEQ (VPTEST x:(VPAND128 j k) y))
+       // cond: x == y && x.Uses == 2
+       // result: (SETEQ (VPTEST j k))
+       for {
+               if v_0.Op != OpAMD64VPTEST {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               if x.Op != OpAMD64VPAND128 {
+                       break
+               }
+               k := x.Args[1]
+               j := x.Args[0]
+               if !(x == y && x.Uses == 2) {
+                       break
+               }
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+               v0.AddArg2(j, k)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (VPTEST x:(VPAND256 j k) y))
+       // cond: x == y && x.Uses == 2
+       // result: (SETEQ (VPTEST j k))
+       for {
+               if v_0.Op != OpAMD64VPTEST {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               if x.Op != OpAMD64VPAND256 {
+                       break
+               }
+               k := x.Args[1]
+               j := x.Args[0]
+               if !(x == y && x.Uses == 2) {
+                       break
+               }
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+               v0.AddArg2(j, k)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (VPTEST x:(VPANDD512 j k) y))
+       // cond: x == y && x.Uses == 2
+       // result: (SETEQ (VPTEST j k))
+       for {
+               if v_0.Op != OpAMD64VPTEST {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               if x.Op != OpAMD64VPANDD512 {
+                       break
+               }
+               k := x.Args[1]
+               j := x.Args[0]
+               if !(x == y && x.Uses == 2) {
+                       break
+               }
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+               v0.AddArg2(j, k)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (VPTEST x:(VPANDQ512 j k) y))
+       // cond: x == y && x.Uses == 2
+       // result: (SETEQ (VPTEST j k))
+       for {
+               if v_0.Op != OpAMD64VPTEST {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               if x.Op != OpAMD64VPANDQ512 {
+                       break
+               }
+               k := x.Args[1]
+               j := x.Args[0]
+               if !(x == y && x.Uses == 2) {
+                       break
+               }
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+               v0.AddArg2(j, k)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (VPTEST x:(VPANDN128 j k) y))
+       // cond: x == y && x.Uses == 2
+       // result: (SETB (VPTEST k j))
+       for {
+               if v_0.Op != OpAMD64VPTEST {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               if x.Op != OpAMD64VPANDN128 {
+                       break
+               }
+               k := x.Args[1]
+               j := x.Args[0]
+               if !(x == y && x.Uses == 2) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+               v0.AddArg2(k, j)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (VPTEST x:(VPANDN256 j k) y))
+       // cond: x == y && x.Uses == 2
+       // result: (SETB (VPTEST k j))
+       for {
+               if v_0.Op != OpAMD64VPTEST {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               if x.Op != OpAMD64VPANDN256 {
+                       break
+               }
+               k := x.Args[1]
+               j := x.Args[0]
+               if !(x == y && x.Uses == 2) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+               v0.AddArg2(k, j)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (VPTEST x:(VPANDND512 j k) y))
+       // cond: x == y && x.Uses == 2
+       // result: (SETB (VPTEST k j))
+       for {
+               if v_0.Op != OpAMD64VPTEST {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               if x.Op != OpAMD64VPANDND512 {
+                       break
+               }
+               k := x.Args[1]
+               j := x.Args[0]
+               if !(x == y && x.Uses == 2) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+               v0.AddArg2(k, j)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (VPTEST x:(VPANDNQ512 j k) y))
+       // cond: x == y && x.Uses == 2
+       // result: (SETB (VPTEST k j))
+       for {
+               if v_0.Op != OpAMD64VPTEST {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               if x.Op != OpAMD64VPANDNQ512 {
+                       break
+               }
+               k := x.Args[1]
+               j := x.Args[0]
+               if !(x == y && x.Uses == 2) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+               v0.AddArg2(k, j)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool {
@@ -61066,6 +61250,190 @@ func rewriteBlockAMD64(b *Block) bool {
                        }
                        break
                }
+               // match: (EQ (VPTEST x:(VPAND128 j k) y) yes no)
+               // cond: x == y && x.Uses == 2
+               // result: (EQ (VPTEST j k) yes no)
+               for b.Controls[0].Op == OpAMD64VPTEST {
+                       v_0 := b.Controls[0]
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if x.Op != OpAMD64VPAND128 {
+                               break
+                       }
+                       _ = x.Args[1]
+                       x_0 := x.Args[0]
+                       x_1 := x.Args[1]
+                       for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 {
+                               j := x_0
+                               k := x_1
+                               if !(x == y && x.Uses == 2) {
+                                       continue
+                               }
+                               v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags)
+                               v0.AddArg2(j, k)
+                               b.resetWithControl(BlockAMD64EQ, v0)
+                               return true
+                       }
+                       break
+               }
+               // match: (EQ (VPTEST x:(VPAND256 j k) y) yes no)
+               // cond: x == y && x.Uses == 2
+               // result: (EQ (VPTEST j k) yes no)
+               for b.Controls[0].Op == OpAMD64VPTEST {
+                       v_0 := b.Controls[0]
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if x.Op != OpAMD64VPAND256 {
+                               break
+                       }
+                       _ = x.Args[1]
+                       x_0 := x.Args[0]
+                       x_1 := x.Args[1]
+                       for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 {
+                               j := x_0
+                               k := x_1
+                               if !(x == y && x.Uses == 2) {
+                                       continue
+                               }
+                               v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags)
+                               v0.AddArg2(j, k)
+                               b.resetWithControl(BlockAMD64EQ, v0)
+                               return true
+                       }
+                       break
+               }
+               // match: (EQ (VPTEST x:(VPANDD512 j k) y) yes no)
+               // cond: x == y && x.Uses == 2
+               // result: (EQ (VPTEST j k) yes no)
+               for b.Controls[0].Op == OpAMD64VPTEST {
+                       v_0 := b.Controls[0]
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if x.Op != OpAMD64VPANDD512 {
+                               break
+                       }
+                       _ = x.Args[1]
+                       x_0 := x.Args[0]
+                       x_1 := x.Args[1]
+                       for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 {
+                               j := x_0
+                               k := x_1
+                               if !(x == y && x.Uses == 2) {
+                                       continue
+                               }
+                               v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags)
+                               v0.AddArg2(j, k)
+                               b.resetWithControl(BlockAMD64EQ, v0)
+                               return true
+                       }
+                       break
+               }
+               // match: (EQ (VPTEST x:(VPANDQ512 j k) y) yes no)
+               // cond: x == y && x.Uses == 2
+               // result: (EQ (VPTEST j k) yes no)
+               for b.Controls[0].Op == OpAMD64VPTEST {
+                       v_0 := b.Controls[0]
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if x.Op != OpAMD64VPANDQ512 {
+                               break
+                       }
+                       _ = x.Args[1]
+                       x_0 := x.Args[0]
+                       x_1 := x.Args[1]
+                       for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 {
+                               j := x_0
+                               k := x_1
+                               if !(x == y && x.Uses == 2) {
+                                       continue
+                               }
+                               v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags)
+                               v0.AddArg2(j, k)
+                               b.resetWithControl(BlockAMD64EQ, v0)
+                               return true
+                       }
+                       break
+               }
+               // match: (EQ (VPTEST x:(VPANDN128 j k) y) yes no)
+               // cond: x == y && x.Uses == 2
+               // result: (ULT (VPTEST k j) yes no)
+               for b.Controls[0].Op == OpAMD64VPTEST {
+                       v_0 := b.Controls[0]
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if x.Op != OpAMD64VPANDN128 {
+                               break
+                       }
+                       k := x.Args[1]
+                       j := x.Args[0]
+                       if !(x == y && x.Uses == 2) {
+                               break
+                       }
+                       v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags)
+                       v0.AddArg2(k, j)
+                       b.resetWithControl(BlockAMD64ULT, v0)
+                       return true
+               }
+               // match: (EQ (VPTEST x:(VPANDN256 j k) y) yes no)
+               // cond: x == y && x.Uses == 2
+               // result: (ULT (VPTEST k j) yes no)
+               for b.Controls[0].Op == OpAMD64VPTEST {
+                       v_0 := b.Controls[0]
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if x.Op != OpAMD64VPANDN256 {
+                               break
+                       }
+                       k := x.Args[1]
+                       j := x.Args[0]
+                       if !(x == y && x.Uses == 2) {
+                               break
+                       }
+                       v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags)
+                       v0.AddArg2(k, j)
+                       b.resetWithControl(BlockAMD64ULT, v0)
+                       return true
+               }
+               // match: (EQ (VPTEST x:(VPANDND512 j k) y) yes no)
+               // cond: x == y && x.Uses == 2
+               // result: (ULT (VPTEST k j) yes no)
+               for b.Controls[0].Op == OpAMD64VPTEST {
+                       v_0 := b.Controls[0]
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if x.Op != OpAMD64VPANDND512 {
+                               break
+                       }
+                       k := x.Args[1]
+                       j := x.Args[0]
+                       if !(x == y && x.Uses == 2) {
+                               break
+                       }
+                       v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags)
+                       v0.AddArg2(k, j)
+                       b.resetWithControl(BlockAMD64ULT, v0)
+                       return true
+               }
+               // match: (EQ (VPTEST x:(VPANDNQ512 j k) y) yes no)
+               // cond: x == y && x.Uses == 2
+               // result: (ULT (VPTEST k j) yes no)
+               for b.Controls[0].Op == OpAMD64VPTEST {
+                       v_0 := b.Controls[0]
+                       y := v_0.Args[1]
+                       x := v_0.Args[0]
+                       if x.Op != OpAMD64VPANDNQ512 {
+                               break
+                       }
+                       k := x.Args[1]
+                       j := x.Args[0]
+                       if !(x == y && x.Uses == 2) {
+                               break
+                       }
+                       v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags)
+                       v0.AddArg2(k, j)
+                       b.resetWithControl(BlockAMD64ULT, v0)
+                       return true
+               }
        case BlockAMD64GE:
                // match: (GE c:(CMPQconst [128] z) yes no)
                // cond: c.Uses == 1
diff --git a/test/codegen/simd.go b/test/codegen/simd.go
new file mode 100644 (file)
index 0000000..0d617bf
--- /dev/null
@@ -0,0 +1,29 @@
+// asmcheck
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check code generation of simd peephole optimizations.
+
+//go:build goexperiment.simd
+
+package codegen
+
+import "simd"
+
+func vptest1() bool {
+       v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
+       v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
+       // amd64:`VPTEST\s(.*)(.*)$`
+       // amd64:`SETCS\s(.*)$`
+       return v1.AndNot(v2).IsZero()
+}
+
+func vptest2() bool {
+       v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
+       v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
+       // amd64:`VPTEST\s(.*)(.*)$`
+       // amd64:`SETEQ\s(.*)$`
+       return v1.And(v2).IsZero()
+}