From ef5f6cc92109ee18d978f81650f93fd8a254b8d2 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 14 Jul 2025 22:00:29 +0000
Subject: [PATCH] [dev.simd] cmd/compile: adjust param order for AndNot

This CL adjusts the parameter order of AndNot, making it x &^ y instead
of ^x & y.

This CL also added a test.

This CL is partially generated by CL 687977.

Change-Id: I244e7b887991dc97e695131a5287af1b0e6fc3ce
Reviewed-on: https://go-review.googlesource.com/c/go/+/687996
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 .../compile/internal/ssagen/simdintrinsics.go | 64 +++++++++----------
 src/simd/ops_amd64.go                         | 64 +++++++++----------
 src/simd/simd_test.go                         |  6 ++
 3 files changed, 70 insertions(+), 64 deletions(-)

diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 1ef4369fa2..1472f5ec1a 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -131,38 +131,38 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.AndMasked", opLen3(ssa.OpAndMaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.AndMasked", opLen3(ssa.OpAndMaskedUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.AndMasked", opLen3(ssa.OpAndMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.AndNot", opLen2(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.AndNot", opLen2(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x8.AndNot", opLen2(ssa.OpAndNotInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.AndNot", opLen2(ssa.OpAndNotInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x4.AndNot", opLen2(ssa.OpAndNotInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AndNot", opLen2(ssa.OpAndNotInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AndNot", opLen2(ssa.OpAndNotInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AndNot", opLen2(ssa.OpAndNotInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AndNot", opLen2(ssa.OpAndNotInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AndNot", opLen2(ssa.OpAndNotInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.AndNot", opLen2(ssa.OpAndNotUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.AndNot", opLen2(ssa.OpAndNotUint8x32, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x8.AndNot", opLen2(ssa.OpAndNotUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.AndNot", opLen2(ssa.OpAndNotUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x4.AndNot", opLen2(ssa.OpAndNotUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.AndNot", opLen2(ssa.OpAndNotUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.AndNot", opLen2(ssa.OpAndNotUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.AndNot", opLen2(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.AndNot", opLen2(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.AndNot", opLen2(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int8x16.AndNot", opLen2_21(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int8x32.AndNot", opLen2_21(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x8.AndNot", opLen2_21(ssa.OpAndNotInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.AndNot", opLen2_21(ssa.OpAndNotInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x4.AndNot", opLen2_21(ssa.OpAndNotInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.AndNot", opLen2_21(ssa.OpAndNotInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.AndNot", opLen2_21(ssa.OpAndNotInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.AndNot", opLen2_21(ssa.OpAndNotInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.AndNot", opLen2_21(ssa.OpAndNotInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.AndNot", opLen2_21(ssa.OpAndNotInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint8x16.AndNot", opLen2_21(ssa.OpAndNotUint8x16, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint8x32.AndNot", opLen2_21(ssa.OpAndNotUint8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x8.AndNot", opLen2_21(ssa.OpAndNotUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.AndNot", opLen2_21(ssa.OpAndNotUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x4.AndNot", opLen2_21(ssa.OpAndNotUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.AndNot", opLen2_21(ssa.OpAndNotUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.AndNot", opLen2_21(ssa.OpAndNotUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.AndNot", opLen2_21(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.AndNot", opLen2_21(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.AndNot", opLen2_21(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float32x16.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index 7121a6d208..3b87836962 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -620,164 +620,164 @@ func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8
 
 /* AndNot */
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
 func (x Int8x16) AndNot(y Int8x16) Int8x16
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Int8x32) AndNot(y Int8x32) Int8x32
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
 func (x Int16x8) AndNot(y Int16x8) Int16x8
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Int16x16) AndNot(y Int16x16) Int16x16
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
 func (x Int32x4) AndNot(y Int32x4) Int32x4
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Int32x8) AndNot(y Int32x8) Int32x8
 
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x16) AndNot(y Int32x16) Int32x16
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
 func (x Int64x2) AndNot(y Int64x2) Int64x2
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Int64x4) AndNot(y Int64x4) Int64x4
 
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x8) AndNot(y Int64x8) Int64x8
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
 func (x Uint8x16) AndNot(y Uint8x16) Uint8x16
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Uint8x32) AndNot(y Uint8x32) Uint8x32
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
 func (x Uint16x8) AndNot(y Uint16x8) Uint16x8
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Uint16x16) AndNot(y Uint16x16) Uint16x16
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
 func (x Uint32x4) AndNot(y Uint32x4) Uint32x4
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Uint32x8) AndNot(y Uint32x8) Uint32x8
 
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x16) AndNot(y Uint32x16) Uint32x16
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
 func (x Uint64x2) AndNot(y Uint64x2) Uint64x2
 
-// AndNot performs a bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
 
-// AndNot performs a masked bitwise AND NOT operation between two vectors.
+// AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
 
 /* AndNotMasked */
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDND, CPU Feature: AVX512F
 func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4
 
-// AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+// AndNotMasked performs a bitwise x &^ y.
 //
 // Asm: VPANDNQ, CPU Feature: AVX512F
 func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go
index d7010de10a..d19889cc76 100644
--- a/src/simd/simd_test.go
+++ b/src/simd/simd_test.go
@@ -196,6 +196,12 @@ func TestCompress(t *testing.T) {
 		[]int32{2, 4, 0, 0}, "Compress")
 }
 
+func TestAndNot(t *testing.T) {
+	testInt32x4Binary(t, []int32{0b11, 0b00, 0b11, 0b00},
+		[]int32{0b01, 0b01, 0b01, 0b01},
+		[]int32{0b10, 0b00, 0b10, 0b00}, "AndNot")
+}
+
 // checkInt8Slices ensures that b and a are equal, to the end of b.
 // also serves to use the slices, to prevent accidental optimization.
 func checkInt8Slices(t *testing.T, a, b []int8) {
-- 
2.52.0