From ef5f6cc92109ee18d978f81650f93fd8a254b8d2 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 14 Jul 2025 22:00:29 +0000 Subject: [PATCH] [dev.simd] cmd/compile: adjust param order for AndNot This CL adjusts the parameter order of AndNot, making it x &^ y instead of ^x & y. This CL also added a test. This CL is partially generated by CL 687977. Change-Id: I244e7b887991dc97e695131a5287af1b0e6fc3ce Reviewed-on: https://go-review.googlesource.com/c/go/+/687996 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- .../compile/internal/ssagen/simdintrinsics.go | 64 +++++++++---------- src/simd/ops_amd64.go | 64 +++++++++---------- src/simd/simd_test.go | 6 ++ 3 files changed, 70 insertions(+), 64 deletions(-) diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 1ef4369fa2..1472f5ec1a 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -131,38 +131,38 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.AndMasked", opLen3(ssa.OpAndMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.AndMasked", opLen3(ssa.OpAndMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.AndMasked", opLen3(ssa.OpAndMaskedUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.AndNot", opLen2(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.AndNot", opLen2(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x8.AndNot", opLen2(ssa.OpAndNotInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.AndNot", opLen2(ssa.OpAndNotInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x4.AndNot", opLen2(ssa.OpAndNotInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.AndNot", opLen2(ssa.OpAndNotInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.AndNot", opLen2(ssa.OpAndNotInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.AndNot", opLen2(ssa.OpAndNotInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.AndNot", opLen2(ssa.OpAndNotInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.AndNot", opLen2(ssa.OpAndNotInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.AndNot", opLen2(ssa.OpAndNotUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.AndNot", opLen2(ssa.OpAndNotUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x8.AndNot", opLen2(ssa.OpAndNotUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.AndNot", opLen2(ssa.OpAndNotUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x4.AndNot", opLen2(ssa.OpAndNotUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.AndNot", opLen2(ssa.OpAndNotUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.AndNot", opLen2(ssa.OpAndNotUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.AndNot", opLen2(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.AndNot", opLen2(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.AndNot", opLen2(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.AndNot", opLen2_21(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.AndNot", opLen2_21(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x8.AndNot", opLen2_21(ssa.OpAndNotInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.AndNot", opLen2_21(ssa.OpAndNotInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x4.AndNot", opLen2_21(ssa.OpAndNotInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.AndNot", opLen2_21(ssa.OpAndNotInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.AndNot", opLen2_21(ssa.OpAndNotInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.AndNot", opLen2_21(ssa.OpAndNotInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.AndNot", opLen2_21(ssa.OpAndNotInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.AndNot", opLen2_21(ssa.OpAndNotInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.AndNot", opLen2_21(ssa.OpAndNotUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AndNot", opLen2_21(ssa.OpAndNotUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.AndNot", opLen2_21(ssa.OpAndNotUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.AndNot", opLen2_21(ssa.OpAndNotUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.AndNot", opLen2_21(ssa.OpAndNotUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.AndNot", opLen2_21(ssa.OpAndNotUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.AndNot", opLen2_21(ssa.OpAndNotUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.AndNot", opLen2_21(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.AndNot", opLen2_21(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.AndNot", opLen2_21(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64) diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 7121a6d208..3b87836962 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -620,164 +620,164 @@ func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8 /* AndNot */ -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX func (x Int8x16) AndNot(y Int8x16) Int8x16 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX2 func (x Int8x32) AndNot(y Int8x32) Int8x32 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX func (x Int16x8) AndNot(y Int16x8) Int16x8 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX2 func (x Int16x16) AndNot(y Int16x16) Int16x16 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX func (x Int32x4) AndNot(y Int32x4) Int32x4 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX2 func (x Int32x8) AndNot(y Int32x8) Int32x8 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDND, CPU Feature: AVX512F func (x Int32x16) AndNot(y Int32x16) Int32x16 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX func (x Int64x2) AndNot(y Int64x2) Int64x2 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX2 func (x Int64x4) AndNot(y Int64x4) Int64x4 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDNQ, CPU Feature: AVX512F func (x Int64x8) AndNot(y Int64x8) Int64x8 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX func (x Uint8x16) AndNot(y Uint8x16) Uint8x16 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX2 func (x Uint8x32) AndNot(y Uint8x32) Uint8x32 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX func (x Uint16x8) AndNot(y Uint16x8) Uint16x8 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX2 func (x Uint16x16) AndNot(y Uint16x16) Uint16x16 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX func (x Uint32x4) AndNot(y Uint32x4) Uint32x4 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX2 func (x Uint32x8) AndNot(y Uint32x8) Uint32x8 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDND, CPU Feature: AVX512F func (x Uint32x16) AndNot(y Uint32x16) Uint32x16 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX func (x Uint64x2) AndNot(y Uint64x2) Uint64x2 -// AndNot performs a bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDN, CPU Feature: AVX2 func (x Uint64x4) AndNot(y Uint64x4) Uint64x4 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// AndNot performs a bitwise x &^ y. // // Asm: VPANDNQ, CPU Feature: AVX512F func (x Uint64x8) AndNot(y Uint64x8) Uint64x8 /* AndNotMasked */ -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDND, CPU Feature: AVX512F func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDND, CPU Feature: AVX512F func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDND, CPU Feature: AVX512F func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDNQ, CPU Feature: AVX512F func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDNQ, CPU Feature: AVX512F func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDNQ, CPU Feature: AVX512F func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDND, CPU Feature: AVX512F func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDND, CPU Feature: AVX512F func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDND, CPU Feature: AVX512F func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDNQ, CPU Feature: AVX512F func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDNQ, CPU Feature: AVX512F func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4 -// AndNotMasked performs a masked bitwise AND NOT operation between two vectors. +// AndNotMasked performs a bitwise x &^ y. // // Asm: VPANDNQ, CPU Feature: AVX512F func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8 diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index d7010de10a..d19889cc76 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -196,6 +196,12 @@ func TestCompress(t *testing.T) { []int32{2, 4, 0, 0}, "Compress") } +func TestAndNot(t *testing.T) { + testInt32x4Binary(t, []int32{0b11, 0b00, 0b11, 0b00}, + []int32{0b01, 0b01, 0b01, 0b01}, + []int32{0b10, 0b00, 0b10, 0b00}, "AndNot") +} + // checkInt8Slices ensures that b and a are equal, to the end of b. // also serves to use the slices, to prevent accidental optimization. func checkInt8Slices(t *testing.T, a, b []int8) { -- 2.52.0