From be57d94c4c660a51e1a82d19d9a54be93a9d5f2a Mon Sep 17 00:00:00 2001 From: David Chase Date: Thu, 9 Oct 2025 14:44:25 -0400 Subject: [PATCH] [dev.simd] simd: add emulated Not method this is to help match other SIMD architectures and to simplify processing of logical expressions for rewriting to ternary-logical simd instructions. Change-Id: I3c83afbb399d32ba2ade5f8ef288d4a07e1f3948 Reviewed-on: https://go-review.googlesource.com/c/go/+/710696 LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/simd/genfiles.go | 55 +++++++ src/simd/internal/simd_test/unary_test.go | 9 ++ src/simd/other_gen_amd64.go | 168 ++++++++++++++++++++++ 3 files changed, 232 insertions(+) diff --git a/src/simd/genfiles.go b/src/simd/genfiles.go index 4d22eaa233..7e904edb10 100644 --- a/src/simd/genfiles.go +++ b/src/simd/genfiles.go @@ -58,6 +58,31 @@ func (sat shapeAndTemplate) shrinkTo(outType string, by int) shapeAndTemplate { return newSat } +func (s *shapes) forAllShapes(f func(seq int, t, upperT string, w, c int, out io.Writer), out io.Writer) { + vecs := s.vecs + ints := s.ints + uints := s.uints + floats := s.floats + seq := 0 + for _, v := range vecs { + for _, w := range ints { + c := v / w + f(seq, "int", "Int", w, c, out) + seq++ + } + for _, w := range uints { + c := v / w + f(seq, "uint", "Uint", w, c, out) + seq++ + } + for _, w := range floats { + c := v / w + f(seq, "float", "Float", w, c, out) + seq++ + } + } +} + var allShapes = &shapes{ vecs: []int{128, 256, 512}, ints: []int{8, 16, 32, 64}, @@ -65,6 +90,16 @@ var allShapes = &shapes{ floats: []int{32, 64}, } +var intShapes = &shapes{ + vecs: []int{128, 256, 512}, + ints: []int{8, 16, 32, 64}, +} + +var uintShapes = &shapes{ + vecs: []int{128, 256, 512}, + uints: []int{8, 16, 32, 64}, +} + var avx512Shapes = &shapes{ vecs: []int{512}, ints: []int{8, 16, 32, 64}, @@ -569,6 +604,24 @@ func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} { } `) +var bitWiseIntTemplate = shapedTemplateOf(intShapes, "bitwise int complement", ` +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature {{.CPUfeature}} +func (x {{.VType}}) Not() {{.VType}} { + return x.Xor(x.Equal(x).As{{.VType}}()) +} +`) + +var bitWiseUintTemplate = shapedTemplateOf(uintShapes, "bitwise uint complement", ` +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature {{.CPUfeature}} +func (x {{.VType}}) Not() {{.VType}} { + return x.Xor(x.Equal(x).AsInt{{.WxC}}().As{{.VType}}()) +} +`) + // CPUfeatureAVX2if8 return AVX2 if the element width is 8, // otherwise, it returns CPUfeature. This is for the cpufeature // of unsigned comparison emulation, which uses shifts for all @@ -781,6 +834,8 @@ func main() { one(*op, prologue, broadcastTemplate, maskCvtTemplate, + bitWiseIntTemplate, + bitWiseUintTemplate, ) } if *ush != "" { diff --git a/src/simd/internal/simd_test/unary_test.go b/src/simd/internal/simd_test/unary_test.go index 6a1d0fe369..1f89beb785 100644 --- a/src/simd/internal/simd_test/unary_test.go +++ b/src/simd/internal/simd_test/unary_test.go @@ -67,6 +67,15 @@ func TestSqrt(t *testing.T) { } } +func TestNot(t *testing.T) { + testInt8x16Unary(t, simd.Int8x16.Not, map1[int8](not)) + testInt8x32Unary(t, simd.Int8x32.Not, map1[int8](not)) + testInt16x8Unary(t, simd.Int16x8.Not, map1[int16](not)) + testInt16x16Unary(t, simd.Int16x16.Not, map1[int16](not)) + testInt32x4Unary(t, simd.Int32x4.Not, map1[int32](not)) + testInt32x8Unary(t, simd.Int32x8.Not, map1[int32](not)) +} + func TestAbsolute(t *testing.T) { testInt8x16Unary(t, simd.Int8x16.Abs, map1[int8](abs)) testInt8x32Unary(t, simd.Int8x32.Abs, map1[int8](abs)) diff --git a/src/simd/other_gen_amd64.go b/src/simd/other_gen_amd64.go index 4a9049a2b9..76fbe48b20 100644 --- a/src/simd/other_gen_amd64.go +++ b/src/simd/other_gen_amd64.go @@ -423,3 +423,171 @@ func (from Float32x16) ToMask() (to Mask32x16) { func (from Float64x8) ToMask() (to Mask64x8) { return from.NotEqual(Float64x8{}) } + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX +func (x Int8x16) Not() Int8x16 { + return x.Xor(x.Equal(x).AsInt8x16()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX +func (x Int16x8) Not() Int16x8 { + return x.Xor(x.Equal(x).AsInt16x8()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX +func (x Int32x4) Not() Int32x4 { + return x.Xor(x.Equal(x).AsInt32x4()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX +func (x Int64x2) Not() Int64x2 { + return x.Xor(x.Equal(x).AsInt64x2()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) Not() Int8x32 { + return x.Xor(x.Equal(x).AsInt8x32()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) Not() Int16x16 { + return x.Xor(x.Equal(x).AsInt16x16()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) Not() Int32x8 { + return x.Xor(x.Equal(x).AsInt32x8()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) Not() Int64x4 { + return x.Xor(x.Equal(x).AsInt64x4()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX512 +func (x Int8x64) Not() Int8x64 { + return x.Xor(x.Equal(x).AsInt8x64()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX512 +func (x Int16x32) Not() Int16x32 { + return x.Xor(x.Equal(x).AsInt16x32()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX512 +func (x Int32x16) Not() Int32x16 { + return x.Xor(x.Equal(x).AsInt32x16()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX512 +func (x Int64x8) Not() Int64x8 { + return x.Xor(x.Equal(x).AsInt64x8()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX +func (x Uint8x16) Not() Uint8x16 { + return x.Xor(x.Equal(x).AsInt8x16().AsUint8x16()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX +func (x Uint16x8) Not() Uint16x8 { + return x.Xor(x.Equal(x).AsInt16x8().AsUint16x8()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX +func (x Uint32x4) Not() Uint32x4 { + return x.Xor(x.Equal(x).AsInt32x4().AsUint32x4()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX +func (x Uint64x2) Not() Uint64x2 { + return x.Xor(x.Equal(x).AsInt64x2().AsUint64x2()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) Not() Uint8x32 { + return x.Xor(x.Equal(x).AsInt8x32().AsUint8x32()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) Not() Uint16x16 { + return x.Xor(x.Equal(x).AsInt16x16().AsUint16x16()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) Not() Uint32x8 { + return x.Xor(x.Equal(x).AsInt32x8().AsUint32x8()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) Not() Uint64x4 { + return x.Xor(x.Equal(x).AsInt64x4().AsUint64x4()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX512 +func (x Uint8x64) Not() Uint8x64 { + return x.Xor(x.Equal(x).AsInt8x64().AsUint8x64()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX512 +func (x Uint16x32) Not() Uint16x32 { + return x.Xor(x.Equal(x).AsInt16x32().AsUint16x32()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX512 +func (x Uint32x16) Not() Uint32x16 { + return x.Xor(x.Equal(x).AsInt32x16().AsUint32x16()) +} + +// Not returns the bitwise complement of x +// +// Emulated, CPU Feature AVX512 +func (x Uint64x8) Not() Uint64x8 { + return x.Xor(x.Equal(x).AsInt64x8().AsUint64x8()) +} -- 2.52.0