From: David Chase Date: Wed, 28 Jan 2026 20:13:38 +0000 (+0200) Subject: simd/archsimd: add missing cpufeature to generated mask/merge methods X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=985b0b3fe26661c10a3201470e80685765656363;p=gostls13.git simd/archsimd: add missing cpufeature to generated mask/merge methods Change-Id: I34678f4ef17fe1b8b7657a2c3d39685b4a5951f2 Reviewed-on: https://go-review.googlesource.com/c/go/+/739981 LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- diff --git a/src/simd/archsimd/_gen/tmplgen/main.go b/src/simd/archsimd/_gen/tmplgen/main.go index 45338b765d..da7eae9029 100644 --- a/src/simd/archsimd/_gen/tmplgen/main.go +++ b/src/simd/archsimd/_gen/tmplgen/main.go @@ -801,6 +801,8 @@ func pa{{.VType}}(s []{{.Etype}}) *[{{.Count}}]{{.Etype}} { var avx2MaskedTemplate = shapedTemplateOf(avx2Shapes, "avx2 .Masked methods", ` // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} { im := mask.ToInt{{.WxC}}() {{- if eq .Base "Int" }} @@ -811,6 +813,8 @@ func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} { } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) Merge(y {{.VType}}, mask Mask{{.WxC}}) {{.VType}} { {{- if eq .BxC .WxC -}} im := mask.ToInt{{.BxC}}() @@ -830,6 +834,8 @@ func (x {{.VType}}) Merge(y {{.VType}}, mask Mask{{.WxC}}) {{.VType}} { // TODO perhaps write these in ways that work better on AVX512 var avx512MaskedTemplate = shapedTemplateOf(avx512Shapes, "avx512 .Masked methods", ` // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} { im := mask.ToInt{{.WxC}}() {{- if eq .Base "Int" }} @@ -840,6 +846,8 @@ func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} { } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x {{.VType}}) Merge(y {{.VType}}, mask Mask{{.WxC}}) {{.VType}} { {{- if eq .Base "Int" }} return y.blendMasked(x, mask) diff --git a/src/simd/archsimd/maskmerge_gen_amd64.go b/src/simd/archsimd/maskmerge_gen_amd64.go index ad56521714..593ad2d480 100644 --- a/src/simd/archsimd/maskmerge_gen_amd64.go +++ b/src/simd/archsimd/maskmerge_gen_amd64.go @@ -5,24 +5,32 @@ package archsimd // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Int8x16) Masked(mask Mask8x16) Int8x16 { im := mask.ToInt8x16() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Int8x16) Merge(y Int8x16, mask Mask8x16) Int8x16 { im := mask.ToInt8x16() return y.blend(x, im) } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Int16x8) Masked(mask Mask16x8) Int16x8 { im := mask.ToInt16x8() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Int16x8) Merge(y Int16x8, mask Mask16x8) Int16x8 { im := mask.ToInt16x8().AsInt8x16() ix := x.AsInt8x16() @@ -31,12 +39,16 @@ func (x Int16x8) Merge(y Int16x8, mask Mask16x8) Int16x8 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Int32x4) Masked(mask Mask32x4) Int32x4 { im := mask.ToInt32x4() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Int32x4) Merge(y Int32x4, mask Mask32x4) Int32x4 { im := mask.ToInt32x4().AsInt8x16() ix := x.AsInt8x16() @@ -45,12 +57,16 @@ func (x Int32x4) Merge(y Int32x4, mask Mask32x4) Int32x4 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Int64x2) Masked(mask Mask64x2) Int64x2 { im := mask.ToInt64x2() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Int64x2) Merge(y Int64x2, mask Mask64x2) Int64x2 { im := mask.ToInt64x2().AsInt8x16() ix := x.AsInt8x16() @@ -59,12 +75,16 @@ func (x Int64x2) Merge(y Int64x2, mask Mask64x2) Int64x2 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Uint8x16) Masked(mask Mask8x16) Uint8x16 { im := mask.ToInt8x16() return x.AsInt8x16().And(im).AsUint8x16() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Uint8x16) Merge(y Uint8x16, mask Mask8x16) Uint8x16 { im := mask.ToInt8x16() ix := x.AsInt8x16() @@ -73,12 +93,16 @@ func (x Uint8x16) Merge(y Uint8x16, mask Mask8x16) Uint8x16 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Uint16x8) Masked(mask Mask16x8) Uint16x8 { im := mask.ToInt16x8() return x.AsInt16x8().And(im).AsUint16x8() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Uint16x8) Merge(y Uint16x8, mask Mask16x8) Uint16x8 { im := mask.ToInt16x8().AsInt8x16() ix := x.AsInt8x16() @@ -87,12 +111,16 @@ func (x Uint16x8) Merge(y Uint16x8, mask Mask16x8) Uint16x8 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Uint32x4) Masked(mask Mask32x4) Uint32x4 { im := mask.ToInt32x4() return x.AsInt32x4().And(im).AsUint32x4() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Uint32x4) Merge(y Uint32x4, mask Mask32x4) Uint32x4 { im := mask.ToInt32x4().AsInt8x16() ix := x.AsInt8x16() @@ -101,12 +129,16 @@ func (x Uint32x4) Merge(y Uint32x4, mask Mask32x4) Uint32x4 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Uint64x2) Masked(mask Mask64x2) Uint64x2 { im := mask.ToInt64x2() return x.AsInt64x2().And(im).AsUint64x2() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Uint64x2) Merge(y Uint64x2, mask Mask64x2) Uint64x2 { im := mask.ToInt64x2().AsInt8x16() ix := x.AsInt8x16() @@ -115,12 +147,16 @@ func (x Uint64x2) Merge(y Uint64x2, mask Mask64x2) Uint64x2 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Float32x4) Masked(mask Mask32x4) Float32x4 { im := mask.ToInt32x4() return x.AsInt32x4().And(im).AsFloat32x4() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Float32x4) Merge(y Float32x4, mask Mask32x4) Float32x4 { im := mask.ToInt32x4().AsInt8x16() ix := x.AsInt8x16() @@ -129,12 +165,16 @@ func (x Float32x4) Merge(y Float32x4, mask Mask32x4) Float32x4 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX func (x Float64x2) Masked(mask Mask64x2) Float64x2 { im := mask.ToInt64x2() return x.AsInt64x2().And(im).AsFloat64x2() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX func (x Float64x2) Merge(y Float64x2, mask Mask64x2) Float64x2 { im := mask.ToInt64x2().AsInt8x16() ix := x.AsInt8x16() @@ -143,24 +183,32 @@ func (x Float64x2) Merge(y Float64x2, mask Mask64x2) Float64x2 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Int8x32) Masked(mask Mask8x32) Int8x32 { im := mask.ToInt8x32() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Int8x32) Merge(y Int8x32, mask Mask8x32) Int8x32 { im := mask.ToInt8x32() return y.blend(x, im) } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Int16x16) Masked(mask Mask16x16) Int16x16 { im := mask.ToInt16x16() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Int16x16) Merge(y Int16x16, mask Mask16x16) Int16x16 { im := mask.ToInt16x16().AsInt8x32() ix := x.AsInt8x32() @@ -169,12 +217,16 @@ func (x Int16x16) Merge(y Int16x16, mask Mask16x16) Int16x16 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Int32x8) Masked(mask Mask32x8) Int32x8 { im := mask.ToInt32x8() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Int32x8) Merge(y Int32x8, mask Mask32x8) Int32x8 { im := mask.ToInt32x8().AsInt8x32() ix := x.AsInt8x32() @@ -183,12 +235,16 @@ func (x Int32x8) Merge(y Int32x8, mask Mask32x8) Int32x8 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Int64x4) Masked(mask Mask64x4) Int64x4 { im := mask.ToInt64x4() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Int64x4) Merge(y Int64x4, mask Mask64x4) Int64x4 { im := mask.ToInt64x4().AsInt8x32() ix := x.AsInt8x32() @@ -197,12 +253,16 @@ func (x Int64x4) Merge(y Int64x4, mask Mask64x4) Int64x4 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Uint8x32) Masked(mask Mask8x32) Uint8x32 { im := mask.ToInt8x32() return x.AsInt8x32().And(im).AsUint8x32() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Uint8x32) Merge(y Uint8x32, mask Mask8x32) Uint8x32 { im := mask.ToInt8x32() ix := x.AsInt8x32() @@ -211,12 +271,16 @@ func (x Uint8x32) Merge(y Uint8x32, mask Mask8x32) Uint8x32 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Uint16x16) Masked(mask Mask16x16) Uint16x16 { im := mask.ToInt16x16() return x.AsInt16x16().And(im).AsUint16x16() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Uint16x16) Merge(y Uint16x16, mask Mask16x16) Uint16x16 { im := mask.ToInt16x16().AsInt8x32() ix := x.AsInt8x32() @@ -225,12 +289,16 @@ func (x Uint16x16) Merge(y Uint16x16, mask Mask16x16) Uint16x16 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Uint32x8) Masked(mask Mask32x8) Uint32x8 { im := mask.ToInt32x8() return x.AsInt32x8().And(im).AsUint32x8() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Uint32x8) Merge(y Uint32x8, mask Mask32x8) Uint32x8 { im := mask.ToInt32x8().AsInt8x32() ix := x.AsInt8x32() @@ -239,12 +307,16 @@ func (x Uint32x8) Merge(y Uint32x8, mask Mask32x8) Uint32x8 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Uint64x4) Masked(mask Mask64x4) Uint64x4 { im := mask.ToInt64x4() return x.AsInt64x4().And(im).AsUint64x4() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Uint64x4) Merge(y Uint64x4, mask Mask64x4) Uint64x4 { im := mask.ToInt64x4().AsInt8x32() ix := x.AsInt8x32() @@ -253,12 +325,16 @@ func (x Uint64x4) Merge(y Uint64x4, mask Mask64x4) Uint64x4 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Float32x8) Masked(mask Mask32x8) Float32x8 { im := mask.ToInt32x8() return x.AsInt32x8().And(im).AsFloat32x8() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Float32x8) Merge(y Float32x8, mask Mask32x8) Float32x8 { im := mask.ToInt32x8().AsInt8x32() ix := x.AsInt8x32() @@ -267,12 +343,16 @@ func (x Float32x8) Merge(y Float32x8, mask Mask32x8) Float32x8 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Float64x4) Masked(mask Mask64x4) Float64x4 { im := mask.ToInt64x4() return x.AsInt64x4().And(im).AsFloat64x4() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX2 func (x Float64x4) Merge(y Float64x4, mask Mask64x4) Float64x4 { im := mask.ToInt64x4().AsInt8x32() ix := x.AsInt8x32() @@ -281,56 +361,76 @@ func (x Float64x4) Merge(y Float64x4, mask Mask64x4) Float64x4 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Int8x64) Masked(mask Mask8x64) Int8x64 { im := mask.ToInt8x64() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 { return y.blendMasked(x, mask) } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Int16x32) Masked(mask Mask16x32) Int16x32 { im := mask.ToInt16x32() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 { return y.blendMasked(x, mask) } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Int32x16) Masked(mask Mask32x16) Int32x16 { im := mask.ToInt32x16() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 { return y.blendMasked(x, mask) } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Int64x8) Masked(mask Mask64x8) Int64x8 { im := mask.ToInt64x8() return im.And(x) } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 { return y.blendMasked(x, mask) } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 { im := mask.ToInt8x64() return x.AsInt8x64().And(im).AsUint8x64() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 { ix := x.AsInt8x64() iy := y.AsInt8x64() @@ -338,12 +438,16 @@ func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 { im := mask.ToInt16x32() return x.AsInt16x32().And(im).AsUint16x32() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 { ix := x.AsInt16x32() iy := y.AsInt16x32() @@ -351,12 +455,16 @@ func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 { im := mask.ToInt32x16() return x.AsInt32x16().And(im).AsUint32x16() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 { ix := x.AsInt32x16() iy := y.AsInt32x16() @@ -364,12 +472,16 @@ func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 { im := mask.ToInt64x8() return x.AsInt64x8().And(im).AsUint64x8() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 { ix := x.AsInt64x8() iy := y.AsInt64x8() @@ -377,12 +489,16 @@ func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Float32x16) Masked(mask Mask32x16) Float32x16 { im := mask.ToInt32x16() return x.AsInt32x16().And(im).AsFloat32x16() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 { ix := x.AsInt32x16() iy := y.AsInt32x16() @@ -390,12 +506,16 @@ func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 { } // Masked returns x but with elements zeroed where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Float64x8) Masked(mask Mask64x8) Float64x8 { im := mask.ToInt64x8() return x.AsInt64x8().And(im).AsFloat64x8() } // Merge returns x but with elements set to y where mask is false. +// +// Emulated, CPU Feature: AVX512 func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 { ix := x.AsInt64x8() iy := y.AsInt64x8()