From 94d72355f662a1c8229db661cc068ea8e901641c Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 30 Jul 2025 17:42:10 -0400 Subject: [PATCH] [dev.simd] simd: add emulations for bitwise ops and for mask/merge methods This CL adds the emulations under a "wrong name"; subsequent CLs will move the AVX512 versions of these operations out of the way, and then will rename these to their better names. Change-Id: I49e7a73e4fea74fb7bd26cb8062014568d7999ca Reviewed-on: https://go-review.googlesource.com/c/go/+/692217 Reviewed-by: Junyang Shao Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- src/simd/genfiles.go | 82 +++++++- src/simd/simd_test.go | 14 ++ src/simd/slice_amd64.go | 408 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 488 insertions(+), 16 deletions(-) diff --git a/src/simd/genfiles.go b/src/simd/genfiles.go index 269659a653..c7c6aae374 100644 --- a/src/simd/genfiles.go +++ b/src/simd/genfiles.go @@ -50,13 +50,20 @@ var convert32Shapes = &shapes{ floats: []int{32}, } -var avx512MaskedLoadShapes = &shapes{ +var avx512Shapes = &shapes{ vecs: []int{512}, ints: []int{8, 16, 32, 64}, uints: []int{8, 16, 32, 64}, floats: []int{32, 64}, } +var avx2Shapes = &shapes{ + vecs: []int{128, 256}, + ints: []int{8, 16, 32, 64}, + uints: []int{8, 16, 32, 64}, + floats: []int{32, 64}, +} + var avx2MaskedLoadShapes = &shapes{ vecs: []int{128, 256}, ints: []int{32, 64}, @@ -70,12 +77,12 @@ var avx2SmallLoadPunShapes = &shapes{ uints: []int{8, 16}, } -var unaryFlaky = &shapes{ +var unaryFlaky = &shapes{ // for tests that support flaky equality vecs: []int{128, 256, 512}, floats: []int{32, 64}, } -var ternaryFlaky = &shapes{ +var ternaryFlaky = &shapes{ // for tests that support flaky equality vecs: []int{128, 256, 512}, floats: []int{32}, } @@ -88,6 +95,7 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io BaseType := strings.ToUpper(baseType[:1]) + baseType[1:] eType := fmt.Sprintf("%s%d", baseType, width) wxc := fmt.Sprintf("%dx%d", width, count) + bxc := fmt.Sprintf("%dx%d", 8, count*(width/8)) vType := fmt.Sprintf("%s%s", BaseType, wxc) aOrAn := "a" if strings.Contains("aeiou", baseType[:1]) { @@ -100,6 +108,8 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io Width int // the bit width of the element type, e.g. 32 Count int // the number of elements, e.g. 4 WxC string // the width-by-type string, e.g., "32x4" + BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8) + Base string // the capitalized Base Type of the vector, e.g., "Float" Type string // the element type, e.g. "float32" OxFF string // a mask for the lowest 'count' bits }{ @@ -108,6 +118,8 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io Width: width, Count: count, WxC: wxc, + BxC: bxc, + Base: BaseType, Type: eType, OxFF: oxFF, }) @@ -373,7 +385,7 @@ func test{{.Vec}}CompareMasked(t *testing.T, } `) -var avx512MaskedLoadSlicePartTemplate = shapedTemplateOf(avx512MaskedLoadShapes, "avx 512 load slice part", ` +var avx512MaskedLoadSlicePartTemplate = shapedTemplateOf(avx512Shapes, "avx 512 load slice part", ` // Load{{.Vec}}SlicePart loads a {{.Vec}} from the slice s. // If s has fewer than {{.Count}} elements, the remaining elements of the vector are filled with zeroes. // If s has {{.Count}} or more elements, the function is equivalent to Load{{.Vec}}Slice. @@ -386,7 +398,6 @@ func Load{{.Vec}}SlicePart(s []{{.Type}}) {{.Vec}} { var x {{.Vec}} return x } - mask := Mask{{.WxC}}FromBits({{.OxFF}} >> ({{.Count}} - l)) return LoadMasked{{.Vec}}(pa{{.Vec}}(s), mask) } @@ -476,6 +487,58 @@ func pa{{.Vec}}(s []{{.Type}}) *[{{.Count}}]{{.Type}} { } `) +var avx2MaskedTemplate = shapedTemplateOf(avx2Shapes, "avx2 .Masked methods", ` +// Masked returns x but with elements zeroed where mask is false. +func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} { + im := mask.AsInt{{.WxC}}() +{{- if eq .Base "Int" }} + return im.And(x) +{{- else}} + return x.AsInt{{.WxC}}().And(im).As{{.Vec}}() +{{- end -}} +} + +// Merge returns x but with elements set to y where mask is false. +func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} { +{{- if eq .BxC .WxC }} + im := mask.AsInt{{.BxC}}() +{{- else}} + im := mask.AsInt{{.WxC}}().AsInt{{.BxC}}() +{{- end -}} +{{- if and (eq .Base "Int") (eq .BxC .WxC) }} + return y.blend(x, im) +{{- else}} + ix := x.AsInt{{.BxC}}() + iy := y.AsInt{{.BxC}}() + return iy.blend(ix, im).As{{.Vec}}() +{{- end -}} +} +`) + +// TODO perhaps write these in ways that work better on AVX512 +var avx512MaskedTemplate = shapedTemplateOf(avx512Shapes, "avx512 .Masked methods", ` +// Masked returns x but with elements zeroed where mask is false. +func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} { + im := mask.AsInt{{.WxC}}() +{{- if eq .Base "Int" }} + return im.And(x) +{{- else}} + return x.AsInt{{.WxC}}().And(im).As{{.Vec}}() +{{- end -}} +} + +// Merge returns x but with elements set to y where m is false. +func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} { +{{- if eq .Base "Int" }} + return y.blendMasked(x, mask) +{{- else}} + ix := x.AsInt{{.WxC}}() + iy := y.AsInt{{.WxC}}() + return iy.blendMasked(ix, mask).As{{.Vec}}() +{{- end -}} +} +`) + func main() { sl := flag.String("sl", "slice_amd64.go", "file name for slice operations") ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers") @@ -487,7 +550,14 @@ func main() { flag.Parse() if *sl != "" { - one(*sl, prologue, sliceTemplate, avx512MaskedLoadSlicePartTemplate, avx2MaskedLoadSlicePartTemplate, avx2SmallLoadSlicePartTemplate) + one(*sl, prologue, + sliceTemplate, + avx512MaskedLoadSlicePartTemplate, + avx2MaskedLoadSlicePartTemplate, + avx2SmallLoadSlicePartTemplate, + avx2MaskedTemplate, + avx512MaskedTemplate, + ) } if *ush != "" { one(*ush, unsafePrologue, unsafePATemplate) diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index 4c3817599e..2fef6417d2 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -382,3 +382,17 @@ func TestBitMaskToBits(t *testing.T) { t.Errorf("Want 0b101, got %b", v) } } + +func TestMergeFloat(t *testing.T) { + a := simd.LoadFloat64x4Slice([]float64{1, 2, 3, 4}) + b := simd.LoadFloat64x4Slice([]float64{4, 2, 3, 1}) + g := a.Greater(b) + k := make([]int64, 4, 4) + g.AsInt64x4().StoreSlice(k) + checkSlices[int64](t, k, []int64{0, 0, 0, -1}) + c := a.Merge(b, g) + + s := make([]float64, 4, 4) + c.StoreSlice(s) + checkSlices[float64](t, s, []float64{4, 2, 3, 4}) +} diff --git a/src/simd/slice_amd64.go b/src/simd/slice_amd64.go index bd1d4f1530..a43660cba4 100644 --- a/src/simd/slice_amd64.go +++ b/src/simd/slice_amd64.go @@ -318,7 +318,6 @@ func LoadInt8x64SlicePart(s []int8) Int8x64 { var x Int8x64 return x } - mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) return LoadMaskedInt8x64(paInt8x64(s), mask) } @@ -351,7 +350,6 @@ func LoadInt16x32SlicePart(s []int16) Int16x32 { var x Int16x32 return x } - mask := Mask16x32FromBits(0xffffffff >> (32 - l)) return LoadMaskedInt16x32(paInt16x32(s), mask) } @@ -384,7 +382,6 @@ func LoadInt32x16SlicePart(s []int32) Int32x16 { var x Int32x16 return x } - mask := Mask32x16FromBits(0xffff >> (16 - l)) return LoadMaskedInt32x16(paInt32x16(s), mask) } @@ -417,7 +414,6 @@ func LoadInt64x8SlicePart(s []int64) Int64x8 { var x Int64x8 return x } - mask := Mask64x8FromBits(0xff >> (8 - l)) return LoadMaskedInt64x8(paInt64x8(s), mask) } @@ -450,7 +446,6 @@ func LoadUint8x64SlicePart(s []uint8) Uint8x64 { var x Uint8x64 return x } - mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l)) return LoadMaskedUint8x64(paUint8x64(s), mask) } @@ -483,7 +478,6 @@ func LoadUint16x32SlicePart(s []uint16) Uint16x32 { var x Uint16x32 return x } - mask := Mask16x32FromBits(0xffffffff >> (32 - l)) return LoadMaskedUint16x32(paUint16x32(s), mask) } @@ -516,7 +510,6 @@ func LoadUint32x16SlicePart(s []uint32) Uint32x16 { var x Uint32x16 return x } - mask := Mask32x16FromBits(0xffff >> (16 - l)) return LoadMaskedUint32x16(paUint32x16(s), mask) } @@ -549,7 +542,6 @@ func LoadUint64x8SlicePart(s []uint64) Uint64x8 { var x Uint64x8 return x } - mask := Mask64x8FromBits(0xff >> (8 - l)) return LoadMaskedUint64x8(paUint64x8(s), mask) } @@ -582,7 +574,6 @@ func LoadFloat32x16SlicePart(s []float32) Float32x16 { var x Float32x16 return x } - mask := Mask32x16FromBits(0xffff >> (16 - l)) return LoadMaskedFloat32x16(paFloat32x16(s), mask) } @@ -615,7 +606,6 @@ func LoadFloat64x8SlicePart(s []float64) Float64x8 { var x Float64x8 return x } - mask := Mask64x8FromBits(0xff >> (8 - l)) return LoadMaskedFloat64x8(paFloat64x8(s), mask) } @@ -1111,3 +1101,401 @@ func (x Uint16x16) StoreSlicePart(s []uint16) { t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s)) x.AsInt16x16().StoreSlicePart(t) } + +// Masked returns x but with elements zeroed where mask is false. +func (x Int8x16) Masked(mask Mask8x16) Int8x16 { + im := mask.AsInt8x16() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int8x16) Merge(y Int8x16, mask Mask8x16) Int8x16 { + im := mask.AsInt8x16() + return y.blend(x, im) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int16x8) Masked(mask Mask16x8) Int16x8 { + im := mask.AsInt16x8() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int16x8) Merge(y Int16x8, mask Mask16x8) Int16x8 { + im := mask.AsInt16x8().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsInt16x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int32x4) Masked(mask Mask32x4) Int32x4 { + im := mask.AsInt32x4() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int32x4) Merge(y Int32x4, mask Mask32x4) Int32x4 { + im := mask.AsInt32x4().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsInt32x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int64x2) Masked(mask Mask64x2) Int64x2 { + im := mask.AsInt64x2() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int64x2) Merge(y Int64x2, mask Mask64x2) Int64x2 { + im := mask.AsInt64x2().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsInt64x2() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint8x16) Masked(mask Mask8x16) Uint8x16 { + im := mask.AsInt8x16() + return x.AsInt8x16().And(im).AsUint8x16() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint8x16) Merge(y Uint8x16, mask Mask8x16) Uint8x16 { + im := mask.AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsUint8x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint16x8) Masked(mask Mask16x8) Uint16x8 { + im := mask.AsInt16x8() + return x.AsInt16x8().And(im).AsUint16x8() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint16x8) Merge(y Uint16x8, mask Mask16x8) Uint16x8 { + im := mask.AsInt16x8().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsUint16x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint32x4) Masked(mask Mask32x4) Uint32x4 { + im := mask.AsInt32x4() + return x.AsInt32x4().And(im).AsUint32x4() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint32x4) Merge(y Uint32x4, mask Mask32x4) Uint32x4 { + im := mask.AsInt32x4().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsUint32x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint64x2) Masked(mask Mask64x2) Uint64x2 { + im := mask.AsInt64x2() + return x.AsInt64x2().And(im).AsUint64x2() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint64x2) Merge(y Uint64x2, mask Mask64x2) Uint64x2 { + im := mask.AsInt64x2().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsUint64x2() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float32x4) Masked(mask Mask32x4) Float32x4 { + im := mask.AsInt32x4() + return x.AsInt32x4().And(im).AsFloat32x4() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Float32x4) Merge(y Float32x4, mask Mask32x4) Float32x4 { + im := mask.AsInt32x4().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsFloat32x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float64x2) Masked(mask Mask64x2) Float64x2 { + im := mask.AsInt64x2() + return x.AsInt64x2().And(im).AsFloat64x2() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Float64x2) Merge(y Float64x2, mask Mask64x2) Float64x2 { + im := mask.AsInt64x2().AsInt8x16() + ix := x.AsInt8x16() + iy := y.AsInt8x16() + return iy.blend(ix, im).AsFloat64x2() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int8x32) Masked(mask Mask8x32) Int8x32 { + im := mask.AsInt8x32() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int8x32) Merge(y Int8x32, mask Mask8x32) Int8x32 { + im := mask.AsInt8x32() + return y.blend(x, im) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int16x16) Masked(mask Mask16x16) Int16x16 { + im := mask.AsInt16x16() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int16x16) Merge(y Int16x16, mask Mask16x16) Int16x16 { + im := mask.AsInt16x16().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsInt16x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int32x8) Masked(mask Mask32x8) Int32x8 { + im := mask.AsInt32x8() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int32x8) Merge(y Int32x8, mask Mask32x8) Int32x8 { + im := mask.AsInt32x8().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsInt32x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int64x4) Masked(mask Mask64x4) Int64x4 { + im := mask.AsInt64x4() + return im.And(x) +} + +// Merge returns x but with elements set to y where mask is false. +func (x Int64x4) Merge(y Int64x4, mask Mask64x4) Int64x4 { + im := mask.AsInt64x4().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsInt64x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint8x32) Masked(mask Mask8x32) Uint8x32 { + im := mask.AsInt8x32() + return x.AsInt8x32().And(im).AsUint8x32() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint8x32) Merge(y Uint8x32, mask Mask8x32) Uint8x32 { + im := mask.AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsUint8x32() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint16x16) Masked(mask Mask16x16) Uint16x16 { + im := mask.AsInt16x16() + return x.AsInt16x16().And(im).AsUint16x16() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint16x16) Merge(y Uint16x16, mask Mask16x16) Uint16x16 { + im := mask.AsInt16x16().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsUint16x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint32x8) Masked(mask Mask32x8) Uint32x8 { + im := mask.AsInt32x8() + return x.AsInt32x8().And(im).AsUint32x8() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint32x8) Merge(y Uint32x8, mask Mask32x8) Uint32x8 { + im := mask.AsInt32x8().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsUint32x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint64x4) Masked(mask Mask64x4) Uint64x4 { + im := mask.AsInt64x4() + return x.AsInt64x4().And(im).AsUint64x4() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Uint64x4) Merge(y Uint64x4, mask Mask64x4) Uint64x4 { + im := mask.AsInt64x4().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsUint64x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float32x8) Masked(mask Mask32x8) Float32x8 { + im := mask.AsInt32x8() + return x.AsInt32x8().And(im).AsFloat32x8() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Float32x8) Merge(y Float32x8, mask Mask32x8) Float32x8 { + im := mask.AsInt32x8().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsFloat32x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float64x4) Masked(mask Mask64x4) Float64x4 { + im := mask.AsInt64x4() + return x.AsInt64x4().And(im).AsFloat64x4() +} + +// Merge returns x but with elements set to y where mask is false. +func (x Float64x4) Merge(y Float64x4, mask Mask64x4) Float64x4 { + im := mask.AsInt64x4().AsInt8x32() + ix := x.AsInt8x32() + iy := y.AsInt8x32() + return iy.blend(ix, im).AsFloat64x4() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int8x64) Masked(mask Mask8x64) Int8x64 { + im := mask.AsInt8x64() + return im.And(x) +} + +// Merge returns x but with elements set to y where m is false. +func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 { + return y.blendMasked(x, mask) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int16x32) Masked(mask Mask16x32) Int16x32 { + im := mask.AsInt16x32() + return im.And(x) +} + +// Merge returns x but with elements set to y where m is false. +func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 { + return y.blendMasked(x, mask) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int32x16) Masked(mask Mask32x16) Int32x16 { + im := mask.AsInt32x16() + return im.And(x) +} + +// Merge returns x but with elements set to y where m is false. +func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 { + return y.blendMasked(x, mask) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Int64x8) Masked(mask Mask64x8) Int64x8 { + im := mask.AsInt64x8() + return im.And(x) +} + +// Merge returns x but with elements set to y where m is false. +func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 { + return y.blendMasked(x, mask) +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 { + im := mask.AsInt8x64() + return x.AsInt8x64().And(im).AsUint8x64() +} + +// Merge returns x but with elements set to y where m is false. +func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 { + ix := x.AsInt8x64() + iy := y.AsInt8x64() + return iy.blendMasked(ix, mask).AsUint8x64() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 { + im := mask.AsInt16x32() + return x.AsInt16x32().And(im).AsUint16x32() +} + +// Merge returns x but with elements set to y where m is false. +func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 { + ix := x.AsInt16x32() + iy := y.AsInt16x32() + return iy.blendMasked(ix, mask).AsUint16x32() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 { + im := mask.AsInt32x16() + return x.AsInt32x16().And(im).AsUint32x16() +} + +// Merge returns x but with elements set to y where m is false. +func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 { + ix := x.AsInt32x16() + iy := y.AsInt32x16() + return iy.blendMasked(ix, mask).AsUint32x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 { + im := mask.AsInt64x8() + return x.AsInt64x8().And(im).AsUint64x8() +} + +// Merge returns x but with elements set to y where m is false. +func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 { + ix := x.AsInt64x8() + iy := y.AsInt64x8() + return iy.blendMasked(ix, mask).AsUint64x8() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float32x16) Masked(mask Mask32x16) Float32x16 { + im := mask.AsInt32x16() + return x.AsInt32x16().And(im).AsFloat32x16() +} + +// Merge returns x but with elements set to y where m is false. +func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 { + ix := x.AsInt32x16() + iy := y.AsInt32x16() + return iy.blendMasked(ix, mask).AsFloat32x16() +} + +// Masked returns x but with elements zeroed where mask is false. +func (x Float64x8) Masked(mask Mask64x8) Float64x8 { + im := mask.AsInt64x8() + return x.AsInt64x8().And(im).AsFloat64x8() +} + +// Merge returns x but with elements set to y where m is false. +func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 { + ix := x.AsInt64x8() + iy := y.AsInt64x8() + return iy.blendMasked(ix, mask).AsFloat64x8() +} -- 2.52.0