From d64add4d60b69973e5cae62f07983f91435de58b Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Fri, 19 Dec 2025 17:37:37 -0500 Subject: [PATCH] simd/archsimd: adjust documentations slightly - Reword the documentation of Scale to mention parameter names. - Correct the parameter name in Merge. - Use proper a/an articles in some documentation. - Add punctuations. - Format code blocks for long expressions. Change-Id: I8a31721503c1b155862255619a835895f3d5123a Reviewed-on: https://go-review.googlesource.com/c/go/+/731560 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- .../archsimd/_gen/simdgen/gen_simdTypes.go | 20 +- .../simdgen/ops/FPonlyArith/categories.yaml | 3 +- .../_gen/simdgen/ops/Moves/categories.yaml | 12 +- .../archsimd/_gen/simdgen/ops/Moves/go.yaml | 30 +- src/simd/archsimd/_gen/tmplgen/main.go | 54 +-- src/simd/archsimd/compare_gen_amd64.go | 288 ++++++------- src/simd/archsimd/extra_amd64.go | 32 +- src/simd/archsimd/maskmerge_gen_amd64.go | 20 +- src/simd/archsimd/ops_amd64.go | 354 ++++++++++------ src/simd/archsimd/ops_internal_amd64.go | 40 +- src/simd/archsimd/other_gen_amd64.go | 216 +++++----- src/simd/archsimd/shuffles_amd64.go | 43 +- src/simd/archsimd/slice_gen_amd64.go | 120 +++--- src/simd/archsimd/types_amd64.go | 384 +++++++++--------- 14 files changed, 897 insertions(+), 719 deletions(-) diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go index baf035f31d..453784ff36 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go @@ -142,7 +142,7 @@ type v{{.}} struct { {{end}} {{define "typeTmpl"}} -// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}} +// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}s. type {{.Name}} struct { {{.Fields}} } @@ -178,15 +178,15 @@ func (X86Features) {{.Feature}}() bool { ` const simdLoadStoreTemplate = ` -// Len returns the number of elements in a {{.Name}} +// Len returns the number of elements in {{.Article}} {{.Name}}. func (x {{.Name}}) Len() int { return {{.Lanes}} } -// Load{{.Name}} loads a {{.Name}} from an array +// Load{{.Name}} loads {{.Article}} {{.Name}} from an array. // //go:noescape func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}} -// Store stores a {{.Name}} to an array +// Store stores {{.Article}} {{.Name}} to an array. // //go:noescape func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}}) @@ -211,16 +211,16 @@ func (x {{.Name}}) ToBits() uint{{.LanesContainer}} ` const simdMaskedLoadStoreTemplate = ` -// LoadMasked{{.Name}} loads a {{.Name}} from an array, -// at those elements enabled by mask +// LoadMasked{{.Name}} loads {{.Article}} {{.Name}} from an array, +// at those elements enabled by mask. // {{.MaskedLoadDoc}} // //go:noescape func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}} -// StoreMasked stores a {{.Name}} to an array, -// at those elements enabled by mask +// StoreMasked stores {{.Article}} {{.Name}} to an array, +// at those elements enabled by mask. // {{.MaskedStoreDoc}} // @@ -407,10 +407,10 @@ func (x {{.Tsrc.Name}}) As{{.Tdst.Name}}() {{.Tdst.Name}} {{end}} {{define "mask"}} -// To{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}} +// To{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}}. func (from {{.Name}}) To{{.VectorCounterpart}}() (to {{.VectorCounterpart}}) -// asMask converts from {{.VectorCounterpart}} to {{.Name}} +// asMask converts from {{.VectorCounterpart}} to {{.Name}}. func (from {{.VectorCounterpart}}) asMask() (to {{.Name}}) func (x {{.Name}}) And(y {{.Name}}) {{.Name}} diff --git a/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml index a270ca54ab..90f5208ff7 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml @@ -18,7 +18,8 @@ - go: Scale commutative: false documentation: !string |- - // NAME multiplies elements by a power of 2. + // NAME multiplies each element of x by 2 raised to the power of the + // floor of the corresponding element in y. - go: RoundToEven commutative: false constImm: 0 diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml index 9e945cdebd..38bc9374cc 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml @@ -31,17 +31,23 @@ commutative: false documentation: !string |- // NAME performs a full permutation of vector x using indices: - // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // + // result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // - go: Permute commutative: false documentation: !string |- // NAME performs a full permutation of vector x using indices: - // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // + // result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // - go: ConcatPermute # ConcatPermute is only available on or after AVX512 commutative: false documentation: !string |- // NAME performs a full permutation of vector x, y using indices: - // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} + // + // result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} + // // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. - go: Compress diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml index d3aa7d4cdc..e1fd184ed7 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml @@ -227,7 +227,7 @@ - go: Permute asm: "VPERMQ|VPERMPD" addDoc: !string |- - // The low 2 bits (values 0-3) of each element of indices is used + // The low 2 bits (values 0-3) of each element of indices is used. operandOrder: "21Type1" in: - &anyindices @@ -244,7 +244,7 @@ - go: Permute asm: "VPERM[WDQ]|VPERMP[SD]" addDoc: !string |- - // The low 3 bits (values 0-7) of each element of indices is used + // The low 3 bits (values 0-7) of each element of indices is used. operandOrder: "21Type1" in: - *anyindices @@ -257,7 +257,7 @@ - go: Permute asm: "VPERM[BWD]|VPERMPS" addDoc: !string |- - // The low 4 bits (values 0-15) of each element of indices is used + // The low 4 bits (values 0-15) of each element of indices is used. operandOrder: "21Type1" in: - *anyindices @@ -270,7 +270,7 @@ - go: Permute asm: "VPERM[BW]" addDoc: !string |- - // The low 5 bits (values 0-31) of each element of indices is used + // The low 5 bits (values 0-31) of each element of indices is used. operandOrder: "21Type1" in: - *anyindices @@ -283,7 +283,7 @@ - go: Permute asm: "VPERMB" addDoc: !string |- - // The low 6 bits (values 0-63) of each element of indices is used + // The low 6 bits (values 0-63) of each element of indices is used. operandOrder: "21Type1" in: - *anyindices @@ -489,7 +489,9 @@ - go: PermuteOrZeroGrouped asm: VPSHUFB addDoc: !string |- - // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} + // + // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} + // // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -506,7 +508,9 @@ - go: permuteScalars asm: VPSHUFD addDoc: !string |- - // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} + // + // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} + // // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. in: - *128any @@ -520,7 +524,9 @@ - go: permuteScalarsGrouped asm: VPSHUFD addDoc: !string |- - // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} + // + // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} + // // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. in: @@ -535,7 +541,9 @@ - go: permuteScalarsLo asm: VPSHUFLW addDoc: !string |- - // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} + // + // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} + // // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. in: - &128lanes8 @@ -573,7 +581,9 @@ - go: permuteScalarsHi asm: VPSHUFHW addDoc: !string |- - // result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} + // + // result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} + // // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. in: - *128lanes8 diff --git a/src/simd/archsimd/_gen/tmplgen/main.go b/src/simd/archsimd/_gen/tmplgen/main.go index e764bee498..e0d607af12 100644 --- a/src/simd/archsimd/_gen/tmplgen/main.go +++ b/src/simd/archsimd/_gen/tmplgen/main.go @@ -323,12 +323,12 @@ func shapedTemplateOf(s *shapes, name, temp string) shapeAndTemplate { } var sliceTemplate = templateOf("slice", ` -// Load{{.VType}}Slice loads {{.AOrAn}} {{.VType}} from a slice of at least {{.Count}} {{.Etype}}s +// Load{{.VType}}Slice loads {{.AOrAn}} {{.VType}} from a slice of at least {{.Count}} {{.Etype}}s. func Load{{.VType}}Slice(s []{{.Etype}}) {{.VType}} { return Load{{.VType}}((*[{{.Count}}]{{.Etype}})(s)) } -// StoreSlice stores x into a slice of at least {{.Count}} {{.Etype}}s +// StoreSlice stores x into a slice of at least {{.Count}} {{.Etype}}s. func (x {{.VType}}) StoreSlice(s []{{.Etype}}) { x.Store((*[{{.Count}}]{{.Etype}})(s)) } @@ -640,32 +640,32 @@ func (t templateData) CPUfeature() string { } var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", ` -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} { ones := x.Equal(x).ToInt{{.WxC}}() return y.Greater(x).ToInt{{.WxC}}().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} { ones := x.Equal(x).ToInt{{.WxC}}() return x.Greater(y).ToInt{{.WxC}}().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} { ones := x.Equal(x).ToInt{{.WxC}}() return x.Equal(y).ToInt{{.WxC}}().Xor(ones).asMask() @@ -673,18 +673,18 @@ func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} { `) var bitWiseIntTemplate = shapedTemplateOf(intShapes, "bitwise int complement", ` -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) Not() {{.VType}} { return x.Xor(x.Equal(x).ToInt{{.WxC}}()) } `) var bitWiseUintTemplate = shapedTemplateOf(uintShapes, "bitwise uint complement", ` -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) Not() {{.VType}} { return x.Xor(x.Equal(x).ToInt{{.WxC}}().As{{.VType}}()) } @@ -703,9 +703,9 @@ func (t templateData) CPUfeatureAVX2if8() string { } var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", ` -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}} func (x {{.VType}}) Greater(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() {{- if eq .EWidth 8}} @@ -717,9 +717,9 @@ func (x {{.VType}}) Greater(y {{.VType}}) Mask{{.WxC}} { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}} func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() {{- if eq .EWidth 8}} @@ -731,9 +731,9 @@ func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}} func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).ToInt{{.WxC}}() @@ -745,9 +745,9 @@ func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} { return b.Xor(signs).Greater(a.Xor(signs)).ToInt{{.WxC}}().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}} func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).ToInt{{.WxC}}() @@ -759,9 +759,9 @@ func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} { return a.Xor(signs).Greater(b.Xor(signs)).ToInt{{.WxC}}().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).ToInt{{.WxC}}() @@ -818,7 +818,7 @@ func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} { {{- end -}} } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x {{.VType}}) Merge(y {{.VType}}, mask Mask{{.WxC}}) {{.VType}} { {{- if eq .Base "Int" }} return y.blendMasked(x, mask) @@ -849,7 +849,7 @@ var broadcastTemplate = templateOf("Broadcast functions", ` // Broadcast{{.VType}} returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature {{.CPUfeatureBC}} +// Emulated, CPU Feature: {{.CPUfeatureBC}} func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} { var z {{.As128BitVec }} return z.SetElem(0, x).Broadcast{{.Vwidth}}() @@ -864,7 +864,7 @@ func (from {{.Base}}{{.WxC}}) ToMask() (to Mask{{.WxC}}) { `) var stringTemplate = shapedTemplateOf(allShapes, "String methods", ` -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x {{.VType}}) String() string { var s [{{.Count}}]{{.Etype}} x.Store(&s) diff --git a/src/simd/archsimd/compare_gen_amd64.go b/src/simd/archsimd/compare_gen_amd64.go index 13b1e03dc9..09f8277dc9 100644 --- a/src/simd/archsimd/compare_gen_amd64.go +++ b/src/simd/archsimd/compare_gen_amd64.go @@ -4,275 +4,275 @@ package archsimd -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) Less(y Int8x16) Mask8x16 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).ToInt8x16() return y.Greater(x).ToInt8x16().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) LessEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).ToInt8x16() return x.Greater(y).ToInt8x16().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) NotEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).ToInt8x16() return x.Equal(y).ToInt8x16().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) Less(y Int16x8) Mask16x8 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).ToInt16x8() return y.Greater(x).ToInt16x8().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) LessEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).ToInt16x8() return x.Greater(y).ToInt16x8().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) NotEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).ToInt16x8() return x.Equal(y).ToInt16x8().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) Less(y Int32x4) Mask32x4 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).ToInt32x4() return y.Greater(x).ToInt32x4().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) LessEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).ToInt32x4() return x.Greater(y).ToInt32x4().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) NotEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).ToInt32x4() return x.Equal(y).ToInt32x4().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) Less(y Int64x2) Mask64x2 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).ToInt64x2() return y.Greater(x).ToInt64x2().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) LessEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).ToInt64x2() return x.Greater(y).ToInt64x2().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) NotEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).ToInt64x2() return x.Equal(y).ToInt64x2().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) Less(y Int8x32) Mask8x32 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).ToInt8x32() return y.Greater(x).ToInt8x32().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) LessEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).ToInt8x32() return x.Greater(y).ToInt8x32().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) NotEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).ToInt8x32() return x.Equal(y).ToInt8x32().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) Less(y Int16x16) Mask16x16 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).ToInt16x16() return y.Greater(x).ToInt16x16().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) LessEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).ToInt16x16() return x.Greater(y).ToInt16x16().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) NotEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).ToInt16x16() return x.Equal(y).ToInt16x16().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) Less(y Int32x8) Mask32x8 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).ToInt32x8() return y.Greater(x).ToInt32x8().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) LessEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).ToInt32x8() return x.Greater(y).ToInt32x8().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) NotEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).ToInt32x8() return x.Equal(y).ToInt32x8().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) Less(y Int64x4) Mask64x4 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).ToInt64x4() return y.Greater(x).ToInt64x4().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) LessEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).ToInt64x4() return x.Greater(y).ToInt64x4().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) NotEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).ToInt64x4() return x.Equal(y).ToInt64x4().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x16) Greater(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() signs := BroadcastInt8x16(-1 << (8 - 1)) return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x16) Less(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() signs := BroadcastInt8x16(-1 << (8 - 1)) return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).ToInt8x16() @@ -280,9 +280,9 @@ func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt8x16().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).ToInt8x16() @@ -290,18 +290,18 @@ func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt8x16().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).ToInt8x16() return a.Equal(b).ToInt8x16().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) Greater(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() @@ -309,9 +309,9 @@ func (x Uint16x8) Greater(y Uint16x8) Mask16x8 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) Less(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() @@ -319,9 +319,9 @@ func (x Uint16x8) Less(y Uint16x8) Mask16x8 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() @@ -329,9 +329,9 @@ func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt16x8().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() @@ -339,18 +339,18 @@ func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt16x8().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() return a.Equal(b).ToInt16x8().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) Greater(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() @@ -358,9 +358,9 @@ func (x Uint32x4) Greater(y Uint32x4) Mask32x4 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) Less(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() @@ -368,9 +368,9 @@ func (x Uint32x4) Less(y Uint32x4) Mask32x4 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() @@ -378,9 +378,9 @@ func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt32x4().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() @@ -388,18 +388,18 @@ func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt32x4().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() return a.Equal(b).ToInt32x4().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) Greater(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() @@ -407,9 +407,9 @@ func (x Uint64x2) Greater(y Uint64x2) Mask64x2 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) Less(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() @@ -417,9 +417,9 @@ func (x Uint64x2) Less(y Uint64x2) Mask64x2 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() @@ -427,9 +427,9 @@ func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt64x2().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() @@ -437,36 +437,36 @@ func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt64x2().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() return a.Equal(b).ToInt64x2().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) Greater(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() signs := BroadcastInt8x32(-1 << (8 - 1)) return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) Less(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() signs := BroadcastInt8x32(-1 << (8 - 1)) return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).ToInt8x32() @@ -474,9 +474,9 @@ func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt8x32().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).ToInt8x32() @@ -484,18 +484,18 @@ func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt8x32().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).ToInt8x32() return a.Equal(b).ToInt8x32().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) Greater(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() @@ -503,9 +503,9 @@ func (x Uint16x16) Greater(y Uint16x16) Mask16x16 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) Less(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() @@ -513,9 +513,9 @@ func (x Uint16x16) Less(y Uint16x16) Mask16x16 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() @@ -523,9 +523,9 @@ func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt16x16().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() @@ -533,18 +533,18 @@ func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt16x16().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() return a.Equal(b).ToInt16x16().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) Greater(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() @@ -552,9 +552,9 @@ func (x Uint32x8) Greater(y Uint32x8) Mask32x8 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) Less(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() @@ -562,9 +562,9 @@ func (x Uint32x8) Less(y Uint32x8) Mask32x8 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() @@ -572,9 +572,9 @@ func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt32x8().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() @@ -582,18 +582,18 @@ func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt32x8().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() return a.Equal(b).ToInt32x8().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) Greater(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() @@ -601,9 +601,9 @@ func (x Uint64x4) Greater(y Uint64x4) Mask64x4 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) Less(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() @@ -611,9 +611,9 @@ func (x Uint64x4) Less(y Uint64x4) Mask64x4 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() @@ -621,9 +621,9 @@ func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt64x4().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() @@ -631,9 +631,9 @@ func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt64x4().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() diff --git a/src/simd/archsimd/extra_amd64.go b/src/simd/archsimd/extra_amd64.go index 921e148f63..cd5a3230b9 100644 --- a/src/simd/archsimd/extra_amd64.go +++ b/src/simd/archsimd/extra_amd64.go @@ -19,7 +19,7 @@ func ClearAVXUpperBits() // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int8x16) IsZero() bool @@ -27,7 +27,7 @@ func (x Int8x16) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int8x32) IsZero() bool @@ -35,7 +35,7 @@ func (x Int8x32) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int16x8) IsZero() bool @@ -43,7 +43,7 @@ func (x Int16x8) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int16x16) IsZero() bool @@ -51,7 +51,7 @@ func (x Int16x16) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int32x4) IsZero() bool @@ -59,7 +59,7 @@ func (x Int32x4) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int32x8) IsZero() bool @@ -67,7 +67,7 @@ func (x Int32x8) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int64x2) IsZero() bool @@ -75,7 +75,7 @@ func (x Int64x2) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int64x4) IsZero() bool @@ -83,7 +83,7 @@ func (x Int64x4) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint8x16) IsZero() bool @@ -91,7 +91,7 @@ func (x Uint8x16) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint8x32) IsZero() bool @@ -99,7 +99,7 @@ func (x Uint8x32) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint16x8) IsZero() bool @@ -107,7 +107,7 @@ func (x Uint16x8) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint16x16) IsZero() bool @@ -115,7 +115,7 @@ func (x Uint16x16) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint32x4) IsZero() bool @@ -123,7 +123,7 @@ func (x Uint32x4) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint32x8) IsZero() bool @@ -131,7 +131,7 @@ func (x Uint32x8) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint64x2) IsZero() bool @@ -139,7 +139,7 @@ func (x Uint64x2) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint64x4) IsZero() bool diff --git a/src/simd/archsimd/maskmerge_gen_amd64.go b/src/simd/archsimd/maskmerge_gen_amd64.go index 2469a477c7..ad56521714 100644 --- a/src/simd/archsimd/maskmerge_gen_amd64.go +++ b/src/simd/archsimd/maskmerge_gen_amd64.go @@ -286,7 +286,7 @@ func (x Int8x64) Masked(mask Mask8x64) Int8x64 { return im.And(x) } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 { return y.blendMasked(x, mask) } @@ -297,7 +297,7 @@ func (x Int16x32) Masked(mask Mask16x32) Int16x32 { return im.And(x) } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 { return y.blendMasked(x, mask) } @@ -308,7 +308,7 @@ func (x Int32x16) Masked(mask Mask32x16) Int32x16 { return im.And(x) } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 { return y.blendMasked(x, mask) } @@ -319,7 +319,7 @@ func (x Int64x8) Masked(mask Mask64x8) Int64x8 { return im.And(x) } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 { return y.blendMasked(x, mask) } @@ -330,7 +330,7 @@ func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 { return x.AsInt8x64().And(im).AsUint8x64() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 { ix := x.AsInt8x64() iy := y.AsInt8x64() @@ -343,7 +343,7 @@ func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 { return x.AsInt16x32().And(im).AsUint16x32() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 { ix := x.AsInt16x32() iy := y.AsInt16x32() @@ -356,7 +356,7 @@ func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 { return x.AsInt32x16().And(im).AsUint32x16() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 { ix := x.AsInt32x16() iy := y.AsInt32x16() @@ -369,7 +369,7 @@ func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 { return x.AsInt64x8().And(im).AsUint64x8() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 { ix := x.AsInt64x8() iy := y.AsInt64x8() @@ -382,7 +382,7 @@ func (x Float32x16) Masked(mask Mask32x16) Float32x16 { return x.AsInt32x16().And(im).AsFloat32x16() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 { ix := x.AsInt32x16() iy := y.AsInt32x16() @@ -395,7 +395,7 @@ func (x Float64x8) Masked(mask Mask64x8) Float64x8 { return x.AsInt64x8().And(im).AsFloat64x8() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 { ix := x.AsInt64x8() iy := y.AsInt64x8() diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index 522a98caea..95d417b3df 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -1286,7 +1286,9 @@ func (x Uint64x8) Compress(mask Mask64x8) Uint64x8 /* ConcatPermute */ // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1294,7 +1296,9 @@ func (x Uint64x8) Compress(mask Mask64x8) Uint64x8 func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1302,7 +1306,9 @@ func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16 func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1310,7 +1316,9 @@ func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16 func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1318,7 +1326,9 @@ func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32 func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1326,7 +1336,9 @@ func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32 func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1334,7 +1346,9 @@ func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64 func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1342,7 +1356,9 @@ func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64 func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1350,7 +1366,9 @@ func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8 func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1358,7 +1376,9 @@ func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8 func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1366,7 +1386,9 @@ func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16 func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1374,7 +1396,9 @@ func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16 func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1382,7 +1406,9 @@ func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32 func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1390,7 +1416,9 @@ func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32 func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1398,7 +1426,9 @@ func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4 func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1406,7 +1436,9 @@ func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4 func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1414,7 +1446,9 @@ func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4 func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1422,7 +1456,9 @@ func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8 func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1430,7 +1466,9 @@ func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8 func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1438,7 +1476,9 @@ func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8 func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1446,7 +1486,9 @@ func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16 func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1454,7 +1496,9 @@ func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16 func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1462,7 +1506,9 @@ func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16 func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1470,7 +1516,9 @@ func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2 func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1478,7 +1526,9 @@ func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2 func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1486,7 +1536,9 @@ func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2 func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1494,7 +1546,9 @@ func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4 func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1502,7 +1556,9 @@ func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4 func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1510,7 +1566,9 @@ func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4 func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1518,7 +1576,9 @@ func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8 func (x Int64x8) ConcatPermute(y Int64x8, indices Uint64x8) Int64x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -4523,169 +4583,217 @@ func (x Uint64x8) Or(y Uint64x8) Uint64x8 /* Permute */ // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Int8x16) Permute(indices Uint8x16) Int8x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Uint8x16) Permute(indices Uint8x16) Uint8x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 5 bits (values 0-31) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 5 bits (values 0-31) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Int8x32) Permute(indices Uint8x32) Int8x32 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 5 bits (values 0-31) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 5 bits (values 0-31) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Uint8x32) Permute(indices Uint8x32) Uint8x32 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 6 bits (values 0-63) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 6 bits (values 0-63) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Int8x64) Permute(indices Uint8x64) Int8x64 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 6 bits (values 0-63) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 6 bits (values 0-63) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Uint8x64) Permute(indices Uint8x64) Uint8x64 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Int16x8) Permute(indices Uint16x8) Int16x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Uint16x8) Permute(indices Uint16x8) Uint16x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Int16x16) Permute(indices Uint16x16) Int16x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Uint16x16) Permute(indices Uint16x16) Uint16x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 5 bits (values 0-31) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 5 bits (values 0-31) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Int16x32) Permute(indices Uint16x32) Int16x32 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 5 bits (values 0-31) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 5 bits (values 0-31) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Uint16x32) Permute(indices Uint16x32) Uint16x32 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMPS, CPU Feature: AVX2 func (x Float32x8) Permute(indices Uint32x8) Float32x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMD, CPU Feature: AVX2 func (x Int32x8) Permute(indices Uint32x8) Int32x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMD, CPU Feature: AVX2 func (x Uint32x8) Permute(indices Uint32x8) Uint32x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMPS, CPU Feature: AVX512 func (x Float32x16) Permute(indices Uint32x16) Float32x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMD, CPU Feature: AVX512 func (x Int32x16) Permute(indices Uint32x16) Int32x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMD, CPU Feature: AVX512 func (x Uint32x16) Permute(indices Uint32x16) Uint32x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 2 bits (values 0-3) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 2 bits (values 0-3) of each element of indices is used. // // Asm: VPERMPD, CPU Feature: AVX512 func (x Float64x4) Permute(indices Uint64x4) Float64x4 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 2 bits (values 0-3) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 2 bits (values 0-3) of each element of indices is used. // // Asm: VPERMQ, CPU Feature: AVX512 func (x Int64x4) Permute(indices Uint64x4) Int64x4 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 2 bits (values 0-3) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 2 bits (values 0-3) of each element of indices is used. // // Asm: VPERMQ, CPU Feature: AVX512 func (x Uint64x4) Permute(indices Uint64x4) Uint64x4 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMPD, CPU Feature: AVX512 func (x Float64x8) Permute(indices Uint64x8) Float64x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMQ, CPU Feature: AVX512 func (x Int64x8) Permute(indices Uint64x8) Int64x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMQ, CPU Feature: AVX512 func (x Uint64x8) Permute(indices Uint64x8) Uint64x8 @@ -4693,7 +4801,9 @@ func (x Uint64x8) Permute(indices Uint64x8) Uint64x8 /* PermuteOrZero */ // PermuteOrZero performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// // The lower four bits of each byte-sized index in indices select an element from x, // unless the index's sign bit is set in which case zero is used instead. // @@ -4701,7 +4811,9 @@ func (x Uint64x8) Permute(indices Uint64x8) Uint64x8 func (x Int8x16) PermuteOrZero(indices Int8x16) Int8x16 // PermuteOrZero performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// // The lower four bits of each byte-sized index in indices select an element from x, // unless the index's sign bit is set in which case zero is used instead. // @@ -4711,7 +4823,9 @@ func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16 /* PermuteOrZeroGrouped */ // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: -// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -4720,7 +4834,9 @@ func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16 func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32 // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: -// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -4729,7 +4845,9 @@ func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32 func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64 // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: -// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -4738,7 +4856,9 @@ func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64 func (x Uint8x32) PermuteOrZeroGrouped(indices Int8x32) Uint8x32 // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: -// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -5566,32 +5686,38 @@ func (x Uint64x8) SaturateToUint32() Uint32x8 /* Scale */ -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPS, CPU Feature: AVX512 func (x Float32x4) Scale(y Float32x4) Float32x4 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPS, CPU Feature: AVX512 func (x Float32x8) Scale(y Float32x8) Float32x8 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPS, CPU Feature: AVX512 func (x Float32x16) Scale(y Float32x16) Float32x16 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPD, CPU Feature: AVX512 func (x Float64x2) Scale(y Float64x2) Float64x2 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPD, CPU Feature: AVX512 func (x Float64x4) Scale(y Float64x4) Float64x4 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPD, CPU Feature: AVX512 func (x Float64x8) Scale(y Float64x8) Float64x8 @@ -8530,120 +8656,120 @@ func (x Uint64x8) AsUint16x32() Uint16x32 // AsUint32x16 returns a Uint32x16 with the same bit representation as x. func (x Uint64x8) AsUint32x16() Uint32x16 -// ToInt8x16 converts from Mask8x16 to Int8x16 +// ToInt8x16 converts from Mask8x16 to Int8x16. func (from Mask8x16) ToInt8x16() (to Int8x16) -// asMask converts from Int8x16 to Mask8x16 +// asMask converts from Int8x16 to Mask8x16. func (from Int8x16) asMask() (to Mask8x16) func (x Mask8x16) And(y Mask8x16) Mask8x16 func (x Mask8x16) Or(y Mask8x16) Mask8x16 -// ToInt8x32 converts from Mask8x32 to Int8x32 +// ToInt8x32 converts from Mask8x32 to Int8x32. func (from Mask8x32) ToInt8x32() (to Int8x32) -// asMask converts from Int8x32 to Mask8x32 +// asMask converts from Int8x32 to Mask8x32. func (from Int8x32) asMask() (to Mask8x32) func (x Mask8x32) And(y Mask8x32) Mask8x32 func (x Mask8x32) Or(y Mask8x32) Mask8x32 -// ToInt8x64 converts from Mask8x64 to Int8x64 +// ToInt8x64 converts from Mask8x64 to Int8x64. func (from Mask8x64) ToInt8x64() (to Int8x64) -// asMask converts from Int8x64 to Mask8x64 +// asMask converts from Int8x64 to Mask8x64. func (from Int8x64) asMask() (to Mask8x64) func (x Mask8x64) And(y Mask8x64) Mask8x64 func (x Mask8x64) Or(y Mask8x64) Mask8x64 -// ToInt16x8 converts from Mask16x8 to Int16x8 +// ToInt16x8 converts from Mask16x8 to Int16x8. func (from Mask16x8) ToInt16x8() (to Int16x8) -// asMask converts from Int16x8 to Mask16x8 +// asMask converts from Int16x8 to Mask16x8. func (from Int16x8) asMask() (to Mask16x8) func (x Mask16x8) And(y Mask16x8) Mask16x8 func (x Mask16x8) Or(y Mask16x8) Mask16x8 -// ToInt16x16 converts from Mask16x16 to Int16x16 +// ToInt16x16 converts from Mask16x16 to Int16x16. func (from Mask16x16) ToInt16x16() (to Int16x16) -// asMask converts from Int16x16 to Mask16x16 +// asMask converts from Int16x16 to Mask16x16. func (from Int16x16) asMask() (to Mask16x16) func (x Mask16x16) And(y Mask16x16) Mask16x16 func (x Mask16x16) Or(y Mask16x16) Mask16x16 -// ToInt16x32 converts from Mask16x32 to Int16x32 +// ToInt16x32 converts from Mask16x32 to Int16x32. func (from Mask16x32) ToInt16x32() (to Int16x32) -// asMask converts from Int16x32 to Mask16x32 +// asMask converts from Int16x32 to Mask16x32. func (from Int16x32) asMask() (to Mask16x32) func (x Mask16x32) And(y Mask16x32) Mask16x32 func (x Mask16x32) Or(y Mask16x32) Mask16x32 -// ToInt32x4 converts from Mask32x4 to Int32x4 +// ToInt32x4 converts from Mask32x4 to Int32x4. func (from Mask32x4) ToInt32x4() (to Int32x4) -// asMask converts from Int32x4 to Mask32x4 +// asMask converts from Int32x4 to Mask32x4. func (from Int32x4) asMask() (to Mask32x4) func (x Mask32x4) And(y Mask32x4) Mask32x4 func (x Mask32x4) Or(y Mask32x4) Mask32x4 -// ToInt32x8 converts from Mask32x8 to Int32x8 +// ToInt32x8 converts from Mask32x8 to Int32x8. func (from Mask32x8) ToInt32x8() (to Int32x8) -// asMask converts from Int32x8 to Mask32x8 +// asMask converts from Int32x8 to Mask32x8. func (from Int32x8) asMask() (to Mask32x8) func (x Mask32x8) And(y Mask32x8) Mask32x8 func (x Mask32x8) Or(y Mask32x8) Mask32x8 -// ToInt32x16 converts from Mask32x16 to Int32x16 +// ToInt32x16 converts from Mask32x16 to Int32x16. func (from Mask32x16) ToInt32x16() (to Int32x16) -// asMask converts from Int32x16 to Mask32x16 +// asMask converts from Int32x16 to Mask32x16. func (from Int32x16) asMask() (to Mask32x16) func (x Mask32x16) And(y Mask32x16) Mask32x16 func (x Mask32x16) Or(y Mask32x16) Mask32x16 -// ToInt64x2 converts from Mask64x2 to Int64x2 +// ToInt64x2 converts from Mask64x2 to Int64x2. func (from Mask64x2) ToInt64x2() (to Int64x2) -// asMask converts from Int64x2 to Mask64x2 +// asMask converts from Int64x2 to Mask64x2. func (from Int64x2) asMask() (to Mask64x2) func (x Mask64x2) And(y Mask64x2) Mask64x2 func (x Mask64x2) Or(y Mask64x2) Mask64x2 -// ToInt64x4 converts from Mask64x4 to Int64x4 +// ToInt64x4 converts from Mask64x4 to Int64x4. func (from Mask64x4) ToInt64x4() (to Int64x4) -// asMask converts from Int64x4 to Mask64x4 +// asMask converts from Int64x4 to Mask64x4. func (from Int64x4) asMask() (to Mask64x4) func (x Mask64x4) And(y Mask64x4) Mask64x4 func (x Mask64x4) Or(y Mask64x4) Mask64x4 -// ToInt64x8 converts from Mask64x8 to Int64x8 +// ToInt64x8 converts from Mask64x8 to Int64x8. func (from Mask64x8) ToInt64x8() (to Int64x8) -// asMask converts from Int64x8 to Mask64x8 +// asMask converts from Int64x8 to Mask64x8. func (from Int64x8) asMask() (to Mask64x8) func (x Mask64x8) And(y Mask64x8) Mask64x8 diff --git a/src/simd/archsimd/ops_internal_amd64.go b/src/simd/archsimd/ops_internal_amd64.go index b8127b302e..8eae69a7ba 100644 --- a/src/simd/archsimd/ops_internal_amd64.go +++ b/src/simd/archsimd/ops_internal_amd64.go @@ -382,7 +382,9 @@ func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x /* permuteScalars */ // permuteScalars performs a permutation of vector x using constant indices: -// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} +// +// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -391,7 +393,9 @@ func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x func (x Int32x4) permuteScalars(indices uint8) Int32x4 // permuteScalars performs a permutation of vector x using constant indices: -// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} +// +// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -402,7 +406,9 @@ func (x Uint32x4) permuteScalars(indices uint8) Uint32x4 /* permuteScalarsGrouped */ // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices: -// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// +// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. // @@ -412,7 +418,9 @@ func (x Uint32x4) permuteScalars(indices uint8) Uint32x4 func (x Int32x8) permuteScalarsGrouped(indices uint8) Int32x8 // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices: -// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// +// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. // @@ -422,7 +430,9 @@ func (x Int32x8) permuteScalarsGrouped(indices uint8) Int32x8 func (x Int32x16) permuteScalarsGrouped(indices uint8) Int32x16 // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices: -// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// +// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. // @@ -432,7 +442,9 @@ func (x Int32x16) permuteScalarsGrouped(indices uint8) Int32x16 func (x Uint32x8) permuteScalarsGrouped(indices uint8) Uint32x8 // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices: -// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// +// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. // @@ -444,7 +456,9 @@ func (x Uint32x16) permuteScalarsGrouped(indices uint8) Uint32x16 /* permuteScalarsHi */ // permuteScalarsHi performs a permutation of vector x using constant indices: -// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} +// +// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -453,7 +467,9 @@ func (x Uint32x16) permuteScalarsGrouped(indices uint8) Uint32x16 func (x Int16x8) permuteScalarsHi(indices uint8) Int16x8 // permuteScalarsHi performs a permutation of vector x using constant indices: -// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} +// +// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -522,7 +538,9 @@ func (x Uint16x32) permuteScalarsHiGrouped(indices uint8) Uint16x32 /* permuteScalarsLo */ // permuteScalarsLo performs a permutation of vector x using constant indices: -// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} +// +// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -531,7 +549,9 @@ func (x Uint16x32) permuteScalarsHiGrouped(indices uint8) Uint16x32 func (x Int16x8) permuteScalarsLo(indices uint8) Int16x8 // permuteScalarsLo performs a permutation of vector x using constant indices: -// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} +// +// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. diff --git a/src/simd/archsimd/other_gen_amd64.go b/src/simd/archsimd/other_gen_amd64.go index 5c85b1842a..647001acce 100644 --- a/src/simd/archsimd/other_gen_amd64.go +++ b/src/simd/archsimd/other_gen_amd64.go @@ -7,7 +7,7 @@ package archsimd // BroadcastInt8x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt8x16(x int8) Int8x16 { var z Int8x16 return z.SetElem(0, x).Broadcast128() @@ -16,7 +16,7 @@ func BroadcastInt8x16(x int8) Int8x16 { // BroadcastInt16x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt16x8(x int16) Int16x8 { var z Int16x8 return z.SetElem(0, x).Broadcast128() @@ -25,7 +25,7 @@ func BroadcastInt16x8(x int16) Int16x8 { // BroadcastInt32x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt32x4(x int32) Int32x4 { var z Int32x4 return z.SetElem(0, x).Broadcast128() @@ -34,7 +34,7 @@ func BroadcastInt32x4(x int32) Int32x4 { // BroadcastInt64x2 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt64x2(x int64) Int64x2 { var z Int64x2 return z.SetElem(0, x).Broadcast128() @@ -43,7 +43,7 @@ func BroadcastInt64x2(x int64) Int64x2 { // BroadcastUint8x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint8x16(x uint8) Uint8x16 { var z Uint8x16 return z.SetElem(0, x).Broadcast128() @@ -52,7 +52,7 @@ func BroadcastUint8x16(x uint8) Uint8x16 { // BroadcastUint16x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint16x8(x uint16) Uint16x8 { var z Uint16x8 return z.SetElem(0, x).Broadcast128() @@ -61,7 +61,7 @@ func BroadcastUint16x8(x uint16) Uint16x8 { // BroadcastUint32x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint32x4(x uint32) Uint32x4 { var z Uint32x4 return z.SetElem(0, x).Broadcast128() @@ -70,7 +70,7 @@ func BroadcastUint32x4(x uint32) Uint32x4 { // BroadcastUint64x2 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint64x2(x uint64) Uint64x2 { var z Uint64x2 return z.SetElem(0, x).Broadcast128() @@ -79,7 +79,7 @@ func BroadcastUint64x2(x uint64) Uint64x2 { // BroadcastFloat32x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastFloat32x4(x float32) Float32x4 { var z Float32x4 return z.SetElem(0, x).Broadcast128() @@ -88,7 +88,7 @@ func BroadcastFloat32x4(x float32) Float32x4 { // BroadcastFloat64x2 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastFloat64x2(x float64) Float64x2 { var z Float64x2 return z.SetElem(0, x).Broadcast128() @@ -97,7 +97,7 @@ func BroadcastFloat64x2(x float64) Float64x2 { // BroadcastInt8x32 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt8x32(x int8) Int8x32 { var z Int8x16 return z.SetElem(0, x).Broadcast256() @@ -106,7 +106,7 @@ func BroadcastInt8x32(x int8) Int8x32 { // BroadcastInt16x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt16x16(x int16) Int16x16 { var z Int16x8 return z.SetElem(0, x).Broadcast256() @@ -115,7 +115,7 @@ func BroadcastInt16x16(x int16) Int16x16 { // BroadcastInt32x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt32x8(x int32) Int32x8 { var z Int32x4 return z.SetElem(0, x).Broadcast256() @@ -124,7 +124,7 @@ func BroadcastInt32x8(x int32) Int32x8 { // BroadcastInt64x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt64x4(x int64) Int64x4 { var z Int64x2 return z.SetElem(0, x).Broadcast256() @@ -133,7 +133,7 @@ func BroadcastInt64x4(x int64) Int64x4 { // BroadcastUint8x32 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint8x32(x uint8) Uint8x32 { var z Uint8x16 return z.SetElem(0, x).Broadcast256() @@ -142,7 +142,7 @@ func BroadcastUint8x32(x uint8) Uint8x32 { // BroadcastUint16x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint16x16(x uint16) Uint16x16 { var z Uint16x8 return z.SetElem(0, x).Broadcast256() @@ -151,7 +151,7 @@ func BroadcastUint16x16(x uint16) Uint16x16 { // BroadcastUint32x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint32x8(x uint32) Uint32x8 { var z Uint32x4 return z.SetElem(0, x).Broadcast256() @@ -160,7 +160,7 @@ func BroadcastUint32x8(x uint32) Uint32x8 { // BroadcastUint64x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint64x4(x uint64) Uint64x4 { var z Uint64x2 return z.SetElem(0, x).Broadcast256() @@ -169,7 +169,7 @@ func BroadcastUint64x4(x uint64) Uint64x4 { // BroadcastFloat32x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastFloat32x8(x float32) Float32x8 { var z Float32x4 return z.SetElem(0, x).Broadcast256() @@ -178,7 +178,7 @@ func BroadcastFloat32x8(x float32) Float32x8 { // BroadcastFloat64x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastFloat64x4(x float64) Float64x4 { var z Float64x2 return z.SetElem(0, x).Broadcast256() @@ -187,7 +187,7 @@ func BroadcastFloat64x4(x float64) Float64x4 { // BroadcastInt8x64 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512BW +// Emulated, CPU Feature: AVX512BW func BroadcastInt8x64(x int8) Int8x64 { var z Int8x16 return z.SetElem(0, x).Broadcast512() @@ -196,7 +196,7 @@ func BroadcastInt8x64(x int8) Int8x64 { // BroadcastInt16x32 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512BW +// Emulated, CPU Feature: AVX512BW func BroadcastInt16x32(x int16) Int16x32 { var z Int16x8 return z.SetElem(0, x).Broadcast512() @@ -205,7 +205,7 @@ func BroadcastInt16x32(x int16) Int16x32 { // BroadcastInt32x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastInt32x16(x int32) Int32x16 { var z Int32x4 return z.SetElem(0, x).Broadcast512() @@ -214,7 +214,7 @@ func BroadcastInt32x16(x int32) Int32x16 { // BroadcastInt64x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastInt64x8(x int64) Int64x8 { var z Int64x2 return z.SetElem(0, x).Broadcast512() @@ -223,7 +223,7 @@ func BroadcastInt64x8(x int64) Int64x8 { // BroadcastUint8x64 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512BW +// Emulated, CPU Feature: AVX512BW func BroadcastUint8x64(x uint8) Uint8x64 { var z Uint8x16 return z.SetElem(0, x).Broadcast512() @@ -232,7 +232,7 @@ func BroadcastUint8x64(x uint8) Uint8x64 { // BroadcastUint16x32 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512BW +// Emulated, CPU Feature: AVX512BW func BroadcastUint16x32(x uint16) Uint16x32 { var z Uint16x8 return z.SetElem(0, x).Broadcast512() @@ -241,7 +241,7 @@ func BroadcastUint16x32(x uint16) Uint16x32 { // BroadcastUint32x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastUint32x16(x uint32) Uint32x16 { var z Uint32x4 return z.SetElem(0, x).Broadcast512() @@ -250,7 +250,7 @@ func BroadcastUint32x16(x uint32) Uint32x16 { // BroadcastUint64x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastUint64x8(x uint64) Uint64x8 { var z Uint64x2 return z.SetElem(0, x).Broadcast512() @@ -259,7 +259,7 @@ func BroadcastUint64x8(x uint64) Uint64x8 { // BroadcastFloat32x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastFloat32x16(x float32) Float32x16 { var z Float32x4 return z.SetElem(0, x).Broadcast512() @@ -268,7 +268,7 @@ func BroadcastFloat32x16(x float32) Float32x16 { // BroadcastFloat64x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastFloat64x8(x float64) Float64x8 { var z Float64x2 return z.SetElem(0, x).Broadcast512() @@ -334,378 +334,378 @@ func (from Int64x8) ToMask() (to Mask64x8) { return from.NotEqual(Int64x8{}) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) Not() Int8x16 { return x.Xor(x.Equal(x).ToInt8x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) Not() Int16x8 { return x.Xor(x.Equal(x).ToInt16x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) Not() Int32x4 { return x.Xor(x.Equal(x).ToInt32x4()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) Not() Int64x2 { return x.Xor(x.Equal(x).ToInt64x2()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) Not() Int8x32 { return x.Xor(x.Equal(x).ToInt8x32()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) Not() Int16x16 { return x.Xor(x.Equal(x).ToInt16x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) Not() Int32x8 { return x.Xor(x.Equal(x).ToInt32x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) Not() Int64x4 { return x.Xor(x.Equal(x).ToInt64x4()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Int8x64) Not() Int8x64 { return x.Xor(x.Equal(x).ToInt8x64()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Int16x32) Not() Int16x32 { return x.Xor(x.Equal(x).ToInt16x32()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Int32x16) Not() Int32x16 { return x.Xor(x.Equal(x).ToInt32x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Int64x8) Not() Int64x8 { return x.Xor(x.Equal(x).ToInt64x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint8x16) Not() Uint8x16 { return x.Xor(x.Equal(x).ToInt8x16().AsUint8x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) Not() Uint16x8 { return x.Xor(x.Equal(x).ToInt16x8().AsUint16x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) Not() Uint32x4 { return x.Xor(x.Equal(x).ToInt32x4().AsUint32x4()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) Not() Uint64x2 { return x.Xor(x.Equal(x).ToInt64x2().AsUint64x2()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) Not() Uint8x32 { return x.Xor(x.Equal(x).ToInt8x32().AsUint8x32()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) Not() Uint16x16 { return x.Xor(x.Equal(x).ToInt16x16().AsUint16x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) Not() Uint32x8 { return x.Xor(x.Equal(x).ToInt32x8().AsUint32x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) Not() Uint64x4 { return x.Xor(x.Equal(x).ToInt64x4().AsUint64x4()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Uint8x64) Not() Uint8x64 { return x.Xor(x.Equal(x).ToInt8x64().AsUint8x64()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Uint16x32) Not() Uint16x32 { return x.Xor(x.Equal(x).ToInt16x32().AsUint16x32()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Uint32x16) Not() Uint32x16 { return x.Xor(x.Equal(x).ToInt32x16().AsUint32x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Uint64x8) Not() Uint64x8 { return x.Xor(x.Equal(x).ToInt64x8().AsUint64x8()) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int8x16) String() string { var s [16]int8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int16x8) String() string { var s [8]int16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int32x4) String() string { var s [4]int32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int64x2) String() string { var s [2]int64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint8x16) String() string { var s [16]uint8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint16x8) String() string { var s [8]uint16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint32x4) String() string { var s [4]uint32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint64x2) String() string { var s [2]uint64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float32x4) String() string { var s [4]float32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float64x2) String() string { var s [2]float64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int8x32) String() string { var s [32]int8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int16x16) String() string { var s [16]int16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int32x8) String() string { var s [8]int32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int64x4) String() string { var s [4]int64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint8x32) String() string { var s [32]uint8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint16x16) String() string { var s [16]uint16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint32x8) String() string { var s [8]uint32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint64x4) String() string { var s [4]uint64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float32x8) String() string { var s [8]float32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float64x4) String() string { var s [4]float64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int8x64) String() string { var s [64]int8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int16x32) String() string { var s [32]int16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int32x16) String() string { var s [16]int32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int64x8) String() string { var s [8]int64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint8x64) String() string { var s [64]uint8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint16x32) String() string { var s [32]uint16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint32x16) String() string { var s [16]uint32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint64x8) String() string { var s [8]uint64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float32x16) String() string { var s [16]float32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float64x8) String() string { var s [8]float64 x.Store(&s) diff --git a/src/simd/archsimd/shuffles_amd64.go b/src/simd/archsimd/shuffles_amd64.go index 2bbd89c725..355634fcae 100644 --- a/src/simd/archsimd/shuffles_amd64.go +++ b/src/simd/archsimd/shuffles_amd64.go @@ -54,7 +54,10 @@ const ( // requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81} +// +// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) +// +// returns {4,8,25,81}. // // If the selectors are not constant this will translate to a function // call. @@ -133,7 +136,10 @@ func (x Int32x4) SelectFromPair(a, b, c, d uint8, y Int32x4) Int32x4 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81} +// +// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) +// +// returns {4,8,25,81}. // // If the selectors are not constant this will translate to a function // call. @@ -205,7 +211,10 @@ func (x Uint32x4) SelectFromPair(a, b, c, d uint8, y Uint32x4) Uint32x4 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81} +// +// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) +// +// returns {4,8,25,81}. // // If the selectors are not constant this will translate to a function // call. @@ -278,9 +287,10 @@ func (x Float32x4) SelectFromPair(a, b, c, d uint8, y Float32x4) Float32x4 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) // -// returns {4,8,25,81,64,128,169,289} +// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) +// +// returns {4,8,25,81,64,128,169,289}. // // If the selectors are not constant this will translate to a function // call. @@ -353,9 +363,10 @@ func (x Int32x8) SelectFromPairGrouped(a, b, c, d uint8, y Int32x8) Int32x8 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) // -// returns {4,8,25,81,64,128,169,289} +// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) +// +// returns {4,8,25,81,64,128,169,289}. // // If the selectors are not constant this will translate to a function // call. @@ -428,9 +439,10 @@ func (x Uint32x8) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x8) Uint32x8 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) // -// returns {4,8,25,81,64,128,169,289} +// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) +// +// returns {4,8,25,81,64,128,169,289}. // // If the selectors are not constant this will translate to a function // call. @@ -1080,7 +1092,7 @@ func (x Uint32x16) PermuteScalarsGrouped(a, b, c, d uint8) Uint32x16 { // PermuteScalarsHi performs a permutation of vector x using the supplied indices: // -// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]} +// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]} // // Parameters a,b,c,d should have values between 0 and 3. // If a through d are constants, then an instruction will be inlined, otherwise @@ -1093,7 +1105,7 @@ func (x Int16x8) PermuteScalarsHi(a, b, c, d uint8) Int16x8 { // PermuteScalarsHi performs a permutation of vector x using the supplied indices: // -// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]} +// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]} // // Parameters a,b,c,d should have values between 0 and 3. // If a through d are constants, then an instruction will be inlined, otherwise @@ -1276,7 +1288,8 @@ func (x Uint16x32) PermuteScalarsLoGrouped(a, b, c, d uint8) Uint16x32 { // // A carryless multiplication uses bitwise XOR instead of // add-with-carry, for example (in base two): -// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 +// +// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 // // This also models multiplication of polynomials with coefficients // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = @@ -1300,7 +1313,8 @@ func (x Uint64x2) CarrylessMultiply(a, b uint8, y Uint64x2) Uint64x2 { // // A carryless multiplication uses bitwise XOR instead of // add-with-carry, for example (in base two): -// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 +// +// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 // // This also models multiplication of polynomials with coefficients // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = @@ -1324,7 +1338,8 @@ func (x Uint64x4) CarrylessMultiplyGrouped(a, b uint8, y Uint64x4) Uint64x4 { // // A carryless multiplication uses bitwise XOR instead of // add-with-carry, for example (in base two): -// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 +// +// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 // // This also models multiplication of polynomials with coefficients // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = diff --git a/src/simd/archsimd/slice_gen_amd64.go b/src/simd/archsimd/slice_gen_amd64.go index 9cf72502db..9e34f9ca56 100644 --- a/src/simd/archsimd/slice_gen_amd64.go +++ b/src/simd/archsimd/slice_gen_amd64.go @@ -6,302 +6,302 @@ package archsimd import "unsafe" -// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s +// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s. func LoadInt8x16Slice(s []int8) Int8x16 { return LoadInt8x16((*[16]int8)(s)) } -// StoreSlice stores x into a slice of at least 16 int8s +// StoreSlice stores x into a slice of at least 16 int8s. func (x Int8x16) StoreSlice(s []int8) { x.Store((*[16]int8)(s)) } -// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s +// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s. func LoadInt16x8Slice(s []int16) Int16x8 { return LoadInt16x8((*[8]int16)(s)) } -// StoreSlice stores x into a slice of at least 8 int16s +// StoreSlice stores x into a slice of at least 8 int16s. func (x Int16x8) StoreSlice(s []int16) { x.Store((*[8]int16)(s)) } -// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s +// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s. func LoadInt32x4Slice(s []int32) Int32x4 { return LoadInt32x4((*[4]int32)(s)) } -// StoreSlice stores x into a slice of at least 4 int32s +// StoreSlice stores x into a slice of at least 4 int32s. func (x Int32x4) StoreSlice(s []int32) { x.Store((*[4]int32)(s)) } -// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s +// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s. func LoadInt64x2Slice(s []int64) Int64x2 { return LoadInt64x2((*[2]int64)(s)) } -// StoreSlice stores x into a slice of at least 2 int64s +// StoreSlice stores x into a slice of at least 2 int64s. func (x Int64x2) StoreSlice(s []int64) { x.Store((*[2]int64)(s)) } -// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s +// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s. func LoadUint8x16Slice(s []uint8) Uint8x16 { return LoadUint8x16((*[16]uint8)(s)) } -// StoreSlice stores x into a slice of at least 16 uint8s +// StoreSlice stores x into a slice of at least 16 uint8s. func (x Uint8x16) StoreSlice(s []uint8) { x.Store((*[16]uint8)(s)) } -// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s +// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s. func LoadUint16x8Slice(s []uint16) Uint16x8 { return LoadUint16x8((*[8]uint16)(s)) } -// StoreSlice stores x into a slice of at least 8 uint16s +// StoreSlice stores x into a slice of at least 8 uint16s. func (x Uint16x8) StoreSlice(s []uint16) { x.Store((*[8]uint16)(s)) } -// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s +// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s. func LoadUint32x4Slice(s []uint32) Uint32x4 { return LoadUint32x4((*[4]uint32)(s)) } -// StoreSlice stores x into a slice of at least 4 uint32s +// StoreSlice stores x into a slice of at least 4 uint32s. func (x Uint32x4) StoreSlice(s []uint32) { x.Store((*[4]uint32)(s)) } -// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s +// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s. func LoadUint64x2Slice(s []uint64) Uint64x2 { return LoadUint64x2((*[2]uint64)(s)) } -// StoreSlice stores x into a slice of at least 2 uint64s +// StoreSlice stores x into a slice of at least 2 uint64s. func (x Uint64x2) StoreSlice(s []uint64) { x.Store((*[2]uint64)(s)) } -// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s +// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s. func LoadFloat32x4Slice(s []float32) Float32x4 { return LoadFloat32x4((*[4]float32)(s)) } -// StoreSlice stores x into a slice of at least 4 float32s +// StoreSlice stores x into a slice of at least 4 float32s. func (x Float32x4) StoreSlice(s []float32) { x.Store((*[4]float32)(s)) } -// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s +// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s. func LoadFloat64x2Slice(s []float64) Float64x2 { return LoadFloat64x2((*[2]float64)(s)) } -// StoreSlice stores x into a slice of at least 2 float64s +// StoreSlice stores x into a slice of at least 2 float64s. func (x Float64x2) StoreSlice(s []float64) { x.Store((*[2]float64)(s)) } -// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s +// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s. func LoadInt8x32Slice(s []int8) Int8x32 { return LoadInt8x32((*[32]int8)(s)) } -// StoreSlice stores x into a slice of at least 32 int8s +// StoreSlice stores x into a slice of at least 32 int8s. func (x Int8x32) StoreSlice(s []int8) { x.Store((*[32]int8)(s)) } -// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s +// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s. func LoadInt16x16Slice(s []int16) Int16x16 { return LoadInt16x16((*[16]int16)(s)) } -// StoreSlice stores x into a slice of at least 16 int16s +// StoreSlice stores x into a slice of at least 16 int16s. func (x Int16x16) StoreSlice(s []int16) { x.Store((*[16]int16)(s)) } -// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s +// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s. func LoadInt32x8Slice(s []int32) Int32x8 { return LoadInt32x8((*[8]int32)(s)) } -// StoreSlice stores x into a slice of at least 8 int32s +// StoreSlice stores x into a slice of at least 8 int32s. func (x Int32x8) StoreSlice(s []int32) { x.Store((*[8]int32)(s)) } -// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s +// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s. func LoadInt64x4Slice(s []int64) Int64x4 { return LoadInt64x4((*[4]int64)(s)) } -// StoreSlice stores x into a slice of at least 4 int64s +// StoreSlice stores x into a slice of at least 4 int64s. func (x Int64x4) StoreSlice(s []int64) { x.Store((*[4]int64)(s)) } -// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s +// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s. func LoadUint8x32Slice(s []uint8) Uint8x32 { return LoadUint8x32((*[32]uint8)(s)) } -// StoreSlice stores x into a slice of at least 32 uint8s +// StoreSlice stores x into a slice of at least 32 uint8s. func (x Uint8x32) StoreSlice(s []uint8) { x.Store((*[32]uint8)(s)) } -// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s +// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s. func LoadUint16x16Slice(s []uint16) Uint16x16 { return LoadUint16x16((*[16]uint16)(s)) } -// StoreSlice stores x into a slice of at least 16 uint16s +// StoreSlice stores x into a slice of at least 16 uint16s. func (x Uint16x16) StoreSlice(s []uint16) { x.Store((*[16]uint16)(s)) } -// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s +// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s. func LoadUint32x8Slice(s []uint32) Uint32x8 { return LoadUint32x8((*[8]uint32)(s)) } -// StoreSlice stores x into a slice of at least 8 uint32s +// StoreSlice stores x into a slice of at least 8 uint32s. func (x Uint32x8) StoreSlice(s []uint32) { x.Store((*[8]uint32)(s)) } -// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s +// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s. func LoadUint64x4Slice(s []uint64) Uint64x4 { return LoadUint64x4((*[4]uint64)(s)) } -// StoreSlice stores x into a slice of at least 4 uint64s +// StoreSlice stores x into a slice of at least 4 uint64s. func (x Uint64x4) StoreSlice(s []uint64) { x.Store((*[4]uint64)(s)) } -// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s +// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s. func LoadFloat32x8Slice(s []float32) Float32x8 { return LoadFloat32x8((*[8]float32)(s)) } -// StoreSlice stores x into a slice of at least 8 float32s +// StoreSlice stores x into a slice of at least 8 float32s. func (x Float32x8) StoreSlice(s []float32) { x.Store((*[8]float32)(s)) } -// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s +// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s. func LoadFloat64x4Slice(s []float64) Float64x4 { return LoadFloat64x4((*[4]float64)(s)) } -// StoreSlice stores x into a slice of at least 4 float64s +// StoreSlice stores x into a slice of at least 4 float64s. func (x Float64x4) StoreSlice(s []float64) { x.Store((*[4]float64)(s)) } -// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s +// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s. func LoadInt8x64Slice(s []int8) Int8x64 { return LoadInt8x64((*[64]int8)(s)) } -// StoreSlice stores x into a slice of at least 64 int8s +// StoreSlice stores x into a slice of at least 64 int8s. func (x Int8x64) StoreSlice(s []int8) { x.Store((*[64]int8)(s)) } -// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s +// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s. func LoadInt16x32Slice(s []int16) Int16x32 { return LoadInt16x32((*[32]int16)(s)) } -// StoreSlice stores x into a slice of at least 32 int16s +// StoreSlice stores x into a slice of at least 32 int16s. func (x Int16x32) StoreSlice(s []int16) { x.Store((*[32]int16)(s)) } -// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s +// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s. func LoadInt32x16Slice(s []int32) Int32x16 { return LoadInt32x16((*[16]int32)(s)) } -// StoreSlice stores x into a slice of at least 16 int32s +// StoreSlice stores x into a slice of at least 16 int32s. func (x Int32x16) StoreSlice(s []int32) { x.Store((*[16]int32)(s)) } -// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s +// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s. func LoadInt64x8Slice(s []int64) Int64x8 { return LoadInt64x8((*[8]int64)(s)) } -// StoreSlice stores x into a slice of at least 8 int64s +// StoreSlice stores x into a slice of at least 8 int64s. func (x Int64x8) StoreSlice(s []int64) { x.Store((*[8]int64)(s)) } -// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s +// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s. func LoadUint8x64Slice(s []uint8) Uint8x64 { return LoadUint8x64((*[64]uint8)(s)) } -// StoreSlice stores x into a slice of at least 64 uint8s +// StoreSlice stores x into a slice of at least 64 uint8s. func (x Uint8x64) StoreSlice(s []uint8) { x.Store((*[64]uint8)(s)) } -// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s +// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s. func LoadUint16x32Slice(s []uint16) Uint16x32 { return LoadUint16x32((*[32]uint16)(s)) } -// StoreSlice stores x into a slice of at least 32 uint16s +// StoreSlice stores x into a slice of at least 32 uint16s. func (x Uint16x32) StoreSlice(s []uint16) { x.Store((*[32]uint16)(s)) } -// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s +// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s. func LoadUint32x16Slice(s []uint32) Uint32x16 { return LoadUint32x16((*[16]uint32)(s)) } -// StoreSlice stores x into a slice of at least 16 uint32s +// StoreSlice stores x into a slice of at least 16 uint32s. func (x Uint32x16) StoreSlice(s []uint32) { x.Store((*[16]uint32)(s)) } -// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s +// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s. func LoadUint64x8Slice(s []uint64) Uint64x8 { return LoadUint64x8((*[8]uint64)(s)) } -// StoreSlice stores x into a slice of at least 8 uint64s +// StoreSlice stores x into a slice of at least 8 uint64s. func (x Uint64x8) StoreSlice(s []uint64) { x.Store((*[8]uint64)(s)) } -// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s +// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s. func LoadFloat32x16Slice(s []float32) Float32x16 { return LoadFloat32x16((*[16]float32)(s)) } -// StoreSlice stores x into a slice of at least 16 float32s +// StoreSlice stores x into a slice of at least 16 float32s. func (x Float32x16) StoreSlice(s []float32) { x.Store((*[16]float32)(s)) } -// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s +// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s. func LoadFloat64x8Slice(s []float64) Float64x8 { return LoadFloat64x8((*[8]float64)(s)) } -// StoreSlice stores x into a slice of at least 8 float64s +// StoreSlice stores x into a slice of at least 8 float64s. func (x Float64x8) StoreSlice(s []float64) { x.Store((*[8]float64)(s)) } diff --git a/src/simd/archsimd/types_amd64.go b/src/simd/archsimd/types_amd64.go index e309bb42ef..2efa1363d6 100644 --- a/src/simd/archsimd/types_amd64.go +++ b/src/simd/archsimd/types_amd64.go @@ -9,27 +9,27 @@ type v128 struct { _128 [0]func() // uncomparable } -// Float32x4 is a 128-bit SIMD vector of 4 float32 +// Float32x4 is a 128-bit SIMD vector of 4 float32s. type Float32x4 struct { float32x4 v128 vals [4]float32 } -// Len returns the number of elements in a Float32x4 +// Len returns the number of elements in a Float32x4. func (x Float32x4) Len() int { return 4 } -// LoadFloat32x4 loads a Float32x4 from an array +// LoadFloat32x4 loads a Float32x4 from an array. // //go:noescape func LoadFloat32x4(y *[4]float32) Float32x4 -// Store stores a Float32x4 to an array +// Store stores a Float32x4 to an array. // //go:noescape func (x Float32x4) Store(y *[4]float32) // LoadMaskedFloat32x4 loads a Float32x4 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // @@ -37,34 +37,34 @@ func (x Float32x4) Store(y *[4]float32) func LoadMaskedFloat32x4(y *[4]float32, mask Mask32x4) Float32x4 // StoreMasked stores a Float32x4 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Float32x4) StoreMasked(y *[4]float32, mask Mask32x4) -// Float64x2 is a 128-bit SIMD vector of 2 float64 +// Float64x2 is a 128-bit SIMD vector of 2 float64s. type Float64x2 struct { float64x2 v128 vals [2]float64 } -// Len returns the number of elements in a Float64x2 +// Len returns the number of elements in a Float64x2. func (x Float64x2) Len() int { return 2 } -// LoadFloat64x2 loads a Float64x2 from an array +// LoadFloat64x2 loads a Float64x2 from an array. // //go:noescape func LoadFloat64x2(y *[2]float64) Float64x2 -// Store stores a Float64x2 to an array +// Store stores a Float64x2 to an array. // //go:noescape func (x Float64x2) Store(y *[2]float64) // LoadMaskedFloat64x2 loads a Float64x2 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // @@ -72,180 +72,180 @@ func (x Float64x2) Store(y *[2]float64) func LoadMaskedFloat64x2(y *[2]float64, mask Mask64x2) Float64x2 // StoreMasked stores a Float64x2 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Float64x2) StoreMasked(y *[2]float64, mask Mask64x2) -// Int8x16 is a 128-bit SIMD vector of 16 int8 +// Int8x16 is a 128-bit SIMD vector of 16 int8s. type Int8x16 struct { int8x16 v128 vals [16]int8 } -// Len returns the number of elements in a Int8x16 +// Len returns the number of elements in an Int8x16. func (x Int8x16) Len() int { return 16 } -// LoadInt8x16 loads a Int8x16 from an array +// LoadInt8x16 loads an Int8x16 from an array. // //go:noescape func LoadInt8x16(y *[16]int8) Int8x16 -// Store stores a Int8x16 to an array +// Store stores an Int8x16 to an array. // //go:noescape func (x Int8x16) Store(y *[16]int8) -// Int16x8 is a 128-bit SIMD vector of 8 int16 +// Int16x8 is a 128-bit SIMD vector of 8 int16s. type Int16x8 struct { int16x8 v128 vals [8]int16 } -// Len returns the number of elements in a Int16x8 +// Len returns the number of elements in an Int16x8. func (x Int16x8) Len() int { return 8 } -// LoadInt16x8 loads a Int16x8 from an array +// LoadInt16x8 loads an Int16x8 from an array. // //go:noescape func LoadInt16x8(y *[8]int16) Int16x8 -// Store stores a Int16x8 to an array +// Store stores an Int16x8 to an array. // //go:noescape func (x Int16x8) Store(y *[8]int16) -// Int32x4 is a 128-bit SIMD vector of 4 int32 +// Int32x4 is a 128-bit SIMD vector of 4 int32s. type Int32x4 struct { int32x4 v128 vals [4]int32 } -// Len returns the number of elements in a Int32x4 +// Len returns the number of elements in an Int32x4. func (x Int32x4) Len() int { return 4 } -// LoadInt32x4 loads a Int32x4 from an array +// LoadInt32x4 loads an Int32x4 from an array. // //go:noescape func LoadInt32x4(y *[4]int32) Int32x4 -// Store stores a Int32x4 to an array +// Store stores an Int32x4 to an array. // //go:noescape func (x Int32x4) Store(y *[4]int32) -// LoadMaskedInt32x4 loads a Int32x4 from an array, -// at those elements enabled by mask +// LoadMaskedInt32x4 loads an Int32x4 from an array, +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func LoadMaskedInt32x4(y *[4]int32, mask Mask32x4) Int32x4 -// StoreMasked stores a Int32x4 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int32x4 to an array, +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Int32x4) StoreMasked(y *[4]int32, mask Mask32x4) -// Int64x2 is a 128-bit SIMD vector of 2 int64 +// Int64x2 is a 128-bit SIMD vector of 2 int64s. type Int64x2 struct { int64x2 v128 vals [2]int64 } -// Len returns the number of elements in a Int64x2 +// Len returns the number of elements in an Int64x2. func (x Int64x2) Len() int { return 2 } -// LoadInt64x2 loads a Int64x2 from an array +// LoadInt64x2 loads an Int64x2 from an array. // //go:noescape func LoadInt64x2(y *[2]int64) Int64x2 -// Store stores a Int64x2 to an array +// Store stores an Int64x2 to an array. // //go:noescape func (x Int64x2) Store(y *[2]int64) -// LoadMaskedInt64x2 loads a Int64x2 from an array, -// at those elements enabled by mask +// LoadMaskedInt64x2 loads an Int64x2 from an array, +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func LoadMaskedInt64x2(y *[2]int64, mask Mask64x2) Int64x2 -// StoreMasked stores a Int64x2 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int64x2 to an array, +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Int64x2) StoreMasked(y *[2]int64, mask Mask64x2) -// Uint8x16 is a 128-bit SIMD vector of 16 uint8 +// Uint8x16 is a 128-bit SIMD vector of 16 uint8s. type Uint8x16 struct { uint8x16 v128 vals [16]uint8 } -// Len returns the number of elements in a Uint8x16 +// Len returns the number of elements in a Uint8x16. func (x Uint8x16) Len() int { return 16 } -// LoadUint8x16 loads a Uint8x16 from an array +// LoadUint8x16 loads a Uint8x16 from an array. // //go:noescape func LoadUint8x16(y *[16]uint8) Uint8x16 -// Store stores a Uint8x16 to an array +// Store stores a Uint8x16 to an array. // //go:noescape func (x Uint8x16) Store(y *[16]uint8) -// Uint16x8 is a 128-bit SIMD vector of 8 uint16 +// Uint16x8 is a 128-bit SIMD vector of 8 uint16s. type Uint16x8 struct { uint16x8 v128 vals [8]uint16 } -// Len returns the number of elements in a Uint16x8 +// Len returns the number of elements in a Uint16x8. func (x Uint16x8) Len() int { return 8 } -// LoadUint16x8 loads a Uint16x8 from an array +// LoadUint16x8 loads a Uint16x8 from an array. // //go:noescape func LoadUint16x8(y *[8]uint16) Uint16x8 -// Store stores a Uint16x8 to an array +// Store stores a Uint16x8 to an array. // //go:noescape func (x Uint16x8) Store(y *[8]uint16) -// Uint32x4 is a 128-bit SIMD vector of 4 uint32 +// Uint32x4 is a 128-bit SIMD vector of 4 uint32s. type Uint32x4 struct { uint32x4 v128 vals [4]uint32 } -// Len returns the number of elements in a Uint32x4 +// Len returns the number of elements in a Uint32x4. func (x Uint32x4) Len() int { return 4 } -// LoadUint32x4 loads a Uint32x4 from an array +// LoadUint32x4 loads a Uint32x4 from an array. // //go:noescape func LoadUint32x4(y *[4]uint32) Uint32x4 -// Store stores a Uint32x4 to an array +// Store stores a Uint32x4 to an array. // //go:noescape func (x Uint32x4) Store(y *[4]uint32) // LoadMaskedUint32x4 loads a Uint32x4 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // @@ -253,34 +253,34 @@ func (x Uint32x4) Store(y *[4]uint32) func LoadMaskedUint32x4(y *[4]uint32, mask Mask32x4) Uint32x4 // StoreMasked stores a Uint32x4 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Uint32x4) StoreMasked(y *[4]uint32, mask Mask32x4) -// Uint64x2 is a 128-bit SIMD vector of 2 uint64 +// Uint64x2 is a 128-bit SIMD vector of 2 uint64s. type Uint64x2 struct { uint64x2 v128 vals [2]uint64 } -// Len returns the number of elements in a Uint64x2 +// Len returns the number of elements in a Uint64x2. func (x Uint64x2) Len() int { return 2 } -// LoadUint64x2 loads a Uint64x2 from an array +// LoadUint64x2 loads a Uint64x2 from an array. // //go:noescape func LoadUint64x2(y *[2]uint64) Uint64x2 -// Store stores a Uint64x2 to an array +// Store stores a Uint64x2 to an array. // //go:noescape func (x Uint64x2) Store(y *[2]uint64) // LoadMaskedUint64x2 loads a Uint64x2 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // @@ -288,14 +288,14 @@ func (x Uint64x2) Store(y *[2]uint64) func LoadMaskedUint64x2(y *[2]uint64, mask Mask64x2) Uint64x2 // StoreMasked stores a Uint64x2 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Uint64x2) StoreMasked(y *[2]uint64, mask Mask64x2) -// Mask8x16 is a 128-bit SIMD vector of 16 int8 +// Mask8x16 is a 128-bit SIMD vector of 16 int8s. type Mask8x16 struct { int8x16 v128 vals [16]int8 @@ -311,7 +311,7 @@ func Mask8x16FromBits(y uint16) Mask8x16 // Asm: KMOVB, CPU Features: AVX512 func (x Mask8x16) ToBits() uint16 -// Mask16x8 is a 128-bit SIMD vector of 8 int16 +// Mask16x8 is a 128-bit SIMD vector of 8 int16s. type Mask16x8 struct { int16x8 v128 vals [8]int16 @@ -327,7 +327,7 @@ func Mask16x8FromBits(y uint8) Mask16x8 // Asm: KMOVW, CPU Features: AVX512 func (x Mask16x8) ToBits() uint8 -// Mask32x4 is a 128-bit SIMD vector of 4 int32 +// Mask32x4 is a 128-bit SIMD vector of 4 int32s. type Mask32x4 struct { int32x4 v128 vals [4]int32 @@ -345,7 +345,7 @@ func Mask32x4FromBits(y uint8) Mask32x4 // Asm: KMOVD, CPU Features: AVX512 func (x Mask32x4) ToBits() uint8 -// Mask64x2 is a 128-bit SIMD vector of 2 int64 +// Mask64x2 is a 128-bit SIMD vector of 2 int64s. type Mask64x2 struct { int64x2 v128 vals [2]int64 @@ -368,27 +368,27 @@ type v256 struct { _256 [0]func() // uncomparable } -// Float32x8 is a 256-bit SIMD vector of 8 float32 +// Float32x8 is a 256-bit SIMD vector of 8 float32s. type Float32x8 struct { float32x8 v256 vals [8]float32 } -// Len returns the number of elements in a Float32x8 +// Len returns the number of elements in a Float32x8. func (x Float32x8) Len() int { return 8 } -// LoadFloat32x8 loads a Float32x8 from an array +// LoadFloat32x8 loads a Float32x8 from an array. // //go:noescape func LoadFloat32x8(y *[8]float32) Float32x8 -// Store stores a Float32x8 to an array +// Store stores a Float32x8 to an array. // //go:noescape func (x Float32x8) Store(y *[8]float32) // LoadMaskedFloat32x8 loads a Float32x8 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // @@ -396,34 +396,34 @@ func (x Float32x8) Store(y *[8]float32) func LoadMaskedFloat32x8(y *[8]float32, mask Mask32x8) Float32x8 // StoreMasked stores a Float32x8 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Float32x8) StoreMasked(y *[8]float32, mask Mask32x8) -// Float64x4 is a 256-bit SIMD vector of 4 float64 +// Float64x4 is a 256-bit SIMD vector of 4 float64s. type Float64x4 struct { float64x4 v256 vals [4]float64 } -// Len returns the number of elements in a Float64x4 +// Len returns the number of elements in a Float64x4. func (x Float64x4) Len() int { return 4 } -// LoadFloat64x4 loads a Float64x4 from an array +// LoadFloat64x4 loads a Float64x4 from an array. // //go:noescape func LoadFloat64x4(y *[4]float64) Float64x4 -// Store stores a Float64x4 to an array +// Store stores a Float64x4 to an array. // //go:noescape func (x Float64x4) Store(y *[4]float64) // LoadMaskedFloat64x4 loads a Float64x4 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // @@ -431,180 +431,180 @@ func (x Float64x4) Store(y *[4]float64) func LoadMaskedFloat64x4(y *[4]float64, mask Mask64x4) Float64x4 // StoreMasked stores a Float64x4 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Float64x4) StoreMasked(y *[4]float64, mask Mask64x4) -// Int8x32 is a 256-bit SIMD vector of 32 int8 +// Int8x32 is a 256-bit SIMD vector of 32 int8s. type Int8x32 struct { int8x32 v256 vals [32]int8 } -// Len returns the number of elements in a Int8x32 +// Len returns the number of elements in an Int8x32. func (x Int8x32) Len() int { return 32 } -// LoadInt8x32 loads a Int8x32 from an array +// LoadInt8x32 loads an Int8x32 from an array. // //go:noescape func LoadInt8x32(y *[32]int8) Int8x32 -// Store stores a Int8x32 to an array +// Store stores an Int8x32 to an array. // //go:noescape func (x Int8x32) Store(y *[32]int8) -// Int16x16 is a 256-bit SIMD vector of 16 int16 +// Int16x16 is a 256-bit SIMD vector of 16 int16s. type Int16x16 struct { int16x16 v256 vals [16]int16 } -// Len returns the number of elements in a Int16x16 +// Len returns the number of elements in an Int16x16. func (x Int16x16) Len() int { return 16 } -// LoadInt16x16 loads a Int16x16 from an array +// LoadInt16x16 loads an Int16x16 from an array. // //go:noescape func LoadInt16x16(y *[16]int16) Int16x16 -// Store stores a Int16x16 to an array +// Store stores an Int16x16 to an array. // //go:noescape func (x Int16x16) Store(y *[16]int16) -// Int32x8 is a 256-bit SIMD vector of 8 int32 +// Int32x8 is a 256-bit SIMD vector of 8 int32s. type Int32x8 struct { int32x8 v256 vals [8]int32 } -// Len returns the number of elements in a Int32x8 +// Len returns the number of elements in an Int32x8. func (x Int32x8) Len() int { return 8 } -// LoadInt32x8 loads a Int32x8 from an array +// LoadInt32x8 loads an Int32x8 from an array. // //go:noescape func LoadInt32x8(y *[8]int32) Int32x8 -// Store stores a Int32x8 to an array +// Store stores an Int32x8 to an array. // //go:noescape func (x Int32x8) Store(y *[8]int32) -// LoadMaskedInt32x8 loads a Int32x8 from an array, -// at those elements enabled by mask +// LoadMaskedInt32x8 loads an Int32x8 from an array, +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func LoadMaskedInt32x8(y *[8]int32, mask Mask32x8) Int32x8 -// StoreMasked stores a Int32x8 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int32x8 to an array, +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Int32x8) StoreMasked(y *[8]int32, mask Mask32x8) -// Int64x4 is a 256-bit SIMD vector of 4 int64 +// Int64x4 is a 256-bit SIMD vector of 4 int64s. type Int64x4 struct { int64x4 v256 vals [4]int64 } -// Len returns the number of elements in a Int64x4 +// Len returns the number of elements in an Int64x4. func (x Int64x4) Len() int { return 4 } -// LoadInt64x4 loads a Int64x4 from an array +// LoadInt64x4 loads an Int64x4 from an array. // //go:noescape func LoadInt64x4(y *[4]int64) Int64x4 -// Store stores a Int64x4 to an array +// Store stores an Int64x4 to an array. // //go:noescape func (x Int64x4) Store(y *[4]int64) -// LoadMaskedInt64x4 loads a Int64x4 from an array, -// at those elements enabled by mask +// LoadMaskedInt64x4 loads an Int64x4 from an array, +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func LoadMaskedInt64x4(y *[4]int64, mask Mask64x4) Int64x4 -// StoreMasked stores a Int64x4 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int64x4 to an array, +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Int64x4) StoreMasked(y *[4]int64, mask Mask64x4) -// Uint8x32 is a 256-bit SIMD vector of 32 uint8 +// Uint8x32 is a 256-bit SIMD vector of 32 uint8s. type Uint8x32 struct { uint8x32 v256 vals [32]uint8 } -// Len returns the number of elements in a Uint8x32 +// Len returns the number of elements in a Uint8x32. func (x Uint8x32) Len() int { return 32 } -// LoadUint8x32 loads a Uint8x32 from an array +// LoadUint8x32 loads a Uint8x32 from an array. // //go:noescape func LoadUint8x32(y *[32]uint8) Uint8x32 -// Store stores a Uint8x32 to an array +// Store stores a Uint8x32 to an array. // //go:noescape func (x Uint8x32) Store(y *[32]uint8) -// Uint16x16 is a 256-bit SIMD vector of 16 uint16 +// Uint16x16 is a 256-bit SIMD vector of 16 uint16s. type Uint16x16 struct { uint16x16 v256 vals [16]uint16 } -// Len returns the number of elements in a Uint16x16 +// Len returns the number of elements in a Uint16x16. func (x Uint16x16) Len() int { return 16 } -// LoadUint16x16 loads a Uint16x16 from an array +// LoadUint16x16 loads a Uint16x16 from an array. // //go:noescape func LoadUint16x16(y *[16]uint16) Uint16x16 -// Store stores a Uint16x16 to an array +// Store stores a Uint16x16 to an array. // //go:noescape func (x Uint16x16) Store(y *[16]uint16) -// Uint32x8 is a 256-bit SIMD vector of 8 uint32 +// Uint32x8 is a 256-bit SIMD vector of 8 uint32s. type Uint32x8 struct { uint32x8 v256 vals [8]uint32 } -// Len returns the number of elements in a Uint32x8 +// Len returns the number of elements in a Uint32x8. func (x Uint32x8) Len() int { return 8 } -// LoadUint32x8 loads a Uint32x8 from an array +// LoadUint32x8 loads a Uint32x8 from an array. // //go:noescape func LoadUint32x8(y *[8]uint32) Uint32x8 -// Store stores a Uint32x8 to an array +// Store stores a Uint32x8 to an array. // //go:noescape func (x Uint32x8) Store(y *[8]uint32) // LoadMaskedUint32x8 loads a Uint32x8 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // @@ -612,34 +612,34 @@ func (x Uint32x8) Store(y *[8]uint32) func LoadMaskedUint32x8(y *[8]uint32, mask Mask32x8) Uint32x8 // StoreMasked stores a Uint32x8 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Uint32x8) StoreMasked(y *[8]uint32, mask Mask32x8) -// Uint64x4 is a 256-bit SIMD vector of 4 uint64 +// Uint64x4 is a 256-bit SIMD vector of 4 uint64s. type Uint64x4 struct { uint64x4 v256 vals [4]uint64 } -// Len returns the number of elements in a Uint64x4 +// Len returns the number of elements in a Uint64x4. func (x Uint64x4) Len() int { return 4 } -// LoadUint64x4 loads a Uint64x4 from an array +// LoadUint64x4 loads a Uint64x4 from an array. // //go:noescape func LoadUint64x4(y *[4]uint64) Uint64x4 -// Store stores a Uint64x4 to an array +// Store stores a Uint64x4 to an array. // //go:noescape func (x Uint64x4) Store(y *[4]uint64) // LoadMaskedUint64x4 loads a Uint64x4 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // @@ -647,14 +647,14 @@ func (x Uint64x4) Store(y *[4]uint64) func LoadMaskedUint64x4(y *[4]uint64, mask Mask64x4) Uint64x4 // StoreMasked stores a Uint64x4 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Uint64x4) StoreMasked(y *[4]uint64, mask Mask64x4) -// Mask8x32 is a 256-bit SIMD vector of 32 int8 +// Mask8x32 is a 256-bit SIMD vector of 32 int8s. type Mask8x32 struct { int8x32 v256 vals [32]int8 @@ -670,7 +670,7 @@ func Mask8x32FromBits(y uint32) Mask8x32 // Asm: KMOVB, CPU Features: AVX512 func (x Mask8x32) ToBits() uint32 -// Mask16x16 is a 256-bit SIMD vector of 16 int16 +// Mask16x16 is a 256-bit SIMD vector of 16 int16s. type Mask16x16 struct { int16x16 v256 vals [16]int16 @@ -686,7 +686,7 @@ func Mask16x16FromBits(y uint16) Mask16x16 // Asm: KMOVW, CPU Features: AVX512 func (x Mask16x16) ToBits() uint16 -// Mask32x8 is a 256-bit SIMD vector of 8 int32 +// Mask32x8 is a 256-bit SIMD vector of 8 int32s. type Mask32x8 struct { int32x8 v256 vals [8]int32 @@ -702,7 +702,7 @@ func Mask32x8FromBits(y uint8) Mask32x8 // Asm: KMOVD, CPU Features: AVX512 func (x Mask32x8) ToBits() uint8 -// Mask64x4 is a 256-bit SIMD vector of 4 int64 +// Mask64x4 is a 256-bit SIMD vector of 4 int64s. type Mask64x4 struct { int64x4 v256 vals [4]int64 @@ -725,27 +725,27 @@ type v512 struct { _512 [0]func() // uncomparable } -// Float32x16 is a 512-bit SIMD vector of 16 float32 +// Float32x16 is a 512-bit SIMD vector of 16 float32s. type Float32x16 struct { float32x16 v512 vals [16]float32 } -// Len returns the number of elements in a Float32x16 +// Len returns the number of elements in a Float32x16. func (x Float32x16) Len() int { return 16 } -// LoadFloat32x16 loads a Float32x16 from an array +// LoadFloat32x16 loads a Float32x16 from an array. // //go:noescape func LoadFloat32x16(y *[16]float32) Float32x16 -// Store stores a Float32x16 to an array +// Store stores a Float32x16 to an array. // //go:noescape func (x Float32x16) Store(y *[16]float32) // LoadMaskedFloat32x16 loads a Float32x16 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU32.Z, CPU Feature: AVX512 // @@ -753,34 +753,34 @@ func (x Float32x16) Store(y *[16]float32) func LoadMaskedFloat32x16(y *[16]float32, mask Mask32x16) Float32x16 // StoreMasked stores a Float32x16 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU32, CPU Feature: AVX512 // //go:noescape func (x Float32x16) StoreMasked(y *[16]float32, mask Mask32x16) -// Float64x8 is a 512-bit SIMD vector of 8 float64 +// Float64x8 is a 512-bit SIMD vector of 8 float64s. type Float64x8 struct { float64x8 v512 vals [8]float64 } -// Len returns the number of elements in a Float64x8 +// Len returns the number of elements in a Float64x8. func (x Float64x8) Len() int { return 8 } -// LoadFloat64x8 loads a Float64x8 from an array +// LoadFloat64x8 loads a Float64x8 from an array. // //go:noescape func LoadFloat64x8(y *[8]float64) Float64x8 -// Store stores a Float64x8 to an array +// Store stores a Float64x8 to an array. // //go:noescape func (x Float64x8) Store(y *[8]float64) // LoadMaskedFloat64x8 loads a Float64x8 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU64.Z, CPU Feature: AVX512 // @@ -788,174 +788,174 @@ func (x Float64x8) Store(y *[8]float64) func LoadMaskedFloat64x8(y *[8]float64, mask Mask64x8) Float64x8 // StoreMasked stores a Float64x8 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU64, CPU Feature: AVX512 // //go:noescape func (x Float64x8) StoreMasked(y *[8]float64, mask Mask64x8) -// Int8x64 is a 512-bit SIMD vector of 64 int8 +// Int8x64 is a 512-bit SIMD vector of 64 int8s. type Int8x64 struct { int8x64 v512 vals [64]int8 } -// Len returns the number of elements in a Int8x64 +// Len returns the number of elements in an Int8x64. func (x Int8x64) Len() int { return 64 } -// LoadInt8x64 loads a Int8x64 from an array +// LoadInt8x64 loads an Int8x64 from an array. // //go:noescape func LoadInt8x64(y *[64]int8) Int8x64 -// Store stores a Int8x64 to an array +// Store stores an Int8x64 to an array. // //go:noescape func (x Int8x64) Store(y *[64]int8) -// LoadMaskedInt8x64 loads a Int8x64 from an array, -// at those elements enabled by mask +// LoadMaskedInt8x64 loads an Int8x64 from an array, +// at those elements enabled by mask. // // Asm: VMOVDQU8.Z, CPU Feature: AVX512 // //go:noescape func LoadMaskedInt8x64(y *[64]int8, mask Mask8x64) Int8x64 -// StoreMasked stores a Int8x64 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int8x64 to an array, +// at those elements enabled by mask. // // Asm: VMOVDQU8, CPU Feature: AVX512 // //go:noescape func (x Int8x64) StoreMasked(y *[64]int8, mask Mask8x64) -// Int16x32 is a 512-bit SIMD vector of 32 int16 +// Int16x32 is a 512-bit SIMD vector of 32 int16s. type Int16x32 struct { int16x32 v512 vals [32]int16 } -// Len returns the number of elements in a Int16x32 +// Len returns the number of elements in an Int16x32. func (x Int16x32) Len() int { return 32 } -// LoadInt16x32 loads a Int16x32 from an array +// LoadInt16x32 loads an Int16x32 from an array. // //go:noescape func LoadInt16x32(y *[32]int16) Int16x32 -// Store stores a Int16x32 to an array +// Store stores an Int16x32 to an array. // //go:noescape func (x Int16x32) Store(y *[32]int16) -// LoadMaskedInt16x32 loads a Int16x32 from an array, -// at those elements enabled by mask +// LoadMaskedInt16x32 loads an Int16x32 from an array, +// at those elements enabled by mask. // // Asm: VMOVDQU16.Z, CPU Feature: AVX512 // //go:noescape func LoadMaskedInt16x32(y *[32]int16, mask Mask16x32) Int16x32 -// StoreMasked stores a Int16x32 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int16x32 to an array, +// at those elements enabled by mask. // // Asm: VMOVDQU16, CPU Feature: AVX512 // //go:noescape func (x Int16x32) StoreMasked(y *[32]int16, mask Mask16x32) -// Int32x16 is a 512-bit SIMD vector of 16 int32 +// Int32x16 is a 512-bit SIMD vector of 16 int32s. type Int32x16 struct { int32x16 v512 vals [16]int32 } -// Len returns the number of elements in a Int32x16 +// Len returns the number of elements in an Int32x16. func (x Int32x16) Len() int { return 16 } -// LoadInt32x16 loads a Int32x16 from an array +// LoadInt32x16 loads an Int32x16 from an array. // //go:noescape func LoadInt32x16(y *[16]int32) Int32x16 -// Store stores a Int32x16 to an array +// Store stores an Int32x16 to an array. // //go:noescape func (x Int32x16) Store(y *[16]int32) -// LoadMaskedInt32x16 loads a Int32x16 from an array, -// at those elements enabled by mask +// LoadMaskedInt32x16 loads an Int32x16 from an array, +// at those elements enabled by mask. // // Asm: VMOVDQU32.Z, CPU Feature: AVX512 // //go:noescape func LoadMaskedInt32x16(y *[16]int32, mask Mask32x16) Int32x16 -// StoreMasked stores a Int32x16 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int32x16 to an array, +// at those elements enabled by mask. // // Asm: VMOVDQU32, CPU Feature: AVX512 // //go:noescape func (x Int32x16) StoreMasked(y *[16]int32, mask Mask32x16) -// Int64x8 is a 512-bit SIMD vector of 8 int64 +// Int64x8 is a 512-bit SIMD vector of 8 int64s. type Int64x8 struct { int64x8 v512 vals [8]int64 } -// Len returns the number of elements in a Int64x8 +// Len returns the number of elements in an Int64x8. func (x Int64x8) Len() int { return 8 } -// LoadInt64x8 loads a Int64x8 from an array +// LoadInt64x8 loads an Int64x8 from an array. // //go:noescape func LoadInt64x8(y *[8]int64) Int64x8 -// Store stores a Int64x8 to an array +// Store stores an Int64x8 to an array. // //go:noescape func (x Int64x8) Store(y *[8]int64) -// LoadMaskedInt64x8 loads a Int64x8 from an array, -// at those elements enabled by mask +// LoadMaskedInt64x8 loads an Int64x8 from an array, +// at those elements enabled by mask. // // Asm: VMOVDQU64.Z, CPU Feature: AVX512 // //go:noescape func LoadMaskedInt64x8(y *[8]int64, mask Mask64x8) Int64x8 -// StoreMasked stores a Int64x8 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int64x8 to an array, +// at those elements enabled by mask. // // Asm: VMOVDQU64, CPU Feature: AVX512 // //go:noescape func (x Int64x8) StoreMasked(y *[8]int64, mask Mask64x8) -// Uint8x64 is a 512-bit SIMD vector of 64 uint8 +// Uint8x64 is a 512-bit SIMD vector of 64 uint8s. type Uint8x64 struct { uint8x64 v512 vals [64]uint8 } -// Len returns the number of elements in a Uint8x64 +// Len returns the number of elements in a Uint8x64. func (x Uint8x64) Len() int { return 64 } -// LoadUint8x64 loads a Uint8x64 from an array +// LoadUint8x64 loads a Uint8x64 from an array. // //go:noescape func LoadUint8x64(y *[64]uint8) Uint8x64 -// Store stores a Uint8x64 to an array +// Store stores a Uint8x64 to an array. // //go:noescape func (x Uint8x64) Store(y *[64]uint8) // LoadMaskedUint8x64 loads a Uint8x64 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU8.Z, CPU Feature: AVX512 // @@ -963,34 +963,34 @@ func (x Uint8x64) Store(y *[64]uint8) func LoadMaskedUint8x64(y *[64]uint8, mask Mask8x64) Uint8x64 // StoreMasked stores a Uint8x64 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU8, CPU Feature: AVX512 // //go:noescape func (x Uint8x64) StoreMasked(y *[64]uint8, mask Mask8x64) -// Uint16x32 is a 512-bit SIMD vector of 32 uint16 +// Uint16x32 is a 512-bit SIMD vector of 32 uint16s. type Uint16x32 struct { uint16x32 v512 vals [32]uint16 } -// Len returns the number of elements in a Uint16x32 +// Len returns the number of elements in a Uint16x32. func (x Uint16x32) Len() int { return 32 } -// LoadUint16x32 loads a Uint16x32 from an array +// LoadUint16x32 loads a Uint16x32 from an array. // //go:noescape func LoadUint16x32(y *[32]uint16) Uint16x32 -// Store stores a Uint16x32 to an array +// Store stores a Uint16x32 to an array. // //go:noescape func (x Uint16x32) Store(y *[32]uint16) // LoadMaskedUint16x32 loads a Uint16x32 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU16.Z, CPU Feature: AVX512 // @@ -998,34 +998,34 @@ func (x Uint16x32) Store(y *[32]uint16) func LoadMaskedUint16x32(y *[32]uint16, mask Mask16x32) Uint16x32 // StoreMasked stores a Uint16x32 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU16, CPU Feature: AVX512 // //go:noescape func (x Uint16x32) StoreMasked(y *[32]uint16, mask Mask16x32) -// Uint32x16 is a 512-bit SIMD vector of 16 uint32 +// Uint32x16 is a 512-bit SIMD vector of 16 uint32s. type Uint32x16 struct { uint32x16 v512 vals [16]uint32 } -// Len returns the number of elements in a Uint32x16 +// Len returns the number of elements in a Uint32x16. func (x Uint32x16) Len() int { return 16 } -// LoadUint32x16 loads a Uint32x16 from an array +// LoadUint32x16 loads a Uint32x16 from an array. // //go:noescape func LoadUint32x16(y *[16]uint32) Uint32x16 -// Store stores a Uint32x16 to an array +// Store stores a Uint32x16 to an array. // //go:noescape func (x Uint32x16) Store(y *[16]uint32) // LoadMaskedUint32x16 loads a Uint32x16 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU32.Z, CPU Feature: AVX512 // @@ -1033,34 +1033,34 @@ func (x Uint32x16) Store(y *[16]uint32) func LoadMaskedUint32x16(y *[16]uint32, mask Mask32x16) Uint32x16 // StoreMasked stores a Uint32x16 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU32, CPU Feature: AVX512 // //go:noescape func (x Uint32x16) StoreMasked(y *[16]uint32, mask Mask32x16) -// Uint64x8 is a 512-bit SIMD vector of 8 uint64 +// Uint64x8 is a 512-bit SIMD vector of 8 uint64s. type Uint64x8 struct { uint64x8 v512 vals [8]uint64 } -// Len returns the number of elements in a Uint64x8 +// Len returns the number of elements in a Uint64x8. func (x Uint64x8) Len() int { return 8 } -// LoadUint64x8 loads a Uint64x8 from an array +// LoadUint64x8 loads a Uint64x8 from an array. // //go:noescape func LoadUint64x8(y *[8]uint64) Uint64x8 -// Store stores a Uint64x8 to an array +// Store stores a Uint64x8 to an array. // //go:noescape func (x Uint64x8) Store(y *[8]uint64) // LoadMaskedUint64x8 loads a Uint64x8 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU64.Z, CPU Feature: AVX512 // @@ -1068,14 +1068,14 @@ func (x Uint64x8) Store(y *[8]uint64) func LoadMaskedUint64x8(y *[8]uint64, mask Mask64x8) Uint64x8 // StoreMasked stores a Uint64x8 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU64, CPU Feature: AVX512 // //go:noescape func (x Uint64x8) StoreMasked(y *[8]uint64, mask Mask64x8) -// Mask8x64 is a 512-bit SIMD vector of 64 int8 +// Mask8x64 is a 512-bit SIMD vector of 64 int8s. type Mask8x64 struct { int8x64 v512 vals [64]int8 @@ -1091,7 +1091,7 @@ func Mask8x64FromBits(y uint64) Mask8x64 // Asm: KMOVB, CPU Features: AVX512 func (x Mask8x64) ToBits() uint64 -// Mask16x32 is a 512-bit SIMD vector of 32 int16 +// Mask16x32 is a 512-bit SIMD vector of 32 int16s. type Mask16x32 struct { int16x32 v512 vals [32]int16 @@ -1107,7 +1107,7 @@ func Mask16x32FromBits(y uint32) Mask16x32 // Asm: KMOVW, CPU Features: AVX512 func (x Mask16x32) ToBits() uint32 -// Mask32x16 is a 512-bit SIMD vector of 16 int32 +// Mask32x16 is a 512-bit SIMD vector of 16 int32s. type Mask32x16 struct { int32x16 v512 vals [16]int32 @@ -1123,7 +1123,7 @@ func Mask32x16FromBits(y uint16) Mask32x16 // Asm: KMOVD, CPU Features: AVX512 func (x Mask32x16) ToBits() uint16 -// Mask64x8 is a 512-bit SIMD vector of 8 int64 +// Mask64x8 is a 512-bit SIMD vector of 8 int64s. type Mask64x8 struct { int64x8 v512 vals [8]int64 -- 2.52.0