func isDebugFn(fn *ir.Func) bool {
// if n := fn.Nname; n != nil {
- // if n.Sym().Name == "Int32x8.Transpose8" && n.Sym().Pkg.Path == "simd" {
+ // if n.Sym().Name == "Int32x8.Transpose8" && n.Sym().Pkg.Path == "simd/archsimd" {
// fmt.Printf("isDebugFn '%s' DOT '%s'\n", n.Sym().Pkg.Path, n.Sym().Name)
// return true
// }
// Only enable intrinsics, if SIMD experiment.
simdIntrinsics(addF)
- addF("simd", "ClearAVXUpperBits",
+ addF(simdPackage, "ClearAVXUpperBits",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem())
return nil
addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ // sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
- addF("simd", method,
+ addF(simdPackage, method,
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
x, a, b, c, d, y := args[0], args[1], args[2], args[3], args[4], args[5]
if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 && c.Op == ssa.OpConst8 && d.Op == ssa.OpConst8 {
- return select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
- } else {
- return s.callResult(n, callNormal)
+ z := select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
+ if z != nil {
+ return z
+ }
}
+ return s.callResult(n, callNormal)
},
sys.AMD64)
}
sfp4("Uint32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512)
sfp4("Float32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x16, types.TypeVec512)
+ // sfp2 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
sfp2 := func(method string, hwop ssa.Op, vectype *types.Type, cscimm func(i, j uint8) int64) {
- addF("simd", method,
+ addF(simdPackage, method,
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
x, a, b, y := args[0], args[1], args[2], args[3]
if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 {
- return select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
- } else {
- return s.callResult(n, callNormal)
+ z := select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
+ if z != nil {
+ return z
+ }
}
+ return s.callResult(n, callNormal)
},
sys.AMD64)
}
func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type, csc func(a, b uint8) int64) *ssa.Value {
a, b := uint8(_a.AuxInt8()), uint8(_b.AuxInt8())
+ if a > 3 || b > 3 {
+ return nil
+ }
pattern := (a&2)>>1 + (b & 2)
a, b = a&1, b&1
func select4FromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *types.Type) *ssa.Value {
a, b, c, d := uint8(_a.AuxInt8()), uint8(_b.AuxInt8()), uint8(_c.AuxInt8()), uint8(_d.AuxInt8())
+ if a > 7 || b > 7 || c > 7 || d > 7 {
+ return nil
+ }
pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
a, b, c, d = a&3, b&3, c&3, d&3
fn := sym.Name
if ssa.IntrinsicsDisable {
if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GrtCallerSP" || fn == "GetClosurePtr") ||
- pkg == "internal/simd" || pkg == "simd" { // TODO after simd has been moved to package simd, remove internal/simd
+ pkg == simdPackage {
// These runtime functions don't have definitions, must be intrinsics.
} else {
return nil
gotIntrinsics[testIntrinsicKey{ik.arch.Name, ik.pkg, ik.fn}] = struct{}{}
}
for ik, _ := range gotIntrinsics {
- if _, found := wantIntrinsics[ik]; !found && (ik.pkg != "simd" || *simd) {
+ if _, found := wantIntrinsics[ik]; !found && (ik.pkg != "simd/archsimd" || *simd) {
t.Errorf("Got unwanted intrinsic %v %v.%v", ik.archName, ik.pkg, ik.fn)
}
}
for ik, _ := range wantIntrinsics {
- if _, found := gotIntrinsics[ik]; !found && (ik.pkg != "simd" || *simd) {
+ if _, found := gotIntrinsics[ik]; !found && (ik.pkg != "simd/archsimd" || *simd) {
t.Errorf("Want missing intrinsic %v %v.%v", ik.archName, ik.pkg, ik.fn)
}
}
"cmd/internal/sys"
)
-const simdPackage = "simd"
+const simdPackage = "simd/archsimd"
func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
} else {
st.floatRegs = 1
}
- // if st.Sym() != nil {
- // base.Warn("Simdify %s, %v, %d", st.Sym().Name, isTag, st.width)
- // } else {
- // base.Warn("Simdify %v, %v, %d", st, isTag, st.width)
- // }
}
// CalcStructSize calculates the size of t,
case sym.Name == "align64" && isAtomicStdPkg(sym.Pkg):
maxAlign = 8
- case buildcfg.Experiment.SIMD && (sym.Pkg.Path == "internal/simd" || sym.Pkg.Path == "simd") && len(t.Fields()) >= 1:
+ case buildcfg.Experiment.SIMD && (sym.Pkg.Path == "simd/archsimd") && len(t.Fields()) >= 1:
// This gates the experiment -- without it, no user-visible types can be "simd".
// The SSA-visible SIMD types remain.
- // TODO after simd has been moved to package simd, remove internal/simd.
switch sym.Name {
case "v128":
simdify(t, true)
"builtin": true,
"cmd/compile/internal/ssa/_gen": true,
"runtime/_mkmalloc": true,
- "simd/_gen/simdgen": true,
- "simd/_gen/unify": true,
+ "simd/archsimd/_gen/simdgen": true,
+ "simd/archsimd/_gen/unify": true,
}
// printPackageMu synchronizes the printing of type-checked package files in
internal/byteorder, internal/cpu, internal/goarch < internal/chacha8rand;
internal/goarch, math/bits < internal/strconv;
- internal/cpu, internal/strconv < simd;
+ internal/cpu, internal/strconv < simd/archsimd;
# RUNTIME is the core runtime group of packages, all of them very light-weight.
internal/abi,
< testing;
testing, math
- < simd/internal/test_helpers;
+ < simd/archsimd/internal/test_helpers;
log/slog, testing
< testing/slogtest;
"builtin": true,
"cmd/compile/internal/ssa/_gen": true,
"runtime/_mkmalloc": true,
- "simd/_gen/simdgen": true,
- "simd/_gen/unify": true,
+ "simd/archsimd/_gen/simdgen": true,
+ "simd/archsimd/_gen/unify": true,
}
// printPackageMu synchronizes the printing of type-checked package files in
+++ /dev/null
-testdata/*
-.gemini/*
-.gemini*
-module simd/_gen
+module simd/archsimd/_gen
go 1.24
exit 1
fi
+# Ensure that goroot is the appropriate ancestor of this directory
which go >/dev/null || exit 1
goroot="$(go env GOROOT)"
-if [[ ! ../../../.. -ef "$goroot" ]]; then
+ancestor="../../../../.."
+if [[ ! $ancestor -ef "$goroot" ]]; then
# We might be able to make this work but it's SO CONFUSING.
- echo >&2 "go command in path has GOROOT $goroot"
- exit 1
-fi
-
-if [[ $(go env GOEXPERIMENT) != simd ]]; then
- echo >&2 "GOEXPERIMENT=$(go env GOEXPERIMENT), expected simd"
+ echo >&2 "go command in path has GOROOT $goroot instead of" `(cd $ancestor; pwd)`
exit 1
fi
go install cmd/compile
# Tests
-GOARCH=amd64 go run -C simd/testdata .
-GOARCH=amd64 go test -v simd
-go test go/doc go/build
-go test cmd/api -v -check -run ^TestCheck$
-go test cmd/compile/internal/ssagen -simd=0
+# Set the GOEXPERIMENT explicitly.
+GOEXPERIMENT=simd GOARCH=amd64 go run -C simd/archsimd/testdata .
+GOEXPERIMENT=simd GOARCH=amd64 go test -v simd/archsimd
+GOEXPERIMENT=simd GOARCH=amd64 go test go/doc go/build
+GOEXPERIMENT=simd GOARCH=amd64 go test cmd/api -v -check -run ^TestCheck$
+GOEXPERIMENT=simd GOARCH=amd64 go test cmd/compile/internal/ssagen -simd=0
# Check tests without the GOEXPERIMENT
GOEXPERIMENT= go test go/doc go/build
const simdPackageHeader = generatedHeader + `
//go:build goexperiment.simd
-package simd
+package archsimd
`
const simdTypesTemplates = `
"strings"
"unicode"
- "simd/_gen/unify"
+ "simd/archsimd/_gen/unify"
)
type Operation struct {
"slices"
"strings"
- "simd/_gen/unify"
+ "simd/archsimd/_gen/unify"
"gopkg.in/yaml.v3"
)
flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`")
)
-const simdPackage = "simd"
+const simdPackage = "simd/archsimd"
func main() {
flag.Parse()
"strconv"
"strings"
- "simd/_gen/unify"
+ "simd/archsimd/_gen/unify"
"golang.org/x/arch/x86/xeddata"
"gopkg.in/yaml.v3"
//go:build goexperiment.simd
-package simd
+package archsimd
`, s)
}
//go:build goexperiment.simd
-package simd
+package archsimd
import "unsafe"
package simd_test
import (
- "simd"
+ "simd/archsimd"
"testing"
)
var unaryTemplate = templateOf("unary_helpers", `
// test{{.VType}}Unary tests the simd unary method f against the expected behavior generated by want
-func test{{.VType}}Unary(t *testing.T, f func(_ simd.{{.VType}}) simd.{{.VType}}, want func(_ []{{.Etype}}) []{{.Etype}}) {
+func test{{.VType}}Unary(t *testing.T, f func(_ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}}
t.Helper()
forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper()
- a := simd.Load{{.VType}}Slice(x)
+ a := archsimd.Load{{.VType}}Slice(x)
g := make([]{{.Etype}}, n)
f(a).StoreSlice(g)
w := want(x)
var unaryFlakyTemplate = shapedTemplateOf(unaryFlaky, "unary_flaky_helpers", `
// test{{.VType}}UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func test{{.VType}}UnaryFlaky(t *testing.T, f func(x simd.{{.VType}}) simd.{{.VType}}, want func(x []{{.Etype}}) []{{.Etype}}, flakiness float64) {
+func test{{.VType}}UnaryFlaky(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.VType}}, want func(x []{{.Etype}}) []{{.Etype}}, flakiness float64) {
n := {{.Count}}
t.Helper()
forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper()
- a := simd.Load{{.VType}}Slice(x)
+ a := archsimd.Load{{.VType}}Slice(x)
g := make([]{{.Etype}}, n)
f(a).StoreSlice(g)
w := want(x)
var convertTemplate = templateOf("convert_helpers", `
// test{{.VType}}ConvertTo{{.OEType}} tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x simd.{{.VType}}) simd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) {
+func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) {
n := {{.Count}}
t.Helper()
forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper()
- a := simd.Load{{.VType}}Slice(x)
+ a := archsimd.Load{{.VType}}Slice(x)
g := make([]{{.OEtype}}, n)
f(a).StoreSlice(g)
w := want(x)
var binaryTemplate = templateOf("binary_helpers", `
// test{{.VType}}Binary tests the simd binary method f against the expected behavior generated by want
-func test{{.VType}}Binary(t *testing.T, f func(_, _ simd.{{.VType}}) simd.{{.VType}}, want func(_, _ []{{.Etype}}) []{{.Etype}}) {
+func test{{.VType}}Binary(t *testing.T, f func(_, _ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_, _ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}}
t.Helper()
forSlicePair(t, {{.Etype}}s, n, func(x, y []{{.Etype}}) bool {
t.Helper()
- a := simd.Load{{.VType}}Slice(x)
- b := simd.Load{{.VType}}Slice(y)
+ a := archsimd.Load{{.VType}}Slice(x)
+ b := archsimd.Load{{.VType}}Slice(y)
g := make([]{{.Etype}}, n)
f(a, b).StoreSlice(g)
w := want(x, y)
var ternaryTemplate = templateOf("ternary_helpers", `
// test{{.VType}}Ternary tests the simd ternary method f against the expected behavior generated by want
-func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ simd.{{.VType}}) simd.{{.VType}}, want func(_, _, _ []{{.Etype}}) []{{.Etype}}) {
+func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_, _, _ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}}
t.Helper()
forSliceTriple(t, {{.Etype}}s, n, func(x, y, z []{{.Etype}}) bool {
t.Helper()
- a := simd.Load{{.VType}}Slice(x)
- b := simd.Load{{.VType}}Slice(y)
- c := simd.Load{{.VType}}Slice(z)
+ a := archsimd.Load{{.VType}}Slice(x)
+ b := archsimd.Load{{.VType}}Slice(y)
+ c := archsimd.Load{{.VType}}Slice(z)
g := make([]{{.Etype}}, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
var ternaryFlakyTemplate = shapedTemplateOf(ternaryFlaky, "ternary_helpers", `
// test{{.VType}}TernaryFlaky tests the simd ternary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.VType}}) simd.{{.VType}}, want func(x, y, z []{{.Etype}}) []{{.Etype}}, flakiness float64) {
+func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z archsimd.{{.VType}}) archsimd.{{.VType}}, want func(x, y, z []{{.Etype}}) []{{.Etype}}, flakiness float64) {
n := {{.Count}}
t.Helper()
forSliceTriple(t, {{.Etype}}s, n, func(x, y, z []{{.Etype}}) bool {
t.Helper()
- a := simd.Load{{.VType}}Slice(x)
- b := simd.Load{{.VType}}Slice(y)
- c := simd.Load{{.VType}}Slice(z)
+ a := archsimd.Load{{.VType}}Slice(x)
+ b := archsimd.Load{{.VType}}Slice(y)
+ c := archsimd.Load{{.VType}}Slice(z)
g := make([]{{.Etype}}, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
var compareTemplate = templateOf("compare_helpers", `
// test{{.VType}}Compare tests the simd comparison method f against the expected behavior generated by want
-func test{{.VType}}Compare(t *testing.T, f func(_, _ simd.{{.VType}}) simd.Mask{{.WxC}}, want func(_, _ []{{.Etype}}) []int64) {
+func test{{.VType}}Compare(t *testing.T, f func(_, _ archsimd.{{.VType}}) archsimd.Mask{{.WxC}}, want func(_, _ []{{.Etype}}) []int64) {
n := {{.Count}}
t.Helper()
forSlicePair(t, {{.Etype}}s, n, func(x, y []{{.Etype}}) bool {
t.Helper()
- a := simd.Load{{.VType}}Slice(x)
- b := simd.Load{{.VType}}Slice(y)
+ a := archsimd.Load{{.VType}}Slice(x)
+ b := archsimd.Load{{.VType}}Slice(y)
g := make([]int{{.EWidth}}, n)
f(a, b).AsInt{{.WxC}}().StoreSlice(g)
w := want(x, y)
// test{{.VType}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func test{{.VType}}CompareMasked(t *testing.T,
- f func(_, _ simd.{{.VType}}, m simd.Mask{{.WxC}}) simd.Mask{{.WxC}},
+ f func(_, _ archsimd.{{.VType}}, m archsimd.Mask{{.WxC}}) archsimd.Mask{{.WxC}},
want func(_, _ []{{.Etype}}) []int64) {
n := {{.Count}}
t.Helper()
forSlicePairMasked(t, {{.Etype}}s, n, func(x, y []{{.Etype}}, m []bool) bool {
t.Helper()
- a := simd.Load{{.VType}}Slice(x)
- b := simd.Load{{.VType}}Slice(y)
- k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.EWidth}}](m)).ToMask()
+ a := archsimd.Load{{.VType}}Slice(x)
+ b := archsimd.Load{{.VType}}Slice(y)
+ k := archsimd.LoadInt{{.WxC}}Slice(toVect[int{{.EWidth}}](m)).ToMask()
g := make([]int{{.EWidth}}, n)
f(a, b, k).AsInt{{.WxC}}().StoreSlice(g)
w := want(x, y)
const SIMD = "../../"
const TD = "../../internal/simd_test/"
-const SSA = "../../../cmd/compile/internal/ssa/"
+const SSA = "../../../../cmd/compile/internal/ssa/"
func main() {
sl := flag.String("sl", SIMD+"slice_gen_amd64.go", "file name for slice operations")
//go:build goexperiment.simd
-package simd
+package archsimd
// Less returns a mask whose elements indicate whether x < y
//
//go:build goexperiment.simd
-package simd
+package archsimd
import "internal/cpu"
// This exposes some internal interfaces to simd_test.
-package simd
+package archsimd
func (x Int64x2) ExportTestConcatSelectedConstant(indices uint8, y Int64x2) Int64x2 {
return x.concatSelectedConstant(indices, y)
//go:build goexperiment.simd && amd64
-package simd
+package archsimd
// ClearAVXUpperBits clears the high bits of Y0-Y15 and Z0-Z15 registers.
// It is intended for transitioning from AVX to SSE, eliminating the
//go:build goexperiment.simd
-package simd
+package archsimd
// Invoke code generators.
package simd_test
import (
- "simd"
+ "simd/archsimd"
"testing"
)
// testInt8x16Binary tests the simd binary method f against the expected behavior generated by want
-func testInt8x16Binary(t *testing.T, f func(_, _ simd.Int8x16) simd.Int8x16, want func(_, _ []int8) []int8) {
+func testInt8x16Binary(t *testing.T, f func(_, _ archsimd.Int8x16) archsimd.Int8x16, want func(_, _ []int8) []int8) {
n := 16
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
- a := simd.LoadInt8x16Slice(x)
- b := simd.LoadInt8x16Slice(y)
+ a := archsimd.LoadInt8x16Slice(x)
+ b := archsimd.LoadInt8x16Slice(y)
g := make([]int8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt16x8Binary tests the simd binary method f against the expected behavior generated by want
-func testInt16x8Binary(t *testing.T, f func(_, _ simd.Int16x8) simd.Int16x8, want func(_, _ []int16) []int16) {
+func testInt16x8Binary(t *testing.T, f func(_, _ archsimd.Int16x8) archsimd.Int16x8, want func(_, _ []int16) []int16) {
n := 8
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
- a := simd.LoadInt16x8Slice(x)
- b := simd.LoadInt16x8Slice(y)
+ a := archsimd.LoadInt16x8Slice(x)
+ b := archsimd.LoadInt16x8Slice(y)
g := make([]int16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt32x4Binary tests the simd binary method f against the expected behavior generated by want
-func testInt32x4Binary(t *testing.T, f func(_, _ simd.Int32x4) simd.Int32x4, want func(_, _ []int32) []int32) {
+func testInt32x4Binary(t *testing.T, f func(_, _ archsimd.Int32x4) archsimd.Int32x4, want func(_, _ []int32) []int32) {
n := 4
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
- a := simd.LoadInt32x4Slice(x)
- b := simd.LoadInt32x4Slice(y)
+ a := archsimd.LoadInt32x4Slice(x)
+ b := archsimd.LoadInt32x4Slice(y)
g := make([]int32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt64x2Binary tests the simd binary method f against the expected behavior generated by want
-func testInt64x2Binary(t *testing.T, f func(_, _ simd.Int64x2) simd.Int64x2, want func(_, _ []int64) []int64) {
+func testInt64x2Binary(t *testing.T, f func(_, _ archsimd.Int64x2) archsimd.Int64x2, want func(_, _ []int64) []int64) {
n := 2
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
- a := simd.LoadInt64x2Slice(x)
- b := simd.LoadInt64x2Slice(y)
+ a := archsimd.LoadInt64x2Slice(x)
+ b := archsimd.LoadInt64x2Slice(y)
g := make([]int64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint8x16Binary tests the simd binary method f against the expected behavior generated by want
-func testUint8x16Binary(t *testing.T, f func(_, _ simd.Uint8x16) simd.Uint8x16, want func(_, _ []uint8) []uint8) {
+func testUint8x16Binary(t *testing.T, f func(_, _ archsimd.Uint8x16) archsimd.Uint8x16, want func(_, _ []uint8) []uint8) {
n := 16
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
- a := simd.LoadUint8x16Slice(x)
- b := simd.LoadUint8x16Slice(y)
+ a := archsimd.LoadUint8x16Slice(x)
+ b := archsimd.LoadUint8x16Slice(y)
g := make([]uint8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint16x8Binary tests the simd binary method f against the expected behavior generated by want
-func testUint16x8Binary(t *testing.T, f func(_, _ simd.Uint16x8) simd.Uint16x8, want func(_, _ []uint16) []uint16) {
+func testUint16x8Binary(t *testing.T, f func(_, _ archsimd.Uint16x8) archsimd.Uint16x8, want func(_, _ []uint16) []uint16) {
n := 8
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
- a := simd.LoadUint16x8Slice(x)
- b := simd.LoadUint16x8Slice(y)
+ a := archsimd.LoadUint16x8Slice(x)
+ b := archsimd.LoadUint16x8Slice(y)
g := make([]uint16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint32x4Binary tests the simd binary method f against the expected behavior generated by want
-func testUint32x4Binary(t *testing.T, f func(_, _ simd.Uint32x4) simd.Uint32x4, want func(_, _ []uint32) []uint32) {
+func testUint32x4Binary(t *testing.T, f func(_, _ archsimd.Uint32x4) archsimd.Uint32x4, want func(_, _ []uint32) []uint32) {
n := 4
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
- a := simd.LoadUint32x4Slice(x)
- b := simd.LoadUint32x4Slice(y)
+ a := archsimd.LoadUint32x4Slice(x)
+ b := archsimd.LoadUint32x4Slice(y)
g := make([]uint32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint64x2Binary tests the simd binary method f against the expected behavior generated by want
-func testUint64x2Binary(t *testing.T, f func(_, _ simd.Uint64x2) simd.Uint64x2, want func(_, _ []uint64) []uint64) {
+func testUint64x2Binary(t *testing.T, f func(_, _ archsimd.Uint64x2) archsimd.Uint64x2, want func(_, _ []uint64) []uint64) {
n := 2
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
- a := simd.LoadUint64x2Slice(x)
- b := simd.LoadUint64x2Slice(y)
+ a := archsimd.LoadUint64x2Slice(x)
+ b := archsimd.LoadUint64x2Slice(y)
g := make([]uint64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testFloat32x4Binary tests the simd binary method f against the expected behavior generated by want
-func testFloat32x4Binary(t *testing.T, f func(_, _ simd.Float32x4) simd.Float32x4, want func(_, _ []float32) []float32) {
+func testFloat32x4Binary(t *testing.T, f func(_, _ archsimd.Float32x4) archsimd.Float32x4, want func(_, _ []float32) []float32) {
n := 4
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
- b := simd.LoadFloat32x4Slice(y)
+ a := archsimd.LoadFloat32x4Slice(x)
+ b := archsimd.LoadFloat32x4Slice(y)
g := make([]float32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testFloat64x2Binary tests the simd binary method f against the expected behavior generated by want
-func testFloat64x2Binary(t *testing.T, f func(_, _ simd.Float64x2) simd.Float64x2, want func(_, _ []float64) []float64) {
+func testFloat64x2Binary(t *testing.T, f func(_, _ archsimd.Float64x2) archsimd.Float64x2, want func(_, _ []float64) []float64) {
n := 2
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
- a := simd.LoadFloat64x2Slice(x)
- b := simd.LoadFloat64x2Slice(y)
+ a := archsimd.LoadFloat64x2Slice(x)
+ b := archsimd.LoadFloat64x2Slice(y)
g := make([]float64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt8x32Binary tests the simd binary method f against the expected behavior generated by want
-func testInt8x32Binary(t *testing.T, f func(_, _ simd.Int8x32) simd.Int8x32, want func(_, _ []int8) []int8) {
+func testInt8x32Binary(t *testing.T, f func(_, _ archsimd.Int8x32) archsimd.Int8x32, want func(_, _ []int8) []int8) {
n := 32
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
- a := simd.LoadInt8x32Slice(x)
- b := simd.LoadInt8x32Slice(y)
+ a := archsimd.LoadInt8x32Slice(x)
+ b := archsimd.LoadInt8x32Slice(y)
g := make([]int8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt16x16Binary tests the simd binary method f against the expected behavior generated by want
-func testInt16x16Binary(t *testing.T, f func(_, _ simd.Int16x16) simd.Int16x16, want func(_, _ []int16) []int16) {
+func testInt16x16Binary(t *testing.T, f func(_, _ archsimd.Int16x16) archsimd.Int16x16, want func(_, _ []int16) []int16) {
n := 16
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
- a := simd.LoadInt16x16Slice(x)
- b := simd.LoadInt16x16Slice(y)
+ a := archsimd.LoadInt16x16Slice(x)
+ b := archsimd.LoadInt16x16Slice(y)
g := make([]int16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt32x8Binary tests the simd binary method f against the expected behavior generated by want
-func testInt32x8Binary(t *testing.T, f func(_, _ simd.Int32x8) simd.Int32x8, want func(_, _ []int32) []int32) {
+func testInt32x8Binary(t *testing.T, f func(_, _ archsimd.Int32x8) archsimd.Int32x8, want func(_, _ []int32) []int32) {
n := 8
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
- a := simd.LoadInt32x8Slice(x)
- b := simd.LoadInt32x8Slice(y)
+ a := archsimd.LoadInt32x8Slice(x)
+ b := archsimd.LoadInt32x8Slice(y)
g := make([]int32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt64x4Binary tests the simd binary method f against the expected behavior generated by want
-func testInt64x4Binary(t *testing.T, f func(_, _ simd.Int64x4) simd.Int64x4, want func(_, _ []int64) []int64) {
+func testInt64x4Binary(t *testing.T, f func(_, _ archsimd.Int64x4) archsimd.Int64x4, want func(_, _ []int64) []int64) {
n := 4
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
- a := simd.LoadInt64x4Slice(x)
- b := simd.LoadInt64x4Slice(y)
+ a := archsimd.LoadInt64x4Slice(x)
+ b := archsimd.LoadInt64x4Slice(y)
g := make([]int64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint8x32Binary tests the simd binary method f against the expected behavior generated by want
-func testUint8x32Binary(t *testing.T, f func(_, _ simd.Uint8x32) simd.Uint8x32, want func(_, _ []uint8) []uint8) {
+func testUint8x32Binary(t *testing.T, f func(_, _ archsimd.Uint8x32) archsimd.Uint8x32, want func(_, _ []uint8) []uint8) {
n := 32
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
- a := simd.LoadUint8x32Slice(x)
- b := simd.LoadUint8x32Slice(y)
+ a := archsimd.LoadUint8x32Slice(x)
+ b := archsimd.LoadUint8x32Slice(y)
g := make([]uint8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint16x16Binary tests the simd binary method f against the expected behavior generated by want
-func testUint16x16Binary(t *testing.T, f func(_, _ simd.Uint16x16) simd.Uint16x16, want func(_, _ []uint16) []uint16) {
+func testUint16x16Binary(t *testing.T, f func(_, _ archsimd.Uint16x16) archsimd.Uint16x16, want func(_, _ []uint16) []uint16) {
n := 16
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
- a := simd.LoadUint16x16Slice(x)
- b := simd.LoadUint16x16Slice(y)
+ a := archsimd.LoadUint16x16Slice(x)
+ b := archsimd.LoadUint16x16Slice(y)
g := make([]uint16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint32x8Binary tests the simd binary method f against the expected behavior generated by want
-func testUint32x8Binary(t *testing.T, f func(_, _ simd.Uint32x8) simd.Uint32x8, want func(_, _ []uint32) []uint32) {
+func testUint32x8Binary(t *testing.T, f func(_, _ archsimd.Uint32x8) archsimd.Uint32x8, want func(_, _ []uint32) []uint32) {
n := 8
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
- a := simd.LoadUint32x8Slice(x)
- b := simd.LoadUint32x8Slice(y)
+ a := archsimd.LoadUint32x8Slice(x)
+ b := archsimd.LoadUint32x8Slice(y)
g := make([]uint32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint64x4Binary tests the simd binary method f against the expected behavior generated by want
-func testUint64x4Binary(t *testing.T, f func(_, _ simd.Uint64x4) simd.Uint64x4, want func(_, _ []uint64) []uint64) {
+func testUint64x4Binary(t *testing.T, f func(_, _ archsimd.Uint64x4) archsimd.Uint64x4, want func(_, _ []uint64) []uint64) {
n := 4
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
- a := simd.LoadUint64x4Slice(x)
- b := simd.LoadUint64x4Slice(y)
+ a := archsimd.LoadUint64x4Slice(x)
+ b := archsimd.LoadUint64x4Slice(y)
g := make([]uint64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testFloat32x8Binary tests the simd binary method f against the expected behavior generated by want
-func testFloat32x8Binary(t *testing.T, f func(_, _ simd.Float32x8) simd.Float32x8, want func(_, _ []float32) []float32) {
+func testFloat32x8Binary(t *testing.T, f func(_, _ archsimd.Float32x8) archsimd.Float32x8, want func(_, _ []float32) []float32) {
n := 8
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
- b := simd.LoadFloat32x8Slice(y)
+ a := archsimd.LoadFloat32x8Slice(x)
+ b := archsimd.LoadFloat32x8Slice(y)
g := make([]float32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testFloat64x4Binary tests the simd binary method f against the expected behavior generated by want
-func testFloat64x4Binary(t *testing.T, f func(_, _ simd.Float64x4) simd.Float64x4, want func(_, _ []float64) []float64) {
+func testFloat64x4Binary(t *testing.T, f func(_, _ archsimd.Float64x4) archsimd.Float64x4, want func(_, _ []float64) []float64) {
n := 4
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
- a := simd.LoadFloat64x4Slice(x)
- b := simd.LoadFloat64x4Slice(y)
+ a := archsimd.LoadFloat64x4Slice(x)
+ b := archsimd.LoadFloat64x4Slice(y)
g := make([]float64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt8x64Binary tests the simd binary method f against the expected behavior generated by want
-func testInt8x64Binary(t *testing.T, f func(_, _ simd.Int8x64) simd.Int8x64, want func(_, _ []int8) []int8) {
+func testInt8x64Binary(t *testing.T, f func(_, _ archsimd.Int8x64) archsimd.Int8x64, want func(_, _ []int8) []int8) {
n := 64
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
- a := simd.LoadInt8x64Slice(x)
- b := simd.LoadInt8x64Slice(y)
+ a := archsimd.LoadInt8x64Slice(x)
+ b := archsimd.LoadInt8x64Slice(y)
g := make([]int8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt16x32Binary tests the simd binary method f against the expected behavior generated by want
-func testInt16x32Binary(t *testing.T, f func(_, _ simd.Int16x32) simd.Int16x32, want func(_, _ []int16) []int16) {
+func testInt16x32Binary(t *testing.T, f func(_, _ archsimd.Int16x32) archsimd.Int16x32, want func(_, _ []int16) []int16) {
n := 32
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
- a := simd.LoadInt16x32Slice(x)
- b := simd.LoadInt16x32Slice(y)
+ a := archsimd.LoadInt16x32Slice(x)
+ b := archsimd.LoadInt16x32Slice(y)
g := make([]int16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt32x16Binary tests the simd binary method f against the expected behavior generated by want
-func testInt32x16Binary(t *testing.T, f func(_, _ simd.Int32x16) simd.Int32x16, want func(_, _ []int32) []int32) {
+func testInt32x16Binary(t *testing.T, f func(_, _ archsimd.Int32x16) archsimd.Int32x16, want func(_, _ []int32) []int32) {
n := 16
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
- a := simd.LoadInt32x16Slice(x)
- b := simd.LoadInt32x16Slice(y)
+ a := archsimd.LoadInt32x16Slice(x)
+ b := archsimd.LoadInt32x16Slice(y)
g := make([]int32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testInt64x8Binary tests the simd binary method f against the expected behavior generated by want
-func testInt64x8Binary(t *testing.T, f func(_, _ simd.Int64x8) simd.Int64x8, want func(_, _ []int64) []int64) {
+func testInt64x8Binary(t *testing.T, f func(_, _ archsimd.Int64x8) archsimd.Int64x8, want func(_, _ []int64) []int64) {
n := 8
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
- a := simd.LoadInt64x8Slice(x)
- b := simd.LoadInt64x8Slice(y)
+ a := archsimd.LoadInt64x8Slice(x)
+ b := archsimd.LoadInt64x8Slice(y)
g := make([]int64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint8x64Binary tests the simd binary method f against the expected behavior generated by want
-func testUint8x64Binary(t *testing.T, f func(_, _ simd.Uint8x64) simd.Uint8x64, want func(_, _ []uint8) []uint8) {
+func testUint8x64Binary(t *testing.T, f func(_, _ archsimd.Uint8x64) archsimd.Uint8x64, want func(_, _ []uint8) []uint8) {
n := 64
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
- a := simd.LoadUint8x64Slice(x)
- b := simd.LoadUint8x64Slice(y)
+ a := archsimd.LoadUint8x64Slice(x)
+ b := archsimd.LoadUint8x64Slice(y)
g := make([]uint8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint16x32Binary tests the simd binary method f against the expected behavior generated by want
-func testUint16x32Binary(t *testing.T, f func(_, _ simd.Uint16x32) simd.Uint16x32, want func(_, _ []uint16) []uint16) {
+func testUint16x32Binary(t *testing.T, f func(_, _ archsimd.Uint16x32) archsimd.Uint16x32, want func(_, _ []uint16) []uint16) {
n := 32
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
- a := simd.LoadUint16x32Slice(x)
- b := simd.LoadUint16x32Slice(y)
+ a := archsimd.LoadUint16x32Slice(x)
+ b := archsimd.LoadUint16x32Slice(y)
g := make([]uint16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint32x16Binary tests the simd binary method f against the expected behavior generated by want
-func testUint32x16Binary(t *testing.T, f func(_, _ simd.Uint32x16) simd.Uint32x16, want func(_, _ []uint32) []uint32) {
+func testUint32x16Binary(t *testing.T, f func(_, _ archsimd.Uint32x16) archsimd.Uint32x16, want func(_, _ []uint32) []uint32) {
n := 16
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
- a := simd.LoadUint32x16Slice(x)
- b := simd.LoadUint32x16Slice(y)
+ a := archsimd.LoadUint32x16Slice(x)
+ b := archsimd.LoadUint32x16Slice(y)
g := make([]uint32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testUint64x8Binary tests the simd binary method f against the expected behavior generated by want
-func testUint64x8Binary(t *testing.T, f func(_, _ simd.Uint64x8) simd.Uint64x8, want func(_, _ []uint64) []uint64) {
+func testUint64x8Binary(t *testing.T, f func(_, _ archsimd.Uint64x8) archsimd.Uint64x8, want func(_, _ []uint64) []uint64) {
n := 8
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
- a := simd.LoadUint64x8Slice(x)
- b := simd.LoadUint64x8Slice(y)
+ a := archsimd.LoadUint64x8Slice(x)
+ b := archsimd.LoadUint64x8Slice(y)
g := make([]uint64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testFloat32x16Binary tests the simd binary method f against the expected behavior generated by want
-func testFloat32x16Binary(t *testing.T, f func(_, _ simd.Float32x16) simd.Float32x16, want func(_, _ []float32) []float32) {
+func testFloat32x16Binary(t *testing.T, f func(_, _ archsimd.Float32x16) archsimd.Float32x16, want func(_, _ []float32) []float32) {
n := 16
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
- b := simd.LoadFloat32x16Slice(y)
+ a := archsimd.LoadFloat32x16Slice(x)
+ b := archsimd.LoadFloat32x16Slice(y)
g := make([]float32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
}
// testFloat64x8Binary tests the simd binary method f against the expected behavior generated by want
-func testFloat64x8Binary(t *testing.T, f func(_, _ simd.Float64x8) simd.Float64x8, want func(_, _ []float64) []float64) {
+func testFloat64x8Binary(t *testing.T, f func(_, _ archsimd.Float64x8) archsimd.Float64x8, want func(_, _ []float64) []float64) {
n := 8
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
- b := simd.LoadFloat64x8Slice(y)
+ a := archsimd.LoadFloat64x8Slice(x)
+ b := archsimd.LoadFloat64x8Slice(y)
g := make([]float64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.simd && amd64
+
+package simd_test
+
+import (
+ "simd/archsimd"
+ "testing"
+)
+
+func TestAdd(t *testing.T) {
+ testFloat32x4Binary(t, archsimd.Float32x4.Add, addSlice[float32])
+ testFloat32x8Binary(t, archsimd.Float32x8.Add, addSlice[float32])
+ testFloat64x2Binary(t, archsimd.Float64x2.Add, addSlice[float64])
+ testFloat64x4Binary(t, archsimd.Float64x4.Add, addSlice[float64])
+
+ testInt16x16Binary(t, archsimd.Int16x16.Add, addSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.Add, addSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.Add, addSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.Add, addSlice[int32])
+ testInt64x2Binary(t, archsimd.Int64x2.Add, addSlice[int64])
+ testInt64x4Binary(t, archsimd.Int64x4.Add, addSlice[int64])
+ testInt8x16Binary(t, archsimd.Int8x16.Add, addSlice[int8])
+ testInt8x32Binary(t, archsimd.Int8x32.Add, addSlice[int8])
+
+ testUint16x16Binary(t, archsimd.Uint16x16.Add, addSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.Add, addSlice[uint16])
+ testUint32x4Binary(t, archsimd.Uint32x4.Add, addSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.Add, addSlice[uint32])
+ testUint64x2Binary(t, archsimd.Uint64x2.Add, addSlice[uint64])
+ testUint64x4Binary(t, archsimd.Uint64x4.Add, addSlice[uint64])
+ testUint8x16Binary(t, archsimd.Uint8x16.Add, addSlice[uint8])
+ testUint8x32Binary(t, archsimd.Uint8x32.Add, addSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16Binary(t, archsimd.Float32x16.Add, addSlice[float32])
+ testFloat64x8Binary(t, archsimd.Float64x8.Add, addSlice[float64])
+ testInt8x64Binary(t, archsimd.Int8x64.Add, addSlice[int8])
+ testInt16x32Binary(t, archsimd.Int16x32.Add, addSlice[int16])
+ testInt32x16Binary(t, archsimd.Int32x16.Add, addSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.Add, addSlice[int64])
+ testUint8x64Binary(t, archsimd.Uint8x64.Add, addSlice[uint8])
+ testUint16x32Binary(t, archsimd.Uint16x32.Add, addSlice[uint16])
+ testUint32x16Binary(t, archsimd.Uint32x16.Add, addSlice[uint32])
+ testUint64x8Binary(t, archsimd.Uint64x8.Add, addSlice[uint64])
+ }
+}
+
+func TestSub(t *testing.T) {
+ testFloat32x4Binary(t, archsimd.Float32x4.Sub, subSlice[float32])
+ testFloat32x8Binary(t, archsimd.Float32x8.Sub, subSlice[float32])
+ testFloat64x2Binary(t, archsimd.Float64x2.Sub, subSlice[float64])
+ testFloat64x4Binary(t, archsimd.Float64x4.Sub, subSlice[float64])
+
+ testInt16x16Binary(t, archsimd.Int16x16.Sub, subSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.Sub, subSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.Sub, subSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.Sub, subSlice[int32])
+ testInt64x2Binary(t, archsimd.Int64x2.Sub, subSlice[int64])
+ testInt64x4Binary(t, archsimd.Int64x4.Sub, subSlice[int64])
+ testInt8x16Binary(t, archsimd.Int8x16.Sub, subSlice[int8])
+ testInt8x32Binary(t, archsimd.Int8x32.Sub, subSlice[int8])
+
+ testUint16x16Binary(t, archsimd.Uint16x16.Sub, subSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.Sub, subSlice[uint16])
+ testUint32x4Binary(t, archsimd.Uint32x4.Sub, subSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.Sub, subSlice[uint32])
+ testUint64x2Binary(t, archsimd.Uint64x2.Sub, subSlice[uint64])
+ testUint64x4Binary(t, archsimd.Uint64x4.Sub, subSlice[uint64])
+ testUint8x16Binary(t, archsimd.Uint8x16.Sub, subSlice[uint8])
+ testUint8x32Binary(t, archsimd.Uint8x32.Sub, subSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16Binary(t, archsimd.Float32x16.Sub, subSlice[float32])
+ testFloat64x8Binary(t, archsimd.Float64x8.Sub, subSlice[float64])
+ testInt8x64Binary(t, archsimd.Int8x64.Sub, subSlice[int8])
+ testInt16x32Binary(t, archsimd.Int16x32.Sub, subSlice[int16])
+ testInt32x16Binary(t, archsimd.Int32x16.Sub, subSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.Sub, subSlice[int64])
+ testUint8x64Binary(t, archsimd.Uint8x64.Sub, subSlice[uint8])
+ testUint16x32Binary(t, archsimd.Uint16x32.Sub, subSlice[uint16])
+ testUint32x16Binary(t, archsimd.Uint32x16.Sub, subSlice[uint32])
+ testUint64x8Binary(t, archsimd.Uint64x8.Sub, subSlice[uint64])
+ }
+}
+
+func TestMax(t *testing.T) {
+ // testFloat32x4Binary(t, archsimd.Float32x4.Max, maxSlice[float32]) // nan is wrong
+ // testFloat32x8Binary(t, archsimd.Float32x8.Max, maxSlice[float32]) // nan is wrong
+ // testFloat64x2Binary(t, archsimd.Float64x2.Max, maxSlice[float64]) // nan is wrong
+ // testFloat64x4Binary(t, archsimd.Float64x4.Max, maxSlice[float64]) // nan is wrong
+
+ testInt16x16Binary(t, archsimd.Int16x16.Max, maxSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.Max, maxSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.Max, maxSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.Max, maxSlice[int32])
+
+ if archsimd.X86.AVX512() {
+ testInt64x2Binary(t, archsimd.Int64x2.Max, maxSlice[int64])
+ testInt64x4Binary(t, archsimd.Int64x4.Max, maxSlice[int64])
+ }
+
+ testInt8x16Binary(t, archsimd.Int8x16.Max, maxSlice[int8])
+ testInt8x32Binary(t, archsimd.Int8x32.Max, maxSlice[int8])
+
+ testUint16x16Binary(t, archsimd.Uint16x16.Max, maxSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.Max, maxSlice[uint16])
+ testUint32x4Binary(t, archsimd.Uint32x4.Max, maxSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.Max, maxSlice[uint32])
+
+ if archsimd.X86.AVX512() {
+ testUint64x2Binary(t, archsimd.Uint64x2.Max, maxSlice[uint64])
+ testUint64x4Binary(t, archsimd.Uint64x4.Max, maxSlice[uint64])
+ }
+
+ testUint8x16Binary(t, archsimd.Uint8x16.Max, maxSlice[uint8])
+ testUint8x32Binary(t, archsimd.Uint8x32.Max, maxSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ // testFloat32x16Binary(t, archsimd.Float32x16.Max, maxSlice[float32]) // nan is wrong
+ // testFloat64x8Binary(t, archsimd.Float64x8.Max, maxSlice[float64]) // nan is wrong
+ testInt8x64Binary(t, archsimd.Int8x64.Max, maxSlice[int8])
+ testInt16x32Binary(t, archsimd.Int16x32.Max, maxSlice[int16])
+ testInt32x16Binary(t, archsimd.Int32x16.Max, maxSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.Max, maxSlice[int64])
+ testUint8x64Binary(t, archsimd.Uint8x64.Max, maxSlice[uint8])
+ testUint16x32Binary(t, archsimd.Uint16x32.Max, maxSlice[uint16])
+ testUint32x16Binary(t, archsimd.Uint32x16.Max, maxSlice[uint32])
+ testUint64x8Binary(t, archsimd.Uint64x8.Max, maxSlice[uint64])
+ }
+}
+
+func TestMin(t *testing.T) {
+ // testFloat32x4Binary(t, archsimd.Float32x4.Min, minSlice[float32]) // nan is wrong
+ // testFloat32x8Binary(t, archsimd.Float32x8.Min, minSlice[float32]) // nan is wrong
+ // testFloat64x2Binary(t, archsimd.Float64x2.Min, minSlice[float64]) // nan is wrong
+ // testFloat64x4Binary(t, archsimd.Float64x4.Min, minSlice[float64]) // nan is wrong
+
+ testInt16x16Binary(t, archsimd.Int16x16.Min, minSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.Min, minSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.Min, minSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.Min, minSlice[int32])
+
+ if archsimd.X86.AVX512() {
+ testInt64x2Binary(t, archsimd.Int64x2.Min, minSlice[int64])
+ testInt64x4Binary(t, archsimd.Int64x4.Min, minSlice[int64])
+ }
+
+ testInt8x16Binary(t, archsimd.Int8x16.Min, minSlice[int8])
+ testInt8x32Binary(t, archsimd.Int8x32.Min, minSlice[int8])
+
+ testUint16x16Binary(t, archsimd.Uint16x16.Min, minSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.Min, minSlice[uint16])
+ testUint32x4Binary(t, archsimd.Uint32x4.Min, minSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.Min, minSlice[uint32])
+
+ if archsimd.X86.AVX512() {
+ testUint64x2Binary(t, archsimd.Uint64x2.Min, minSlice[uint64])
+ testUint64x4Binary(t, archsimd.Uint64x4.Min, minSlice[uint64])
+ }
+
+ testUint8x16Binary(t, archsimd.Uint8x16.Min, minSlice[uint8])
+ testUint8x32Binary(t, archsimd.Uint8x32.Min, minSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ // testFloat32x16Binary(t, archsimd.Float32x16.Min, minSlice[float32]) // nan is wrong
+ // testFloat64x8Binary(t, archsimd.Float64x8.Min, minSlice[float64]) // nan is wrong
+ testInt8x64Binary(t, archsimd.Int8x64.Min, minSlice[int8])
+ testInt16x32Binary(t, archsimd.Int16x32.Min, minSlice[int16])
+ testInt32x16Binary(t, archsimd.Int32x16.Min, minSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.Min, minSlice[int64])
+ testUint8x64Binary(t, archsimd.Uint8x64.Min, minSlice[uint8])
+ testUint16x32Binary(t, archsimd.Uint16x32.Min, minSlice[uint16])
+ testUint32x16Binary(t, archsimd.Uint32x16.Min, minSlice[uint32])
+ testUint64x8Binary(t, archsimd.Uint64x8.Min, minSlice[uint64])
+ }
+}
+
+func TestAnd(t *testing.T) {
+ testInt16x16Binary(t, archsimd.Int16x16.And, andSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.And, andSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.And, andSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.And, andSlice[int32])
+ testInt64x2Binary(t, archsimd.Int64x2.And, andSlice[int64])
+ testInt64x4Binary(t, archsimd.Int64x4.And, andSlice[int64])
+ testInt8x16Binary(t, archsimd.Int8x16.And, andSlice[int8])
+ testInt8x32Binary(t, archsimd.Int8x32.And, andSlice[int8])
+
+ testUint16x16Binary(t, archsimd.Uint16x16.And, andSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.And, andSlice[uint16])
+ testUint32x4Binary(t, archsimd.Uint32x4.And, andSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.And, andSlice[uint32])
+ testUint64x2Binary(t, archsimd.Uint64x2.And, andSlice[uint64])
+ testUint64x4Binary(t, archsimd.Uint64x4.And, andSlice[uint64])
+ testUint8x16Binary(t, archsimd.Uint8x16.And, andSlice[uint8])
+ testUint8x32Binary(t, archsimd.Uint8x32.And, andSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ // testInt8x64Binary(t, archsimd.Int8x64.And, andISlice[int8]) // missing
+ // testInt16x32Binary(t, archsimd.Int16x32.And, andISlice[int16]) // missing
+ testInt32x16Binary(t, archsimd.Int32x16.And, andSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.And, andSlice[int64])
+ // testUint8x64Binary(t, archsimd.Uint8x64.And, andISlice[uint8]) // missing
+ // testUint16x32Binary(t, archsimd.Uint16x32.And, andISlice[uint16]) // missing
+ testUint32x16Binary(t, archsimd.Uint32x16.And, andSlice[uint32])
+ testUint64x8Binary(t, archsimd.Uint64x8.And, andSlice[uint64])
+ }
+}
+
+func TestAndNot(t *testing.T) {
+ testInt16x16Binary(t, archsimd.Int16x16.AndNot, andNotSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.AndNot, andNotSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.AndNot, andNotSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.AndNot, andNotSlice[int32])
+ testInt64x2Binary(t, archsimd.Int64x2.AndNot, andNotSlice[int64])
+ testInt64x4Binary(t, archsimd.Int64x4.AndNot, andNotSlice[int64])
+ testInt8x16Binary(t, archsimd.Int8x16.AndNot, andNotSlice[int8])
+ testInt8x32Binary(t, archsimd.Int8x32.AndNot, andNotSlice[int8])
+
+ testUint16x16Binary(t, archsimd.Uint16x16.AndNot, andNotSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.AndNot, andNotSlice[uint16])
+ testUint32x4Binary(t, archsimd.Uint32x4.AndNot, andNotSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.AndNot, andNotSlice[uint32])
+ testUint64x2Binary(t, archsimd.Uint64x2.AndNot, andNotSlice[uint64])
+ testUint64x4Binary(t, archsimd.Uint64x4.AndNot, andNotSlice[uint64])
+ testUint8x16Binary(t, archsimd.Uint8x16.AndNot, andNotSlice[uint8])
+ testUint8x32Binary(t, archsimd.Uint8x32.AndNot, andNotSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testInt8x64Binary(t, archsimd.Int8x64.AndNot, andNotSlice[int8])
+ testInt16x32Binary(t, archsimd.Int16x32.AndNot, andNotSlice[int16])
+ testInt32x16Binary(t, archsimd.Int32x16.AndNot, andNotSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.AndNot, andNotSlice[int64])
+ testUint8x64Binary(t, archsimd.Uint8x64.AndNot, andNotSlice[uint8])
+ testUint16x32Binary(t, archsimd.Uint16x32.AndNot, andNotSlice[uint16])
+ testUint32x16Binary(t, archsimd.Uint32x16.AndNot, andNotSlice[uint32])
+ testUint64x8Binary(t, archsimd.Uint64x8.AndNot, andNotSlice[uint64])
+ }
+}
+
+func TestXor(t *testing.T) {
+ testInt16x16Binary(t, archsimd.Int16x16.Xor, xorSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.Xor, xorSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.Xor, xorSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.Xor, xorSlice[int32])
+ testInt64x2Binary(t, archsimd.Int64x2.Xor, xorSlice[int64])
+ testInt64x4Binary(t, archsimd.Int64x4.Xor, xorSlice[int64])
+ testInt8x16Binary(t, archsimd.Int8x16.Xor, xorSlice[int8])
+ testInt8x32Binary(t, archsimd.Int8x32.Xor, xorSlice[int8])
+
+ testUint16x16Binary(t, archsimd.Uint16x16.Xor, xorSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.Xor, xorSlice[uint16])
+ testUint32x4Binary(t, archsimd.Uint32x4.Xor, xorSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.Xor, xorSlice[uint32])
+ testUint64x2Binary(t, archsimd.Uint64x2.Xor, xorSlice[uint64])
+ testUint64x4Binary(t, archsimd.Uint64x4.Xor, xorSlice[uint64])
+ testUint8x16Binary(t, archsimd.Uint8x16.Xor, xorSlice[uint8])
+ testUint8x32Binary(t, archsimd.Uint8x32.Xor, xorSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ // testInt8x64Binary(t, archsimd.Int8x64.Xor, andISlice[int8]) // missing
+ // testInt16x32Binary(t, archsimd.Int16x32.Xor, andISlice[int16]) // missing
+ testInt32x16Binary(t, archsimd.Int32x16.Xor, xorSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.Xor, xorSlice[int64])
+ // testUint8x64Binary(t, archsimd.Uint8x64.Xor, andISlice[uint8]) // missing
+ // testUint16x32Binary(t, archsimd.Uint16x32.Xor, andISlice[uint16]) // missing
+ testUint32x16Binary(t, archsimd.Uint32x16.Xor, xorSlice[uint32])
+ testUint64x8Binary(t, archsimd.Uint64x8.Xor, xorSlice[uint64])
+ }
+}
+
+func TestOr(t *testing.T) {
+ testInt16x16Binary(t, archsimd.Int16x16.Or, orSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.Or, orSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.Or, orSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.Or, orSlice[int32])
+ testInt64x2Binary(t, archsimd.Int64x2.Or, orSlice[int64])
+ testInt64x4Binary(t, archsimd.Int64x4.Or, orSlice[int64])
+ testInt8x16Binary(t, archsimd.Int8x16.Or, orSlice[int8])
+ testInt8x32Binary(t, archsimd.Int8x32.Or, orSlice[int8])
+
+ testUint16x16Binary(t, archsimd.Uint16x16.Or, orSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.Or, orSlice[uint16])
+ testUint32x4Binary(t, archsimd.Uint32x4.Or, orSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.Or, orSlice[uint32])
+ testUint64x2Binary(t, archsimd.Uint64x2.Or, orSlice[uint64])
+ testUint64x4Binary(t, archsimd.Uint64x4.Or, orSlice[uint64])
+ testUint8x16Binary(t, archsimd.Uint8x16.Or, orSlice[uint8])
+ testUint8x32Binary(t, archsimd.Uint8x32.Or, orSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ // testInt8x64Binary(t, archsimd.Int8x64.Or, andISlice[int8]) // missing
+ // testInt16x32Binary(t, archsimd.Int16x32.Or, andISlice[int16]) // missing
+ testInt32x16Binary(t, archsimd.Int32x16.Or, orSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.Or, orSlice[int64])
+ // testUint8x64Binary(t, archsimd.Uint8x64.Or, andISlice[uint8]) // missing
+ // testUint16x32Binary(t, archsimd.Uint16x32.Or, andISlice[uint16]) // missing
+ testUint32x16Binary(t, archsimd.Uint32x16.Or, orSlice[uint32])
+ testUint64x8Binary(t, archsimd.Uint64x8.Or, orSlice[uint64])
+ }
+}
+
+func TestMul(t *testing.T) {
+ testFloat32x4Binary(t, archsimd.Float32x4.Mul, mulSlice[float32])
+ testFloat32x8Binary(t, archsimd.Float32x8.Mul, mulSlice[float32])
+ testFloat64x2Binary(t, archsimd.Float64x2.Mul, mulSlice[float64])
+ testFloat64x4Binary(t, archsimd.Float64x4.Mul, mulSlice[float64])
+
+ testInt16x16Binary(t, archsimd.Int16x16.Mul, mulSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.Mul, mulSlice[int16])
+ testInt32x4Binary(t, archsimd.Int32x4.Mul, mulSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.Mul, mulSlice[int32])
+
+ // testInt8x16Binary(t, archsimd.Int8x16.Mul, mulSlice[int8]) // nope
+ // testInt8x32Binary(t, archsimd.Int8x32.Mul, mulSlice[int8])
+
+ // TODO we should be able to do these, there's no difference between signed/unsigned Mul
+ // testUint16x16Binary(t, archsimd.Uint16x16.Mul, mulSlice[uint16])
+ // testUint16x8Binary(t, archsimd.Uint16x8.Mul, mulSlice[uint16])
+ // testUint32x4Binary(t, archsimd.Uint32x4.Mul, mulSlice[uint32])
+ // testUint32x8Binary(t, archsimd.Uint32x8.Mul, mulSlice[uint32])
+ // testUint64x2Binary(t, archsimd.Uint64x2.Mul, mulSlice[uint64])
+ // testUint64x4Binary(t, archsimd.Uint64x4.Mul, mulSlice[uint64])
+
+ // testUint8x16Binary(t, archsimd.Uint8x16.Mul, mulSlice[uint8]) // nope
+ // testUint8x32Binary(t, archsimd.Uint8x32.Mul, mulSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testInt64x2Binary(t, archsimd.Int64x2.Mul, mulSlice[int64]) // avx512 only
+ testInt64x4Binary(t, archsimd.Int64x4.Mul, mulSlice[int64])
+
+ testFloat32x16Binary(t, archsimd.Float32x16.Mul, mulSlice[float32])
+ testFloat64x8Binary(t, archsimd.Float64x8.Mul, mulSlice[float64])
+
+ // testInt8x64Binary(t, archsimd.Int8x64.Mul, mulSlice[int8]) // nope
+ testInt16x32Binary(t, archsimd.Int16x32.Mul, mulSlice[int16])
+ testInt32x16Binary(t, archsimd.Int32x16.Mul, mulSlice[int32])
+ testInt64x8Binary(t, archsimd.Int64x8.Mul, mulSlice[int64])
+ // testUint8x64Binary(t, archsimd.Uint8x64.Mul, mulSlice[uint8]) // nope
+
+ // TODO signed should do the job
+ // testUint16x32Binary(t, archsimd.Uint16x32.Mul, mulSlice[uint16])
+ // testUint32x16Binary(t, archsimd.Uint32x16.Mul, mulSlice[uint32])
+ // testUint64x8Binary(t, archsimd.Uint64x8.Mul, mulSlice[uint64])
+ }
+}
+
+func TestDiv(t *testing.T) {
+ testFloat32x4Binary(t, archsimd.Float32x4.Div, divSlice[float32])
+ testFloat32x8Binary(t, archsimd.Float32x8.Div, divSlice[float32])
+ testFloat64x2Binary(t, archsimd.Float64x2.Div, divSlice[float64])
+ testFloat64x4Binary(t, archsimd.Float64x4.Div, divSlice[float64])
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16Binary(t, archsimd.Float32x16.Div, divSlice[float32])
+ testFloat64x8Binary(t, archsimd.Float64x8.Div, divSlice[float64])
+ }
+}
package simd_test
import (
- "simd"
+ "simd/archsimd"
"testing"
)
// testInt8x16Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt8x16Compare(t *testing.T, f func(_, _ simd.Int8x16) simd.Mask8x16, want func(_, _ []int8) []int64) {
+func testInt8x16Compare(t *testing.T, f func(_, _ archsimd.Int8x16) archsimd.Mask8x16, want func(_, _ []int8) []int64) {
n := 16
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
- a := simd.LoadInt8x16Slice(x)
- b := simd.LoadInt8x16Slice(y)
+ a := archsimd.LoadInt8x16Slice(x)
+ b := archsimd.LoadInt8x16Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x16().StoreSlice(g)
w := want(x, y)
}
// testInt16x8Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt16x8Compare(t *testing.T, f func(_, _ simd.Int16x8) simd.Mask16x8, want func(_, _ []int16) []int64) {
+func testInt16x8Compare(t *testing.T, f func(_, _ archsimd.Int16x8) archsimd.Mask16x8, want func(_, _ []int16) []int64) {
n := 8
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
- a := simd.LoadInt16x8Slice(x)
- b := simd.LoadInt16x8Slice(y)
+ a := archsimd.LoadInt16x8Slice(x)
+ b := archsimd.LoadInt16x8Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x8().StoreSlice(g)
w := want(x, y)
}
// testInt32x4Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt32x4Compare(t *testing.T, f func(_, _ simd.Int32x4) simd.Mask32x4, want func(_, _ []int32) []int64) {
+func testInt32x4Compare(t *testing.T, f func(_, _ archsimd.Int32x4) archsimd.Mask32x4, want func(_, _ []int32) []int64) {
n := 4
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
- a := simd.LoadInt32x4Slice(x)
- b := simd.LoadInt32x4Slice(y)
+ a := archsimd.LoadInt32x4Slice(x)
+ b := archsimd.LoadInt32x4Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x4().StoreSlice(g)
w := want(x, y)
}
// testInt64x2Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt64x2Compare(t *testing.T, f func(_, _ simd.Int64x2) simd.Mask64x2, want func(_, _ []int64) []int64) {
+func testInt64x2Compare(t *testing.T, f func(_, _ archsimd.Int64x2) archsimd.Mask64x2, want func(_, _ []int64) []int64) {
n := 2
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
- a := simd.LoadInt64x2Slice(x)
- b := simd.LoadInt64x2Slice(y)
+ a := archsimd.LoadInt64x2Slice(x)
+ b := archsimd.LoadInt64x2Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x2().StoreSlice(g)
w := want(x, y)
}
// testUint8x16Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint8x16Compare(t *testing.T, f func(_, _ simd.Uint8x16) simd.Mask8x16, want func(_, _ []uint8) []int64) {
+func testUint8x16Compare(t *testing.T, f func(_, _ archsimd.Uint8x16) archsimd.Mask8x16, want func(_, _ []uint8) []int64) {
n := 16
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
- a := simd.LoadUint8x16Slice(x)
- b := simd.LoadUint8x16Slice(y)
+ a := archsimd.LoadUint8x16Slice(x)
+ b := archsimd.LoadUint8x16Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x16().StoreSlice(g)
w := want(x, y)
}
// testUint16x8Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint16x8Compare(t *testing.T, f func(_, _ simd.Uint16x8) simd.Mask16x8, want func(_, _ []uint16) []int64) {
+func testUint16x8Compare(t *testing.T, f func(_, _ archsimd.Uint16x8) archsimd.Mask16x8, want func(_, _ []uint16) []int64) {
n := 8
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
- a := simd.LoadUint16x8Slice(x)
- b := simd.LoadUint16x8Slice(y)
+ a := archsimd.LoadUint16x8Slice(x)
+ b := archsimd.LoadUint16x8Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x8().StoreSlice(g)
w := want(x, y)
}
// testUint32x4Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint32x4Compare(t *testing.T, f func(_, _ simd.Uint32x4) simd.Mask32x4, want func(_, _ []uint32) []int64) {
+func testUint32x4Compare(t *testing.T, f func(_, _ archsimd.Uint32x4) archsimd.Mask32x4, want func(_, _ []uint32) []int64) {
n := 4
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
- a := simd.LoadUint32x4Slice(x)
- b := simd.LoadUint32x4Slice(y)
+ a := archsimd.LoadUint32x4Slice(x)
+ b := archsimd.LoadUint32x4Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x4().StoreSlice(g)
w := want(x, y)
}
// testUint64x2Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint64x2Compare(t *testing.T, f func(_, _ simd.Uint64x2) simd.Mask64x2, want func(_, _ []uint64) []int64) {
+func testUint64x2Compare(t *testing.T, f func(_, _ archsimd.Uint64x2) archsimd.Mask64x2, want func(_, _ []uint64) []int64) {
n := 2
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
- a := simd.LoadUint64x2Slice(x)
- b := simd.LoadUint64x2Slice(y)
+ a := archsimd.LoadUint64x2Slice(x)
+ b := archsimd.LoadUint64x2Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x2().StoreSlice(g)
w := want(x, y)
}
// testFloat32x4Compare tests the simd comparison method f against the expected behavior generated by want
-func testFloat32x4Compare(t *testing.T, f func(_, _ simd.Float32x4) simd.Mask32x4, want func(_, _ []float32) []int64) {
+func testFloat32x4Compare(t *testing.T, f func(_, _ archsimd.Float32x4) archsimd.Mask32x4, want func(_, _ []float32) []int64) {
n := 4
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
- b := simd.LoadFloat32x4Slice(y)
+ a := archsimd.LoadFloat32x4Slice(x)
+ b := archsimd.LoadFloat32x4Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x4().StoreSlice(g)
w := want(x, y)
}
// testFloat64x2Compare tests the simd comparison method f against the expected behavior generated by want
-func testFloat64x2Compare(t *testing.T, f func(_, _ simd.Float64x2) simd.Mask64x2, want func(_, _ []float64) []int64) {
+func testFloat64x2Compare(t *testing.T, f func(_, _ archsimd.Float64x2) archsimd.Mask64x2, want func(_, _ []float64) []int64) {
n := 2
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
- a := simd.LoadFloat64x2Slice(x)
- b := simd.LoadFloat64x2Slice(y)
+ a := archsimd.LoadFloat64x2Slice(x)
+ b := archsimd.LoadFloat64x2Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x2().StoreSlice(g)
w := want(x, y)
}
// testInt8x32Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt8x32Compare(t *testing.T, f func(_, _ simd.Int8x32) simd.Mask8x32, want func(_, _ []int8) []int64) {
+func testInt8x32Compare(t *testing.T, f func(_, _ archsimd.Int8x32) archsimd.Mask8x32, want func(_, _ []int8) []int64) {
n := 32
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
- a := simd.LoadInt8x32Slice(x)
- b := simd.LoadInt8x32Slice(y)
+ a := archsimd.LoadInt8x32Slice(x)
+ b := archsimd.LoadInt8x32Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x32().StoreSlice(g)
w := want(x, y)
}
// testInt16x16Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt16x16Compare(t *testing.T, f func(_, _ simd.Int16x16) simd.Mask16x16, want func(_, _ []int16) []int64) {
+func testInt16x16Compare(t *testing.T, f func(_, _ archsimd.Int16x16) archsimd.Mask16x16, want func(_, _ []int16) []int64) {
n := 16
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
- a := simd.LoadInt16x16Slice(x)
- b := simd.LoadInt16x16Slice(y)
+ a := archsimd.LoadInt16x16Slice(x)
+ b := archsimd.LoadInt16x16Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x16().StoreSlice(g)
w := want(x, y)
}
// testInt32x8Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt32x8Compare(t *testing.T, f func(_, _ simd.Int32x8) simd.Mask32x8, want func(_, _ []int32) []int64) {
+func testInt32x8Compare(t *testing.T, f func(_, _ archsimd.Int32x8) archsimd.Mask32x8, want func(_, _ []int32) []int64) {
n := 8
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
- a := simd.LoadInt32x8Slice(x)
- b := simd.LoadInt32x8Slice(y)
+ a := archsimd.LoadInt32x8Slice(x)
+ b := archsimd.LoadInt32x8Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x8().StoreSlice(g)
w := want(x, y)
}
// testInt64x4Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt64x4Compare(t *testing.T, f func(_, _ simd.Int64x4) simd.Mask64x4, want func(_, _ []int64) []int64) {
+func testInt64x4Compare(t *testing.T, f func(_, _ archsimd.Int64x4) archsimd.Mask64x4, want func(_, _ []int64) []int64) {
n := 4
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
- a := simd.LoadInt64x4Slice(x)
- b := simd.LoadInt64x4Slice(y)
+ a := archsimd.LoadInt64x4Slice(x)
+ b := archsimd.LoadInt64x4Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x4().StoreSlice(g)
w := want(x, y)
}
// testUint8x32Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint8x32Compare(t *testing.T, f func(_, _ simd.Uint8x32) simd.Mask8x32, want func(_, _ []uint8) []int64) {
+func testUint8x32Compare(t *testing.T, f func(_, _ archsimd.Uint8x32) archsimd.Mask8x32, want func(_, _ []uint8) []int64) {
n := 32
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
- a := simd.LoadUint8x32Slice(x)
- b := simd.LoadUint8x32Slice(y)
+ a := archsimd.LoadUint8x32Slice(x)
+ b := archsimd.LoadUint8x32Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x32().StoreSlice(g)
w := want(x, y)
}
// testUint16x16Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint16x16Compare(t *testing.T, f func(_, _ simd.Uint16x16) simd.Mask16x16, want func(_, _ []uint16) []int64) {
+func testUint16x16Compare(t *testing.T, f func(_, _ archsimd.Uint16x16) archsimd.Mask16x16, want func(_, _ []uint16) []int64) {
n := 16
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
- a := simd.LoadUint16x16Slice(x)
- b := simd.LoadUint16x16Slice(y)
+ a := archsimd.LoadUint16x16Slice(x)
+ b := archsimd.LoadUint16x16Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x16().StoreSlice(g)
w := want(x, y)
}
// testUint32x8Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint32x8Compare(t *testing.T, f func(_, _ simd.Uint32x8) simd.Mask32x8, want func(_, _ []uint32) []int64) {
+func testUint32x8Compare(t *testing.T, f func(_, _ archsimd.Uint32x8) archsimd.Mask32x8, want func(_, _ []uint32) []int64) {
n := 8
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
- a := simd.LoadUint32x8Slice(x)
- b := simd.LoadUint32x8Slice(y)
+ a := archsimd.LoadUint32x8Slice(x)
+ b := archsimd.LoadUint32x8Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x8().StoreSlice(g)
w := want(x, y)
}
// testUint64x4Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint64x4Compare(t *testing.T, f func(_, _ simd.Uint64x4) simd.Mask64x4, want func(_, _ []uint64) []int64) {
+func testUint64x4Compare(t *testing.T, f func(_, _ archsimd.Uint64x4) archsimd.Mask64x4, want func(_, _ []uint64) []int64) {
n := 4
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
- a := simd.LoadUint64x4Slice(x)
- b := simd.LoadUint64x4Slice(y)
+ a := archsimd.LoadUint64x4Slice(x)
+ b := archsimd.LoadUint64x4Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x4().StoreSlice(g)
w := want(x, y)
}
// testFloat32x8Compare tests the simd comparison method f against the expected behavior generated by want
-func testFloat32x8Compare(t *testing.T, f func(_, _ simd.Float32x8) simd.Mask32x8, want func(_, _ []float32) []int64) {
+func testFloat32x8Compare(t *testing.T, f func(_, _ archsimd.Float32x8) archsimd.Mask32x8, want func(_, _ []float32) []int64) {
n := 8
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
- b := simd.LoadFloat32x8Slice(y)
+ a := archsimd.LoadFloat32x8Slice(x)
+ b := archsimd.LoadFloat32x8Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x8().StoreSlice(g)
w := want(x, y)
}
// testFloat64x4Compare tests the simd comparison method f against the expected behavior generated by want
-func testFloat64x4Compare(t *testing.T, f func(_, _ simd.Float64x4) simd.Mask64x4, want func(_, _ []float64) []int64) {
+func testFloat64x4Compare(t *testing.T, f func(_, _ archsimd.Float64x4) archsimd.Mask64x4, want func(_, _ []float64) []int64) {
n := 4
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
- a := simd.LoadFloat64x4Slice(x)
- b := simd.LoadFloat64x4Slice(y)
+ a := archsimd.LoadFloat64x4Slice(x)
+ b := archsimd.LoadFloat64x4Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x4().StoreSlice(g)
w := want(x, y)
}
// testInt8x64Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt8x64Compare(t *testing.T, f func(_, _ simd.Int8x64) simd.Mask8x64, want func(_, _ []int8) []int64) {
+func testInt8x64Compare(t *testing.T, f func(_, _ archsimd.Int8x64) archsimd.Mask8x64, want func(_, _ []int8) []int64) {
n := 64
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
- a := simd.LoadInt8x64Slice(x)
- b := simd.LoadInt8x64Slice(y)
+ a := archsimd.LoadInt8x64Slice(x)
+ b := archsimd.LoadInt8x64Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x64().StoreSlice(g)
w := want(x, y)
}
// testInt16x32Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt16x32Compare(t *testing.T, f func(_, _ simd.Int16x32) simd.Mask16x32, want func(_, _ []int16) []int64) {
+func testInt16x32Compare(t *testing.T, f func(_, _ archsimd.Int16x32) archsimd.Mask16x32, want func(_, _ []int16) []int64) {
n := 32
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
- a := simd.LoadInt16x32Slice(x)
- b := simd.LoadInt16x32Slice(y)
+ a := archsimd.LoadInt16x32Slice(x)
+ b := archsimd.LoadInt16x32Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x32().StoreSlice(g)
w := want(x, y)
}
// testInt32x16Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt32x16Compare(t *testing.T, f func(_, _ simd.Int32x16) simd.Mask32x16, want func(_, _ []int32) []int64) {
+func testInt32x16Compare(t *testing.T, f func(_, _ archsimd.Int32x16) archsimd.Mask32x16, want func(_, _ []int32) []int64) {
n := 16
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
- a := simd.LoadInt32x16Slice(x)
- b := simd.LoadInt32x16Slice(y)
+ a := archsimd.LoadInt32x16Slice(x)
+ b := archsimd.LoadInt32x16Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x16().StoreSlice(g)
w := want(x, y)
}
// testInt64x8Compare tests the simd comparison method f against the expected behavior generated by want
-func testInt64x8Compare(t *testing.T, f func(_, _ simd.Int64x8) simd.Mask64x8, want func(_, _ []int64) []int64) {
+func testInt64x8Compare(t *testing.T, f func(_, _ archsimd.Int64x8) archsimd.Mask64x8, want func(_, _ []int64) []int64) {
n := 8
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
- a := simd.LoadInt64x8Slice(x)
- b := simd.LoadInt64x8Slice(y)
+ a := archsimd.LoadInt64x8Slice(x)
+ b := archsimd.LoadInt64x8Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x8().StoreSlice(g)
w := want(x, y)
}
// testUint8x64Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint8x64Compare(t *testing.T, f func(_, _ simd.Uint8x64) simd.Mask8x64, want func(_, _ []uint8) []int64) {
+func testUint8x64Compare(t *testing.T, f func(_, _ archsimd.Uint8x64) archsimd.Mask8x64, want func(_, _ []uint8) []int64) {
n := 64
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
- a := simd.LoadUint8x64Slice(x)
- b := simd.LoadUint8x64Slice(y)
+ a := archsimd.LoadUint8x64Slice(x)
+ b := archsimd.LoadUint8x64Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x64().StoreSlice(g)
w := want(x, y)
}
// testUint16x32Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint16x32Compare(t *testing.T, f func(_, _ simd.Uint16x32) simd.Mask16x32, want func(_, _ []uint16) []int64) {
+func testUint16x32Compare(t *testing.T, f func(_, _ archsimd.Uint16x32) archsimd.Mask16x32, want func(_, _ []uint16) []int64) {
n := 32
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
- a := simd.LoadUint16x32Slice(x)
- b := simd.LoadUint16x32Slice(y)
+ a := archsimd.LoadUint16x32Slice(x)
+ b := archsimd.LoadUint16x32Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x32().StoreSlice(g)
w := want(x, y)
}
// testUint32x16Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint32x16Compare(t *testing.T, f func(_, _ simd.Uint32x16) simd.Mask32x16, want func(_, _ []uint32) []int64) {
+func testUint32x16Compare(t *testing.T, f func(_, _ archsimd.Uint32x16) archsimd.Mask32x16, want func(_, _ []uint32) []int64) {
n := 16
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
- a := simd.LoadUint32x16Slice(x)
- b := simd.LoadUint32x16Slice(y)
+ a := archsimd.LoadUint32x16Slice(x)
+ b := archsimd.LoadUint32x16Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x16().StoreSlice(g)
w := want(x, y)
}
// testUint64x8Compare tests the simd comparison method f against the expected behavior generated by want
-func testUint64x8Compare(t *testing.T, f func(_, _ simd.Uint64x8) simd.Mask64x8, want func(_, _ []uint64) []int64) {
+func testUint64x8Compare(t *testing.T, f func(_, _ archsimd.Uint64x8) archsimd.Mask64x8, want func(_, _ []uint64) []int64) {
n := 8
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
- a := simd.LoadUint64x8Slice(x)
- b := simd.LoadUint64x8Slice(y)
+ a := archsimd.LoadUint64x8Slice(x)
+ b := archsimd.LoadUint64x8Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x8().StoreSlice(g)
w := want(x, y)
}
// testFloat32x16Compare tests the simd comparison method f against the expected behavior generated by want
-func testFloat32x16Compare(t *testing.T, f func(_, _ simd.Float32x16) simd.Mask32x16, want func(_, _ []float32) []int64) {
+func testFloat32x16Compare(t *testing.T, f func(_, _ archsimd.Float32x16) archsimd.Mask32x16, want func(_, _ []float32) []int64) {
n := 16
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
- b := simd.LoadFloat32x16Slice(y)
+ a := archsimd.LoadFloat32x16Slice(x)
+ b := archsimd.LoadFloat32x16Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x16().StoreSlice(g)
w := want(x, y)
}
// testFloat64x8Compare tests the simd comparison method f against the expected behavior generated by want
-func testFloat64x8Compare(t *testing.T, f func(_, _ simd.Float64x8) simd.Mask64x8, want func(_, _ []float64) []int64) {
+func testFloat64x8Compare(t *testing.T, f func(_, _ archsimd.Float64x8) archsimd.Mask64x8, want func(_, _ []float64) []int64) {
n := 8
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
- b := simd.LoadFloat64x8Slice(y)
+ a := archsimd.LoadFloat64x8Slice(x)
+ b := archsimd.LoadFloat64x8Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x8().StoreSlice(g)
w := want(x, y)
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.simd && amd64
+
+package simd_test
+
+import (
+ "simd/archsimd"
+ "testing"
+)
+
+// AVX 2 lacks most comparisons, but they can be synthesized
+// from > and =
+var comparisonFixed bool = archsimd.X86.AVX512()
+
+func TestLess(t *testing.T) {
+ testFloat32x4Compare(t, archsimd.Float32x4.Less, lessSlice[float32])
+ testFloat32x8Compare(t, archsimd.Float32x8.Less, lessSlice[float32])
+ testFloat64x2Compare(t, archsimd.Float64x2.Less, lessSlice[float64])
+ testFloat64x4Compare(t, archsimd.Float64x4.Less, lessSlice[float64])
+
+ testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16])
+ testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16])
+ testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32])
+ testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32])
+ testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64])
+ testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64])
+ testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8])
+ testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8])
+
+ testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16])
+ testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16])
+ testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32])
+ testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32])
+ testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64])
+ testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64])
+ testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8])
+ testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8])
+
+ testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16])
+ testUint16x8Compare(t, archsimd.Uint16x8.Less, lessSlice[uint16])
+ testUint32x4Compare(t, archsimd.Uint32x4.Less, lessSlice[uint32])
+ testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32])
+ testUint64x2Compare(t, archsimd.Uint64x2.Less, lessSlice[uint64])
+ testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64])
+ testUint8x16Compare(t, archsimd.Uint8x16.Less, lessSlice[uint8])
+ testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16])
+ testUint16x8Compare(t, archsimd.Uint16x8.Less, lessSlice[uint16])
+ testUint32x4Compare(t, archsimd.Uint32x4.Less, lessSlice[uint32])
+ testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32])
+ testUint64x2Compare(t, archsimd.Uint64x2.Less, lessSlice[uint64])
+ testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64])
+ testUint8x16Compare(t, archsimd.Uint8x16.Less, lessSlice[uint8])
+ testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8])
+
+ testFloat32x16Compare(t, archsimd.Float32x16.Less, lessSlice[float32])
+ testFloat64x8Compare(t, archsimd.Float64x8.Less, lessSlice[float64])
+ testInt8x64Compare(t, archsimd.Int8x64.Less, lessSlice[int8])
+ testInt16x32Compare(t, archsimd.Int16x32.Less, lessSlice[int16])
+ testInt32x16Compare(t, archsimd.Int32x16.Less, lessSlice[int32])
+ testInt64x8Compare(t, archsimd.Int64x8.Less, lessSlice[int64])
+ testUint8x64Compare(t, archsimd.Uint8x64.Less, lessSlice[uint8])
+ testUint16x32Compare(t, archsimd.Uint16x32.Less, lessSlice[uint16])
+ testUint32x16Compare(t, archsimd.Uint32x16.Less, lessSlice[uint32])
+ testUint64x8Compare(t, archsimd.Uint64x8.Less, lessSlice[uint64])
+ }
+}
+
+func TestLessEqual(t *testing.T) {
+ testFloat32x4Compare(t, archsimd.Float32x4.LessEqual, lessEqualSlice[float32])
+ testFloat32x8Compare(t, archsimd.Float32x8.LessEqual, lessEqualSlice[float32])
+ testFloat64x2Compare(t, archsimd.Float64x2.LessEqual, lessEqualSlice[float64])
+ testFloat64x4Compare(t, archsimd.Float64x4.LessEqual, lessEqualSlice[float64])
+
+ testInt16x16Compare(t, archsimd.Int16x16.LessEqual, lessEqualSlice[int16])
+ testInt16x8Compare(t, archsimd.Int16x8.LessEqual, lessEqualSlice[int16])
+ testInt32x4Compare(t, archsimd.Int32x4.LessEqual, lessEqualSlice[int32])
+ testInt32x8Compare(t, archsimd.Int32x8.LessEqual, lessEqualSlice[int32])
+ testInt64x2Compare(t, archsimd.Int64x2.LessEqual, lessEqualSlice[int64])
+ testInt64x4Compare(t, archsimd.Int64x4.LessEqual, lessEqualSlice[int64])
+ testInt8x16Compare(t, archsimd.Int8x16.LessEqual, lessEqualSlice[int8])
+ testInt8x32Compare(t, archsimd.Int8x32.LessEqual, lessEqualSlice[int8])
+
+ testUint16x16Compare(t, archsimd.Uint16x16.LessEqual, lessEqualSlice[uint16])
+ testUint16x8Compare(t, archsimd.Uint16x8.LessEqual, lessEqualSlice[uint16])
+ testUint32x4Compare(t, archsimd.Uint32x4.LessEqual, lessEqualSlice[uint32])
+ testUint32x8Compare(t, archsimd.Uint32x8.LessEqual, lessEqualSlice[uint32])
+ testUint64x2Compare(t, archsimd.Uint64x2.LessEqual, lessEqualSlice[uint64])
+ testUint64x4Compare(t, archsimd.Uint64x4.LessEqual, lessEqualSlice[uint64])
+ testUint8x16Compare(t, archsimd.Uint8x16.LessEqual, lessEqualSlice[uint8])
+ testUint8x32Compare(t, archsimd.Uint8x32.LessEqual, lessEqualSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16Compare(t, archsimd.Float32x16.LessEqual, lessEqualSlice[float32])
+ testFloat64x8Compare(t, archsimd.Float64x8.LessEqual, lessEqualSlice[float64])
+ testInt8x64Compare(t, archsimd.Int8x64.LessEqual, lessEqualSlice[int8])
+ testInt16x32Compare(t, archsimd.Int16x32.LessEqual, lessEqualSlice[int16])
+ testInt32x16Compare(t, archsimd.Int32x16.LessEqual, lessEqualSlice[int32])
+ testInt64x8Compare(t, archsimd.Int64x8.LessEqual, lessEqualSlice[int64])
+ testUint8x64Compare(t, archsimd.Uint8x64.LessEqual, lessEqualSlice[uint8])
+ testUint16x32Compare(t, archsimd.Uint16x32.LessEqual, lessEqualSlice[uint16])
+ testUint32x16Compare(t, archsimd.Uint32x16.LessEqual, lessEqualSlice[uint32])
+ testUint64x8Compare(t, archsimd.Uint64x8.LessEqual, lessEqualSlice[uint64])
+ }
+}
+
+func TestGreater(t *testing.T) {
+ testFloat32x4Compare(t, archsimd.Float32x4.Greater, greaterSlice[float32])
+ testFloat32x8Compare(t, archsimd.Float32x8.Greater, greaterSlice[float32])
+ testFloat64x2Compare(t, archsimd.Float64x2.Greater, greaterSlice[float64])
+ testFloat64x4Compare(t, archsimd.Float64x4.Greater, greaterSlice[float64])
+
+ testInt16x16Compare(t, archsimd.Int16x16.Greater, greaterSlice[int16])
+ testInt16x8Compare(t, archsimd.Int16x8.Greater, greaterSlice[int16])
+ testInt32x4Compare(t, archsimd.Int32x4.Greater, greaterSlice[int32])
+ testInt32x8Compare(t, archsimd.Int32x8.Greater, greaterSlice[int32])
+
+ testInt64x2Compare(t, archsimd.Int64x2.Greater, greaterSlice[int64])
+ testInt64x4Compare(t, archsimd.Int64x4.Greater, greaterSlice[int64])
+ testInt8x16Compare(t, archsimd.Int8x16.Greater, greaterSlice[int8])
+ testInt8x32Compare(t, archsimd.Int8x32.Greater, greaterSlice[int8])
+
+ testUint16x16Compare(t, archsimd.Uint16x16.Greater, greaterSlice[uint16])
+ testUint16x8Compare(t, archsimd.Uint16x8.Greater, greaterSlice[uint16])
+ testUint32x4Compare(t, archsimd.Uint32x4.Greater, greaterSlice[uint32])
+ testUint32x8Compare(t, archsimd.Uint32x8.Greater, greaterSlice[uint32])
+
+ testUint64x2Compare(t, archsimd.Uint64x2.Greater, greaterSlice[uint64])
+ testUint64x4Compare(t, archsimd.Uint64x4.Greater, greaterSlice[uint64])
+ testUint8x16Compare(t, archsimd.Uint8x16.Greater, greaterSlice[uint8])
+ testUint8x32Compare(t, archsimd.Uint8x32.Greater, greaterSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+
+ testFloat32x16Compare(t, archsimd.Float32x16.Greater, greaterSlice[float32])
+ testFloat64x8Compare(t, archsimd.Float64x8.Greater, greaterSlice[float64])
+ testInt8x64Compare(t, archsimd.Int8x64.Greater, greaterSlice[int8])
+ testInt16x32Compare(t, archsimd.Int16x32.Greater, greaterSlice[int16])
+ testInt32x16Compare(t, archsimd.Int32x16.Greater, greaterSlice[int32])
+ testInt64x8Compare(t, archsimd.Int64x8.Greater, greaterSlice[int64])
+ testUint8x64Compare(t, archsimd.Uint8x64.Greater, greaterSlice[uint8])
+ testUint16x32Compare(t, archsimd.Uint16x32.Greater, greaterSlice[uint16])
+ testUint32x16Compare(t, archsimd.Uint32x16.Greater, greaterSlice[uint32])
+ testUint64x8Compare(t, archsimd.Uint64x8.Greater, greaterSlice[uint64])
+ }
+}
+
+func TestGreaterEqual(t *testing.T) {
+ testFloat32x4Compare(t, archsimd.Float32x4.GreaterEqual, greaterEqualSlice[float32])
+ testFloat32x8Compare(t, archsimd.Float32x8.GreaterEqual, greaterEqualSlice[float32])
+ testFloat64x2Compare(t, archsimd.Float64x2.GreaterEqual, greaterEqualSlice[float64])
+ testFloat64x4Compare(t, archsimd.Float64x4.GreaterEqual, greaterEqualSlice[float64])
+
+ testInt16x16Compare(t, archsimd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
+ testInt16x8Compare(t, archsimd.Int16x8.GreaterEqual, greaterEqualSlice[int16])
+ testInt32x4Compare(t, archsimd.Int32x4.GreaterEqual, greaterEqualSlice[int32])
+ testInt32x8Compare(t, archsimd.Int32x8.GreaterEqual, greaterEqualSlice[int32])
+ testInt64x2Compare(t, archsimd.Int64x2.GreaterEqual, greaterEqualSlice[int64])
+ testInt64x4Compare(t, archsimd.Int64x4.GreaterEqual, greaterEqualSlice[int64])
+ testInt8x16Compare(t, archsimd.Int8x16.GreaterEqual, greaterEqualSlice[int8])
+ testInt8x32Compare(t, archsimd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
+
+ testUint16x16Compare(t, archsimd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
+ testUint16x8Compare(t, archsimd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
+ testUint32x4Compare(t, archsimd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
+ testUint32x8Compare(t, archsimd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
+ testUint64x2Compare(t, archsimd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64])
+ testUint64x4Compare(t, archsimd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
+ testUint8x16Compare(t, archsimd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
+ testUint8x32Compare(t, archsimd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16Compare(t, archsimd.Float32x16.GreaterEqual, greaterEqualSlice[float32])
+ testFloat64x8Compare(t, archsimd.Float64x8.GreaterEqual, greaterEqualSlice[float64])
+ testInt8x64Compare(t, archsimd.Int8x64.GreaterEqual, greaterEqualSlice[int8])
+ testInt16x32Compare(t, archsimd.Int16x32.GreaterEqual, greaterEqualSlice[int16])
+ testInt32x16Compare(t, archsimd.Int32x16.GreaterEqual, greaterEqualSlice[int32])
+ testInt64x8Compare(t, archsimd.Int64x8.GreaterEqual, greaterEqualSlice[int64])
+ testUint8x64Compare(t, archsimd.Uint8x64.GreaterEqual, greaterEqualSlice[uint8])
+ testUint16x32Compare(t, archsimd.Uint16x32.GreaterEqual, greaterEqualSlice[uint16])
+ testUint32x16Compare(t, archsimd.Uint32x16.GreaterEqual, greaterEqualSlice[uint32])
+ testUint64x8Compare(t, archsimd.Uint64x8.GreaterEqual, greaterEqualSlice[uint64])
+ }
+}
+
+func TestEqual(t *testing.T) {
+ testFloat32x4Compare(t, archsimd.Float32x4.Equal, equalSlice[float32])
+ testFloat32x8Compare(t, archsimd.Float32x8.Equal, equalSlice[float32])
+ testFloat64x2Compare(t, archsimd.Float64x2.Equal, equalSlice[float64])
+ testFloat64x4Compare(t, archsimd.Float64x4.Equal, equalSlice[float64])
+
+ testInt16x16Compare(t, archsimd.Int16x16.Equal, equalSlice[int16])
+ testInt16x8Compare(t, archsimd.Int16x8.Equal, equalSlice[int16])
+ testInt32x4Compare(t, archsimd.Int32x4.Equal, equalSlice[int32])
+ testInt32x8Compare(t, archsimd.Int32x8.Equal, equalSlice[int32])
+ testInt64x2Compare(t, archsimd.Int64x2.Equal, equalSlice[int64])
+ testInt64x4Compare(t, archsimd.Int64x4.Equal, equalSlice[int64])
+ testInt8x16Compare(t, archsimd.Int8x16.Equal, equalSlice[int8])
+ testInt8x32Compare(t, archsimd.Int8x32.Equal, equalSlice[int8])
+
+ testUint16x16Compare(t, archsimd.Uint16x16.Equal, equalSlice[uint16])
+ testUint16x8Compare(t, archsimd.Uint16x8.Equal, equalSlice[uint16])
+ testUint32x4Compare(t, archsimd.Uint32x4.Equal, equalSlice[uint32])
+ testUint32x8Compare(t, archsimd.Uint32x8.Equal, equalSlice[uint32])
+ testUint64x2Compare(t, archsimd.Uint64x2.Equal, equalSlice[uint64])
+ testUint64x4Compare(t, archsimd.Uint64x4.Equal, equalSlice[uint64])
+ testUint8x16Compare(t, archsimd.Uint8x16.Equal, equalSlice[uint8])
+ testUint8x32Compare(t, archsimd.Uint8x32.Equal, equalSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16Compare(t, archsimd.Float32x16.Equal, equalSlice[float32])
+ testFloat64x8Compare(t, archsimd.Float64x8.Equal, equalSlice[float64])
+ testInt8x64Compare(t, archsimd.Int8x64.Equal, equalSlice[int8])
+ testInt16x32Compare(t, archsimd.Int16x32.Equal, equalSlice[int16])
+ testInt32x16Compare(t, archsimd.Int32x16.Equal, equalSlice[int32])
+ testInt64x8Compare(t, archsimd.Int64x8.Equal, equalSlice[int64])
+ testUint8x64Compare(t, archsimd.Uint8x64.Equal, equalSlice[uint8])
+ testUint16x32Compare(t, archsimd.Uint16x32.Equal, equalSlice[uint16])
+ testUint32x16Compare(t, archsimd.Uint32x16.Equal, equalSlice[uint32])
+ testUint64x8Compare(t, archsimd.Uint64x8.Equal, equalSlice[uint64])
+ }
+}
+
+func TestNotEqual(t *testing.T) {
+ testFloat32x4Compare(t, archsimd.Float32x4.NotEqual, notEqualSlice[float32])
+ testFloat32x8Compare(t, archsimd.Float32x8.NotEqual, notEqualSlice[float32])
+ testFloat64x2Compare(t, archsimd.Float64x2.NotEqual, notEqualSlice[float64])
+ testFloat64x4Compare(t, archsimd.Float64x4.NotEqual, notEqualSlice[float64])
+
+ testInt16x16Compare(t, archsimd.Int16x16.NotEqual, notEqualSlice[int16])
+ testInt16x8Compare(t, archsimd.Int16x8.NotEqual, notEqualSlice[int16])
+ testInt32x4Compare(t, archsimd.Int32x4.NotEqual, notEqualSlice[int32])
+ testInt32x8Compare(t, archsimd.Int32x8.NotEqual, notEqualSlice[int32])
+ testInt64x2Compare(t, archsimd.Int64x2.NotEqual, notEqualSlice[int64])
+ testInt64x4Compare(t, archsimd.Int64x4.NotEqual, notEqualSlice[int64])
+ testInt8x16Compare(t, archsimd.Int8x16.NotEqual, notEqualSlice[int8])
+ testInt8x32Compare(t, archsimd.Int8x32.NotEqual, notEqualSlice[int8])
+
+ testUint16x16Compare(t, archsimd.Uint16x16.NotEqual, notEqualSlice[uint16])
+ testUint16x8Compare(t, archsimd.Uint16x8.NotEqual, notEqualSlice[uint16])
+ testUint32x4Compare(t, archsimd.Uint32x4.NotEqual, notEqualSlice[uint32])
+ testUint32x8Compare(t, archsimd.Uint32x8.NotEqual, notEqualSlice[uint32])
+ testUint64x2Compare(t, archsimd.Uint64x2.NotEqual, notEqualSlice[uint64])
+ testUint64x4Compare(t, archsimd.Uint64x4.NotEqual, notEqualSlice[uint64])
+ testUint8x16Compare(t, archsimd.Uint8x16.NotEqual, notEqualSlice[uint8])
+ testUint8x32Compare(t, archsimd.Uint8x32.NotEqual, notEqualSlice[uint8])
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16Compare(t, archsimd.Float32x16.NotEqual, notEqualSlice[float32])
+ testFloat64x8Compare(t, archsimd.Float64x8.NotEqual, notEqualSlice[float64])
+ testInt8x64Compare(t, archsimd.Int8x64.NotEqual, notEqualSlice[int8])
+ testInt16x32Compare(t, archsimd.Int16x32.NotEqual, notEqualSlice[int16])
+ testInt32x16Compare(t, archsimd.Int32x16.NotEqual, notEqualSlice[int32])
+ testInt64x8Compare(t, archsimd.Int64x8.NotEqual, notEqualSlice[int64])
+ testUint8x64Compare(t, archsimd.Uint8x64.NotEqual, notEqualSlice[uint8])
+ testUint16x32Compare(t, archsimd.Uint16x32.NotEqual, notEqualSlice[uint16])
+ testUint32x16Compare(t, archsimd.Uint32x16.NotEqual, notEqualSlice[uint32])
+ testUint64x8Compare(t, archsimd.Uint64x8.NotEqual, notEqualSlice[uint64])
+ }
+}
package simd_test
import (
- "simd"
+ "simd/archsimd"
"testing"
)
// testInt8x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt8x16CompareMasked(t *testing.T,
- f func(_, _ simd.Int8x16, m simd.Mask8x16) simd.Mask8x16,
+ f func(_, _ archsimd.Int8x16, m archsimd.Mask8x16) archsimd.Mask8x16,
want func(_, _ []int8) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool {
t.Helper()
- a := simd.LoadInt8x16Slice(x)
- b := simd.LoadInt8x16Slice(y)
- k := simd.LoadInt8x16Slice(toVect[int8](m)).ToMask()
+ a := archsimd.LoadInt8x16Slice(x)
+ b := archsimd.LoadInt8x16Slice(y)
+ k := archsimd.LoadInt8x16Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x16().StoreSlice(g)
w := want(x, y)
// testInt16x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt16x8CompareMasked(t *testing.T,
- f func(_, _ simd.Int16x8, m simd.Mask16x8) simd.Mask16x8,
+ f func(_, _ archsimd.Int16x8, m archsimd.Mask16x8) archsimd.Mask16x8,
want func(_, _ []int16) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool {
t.Helper()
- a := simd.LoadInt16x8Slice(x)
- b := simd.LoadInt16x8Slice(y)
- k := simd.LoadInt16x8Slice(toVect[int16](m)).ToMask()
+ a := archsimd.LoadInt16x8Slice(x)
+ b := archsimd.LoadInt16x8Slice(y)
+ k := archsimd.LoadInt16x8Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x8().StoreSlice(g)
w := want(x, y)
// testInt32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt32x4CompareMasked(t *testing.T,
- f func(_, _ simd.Int32x4, m simd.Mask32x4) simd.Mask32x4,
+ f func(_, _ archsimd.Int32x4, m archsimd.Mask32x4) archsimd.Mask32x4,
want func(_, _ []int32) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool {
t.Helper()
- a := simd.LoadInt32x4Slice(x)
- b := simd.LoadInt32x4Slice(y)
- k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadInt32x4Slice(x)
+ b := archsimd.LoadInt32x4Slice(y)
+ k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x4().StoreSlice(g)
w := want(x, y)
// testInt64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt64x2CompareMasked(t *testing.T,
- f func(_, _ simd.Int64x2, m simd.Mask64x2) simd.Mask64x2,
+ f func(_, _ archsimd.Int64x2, m archsimd.Mask64x2) archsimd.Mask64x2,
want func(_, _ []int64) []int64) {
n := 2
t.Helper()
forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool {
t.Helper()
- a := simd.LoadInt64x2Slice(x)
- b := simd.LoadInt64x2Slice(y)
- k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadInt64x2Slice(x)
+ b := archsimd.LoadInt64x2Slice(y)
+ k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x2().StoreSlice(g)
w := want(x, y)
// testUint8x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint8x16CompareMasked(t *testing.T,
- f func(_, _ simd.Uint8x16, m simd.Mask8x16) simd.Mask8x16,
+ f func(_, _ archsimd.Uint8x16, m archsimd.Mask8x16) archsimd.Mask8x16,
want func(_, _ []uint8) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool {
t.Helper()
- a := simd.LoadUint8x16Slice(x)
- b := simd.LoadUint8x16Slice(y)
- k := simd.LoadInt8x16Slice(toVect[int8](m)).ToMask()
+ a := archsimd.LoadUint8x16Slice(x)
+ b := archsimd.LoadUint8x16Slice(y)
+ k := archsimd.LoadInt8x16Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x16().StoreSlice(g)
w := want(x, y)
// testUint16x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint16x8CompareMasked(t *testing.T,
- f func(_, _ simd.Uint16x8, m simd.Mask16x8) simd.Mask16x8,
+ f func(_, _ archsimd.Uint16x8, m archsimd.Mask16x8) archsimd.Mask16x8,
want func(_, _ []uint16) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool {
t.Helper()
- a := simd.LoadUint16x8Slice(x)
- b := simd.LoadUint16x8Slice(y)
- k := simd.LoadInt16x8Slice(toVect[int16](m)).ToMask()
+ a := archsimd.LoadUint16x8Slice(x)
+ b := archsimd.LoadUint16x8Slice(y)
+ k := archsimd.LoadInt16x8Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x8().StoreSlice(g)
w := want(x, y)
// testUint32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint32x4CompareMasked(t *testing.T,
- f func(_, _ simd.Uint32x4, m simd.Mask32x4) simd.Mask32x4,
+ f func(_, _ archsimd.Uint32x4, m archsimd.Mask32x4) archsimd.Mask32x4,
want func(_, _ []uint32) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool {
t.Helper()
- a := simd.LoadUint32x4Slice(x)
- b := simd.LoadUint32x4Slice(y)
- k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadUint32x4Slice(x)
+ b := archsimd.LoadUint32x4Slice(y)
+ k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x4().StoreSlice(g)
w := want(x, y)
// testUint64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint64x2CompareMasked(t *testing.T,
- f func(_, _ simd.Uint64x2, m simd.Mask64x2) simd.Mask64x2,
+ f func(_, _ archsimd.Uint64x2, m archsimd.Mask64x2) archsimd.Mask64x2,
want func(_, _ []uint64) []int64) {
n := 2
t.Helper()
forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool {
t.Helper()
- a := simd.LoadUint64x2Slice(x)
- b := simd.LoadUint64x2Slice(y)
- k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadUint64x2Slice(x)
+ b := archsimd.LoadUint64x2Slice(y)
+ k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x2().StoreSlice(g)
w := want(x, y)
// testFloat32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat32x4CompareMasked(t *testing.T,
- f func(_, _ simd.Float32x4, m simd.Mask32x4) simd.Mask32x4,
+ f func(_, _ archsimd.Float32x4, m archsimd.Mask32x4) archsimd.Mask32x4,
want func(_, _ []float32) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
- b := simd.LoadFloat32x4Slice(y)
- k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadFloat32x4Slice(x)
+ b := archsimd.LoadFloat32x4Slice(y)
+ k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x4().StoreSlice(g)
w := want(x, y)
// testFloat64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat64x2CompareMasked(t *testing.T,
- f func(_, _ simd.Float64x2, m simd.Mask64x2) simd.Mask64x2,
+ f func(_, _ archsimd.Float64x2, m archsimd.Mask64x2) archsimd.Mask64x2,
want func(_, _ []float64) []int64) {
n := 2
t.Helper()
forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool {
t.Helper()
- a := simd.LoadFloat64x2Slice(x)
- b := simd.LoadFloat64x2Slice(y)
- k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadFloat64x2Slice(x)
+ b := archsimd.LoadFloat64x2Slice(y)
+ k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x2().StoreSlice(g)
w := want(x, y)
// testInt8x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt8x32CompareMasked(t *testing.T,
- f func(_, _ simd.Int8x32, m simd.Mask8x32) simd.Mask8x32,
+ f func(_, _ archsimd.Int8x32, m archsimd.Mask8x32) archsimd.Mask8x32,
want func(_, _ []int8) []int64) {
n := 32
t.Helper()
forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool {
t.Helper()
- a := simd.LoadInt8x32Slice(x)
- b := simd.LoadInt8x32Slice(y)
- k := simd.LoadInt8x32Slice(toVect[int8](m)).ToMask()
+ a := archsimd.LoadInt8x32Slice(x)
+ b := archsimd.LoadInt8x32Slice(y)
+ k := archsimd.LoadInt8x32Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x32().StoreSlice(g)
w := want(x, y)
// testInt16x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt16x16CompareMasked(t *testing.T,
- f func(_, _ simd.Int16x16, m simd.Mask16x16) simd.Mask16x16,
+ f func(_, _ archsimd.Int16x16, m archsimd.Mask16x16) archsimd.Mask16x16,
want func(_, _ []int16) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool {
t.Helper()
- a := simd.LoadInt16x16Slice(x)
- b := simd.LoadInt16x16Slice(y)
- k := simd.LoadInt16x16Slice(toVect[int16](m)).ToMask()
+ a := archsimd.LoadInt16x16Slice(x)
+ b := archsimd.LoadInt16x16Slice(y)
+ k := archsimd.LoadInt16x16Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x16().StoreSlice(g)
w := want(x, y)
// testInt32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt32x8CompareMasked(t *testing.T,
- f func(_, _ simd.Int32x8, m simd.Mask32x8) simd.Mask32x8,
+ f func(_, _ archsimd.Int32x8, m archsimd.Mask32x8) archsimd.Mask32x8,
want func(_, _ []int32) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool {
t.Helper()
- a := simd.LoadInt32x8Slice(x)
- b := simd.LoadInt32x8Slice(y)
- k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadInt32x8Slice(x)
+ b := archsimd.LoadInt32x8Slice(y)
+ k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x8().StoreSlice(g)
w := want(x, y)
// testInt64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt64x4CompareMasked(t *testing.T,
- f func(_, _ simd.Int64x4, m simd.Mask64x4) simd.Mask64x4,
+ f func(_, _ archsimd.Int64x4, m archsimd.Mask64x4) archsimd.Mask64x4,
want func(_, _ []int64) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool {
t.Helper()
- a := simd.LoadInt64x4Slice(x)
- b := simd.LoadInt64x4Slice(y)
- k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadInt64x4Slice(x)
+ b := archsimd.LoadInt64x4Slice(y)
+ k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x4().StoreSlice(g)
w := want(x, y)
// testUint8x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint8x32CompareMasked(t *testing.T,
- f func(_, _ simd.Uint8x32, m simd.Mask8x32) simd.Mask8x32,
+ f func(_, _ archsimd.Uint8x32, m archsimd.Mask8x32) archsimd.Mask8x32,
want func(_, _ []uint8) []int64) {
n := 32
t.Helper()
forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool {
t.Helper()
- a := simd.LoadUint8x32Slice(x)
- b := simd.LoadUint8x32Slice(y)
- k := simd.LoadInt8x32Slice(toVect[int8](m)).ToMask()
+ a := archsimd.LoadUint8x32Slice(x)
+ b := archsimd.LoadUint8x32Slice(y)
+ k := archsimd.LoadInt8x32Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x32().StoreSlice(g)
w := want(x, y)
// testUint16x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint16x16CompareMasked(t *testing.T,
- f func(_, _ simd.Uint16x16, m simd.Mask16x16) simd.Mask16x16,
+ f func(_, _ archsimd.Uint16x16, m archsimd.Mask16x16) archsimd.Mask16x16,
want func(_, _ []uint16) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool {
t.Helper()
- a := simd.LoadUint16x16Slice(x)
- b := simd.LoadUint16x16Slice(y)
- k := simd.LoadInt16x16Slice(toVect[int16](m)).ToMask()
+ a := archsimd.LoadUint16x16Slice(x)
+ b := archsimd.LoadUint16x16Slice(y)
+ k := archsimd.LoadInt16x16Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x16().StoreSlice(g)
w := want(x, y)
// testUint32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint32x8CompareMasked(t *testing.T,
- f func(_, _ simd.Uint32x8, m simd.Mask32x8) simd.Mask32x8,
+ f func(_, _ archsimd.Uint32x8, m archsimd.Mask32x8) archsimd.Mask32x8,
want func(_, _ []uint32) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool {
t.Helper()
- a := simd.LoadUint32x8Slice(x)
- b := simd.LoadUint32x8Slice(y)
- k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadUint32x8Slice(x)
+ b := archsimd.LoadUint32x8Slice(y)
+ k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x8().StoreSlice(g)
w := want(x, y)
// testUint64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint64x4CompareMasked(t *testing.T,
- f func(_, _ simd.Uint64x4, m simd.Mask64x4) simd.Mask64x4,
+ f func(_, _ archsimd.Uint64x4, m archsimd.Mask64x4) archsimd.Mask64x4,
want func(_, _ []uint64) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool {
t.Helper()
- a := simd.LoadUint64x4Slice(x)
- b := simd.LoadUint64x4Slice(y)
- k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadUint64x4Slice(x)
+ b := archsimd.LoadUint64x4Slice(y)
+ k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x4().StoreSlice(g)
w := want(x, y)
// testFloat32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat32x8CompareMasked(t *testing.T,
- f func(_, _ simd.Float32x8, m simd.Mask32x8) simd.Mask32x8,
+ f func(_, _ archsimd.Float32x8, m archsimd.Mask32x8) archsimd.Mask32x8,
want func(_, _ []float32) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
- b := simd.LoadFloat32x8Slice(y)
- k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadFloat32x8Slice(x)
+ b := archsimd.LoadFloat32x8Slice(y)
+ k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x8().StoreSlice(g)
w := want(x, y)
// testFloat64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat64x4CompareMasked(t *testing.T,
- f func(_, _ simd.Float64x4, m simd.Mask64x4) simd.Mask64x4,
+ f func(_, _ archsimd.Float64x4, m archsimd.Mask64x4) archsimd.Mask64x4,
want func(_, _ []float64) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool {
t.Helper()
- a := simd.LoadFloat64x4Slice(x)
- b := simd.LoadFloat64x4Slice(y)
- k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadFloat64x4Slice(x)
+ b := archsimd.LoadFloat64x4Slice(y)
+ k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x4().StoreSlice(g)
w := want(x, y)
// testInt8x64CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt8x64CompareMasked(t *testing.T,
- f func(_, _ simd.Int8x64, m simd.Mask8x64) simd.Mask8x64,
+ f func(_, _ archsimd.Int8x64, m archsimd.Mask8x64) archsimd.Mask8x64,
want func(_, _ []int8) []int64) {
n := 64
t.Helper()
forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool {
t.Helper()
- a := simd.LoadInt8x64Slice(x)
- b := simd.LoadInt8x64Slice(y)
- k := simd.LoadInt8x64Slice(toVect[int8](m)).ToMask()
+ a := archsimd.LoadInt8x64Slice(x)
+ b := archsimd.LoadInt8x64Slice(y)
+ k := archsimd.LoadInt8x64Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x64().StoreSlice(g)
w := want(x, y)
// testInt16x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt16x32CompareMasked(t *testing.T,
- f func(_, _ simd.Int16x32, m simd.Mask16x32) simd.Mask16x32,
+ f func(_, _ archsimd.Int16x32, m archsimd.Mask16x32) archsimd.Mask16x32,
want func(_, _ []int16) []int64) {
n := 32
t.Helper()
forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool {
t.Helper()
- a := simd.LoadInt16x32Slice(x)
- b := simd.LoadInt16x32Slice(y)
- k := simd.LoadInt16x32Slice(toVect[int16](m)).ToMask()
+ a := archsimd.LoadInt16x32Slice(x)
+ b := archsimd.LoadInt16x32Slice(y)
+ k := archsimd.LoadInt16x32Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x32().StoreSlice(g)
w := want(x, y)
// testInt32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt32x16CompareMasked(t *testing.T,
- f func(_, _ simd.Int32x16, m simd.Mask32x16) simd.Mask32x16,
+ f func(_, _ archsimd.Int32x16, m archsimd.Mask32x16) archsimd.Mask32x16,
want func(_, _ []int32) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool {
t.Helper()
- a := simd.LoadInt32x16Slice(x)
- b := simd.LoadInt32x16Slice(y)
- k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadInt32x16Slice(x)
+ b := archsimd.LoadInt32x16Slice(y)
+ k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x16().StoreSlice(g)
w := want(x, y)
// testInt64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt64x8CompareMasked(t *testing.T,
- f func(_, _ simd.Int64x8, m simd.Mask64x8) simd.Mask64x8,
+ f func(_, _ archsimd.Int64x8, m archsimd.Mask64x8) archsimd.Mask64x8,
want func(_, _ []int64) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool {
t.Helper()
- a := simd.LoadInt64x8Slice(x)
- b := simd.LoadInt64x8Slice(y)
- k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadInt64x8Slice(x)
+ b := archsimd.LoadInt64x8Slice(y)
+ k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x8().StoreSlice(g)
w := want(x, y)
// testUint8x64CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint8x64CompareMasked(t *testing.T,
- f func(_, _ simd.Uint8x64, m simd.Mask8x64) simd.Mask8x64,
+ f func(_, _ archsimd.Uint8x64, m archsimd.Mask8x64) archsimd.Mask8x64,
want func(_, _ []uint8) []int64) {
n := 64
t.Helper()
forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool {
t.Helper()
- a := simd.LoadUint8x64Slice(x)
- b := simd.LoadUint8x64Slice(y)
- k := simd.LoadInt8x64Slice(toVect[int8](m)).ToMask()
+ a := archsimd.LoadUint8x64Slice(x)
+ b := archsimd.LoadUint8x64Slice(y)
+ k := archsimd.LoadInt8x64Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x64().StoreSlice(g)
w := want(x, y)
// testUint16x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint16x32CompareMasked(t *testing.T,
- f func(_, _ simd.Uint16x32, m simd.Mask16x32) simd.Mask16x32,
+ f func(_, _ archsimd.Uint16x32, m archsimd.Mask16x32) archsimd.Mask16x32,
want func(_, _ []uint16) []int64) {
n := 32
t.Helper()
forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool {
t.Helper()
- a := simd.LoadUint16x32Slice(x)
- b := simd.LoadUint16x32Slice(y)
- k := simd.LoadInt16x32Slice(toVect[int16](m)).ToMask()
+ a := archsimd.LoadUint16x32Slice(x)
+ b := archsimd.LoadUint16x32Slice(y)
+ k := archsimd.LoadInt16x32Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x32().StoreSlice(g)
w := want(x, y)
// testUint32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint32x16CompareMasked(t *testing.T,
- f func(_, _ simd.Uint32x16, m simd.Mask32x16) simd.Mask32x16,
+ f func(_, _ archsimd.Uint32x16, m archsimd.Mask32x16) archsimd.Mask32x16,
want func(_, _ []uint32) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool {
t.Helper()
- a := simd.LoadUint32x16Slice(x)
- b := simd.LoadUint32x16Slice(y)
- k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadUint32x16Slice(x)
+ b := archsimd.LoadUint32x16Slice(y)
+ k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x16().StoreSlice(g)
w := want(x, y)
// testUint64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint64x8CompareMasked(t *testing.T,
- f func(_, _ simd.Uint64x8, m simd.Mask64x8) simd.Mask64x8,
+ f func(_, _ archsimd.Uint64x8, m archsimd.Mask64x8) archsimd.Mask64x8,
want func(_, _ []uint64) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool {
t.Helper()
- a := simd.LoadUint64x8Slice(x)
- b := simd.LoadUint64x8Slice(y)
- k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadUint64x8Slice(x)
+ b := archsimd.LoadUint64x8Slice(y)
+ k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x8().StoreSlice(g)
w := want(x, y)
// testFloat32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat32x16CompareMasked(t *testing.T,
- f func(_, _ simd.Float32x16, m simd.Mask32x16) simd.Mask32x16,
+ f func(_, _ archsimd.Float32x16, m archsimd.Mask32x16) archsimd.Mask32x16,
want func(_, _ []float32) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
- b := simd.LoadFloat32x16Slice(y)
- k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
+ a := archsimd.LoadFloat32x16Slice(x)
+ b := archsimd.LoadFloat32x16Slice(y)
+ k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x16().StoreSlice(g)
w := want(x, y)
// testFloat64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat64x8CompareMasked(t *testing.T,
- f func(_, _ simd.Float64x8, m simd.Mask64x8) simd.Mask64x8,
+ f func(_, _ archsimd.Float64x8, m archsimd.Mask64x8) archsimd.Mask64x8,
want func(_, _ []float64) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
- b := simd.LoadFloat64x8Slice(y)
- k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
+ a := archsimd.LoadFloat64x8Slice(x)
+ b := archsimd.LoadFloat64x8Slice(y)
+ k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x8().StoreSlice(g)
w := want(x, y)
import (
"math"
- "simd/internal/test_helpers"
+ "simd/archsimd/internal/test_helpers"
"testing"
)
import (
"reflect"
- "simd"
+ "simd/archsimd"
"slices"
"testing"
)
// - Type alias is ok
// - Type conversion is ok
// - Conversion to interface is ok
- type alias = simd.Int32x4
- type maskT simd.Mask32x4
+ type alias = archsimd.Int32x4
+ type maskT archsimd.Mask32x4
type myStruct struct {
x alias
- y *simd.Int32x4
+ y *archsimd.Int32x4
z maskT
}
vals := [4]int32{1, 2, 3, 4}
- v := myStruct{x: simd.LoadInt32x4(&vals)}
+ v := myStruct{x: archsimd.LoadInt32x4(&vals)}
// masking elements 1 and 2.
want := []int32{2, 4, 0, 0}
- y := simd.LoadInt32x4(&vals)
+ y := archsimd.LoadInt32x4(&vals)
v.y = &y
sink = y
- if !simd.X86.AVX512GFNI() {
+ if !archsimd.X86.AVX512GFNI() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
- v.z = maskT(simd.Mask32x4FromBits(0b0011))
- *v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z))
+ v.z = maskT(archsimd.Mask32x4FromBits(0b0011))
+ *v.y = v.y.Add(v.x).Masked(archsimd.Mask32x4(v.z))
got := [4]int32{}
v.y.Store(&got)
func TestUncomparable(t *testing.T) {
// Test that simd vectors are not comparable
- var x, y any = simd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), simd.LoadUint32x4(&[4]uint32{5, 6, 7, 8})
+ var x, y any = archsimd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), archsimd.LoadUint32x4(&[4]uint32{5, 6, 7, 8})
shouldPanic := func(fn func()) {
defer func() {
if recover() == nil {
xv := [4]int32{1, 2, 3, 4}
yv := [4]int32{5, 6, 7, 8}
want := []int32{6, 8, 10, 12}
- x := simd.LoadInt32x4(&xv)
- y := simd.LoadInt32x4(&yv)
- fn := simd.Int32x4.Add
+ x := archsimd.LoadInt32x4(&xv)
+ y := archsimd.LoadInt32x4(&yv)
+ fn := archsimd.Int32x4.Add
sink = fn
x = fn(x, y)
got := [4]int32{}
xv := [4]int32{1, 2, 3, 4}
yv := [4]int32{5, 6, 7, 8}
want := []int32{6, 8, 10, 12}
- x := simd.LoadInt32x4(&xv)
- y := simd.LoadInt32x4(&yv)
+ x := archsimd.LoadInt32x4(&xv)
+ y := archsimd.LoadInt32x4(&yv)
m, ok := reflect.TypeOf(x).MethodByName("Add")
if !ok {
t.Fatal("Add method not found")
}
- fn := m.Func.Interface().(func(x, y simd.Int32x4) simd.Int32x4)
+ fn := m.Func.Interface().(func(x, y archsimd.Int32x4) archsimd.Int32x4)
x = fn(x, y)
got := [4]int32{}
x.Store(&got)
}
func TestVectorConversion(t *testing.T) {
- if !simd.X86.AVX512GFNI() {
+ if !archsimd.X86.AVX512GFNI() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
xv := [4]int32{1, 2, 3, 4}
- x := simd.LoadInt32x4(&xv)
+ x := archsimd.LoadInt32x4(&xv)
xPromoted := x.AsInt64x2()
xPromotedDemoted := xPromoted.AsInt32x4()
got := [4]int32{}
}
func TestMaskConversion(t *testing.T) {
- if !simd.X86.AVX512GFNI() {
+ if !archsimd.X86.AVX512GFNI() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
- x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
- mask := simd.Int32x4{}.Sub(x).ToMask()
- y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
+ x := archsimd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
+ mask := archsimd.Int32x4{}.Sub(x).ToMask()
+ y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
want := [4]int32{6, 0, 10, 0}
got := make([]int32, 4)
y.StoreSlice(got)
}
func TestPermute(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0}
want := []int64{8, 7, 6, 5, 4, 3, 2, 1}
got := make([]int64, 8)
- simd.LoadInt64x8Slice(x).Permute(simd.LoadUint64x8Slice(indices)).StoreSlice(got)
+ archsimd.LoadInt64x8Slice(x).Permute(archsimd.LoadUint64x8Slice(indices)).StoreSlice(got)
checkSlices(t, got, want)
}
indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11}
want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12}
got := make([]uint8, len(x))
- simd.LoadUint8x16Slice(x).PermuteOrZero(simd.LoadInt8x16Slice(indices)).StoreSlice(got)
+ archsimd.LoadUint8x16Slice(x).PermuteOrZero(archsimd.LoadInt8x16Slice(indices)).StoreSlice(got)
checkSlices(t, got, want)
}
func TestConcatPermute(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
got := make([]int64, 8)
- simd.LoadInt64x8Slice(x).ConcatPermute(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got)
+ archsimd.LoadInt64x8Slice(x).ConcatPermute(archsimd.LoadInt64x8Slice(y), archsimd.LoadUint64x8Slice(indices)).StoreSlice(got)
checkSlices(t, got, want)
}
func TestCompress(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
- v1234 := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
- v2400 := v1234.Compress(simd.Mask32x4FromBits(0b1010))
+ v1234 := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
+ v2400 := v1234.Compress(archsimd.Mask32x4FromBits(0b1010))
got := make([]int32, 4)
v2400.StoreSlice(got)
want := []int32{2, 4, 0, 0}
}
func TestExpand(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
- v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
- v2400 := v3400.Expand(simd.Mask32x4FromBits(0b1010))
+ v3400 := archsimd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
+ v2400 := v3400.Expand(archsimd.Mask32x4FromBits(0b1010))
got := make([]int32, 4)
v2400.StoreSlice(got)
want := []int32{0, 3, 0, 4}
func TestShiftAll(t *testing.T) {
got := make([]int32, 4)
- simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
+ archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
for _, v := range got {
if v != 0b1100 {
t.Errorf("expect 0b1100, got %b", v)
}
}
- simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
+ archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
for _, v := range got {
if v != 0b11000 {
t.Errorf("expect 0b11000, got %b", v)
func TestSlicesInt8(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
- v := simd.LoadInt8x32Slice(a)
+ v := archsimd.LoadInt8x32Slice(a)
b := make([]int8, 32, 32)
v.StoreSlice(b)
checkSlices(t, a, b)
func TestSlicesInt8SetElem(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
- v := simd.LoadInt8x16Slice(a)
+ v := archsimd.LoadInt8x16Slice(a)
v = v.SetElem(3, 13)
a[3] = 13
func TestSlicesInt8GetElem(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
- v := simd.LoadInt8x16Slice(a)
+ v := archsimd.LoadInt8x16Slice(a)
e := v.GetElem(2)
if e != a[2] {
t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
}()
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic
- v := simd.LoadInt8x32Slice(a)
+ v := archsimd.LoadInt8x32Slice(a)
b := make([]int8, 32, 32)
v.StoreSlice(b)
checkSlices(t, a, b)
}()
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
- v := simd.LoadInt8x32Slice(a)
+ v := archsimd.LoadInt8x32Slice(a)
b := make([]int8, 31) // TOO SHORT, should panic
v.StoreSlice(b)
checkSlices(t, a, b)
func TestSlicesFloat64(t *testing.T) {
a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine
- v := simd.LoadFloat64x4Slice(a)
+ v := archsimd.LoadFloat64x4Slice(a)
b := make([]float64, 4, 4)
v.StoreSlice(b)
for i := range b {
// TODO: try to reduce this test to be smaller.
func TestMergeLocals(t *testing.T) {
- testMergeLocalswrapper(t, simd.Int64x4.Add)
+ testMergeLocalswrapper(t, archsimd.Int64x4.Add)
}
//go:noinline
func forceSpill() {}
-func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) simd.Int64x4) {
+func testMergeLocalswrapper(t *testing.T, op func(archsimd.Int64x4, archsimd.Int64x4) archsimd.Int64x4) {
t.Helper()
s0 := []int64{0, 1, 2, 3}
s1 := []int64{-1, 0, -1, 0}
want := []int64{-1, 1, 1, 3}
- v := simd.LoadInt64x4Slice(s0)
- m := simd.LoadInt64x4Slice(s1)
+ v := archsimd.LoadInt64x4Slice(s0)
+ m := archsimd.LoadInt64x4Slice(s1)
forceSpill()
got := make([]int64, 4)
gotv := op(v, m)
}
func TestBitMaskFromBits(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
results := [2]int64{}
want := [2]int64{0, 6}
- m := simd.Mask64x2FromBits(0b10)
- simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
+ m := archsimd.Mask64x2FromBits(0b10)
+ archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
for i := range 2 {
if results[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
var maskForTestBitMaskFromBitsLoad = uint8(0b10)
func TestBitMaskFromBitsLoad(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
results := [2]int64{}
want := [2]int64{0, 6}
- m := simd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad)
- simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
+ m := archsimd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad)
+ archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
for i := range 2 {
if results[i] != want[i] {
t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
}
func TestBitMaskToBits(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
- if v := simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 {
+ if v := archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 {
t.Errorf("Want 0b101, got %b", v)
}
}
var maskForTestBitMaskFromBitsStore uint8
func TestBitMaskToBitsStore(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
- maskForTestBitMaskFromBitsStore = simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits()
+ maskForTestBitMaskFromBitsStore = archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits()
if maskForTestBitMaskFromBitsStore != 0b101 {
t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore)
}
k := make([]int64, 4, 4)
s := make([]float64, 4, 4)
- a := simd.LoadFloat64x4Slice([]float64{1, 2, 3, 4})
- b := simd.LoadFloat64x4Slice([]float64{4, 2, 3, 1})
+ a := archsimd.LoadFloat64x4Slice([]float64{1, 2, 3, 4})
+ b := archsimd.LoadFloat64x4Slice([]float64{4, 2, 3, 1})
g := a.Greater(b)
g.AsInt64x4().StoreSlice(k)
c := a.Merge(b, g)
}
func TestMergeFloat512(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
k := make([]int64, 8, 8)
s := make([]float64, 8, 8)
- a := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
- b := simd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1})
+ a := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
+ b := archsimd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1})
g := a.Greater(b)
g.AsInt64x8().StoreSlice(k)
c := a.Merge(b, g)
var ro uint8 = 2
func TestRotateAllVariable(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
got := make([]int32, 4)
- simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got)
+ archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got)
for _, v := range got {
if v != 0b1100 {
t.Errorf("Want 0b1100, got %b", v)
func TestBroadcastUint32x4(t *testing.T) {
s := make([]uint32, 4, 4)
- simd.BroadcastUint32x4(123456789).StoreSlice(s)
+ archsimd.BroadcastUint32x4(123456789).StoreSlice(s)
checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
}
func TestBroadcastFloat32x8(t *testing.T) {
s := make([]float32, 8, 8)
- simd.BroadcastFloat32x8(123456789).StoreSlice(s)
+ archsimd.BroadcastFloat32x8(123456789).StoreSlice(s)
checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
}
func TestBroadcastFloat64x2(t *testing.T) {
s := make([]float64, 2, 2)
- simd.BroadcastFloat64x2(123456789).StoreSlice(s)
+ archsimd.BroadcastFloat64x2(123456789).StoreSlice(s)
checkSlices(t, s, []float64{123456789, 123456789})
}
func TestBroadcastUint64x2(t *testing.T) {
s := make([]uint64, 2, 2)
- simd.BroadcastUint64x2(123456789).StoreSlice(s)
+ archsimd.BroadcastUint64x2(123456789).StoreSlice(s)
checkSlices(t, s, []uint64{123456789, 123456789})
}
func TestBroadcastUint16x8(t *testing.T) {
s := make([]uint16, 8, 8)
- simd.BroadcastUint16x8(12345).StoreSlice(s)
+ archsimd.BroadcastUint16x8(12345).StoreSlice(s)
checkSlices(t, s, []uint16{12345, 12345, 12345, 12345})
}
func TestBroadcastInt8x32(t *testing.T) {
s := make([]int8, 32, 32)
- simd.BroadcastInt8x32(-123).StoreSlice(s)
+ archsimd.BroadcastInt8x32(-123).StoreSlice(s)
checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123,
-123, -123, -123, -123, -123, -123, -123, -123,
-123, -123, -123, -123, -123, -123, -123, -123,
}
func TestMaskOpt512(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
k := make([]int64, 8, 8)
s := make([]float64, 8, 8)
- a := simd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0})
- b := simd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1})
- c := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
- d := simd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16})
+ a := archsimd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0})
+ b := archsimd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1})
+ c := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
+ d := archsimd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16})
g := a.Greater(b)
e := c.Add(d).Masked(g)
e.StoreSlice(s)
// matrices, but then flattens the rows in order, i.e
// x: ABCD ==> a: A1B2
// y: 1234 b: C3D4
-func flattenedTranspose(x, y simd.Int32x4) (a, b simd.Int32x4) {
+func flattenedTranspose(x, y archsimd.Int32x4) (a, b archsimd.Int32x4) {
return x.InterleaveLo(y), x.InterleaveHi(y)
}
r := make([]int32, 4, 4)
s := make([]int32, 4, 4)
- x := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
- y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
+ x := archsimd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
+ y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
a, b := flattenedTranspose(x, y)
a.StoreSlice(r)
func TestClearAVXUpperBits(t *testing.T) {
// Test that ClearAVXUpperBits is safe even if there are SIMD values
// alive (although usually one should not do this).
- if !simd.X86.AVX2() {
+ if !archsimd.X86.AVX2() {
t.Skip("Test requires X86.AVX2, not available on this hardware")
return
}
r := make([]int64, 4)
s := make([]int64, 4)
- x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
- y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
+ x := archsimd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
+ y := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
x.Add(y).StoreSlice(r)
- simd.ClearAVXUpperBits()
+ archsimd.ClearAVXUpperBits()
x.Sub(y).StoreSlice(s)
checkSlices[int64](t, r, []int64{11, 22, 33, 44})
}
func TestLeadingZeros(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
src := []uint64{0b1111, 0}
want := []uint64{60, 64}
got := make([]uint64, 2)
- simd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
+ archsimd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
for i := range 2 {
if want[i] != got[i] {
t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i])
}
func TestIsZero(t *testing.T) {
- v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
- v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
+ v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1})
+ v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0})
if v1.IsZero() {
t.Errorf("Result incorrect, want false, got true")
}
}
func TestSelect4FromPairConst(t *testing.T) {
- x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
- y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
+ x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
+ y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
llll := x.SelectFromPair(0, 1, 2, 3, y)
hhhh := x.SelectFromPair(4, 5, 6, 7, y)
r := make([]int32, 4, 4)
- foo := func(v simd.Int32x4, a, b, c, d int32) {
+ foo := func(v archsimd.Int32x4, a, b, c, d int32) {
v.StoreSlice(r)
checkSlices[int32](t, r, []int32{a, b, c, d})
}
}
//go:noinline
-func selectFromPairInt32x4(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) simd.Int32x4 {
+func selectFromPairInt32x4(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 {
return x.SelectFromPair(a, b, c, d, y)
}
func TestSelect4FromPairVar(t *testing.T) {
- x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
- y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
+ x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
+ y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y)
hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y)
r := make([]int32, 4, 4)
- foo := func(v simd.Int32x4, a, b, c, d int32) {
+ foo := func(v archsimd.Int32x4, a, b, c, d int32) {
v.StoreSlice(r)
checkSlices[int32](t, r, []int32{a, b, c, d})
}
}
func TestSelect4FromPairConstGrouped(t *testing.T) {
- x := simd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
- y := simd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
+ x := archsimd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
+ y := archsimd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
r := make([]float32, 8, 8)
- foo := func(v simd.Float32x8, a, b, c, d float32) {
+ foo := func(v archsimd.Float32x8, a, b, c, d float32) {
v.StoreSlice(r)
checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d})
}
}
func TestSelectFromPairConstGroupedUint32x16(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
- x := simd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33})
- y := simd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37})
+ x := archsimd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33})
+ y := archsimd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37})
llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
r := make([]uint32, 16, 16)
- foo := func(v simd.Uint32x16, a, b, c, d uint32) {
+ foo := func(v archsimd.Uint32x16, a, b, c, d uint32) {
v.StoreSlice(r)
checkSlices[uint32](t, r, []uint32{a, b, c, d,
10 + a, 10 + b, 10 + c, 10 + d,
}
func TestSelect128FromPair(t *testing.T) {
- x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
- y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
+ x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
+ y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
aa := x.Select128FromPair(0, 0, y)
ab := x.Select128FromPair(0, 1, y)
r := make([]uint64, 4, 4)
- foo := func(v simd.Uint64x4, a, b uint64) {
+ foo := func(v archsimd.Uint64x4, a, b uint64) {
a, b = 2*a, 2*b
v.StoreSlice(r)
checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
}
func TestSelect128FromPairError(t *testing.T) {
- x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
- y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
+ x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
+ y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
defer func() {
if r := recover(); r != nil {
}
//go:noinline
-func select128FromPair(x simd.Uint64x4, lo, hi uint8, y simd.Uint64x4) simd.Uint64x4 {
+func select128FromPair(x archsimd.Uint64x4, lo, hi uint8, y archsimd.Uint64x4) archsimd.Uint64x4 {
return x.Select128FromPair(lo, hi, y)
}
func TestSelect128FromPairVar(t *testing.T) {
- x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
- y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
+ x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
+ y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
aa := select128FromPair(x, 0, 0, y)
ab := select128FromPair(x, 0, 1, y)
r := make([]uint64, 4, 4)
- foo := func(v simd.Uint64x4, a, b uint64) {
+ foo := func(v archsimd.Uint64x4, a, b uint64) {
a, b = 2*a, 2*b
v.StoreSlice(r)
checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
}
func TestSelect2FromPairConst(t *testing.T) {
- x := simd.LoadUint64x2Slice([]uint64{0, 1})
- y := simd.LoadUint64x2Slice([]uint64{2, 3})
+ x := archsimd.LoadUint64x2Slice([]uint64{0, 1})
+ y := archsimd.LoadUint64x2Slice([]uint64{2, 3})
ll := x.SelectFromPair(0, 1, y)
hh := x.SelectFromPair(3, 2, y)
r := make([]uint64, 2, 2)
- foo := func(v simd.Uint64x2, a, b uint64) {
+ foo := func(v archsimd.Uint64x2, a, b uint64) {
v.StoreSlice(r)
checkSlices[uint64](t, r, []uint64{a, b})
}
}
func TestSelect2FromPairConstGroupedUint(t *testing.T) {
- x := simd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
- y := simd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
+ x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
+ y := archsimd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
ll := x.SelectFromPairGrouped(0, 1, y)
hh := x.SelectFromPairGrouped(3, 2, y)
r := make([]uint64, 4, 4)
- foo := func(v simd.Uint64x4, a, b uint64) {
+ foo := func(v archsimd.Uint64x4, a, b uint64) {
v.StoreSlice(r)
checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10})
}
}
func TestSelect2FromPairConstGroupedFloat(t *testing.T) {
- x := simd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
- y := simd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
+ x := archsimd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
+ y := archsimd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
ll := x.SelectFromPairGrouped(0, 1, y)
hh := x.SelectFromPairGrouped(3, 2, y)
r := make([]float64, 4, 4)
- foo := func(v simd.Float64x4, a, b float64) {
+ foo := func(v archsimd.Float64x4, a, b float64) {
v.StoreSlice(r)
checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10})
}
}
func TestSelect2FromPairConstGroupedInt(t *testing.T) {
- x := simd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
- y := simd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
+ x := archsimd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
+ y := archsimd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
ll := x.SelectFromPairGrouped(0, 1, y)
hh := x.SelectFromPairGrouped(3, 2, y)
r := make([]int64, 4, 4)
- foo := func(v simd.Int64x4, a, b int64) {
+ foo := func(v archsimd.Int64x4, a, b int64) {
v.StoreSlice(r)
checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10})
}
}
func TestSelect2FromPairConstGroupedInt512(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
- x := simd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
- y := simd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
+ x := archsimd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
+ y := archsimd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
ll := x.SelectFromPairGrouped(0, 1, y)
hh := x.SelectFromPairGrouped(3, 2, y)
r := make([]int64, 8, 8)
- foo := func(v simd.Int64x8, a, b int64) {
+ foo := func(v archsimd.Int64x8, a, b int64) {
v.StoreSlice(r)
checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30})
}
}
func TestString(t *testing.T) {
- x := simd.LoadUint32x4Slice([]uint32{0, 1, 2, 3})
- y := simd.LoadInt64x4Slice([]int64{-4, -5, -6, -7})
- z := simd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9})
- w := simd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9})
+ x := archsimd.LoadUint32x4Slice([]uint32{0, 1, 2, 3})
+ y := archsimd.LoadInt64x4Slice([]int64{-4, -5, -6, -7})
+ z := archsimd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9})
+ w := archsimd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9})
sx := "{0,1,2,3}"
sy := "{-4,-5,-6,-7}"
// applyTo3 returns a 16-element slice of the results of
// applying f to the respective elements of vectors x, y, and z.
-func applyTo3(x, y, z simd.Int32x16, f func(x, y, z int32) int32) []int32 {
+func applyTo3(x, y, z archsimd.Int32x16, f func(x, y, z int32) int32) []int32 {
ax, ay, az := a(), a(), a()
x.StoreSlice(ax)
y.StoreSlice(ay)
// applyTo3 returns a 16-element slice of the results of
// applying f to the respective elements of vectors x, y, z, and w.
-func applyTo4(x, y, z, w simd.Int32x16, f func(x, y, z, w int32) int32) []int32 {
+func applyTo4(x, y, z, w archsimd.Int32x16, f func(x, y, z, w int32) int32) []int32 {
ax, ay, az, aw := a(), a(), a(), a()
x.StoreSlice(ax)
y.StoreSlice(ay)
}
func TestSelectTernOptInt32x16(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
- x := simd.LoadInt32x16Slice(ax)
- y := simd.LoadInt32x16Slice(ay)
- z := simd.LoadInt32x16Slice(az)
- w := simd.LoadInt32x16Slice(aw)
- m := simd.LoadInt32x16Slice(am)
+ x := archsimd.LoadInt32x16Slice(ax)
+ y := archsimd.LoadInt32x16Slice(ay)
+ z := archsimd.LoadInt32x16Slice(az)
+ w := archsimd.LoadInt32x16Slice(aw)
+ m := archsimd.LoadInt32x16Slice(am)
- foo := func(v simd.Int32x16, s []int32) {
+ foo := func(v archsimd.Int32x16, s []int32) {
r := make([]int32, 16, 16)
v.StoreSlice(r)
checkSlices[int32](t, r, s)
}
func TestMaskedMerge(t *testing.T) {
- x := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
- y := simd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
- z := simd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
+ x := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
+ y := archsimd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
+ z := archsimd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
res := make([]int64, 4)
expected := []int64{6, 8, -3, -4}
mask := x.Less(y)
- if simd.X86.AVX512() {
+ if archsimd.X86.AVX512() {
x.Add(y).Merge(z, mask).StoreSlice(res)
} else {
x.Add(y).Merge(z, mask).StoreSlice(res)
}
func TestDotProductQuadruple(t *testing.T) {
- if !simd.X86.AVXVNNI() {
+ if !archsimd.X86.AVXVNNI() {
t.Skip("Test requires X86.AVXVNNI, not available on this hardware")
return
}
wanted1[i] = 30
wanted2[i] = 30
}
- x := simd.LoadInt8x16Slice(xd)
- y := simd.LoadUint8x16Slice(yd)
- z := simd.LoadInt32x4Slice(zd)
+ x := archsimd.LoadInt8x16Slice(xd)
+ y := archsimd.LoadUint8x16Slice(yd)
+ z := archsimd.LoadInt32x4Slice(zd)
x.DotProductQuadruple(y).StoreSlice(res1)
x.DotProductQuadruple(y).Add(z).StoreSlice(res1)
for i := range 4 {
x := []int32{11, 12, 13, 14}
want := []int32{12, 13, 14, 11}
got := make([]int32, 4)
- simd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got)
+ archsimd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got)
checkSlices(t, got, want)
}
x := []int32{11, 12, 13, 14, 21, 22, 23, 24}
want := []int32{12, 13, 14, 11, 22, 23, 24, 21}
got := make([]int32, 8)
- simd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got)
+ archsimd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got)
checkSlices(t, got, want)
}
x := []int16{-1, -2, -3, -4, 11, 12, 13, 14}
want := []int16{-1, -2, -3, -4, 12, 13, 14, 11}
got := make([]int16, len(x))
- simd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got)
+ archsimd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got)
checkSlices(t, got, want)
}
x := []int16{11, 12, 13, 14, 4, 5, 6, 7}
want := []int16{12, 13, 14, 11, 4, 5, 6, 7}
got := make([]int16, len(x))
- simd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got)
+ archsimd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got)
checkSlices(t, got, want)
}
x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114}
want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111}
got := make([]int16, len(x))
- simd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got)
+ archsimd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got)
checkSlices(t, got, want)
}
x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17}
want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17}
got := make([]int16, len(x))
- simd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got)
+ archsimd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got)
checkSlices(t, got, want)
}
func TestClMul(t *testing.T) {
- var x = simd.LoadUint64x2Slice([]uint64{1, 5})
- var y = simd.LoadUint64x2Slice([]uint64{3, 9})
+ var x = archsimd.LoadUint64x2Slice([]uint64{1, 5})
+ var y = archsimd.LoadUint64x2Slice([]uint64{3, 9})
- foo := func(v simd.Uint64x2, s []uint64) {
+ foo := func(v archsimd.Uint64x2, s []uint64) {
r := make([]uint64, 2, 2)
v.StoreSlice(r)
checkSlices[uint64](t, r, s)
package simd_test
import (
- "simd"
+ "simd/archsimd"
"testing"
)
func TestSlicePartInt8x16(t *testing.T) {
Do(t, 16, func(a, c []int8) {
- u := simd.LoadInt8x16SlicePart(a)
+ u := archsimd.LoadInt8x16SlicePart(a)
u.StoreSlice(c)
})
}
b := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
for i := 32; i >= 0; i-- {
- u := simd.LoadInt8x32SlicePart(a[:i])
+ u := archsimd.LoadInt8x32SlicePart(a[:i])
c := make([]int8, 32, 32)
u.StoreSlice(c)
checkSlices(t, c, b)
a := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
b := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
for i := 16; i >= 0; i-- {
- u := simd.LoadUint8x16SlicePart(a[:i])
+ u := archsimd.LoadUint8x16SlicePart(a[:i])
c := make([]uint8, 32, 32)
u.StoreSlice(c)
checkSlices(t, c, b)
b := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
for i := 32; i >= 0; i-- {
- u := simd.LoadUint8x32SlicePart(a[:i])
+ u := archsimd.LoadUint8x32SlicePart(a[:i])
c := make([]uint8, 32, 32)
u.StoreSlice(c)
checkSlices(t, c, b)
a := []int16{1, 2, 3, 4, 5, 6, 7, 8}
b := []int16{1, 2, 3, 4, 5, 6, 7, 8}
for i := 8; i >= 0; i-- {
- u := simd.LoadInt16x8SlicePart(a[:i])
+ u := archsimd.LoadInt16x8SlicePart(a[:i])
c := make([]int16, 16, 16)
u.StoreSlice(c)
checkSlices(t, c, b)
a := []int16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
b := []int16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
for i := 16; i >= 0; i-- {
- u := simd.LoadInt16x16SlicePart(a[:i])
+ u := archsimd.LoadInt16x16SlicePart(a[:i])
c := make([]int16, 16, 16)
u.StoreSlice(c)
checkSlices(t, c, b)
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
b := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
for i := 16; i >= 0; i-- {
- v := simd.LoadInt8x16Slice(a)
+ v := archsimd.LoadInt8x16Slice(a)
c := make([]int8, 32, 32)
v.StoreSlicePart(c[:i])
checkSlices(t, c, b)
a := []int16{1, 2, 3, 4, 5, 6, 7, 8}
b := []int16{1, 2, 3, 4, 5, 6, 7, 8}
for i := 8; i >= 0; i-- {
- v := simd.LoadInt16x8Slice(a)
+ v := archsimd.LoadInt16x8Slice(a)
c := make([]int16, 32, 32)
v.StoreSlicePart(c[:i])
checkSlices(t, c, b)
a := []int16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
b := []int16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
for i := 16; i >= 0; i-- {
- v := simd.LoadInt16x16Slice(a)
+ v := archsimd.LoadInt16x16Slice(a)
c := make([]int16, 32, 32)
v.StoreSlicePart(c[:i])
checkSlices(t, c, b)
a := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
b := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
for i := 16; i >= 0; i-- {
- v := simd.LoadUint8x16Slice(a)
+ v := archsimd.LoadUint8x16Slice(a)
c := make([]uint8, 32, 32)
v.StoreSlicePart(c[:i])
checkSlices(t, c, b)
a := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
b := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
for i := 16; i >= 0; i-- {
- v := simd.LoadUint16x16Slice(a)
+ v := archsimd.LoadUint16x16Slice(a)
c := make([]uint16, 32, 32)
v.StoreSlicePart(c[:i])
checkSlices(t, c, b)
b := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
for i := 32; i >= 0; i-- {
- v := simd.LoadUint8x32Slice(a)
+ v := archsimd.LoadUint8x32Slice(a)
c := make([]uint8, 32, 32)
v.StoreSlicePart(c[:i])
checkSlices(t, c, b)
// Test the load first
// e is a partial slice.
e := a[i:]
- v := simd.LoadInt32x4SlicePart(e)
+ v := archsimd.LoadInt32x4SlicePart(e)
// d contains what a ought to contain
d := make([]int32, L)
for j := 0; j < len(e) && j < len(d); j++ {
// Test the load first
// e is a partial slice.
e := a[i:]
- v := simd.LoadUint64x4SlicePart(e)
+ v := archsimd.LoadUint64x4SlicePart(e)
// d contains what a ought to contain
d := make([]uint64, L)
for j := 0; j < len(e) && j < len(d); j++ {
// Test the load first
// e is a partial slice.
e := a[i:]
- v := simd.LoadFloat64x2SlicePart(e)
+ v := archsimd.LoadFloat64x2SlicePart(e)
// d contains what a ought to contain
d := make([]float64, L)
for j := 0; j < len(e) && j < len(d); j++ {
// Test the load first
// e is a partial slice.
e := a[i:]
- v := simd.LoadFloat32x8SlicePart(e)
+ v := archsimd.LoadFloat32x8SlicePart(e)
// d contains what a ought to contain
d := make([]float32, L)
for j := 0; j < len(e) && j < len(d); j++ {
// 512-bit load
func TestSlicePartInt64(t *testing.T) {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
t.Skip("Test requires X86.AVX512, not available on this hardware")
return
}
// Test the load first
// e is a partial slice.
e := a[i:]
- v := simd.LoadInt64x8SlicePart(e)
+ v := archsimd.LoadInt64x8SlicePart(e)
// d contains what a ought to contain
d := make([]int64, L)
for j := 0; j < len(e) && j < len(d); j++ {
package simd_test
import (
- "simd"
+ "simd/archsimd"
"testing"
)
// testInt8x16Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt8x16Ternary(t *testing.T, f func(_, _, _ simd.Int8x16) simd.Int8x16, want func(_, _, _ []int8) []int8) {
+func testInt8x16Ternary(t *testing.T, f func(_, _, _ archsimd.Int8x16) archsimd.Int8x16, want func(_, _, _ []int8) []int8) {
n := 16
t.Helper()
forSliceTriple(t, int8s, n, func(x, y, z []int8) bool {
t.Helper()
- a := simd.LoadInt8x16Slice(x)
- b := simd.LoadInt8x16Slice(y)
- c := simd.LoadInt8x16Slice(z)
+ a := archsimd.LoadInt8x16Slice(x)
+ b := archsimd.LoadInt8x16Slice(y)
+ c := archsimd.LoadInt8x16Slice(z)
g := make([]int8, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt16x8Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt16x8Ternary(t *testing.T, f func(_, _, _ simd.Int16x8) simd.Int16x8, want func(_, _, _ []int16) []int16) {
+func testInt16x8Ternary(t *testing.T, f func(_, _, _ archsimd.Int16x8) archsimd.Int16x8, want func(_, _, _ []int16) []int16) {
n := 8
t.Helper()
forSliceTriple(t, int16s, n, func(x, y, z []int16) bool {
t.Helper()
- a := simd.LoadInt16x8Slice(x)
- b := simd.LoadInt16x8Slice(y)
- c := simd.LoadInt16x8Slice(z)
+ a := archsimd.LoadInt16x8Slice(x)
+ b := archsimd.LoadInt16x8Slice(y)
+ c := archsimd.LoadInt16x8Slice(z)
g := make([]int16, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt32x4Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt32x4Ternary(t *testing.T, f func(_, _, _ simd.Int32x4) simd.Int32x4, want func(_, _, _ []int32) []int32) {
+func testInt32x4Ternary(t *testing.T, f func(_, _, _ archsimd.Int32x4) archsimd.Int32x4, want func(_, _, _ []int32) []int32) {
n := 4
t.Helper()
forSliceTriple(t, int32s, n, func(x, y, z []int32) bool {
t.Helper()
- a := simd.LoadInt32x4Slice(x)
- b := simd.LoadInt32x4Slice(y)
- c := simd.LoadInt32x4Slice(z)
+ a := archsimd.LoadInt32x4Slice(x)
+ b := archsimd.LoadInt32x4Slice(y)
+ c := archsimd.LoadInt32x4Slice(z)
g := make([]int32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt64x2Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt64x2Ternary(t *testing.T, f func(_, _, _ simd.Int64x2) simd.Int64x2, want func(_, _, _ []int64) []int64) {
+func testInt64x2Ternary(t *testing.T, f func(_, _, _ archsimd.Int64x2) archsimd.Int64x2, want func(_, _, _ []int64) []int64) {
n := 2
t.Helper()
forSliceTriple(t, int64s, n, func(x, y, z []int64) bool {
t.Helper()
- a := simd.LoadInt64x2Slice(x)
- b := simd.LoadInt64x2Slice(y)
- c := simd.LoadInt64x2Slice(z)
+ a := archsimd.LoadInt64x2Slice(x)
+ b := archsimd.LoadInt64x2Slice(y)
+ c := archsimd.LoadInt64x2Slice(z)
g := make([]int64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint8x16Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint8x16Ternary(t *testing.T, f func(_, _, _ simd.Uint8x16) simd.Uint8x16, want func(_, _, _ []uint8) []uint8) {
+func testUint8x16Ternary(t *testing.T, f func(_, _, _ archsimd.Uint8x16) archsimd.Uint8x16, want func(_, _, _ []uint8) []uint8) {
n := 16
t.Helper()
forSliceTriple(t, uint8s, n, func(x, y, z []uint8) bool {
t.Helper()
- a := simd.LoadUint8x16Slice(x)
- b := simd.LoadUint8x16Slice(y)
- c := simd.LoadUint8x16Slice(z)
+ a := archsimd.LoadUint8x16Slice(x)
+ b := archsimd.LoadUint8x16Slice(y)
+ c := archsimd.LoadUint8x16Slice(z)
g := make([]uint8, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint16x8Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint16x8Ternary(t *testing.T, f func(_, _, _ simd.Uint16x8) simd.Uint16x8, want func(_, _, _ []uint16) []uint16) {
+func testUint16x8Ternary(t *testing.T, f func(_, _, _ archsimd.Uint16x8) archsimd.Uint16x8, want func(_, _, _ []uint16) []uint16) {
n := 8
t.Helper()
forSliceTriple(t, uint16s, n, func(x, y, z []uint16) bool {
t.Helper()
- a := simd.LoadUint16x8Slice(x)
- b := simd.LoadUint16x8Slice(y)
- c := simd.LoadUint16x8Slice(z)
+ a := archsimd.LoadUint16x8Slice(x)
+ b := archsimd.LoadUint16x8Slice(y)
+ c := archsimd.LoadUint16x8Slice(z)
g := make([]uint16, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint32x4Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint32x4Ternary(t *testing.T, f func(_, _, _ simd.Uint32x4) simd.Uint32x4, want func(_, _, _ []uint32) []uint32) {
+func testUint32x4Ternary(t *testing.T, f func(_, _, _ archsimd.Uint32x4) archsimd.Uint32x4, want func(_, _, _ []uint32) []uint32) {
n := 4
t.Helper()
forSliceTriple(t, uint32s, n, func(x, y, z []uint32) bool {
t.Helper()
- a := simd.LoadUint32x4Slice(x)
- b := simd.LoadUint32x4Slice(y)
- c := simd.LoadUint32x4Slice(z)
+ a := archsimd.LoadUint32x4Slice(x)
+ b := archsimd.LoadUint32x4Slice(y)
+ c := archsimd.LoadUint32x4Slice(z)
g := make([]uint32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint64x2Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint64x2Ternary(t *testing.T, f func(_, _, _ simd.Uint64x2) simd.Uint64x2, want func(_, _, _ []uint64) []uint64) {
+func testUint64x2Ternary(t *testing.T, f func(_, _, _ archsimd.Uint64x2) archsimd.Uint64x2, want func(_, _, _ []uint64) []uint64) {
n := 2
t.Helper()
forSliceTriple(t, uint64s, n, func(x, y, z []uint64) bool {
t.Helper()
- a := simd.LoadUint64x2Slice(x)
- b := simd.LoadUint64x2Slice(y)
- c := simd.LoadUint64x2Slice(z)
+ a := archsimd.LoadUint64x2Slice(x)
+ b := archsimd.LoadUint64x2Slice(y)
+ c := archsimd.LoadUint64x2Slice(z)
g := make([]uint64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testFloat32x4Ternary tests the simd ternary method f against the expected behavior generated by want
-func testFloat32x4Ternary(t *testing.T, f func(_, _, _ simd.Float32x4) simd.Float32x4, want func(_, _, _ []float32) []float32) {
+func testFloat32x4Ternary(t *testing.T, f func(_, _, _ archsimd.Float32x4) archsimd.Float32x4, want func(_, _, _ []float32) []float32) {
n := 4
t.Helper()
forSliceTriple(t, float32s, n, func(x, y, z []float32) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
- b := simd.LoadFloat32x4Slice(y)
- c := simd.LoadFloat32x4Slice(z)
+ a := archsimd.LoadFloat32x4Slice(x)
+ b := archsimd.LoadFloat32x4Slice(y)
+ c := archsimd.LoadFloat32x4Slice(z)
g := make([]float32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testFloat64x2Ternary tests the simd ternary method f against the expected behavior generated by want
-func testFloat64x2Ternary(t *testing.T, f func(_, _, _ simd.Float64x2) simd.Float64x2, want func(_, _, _ []float64) []float64) {
+func testFloat64x2Ternary(t *testing.T, f func(_, _, _ archsimd.Float64x2) archsimd.Float64x2, want func(_, _, _ []float64) []float64) {
n := 2
t.Helper()
forSliceTriple(t, float64s, n, func(x, y, z []float64) bool {
t.Helper()
- a := simd.LoadFloat64x2Slice(x)
- b := simd.LoadFloat64x2Slice(y)
- c := simd.LoadFloat64x2Slice(z)
+ a := archsimd.LoadFloat64x2Slice(x)
+ b := archsimd.LoadFloat64x2Slice(y)
+ c := archsimd.LoadFloat64x2Slice(z)
g := make([]float64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt8x32Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt8x32Ternary(t *testing.T, f func(_, _, _ simd.Int8x32) simd.Int8x32, want func(_, _, _ []int8) []int8) {
+func testInt8x32Ternary(t *testing.T, f func(_, _, _ archsimd.Int8x32) archsimd.Int8x32, want func(_, _, _ []int8) []int8) {
n := 32
t.Helper()
forSliceTriple(t, int8s, n, func(x, y, z []int8) bool {
t.Helper()
- a := simd.LoadInt8x32Slice(x)
- b := simd.LoadInt8x32Slice(y)
- c := simd.LoadInt8x32Slice(z)
+ a := archsimd.LoadInt8x32Slice(x)
+ b := archsimd.LoadInt8x32Slice(y)
+ c := archsimd.LoadInt8x32Slice(z)
g := make([]int8, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt16x16Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt16x16Ternary(t *testing.T, f func(_, _, _ simd.Int16x16) simd.Int16x16, want func(_, _, _ []int16) []int16) {
+func testInt16x16Ternary(t *testing.T, f func(_, _, _ archsimd.Int16x16) archsimd.Int16x16, want func(_, _, _ []int16) []int16) {
n := 16
t.Helper()
forSliceTriple(t, int16s, n, func(x, y, z []int16) bool {
t.Helper()
- a := simd.LoadInt16x16Slice(x)
- b := simd.LoadInt16x16Slice(y)
- c := simd.LoadInt16x16Slice(z)
+ a := archsimd.LoadInt16x16Slice(x)
+ b := archsimd.LoadInt16x16Slice(y)
+ c := archsimd.LoadInt16x16Slice(z)
g := make([]int16, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt32x8Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt32x8Ternary(t *testing.T, f func(_, _, _ simd.Int32x8) simd.Int32x8, want func(_, _, _ []int32) []int32) {
+func testInt32x8Ternary(t *testing.T, f func(_, _, _ archsimd.Int32x8) archsimd.Int32x8, want func(_, _, _ []int32) []int32) {
n := 8
t.Helper()
forSliceTriple(t, int32s, n, func(x, y, z []int32) bool {
t.Helper()
- a := simd.LoadInt32x8Slice(x)
- b := simd.LoadInt32x8Slice(y)
- c := simd.LoadInt32x8Slice(z)
+ a := archsimd.LoadInt32x8Slice(x)
+ b := archsimd.LoadInt32x8Slice(y)
+ c := archsimd.LoadInt32x8Slice(z)
g := make([]int32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt64x4Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt64x4Ternary(t *testing.T, f func(_, _, _ simd.Int64x4) simd.Int64x4, want func(_, _, _ []int64) []int64) {
+func testInt64x4Ternary(t *testing.T, f func(_, _, _ archsimd.Int64x4) archsimd.Int64x4, want func(_, _, _ []int64) []int64) {
n := 4
t.Helper()
forSliceTriple(t, int64s, n, func(x, y, z []int64) bool {
t.Helper()
- a := simd.LoadInt64x4Slice(x)
- b := simd.LoadInt64x4Slice(y)
- c := simd.LoadInt64x4Slice(z)
+ a := archsimd.LoadInt64x4Slice(x)
+ b := archsimd.LoadInt64x4Slice(y)
+ c := archsimd.LoadInt64x4Slice(z)
g := make([]int64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint8x32Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint8x32Ternary(t *testing.T, f func(_, _, _ simd.Uint8x32) simd.Uint8x32, want func(_, _, _ []uint8) []uint8) {
+func testUint8x32Ternary(t *testing.T, f func(_, _, _ archsimd.Uint8x32) archsimd.Uint8x32, want func(_, _, _ []uint8) []uint8) {
n := 32
t.Helper()
forSliceTriple(t, uint8s, n, func(x, y, z []uint8) bool {
t.Helper()
- a := simd.LoadUint8x32Slice(x)
- b := simd.LoadUint8x32Slice(y)
- c := simd.LoadUint8x32Slice(z)
+ a := archsimd.LoadUint8x32Slice(x)
+ b := archsimd.LoadUint8x32Slice(y)
+ c := archsimd.LoadUint8x32Slice(z)
g := make([]uint8, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint16x16Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint16x16Ternary(t *testing.T, f func(_, _, _ simd.Uint16x16) simd.Uint16x16, want func(_, _, _ []uint16) []uint16) {
+func testUint16x16Ternary(t *testing.T, f func(_, _, _ archsimd.Uint16x16) archsimd.Uint16x16, want func(_, _, _ []uint16) []uint16) {
n := 16
t.Helper()
forSliceTriple(t, uint16s, n, func(x, y, z []uint16) bool {
t.Helper()
- a := simd.LoadUint16x16Slice(x)
- b := simd.LoadUint16x16Slice(y)
- c := simd.LoadUint16x16Slice(z)
+ a := archsimd.LoadUint16x16Slice(x)
+ b := archsimd.LoadUint16x16Slice(y)
+ c := archsimd.LoadUint16x16Slice(z)
g := make([]uint16, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint32x8Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint32x8Ternary(t *testing.T, f func(_, _, _ simd.Uint32x8) simd.Uint32x8, want func(_, _, _ []uint32) []uint32) {
+func testUint32x8Ternary(t *testing.T, f func(_, _, _ archsimd.Uint32x8) archsimd.Uint32x8, want func(_, _, _ []uint32) []uint32) {
n := 8
t.Helper()
forSliceTriple(t, uint32s, n, func(x, y, z []uint32) bool {
t.Helper()
- a := simd.LoadUint32x8Slice(x)
- b := simd.LoadUint32x8Slice(y)
- c := simd.LoadUint32x8Slice(z)
+ a := archsimd.LoadUint32x8Slice(x)
+ b := archsimd.LoadUint32x8Slice(y)
+ c := archsimd.LoadUint32x8Slice(z)
g := make([]uint32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint64x4Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint64x4Ternary(t *testing.T, f func(_, _, _ simd.Uint64x4) simd.Uint64x4, want func(_, _, _ []uint64) []uint64) {
+func testUint64x4Ternary(t *testing.T, f func(_, _, _ archsimd.Uint64x4) archsimd.Uint64x4, want func(_, _, _ []uint64) []uint64) {
n := 4
t.Helper()
forSliceTriple(t, uint64s, n, func(x, y, z []uint64) bool {
t.Helper()
- a := simd.LoadUint64x4Slice(x)
- b := simd.LoadUint64x4Slice(y)
- c := simd.LoadUint64x4Slice(z)
+ a := archsimd.LoadUint64x4Slice(x)
+ b := archsimd.LoadUint64x4Slice(y)
+ c := archsimd.LoadUint64x4Slice(z)
g := make([]uint64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testFloat32x8Ternary tests the simd ternary method f against the expected behavior generated by want
-func testFloat32x8Ternary(t *testing.T, f func(_, _, _ simd.Float32x8) simd.Float32x8, want func(_, _, _ []float32) []float32) {
+func testFloat32x8Ternary(t *testing.T, f func(_, _, _ archsimd.Float32x8) archsimd.Float32x8, want func(_, _, _ []float32) []float32) {
n := 8
t.Helper()
forSliceTriple(t, float32s, n, func(x, y, z []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
- b := simd.LoadFloat32x8Slice(y)
- c := simd.LoadFloat32x8Slice(z)
+ a := archsimd.LoadFloat32x8Slice(x)
+ b := archsimd.LoadFloat32x8Slice(y)
+ c := archsimd.LoadFloat32x8Slice(z)
g := make([]float32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testFloat64x4Ternary tests the simd ternary method f against the expected behavior generated by want
-func testFloat64x4Ternary(t *testing.T, f func(_, _, _ simd.Float64x4) simd.Float64x4, want func(_, _, _ []float64) []float64) {
+func testFloat64x4Ternary(t *testing.T, f func(_, _, _ archsimd.Float64x4) archsimd.Float64x4, want func(_, _, _ []float64) []float64) {
n := 4
t.Helper()
forSliceTriple(t, float64s, n, func(x, y, z []float64) bool {
t.Helper()
- a := simd.LoadFloat64x4Slice(x)
- b := simd.LoadFloat64x4Slice(y)
- c := simd.LoadFloat64x4Slice(z)
+ a := archsimd.LoadFloat64x4Slice(x)
+ b := archsimd.LoadFloat64x4Slice(y)
+ c := archsimd.LoadFloat64x4Slice(z)
g := make([]float64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt8x64Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt8x64Ternary(t *testing.T, f func(_, _, _ simd.Int8x64) simd.Int8x64, want func(_, _, _ []int8) []int8) {
+func testInt8x64Ternary(t *testing.T, f func(_, _, _ archsimd.Int8x64) archsimd.Int8x64, want func(_, _, _ []int8) []int8) {
n := 64
t.Helper()
forSliceTriple(t, int8s, n, func(x, y, z []int8) bool {
t.Helper()
- a := simd.LoadInt8x64Slice(x)
- b := simd.LoadInt8x64Slice(y)
- c := simd.LoadInt8x64Slice(z)
+ a := archsimd.LoadInt8x64Slice(x)
+ b := archsimd.LoadInt8x64Slice(y)
+ c := archsimd.LoadInt8x64Slice(z)
g := make([]int8, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt16x32Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt16x32Ternary(t *testing.T, f func(_, _, _ simd.Int16x32) simd.Int16x32, want func(_, _, _ []int16) []int16) {
+func testInt16x32Ternary(t *testing.T, f func(_, _, _ archsimd.Int16x32) archsimd.Int16x32, want func(_, _, _ []int16) []int16) {
n := 32
t.Helper()
forSliceTriple(t, int16s, n, func(x, y, z []int16) bool {
t.Helper()
- a := simd.LoadInt16x32Slice(x)
- b := simd.LoadInt16x32Slice(y)
- c := simd.LoadInt16x32Slice(z)
+ a := archsimd.LoadInt16x32Slice(x)
+ b := archsimd.LoadInt16x32Slice(y)
+ c := archsimd.LoadInt16x32Slice(z)
g := make([]int16, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt32x16Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt32x16Ternary(t *testing.T, f func(_, _, _ simd.Int32x16) simd.Int32x16, want func(_, _, _ []int32) []int32) {
+func testInt32x16Ternary(t *testing.T, f func(_, _, _ archsimd.Int32x16) archsimd.Int32x16, want func(_, _, _ []int32) []int32) {
n := 16
t.Helper()
forSliceTriple(t, int32s, n, func(x, y, z []int32) bool {
t.Helper()
- a := simd.LoadInt32x16Slice(x)
- b := simd.LoadInt32x16Slice(y)
- c := simd.LoadInt32x16Slice(z)
+ a := archsimd.LoadInt32x16Slice(x)
+ b := archsimd.LoadInt32x16Slice(y)
+ c := archsimd.LoadInt32x16Slice(z)
g := make([]int32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testInt64x8Ternary tests the simd ternary method f against the expected behavior generated by want
-func testInt64x8Ternary(t *testing.T, f func(_, _, _ simd.Int64x8) simd.Int64x8, want func(_, _, _ []int64) []int64) {
+func testInt64x8Ternary(t *testing.T, f func(_, _, _ archsimd.Int64x8) archsimd.Int64x8, want func(_, _, _ []int64) []int64) {
n := 8
t.Helper()
forSliceTriple(t, int64s, n, func(x, y, z []int64) bool {
t.Helper()
- a := simd.LoadInt64x8Slice(x)
- b := simd.LoadInt64x8Slice(y)
- c := simd.LoadInt64x8Slice(z)
+ a := archsimd.LoadInt64x8Slice(x)
+ b := archsimd.LoadInt64x8Slice(y)
+ c := archsimd.LoadInt64x8Slice(z)
g := make([]int64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint8x64Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint8x64Ternary(t *testing.T, f func(_, _, _ simd.Uint8x64) simd.Uint8x64, want func(_, _, _ []uint8) []uint8) {
+func testUint8x64Ternary(t *testing.T, f func(_, _, _ archsimd.Uint8x64) archsimd.Uint8x64, want func(_, _, _ []uint8) []uint8) {
n := 64
t.Helper()
forSliceTriple(t, uint8s, n, func(x, y, z []uint8) bool {
t.Helper()
- a := simd.LoadUint8x64Slice(x)
- b := simd.LoadUint8x64Slice(y)
- c := simd.LoadUint8x64Slice(z)
+ a := archsimd.LoadUint8x64Slice(x)
+ b := archsimd.LoadUint8x64Slice(y)
+ c := archsimd.LoadUint8x64Slice(z)
g := make([]uint8, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint16x32Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint16x32Ternary(t *testing.T, f func(_, _, _ simd.Uint16x32) simd.Uint16x32, want func(_, _, _ []uint16) []uint16) {
+func testUint16x32Ternary(t *testing.T, f func(_, _, _ archsimd.Uint16x32) archsimd.Uint16x32, want func(_, _, _ []uint16) []uint16) {
n := 32
t.Helper()
forSliceTriple(t, uint16s, n, func(x, y, z []uint16) bool {
t.Helper()
- a := simd.LoadUint16x32Slice(x)
- b := simd.LoadUint16x32Slice(y)
- c := simd.LoadUint16x32Slice(z)
+ a := archsimd.LoadUint16x32Slice(x)
+ b := archsimd.LoadUint16x32Slice(y)
+ c := archsimd.LoadUint16x32Slice(z)
g := make([]uint16, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint32x16Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint32x16Ternary(t *testing.T, f func(_, _, _ simd.Uint32x16) simd.Uint32x16, want func(_, _, _ []uint32) []uint32) {
+func testUint32x16Ternary(t *testing.T, f func(_, _, _ archsimd.Uint32x16) archsimd.Uint32x16, want func(_, _, _ []uint32) []uint32) {
n := 16
t.Helper()
forSliceTriple(t, uint32s, n, func(x, y, z []uint32) bool {
t.Helper()
- a := simd.LoadUint32x16Slice(x)
- b := simd.LoadUint32x16Slice(y)
- c := simd.LoadUint32x16Slice(z)
+ a := archsimd.LoadUint32x16Slice(x)
+ b := archsimd.LoadUint32x16Slice(y)
+ c := archsimd.LoadUint32x16Slice(z)
g := make([]uint32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testUint64x8Ternary tests the simd ternary method f against the expected behavior generated by want
-func testUint64x8Ternary(t *testing.T, f func(_, _, _ simd.Uint64x8) simd.Uint64x8, want func(_, _, _ []uint64) []uint64) {
+func testUint64x8Ternary(t *testing.T, f func(_, _, _ archsimd.Uint64x8) archsimd.Uint64x8, want func(_, _, _ []uint64) []uint64) {
n := 8
t.Helper()
forSliceTriple(t, uint64s, n, func(x, y, z []uint64) bool {
t.Helper()
- a := simd.LoadUint64x8Slice(x)
- b := simd.LoadUint64x8Slice(y)
- c := simd.LoadUint64x8Slice(z)
+ a := archsimd.LoadUint64x8Slice(x)
+ b := archsimd.LoadUint64x8Slice(y)
+ c := archsimd.LoadUint64x8Slice(z)
g := make([]uint64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testFloat32x16Ternary tests the simd ternary method f against the expected behavior generated by want
-func testFloat32x16Ternary(t *testing.T, f func(_, _, _ simd.Float32x16) simd.Float32x16, want func(_, _, _ []float32) []float32) {
+func testFloat32x16Ternary(t *testing.T, f func(_, _, _ archsimd.Float32x16) archsimd.Float32x16, want func(_, _, _ []float32) []float32) {
n := 16
t.Helper()
forSliceTriple(t, float32s, n, func(x, y, z []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
- b := simd.LoadFloat32x16Slice(y)
- c := simd.LoadFloat32x16Slice(z)
+ a := archsimd.LoadFloat32x16Slice(x)
+ b := archsimd.LoadFloat32x16Slice(y)
+ c := archsimd.LoadFloat32x16Slice(z)
g := make([]float32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
}
// testFloat64x8Ternary tests the simd ternary method f against the expected behavior generated by want
-func testFloat64x8Ternary(t *testing.T, f func(_, _, _ simd.Float64x8) simd.Float64x8, want func(_, _, _ []float64) []float64) {
+func testFloat64x8Ternary(t *testing.T, f func(_, _, _ archsimd.Float64x8) archsimd.Float64x8, want func(_, _, _ []float64) []float64) {
n := 8
t.Helper()
forSliceTriple(t, float64s, n, func(x, y, z []float64) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
- b := simd.LoadFloat64x8Slice(y)
- c := simd.LoadFloat64x8Slice(z)
+ a := archsimd.LoadFloat64x8Slice(x)
+ b := archsimd.LoadFloat64x8Slice(y)
+ c := archsimd.LoadFloat64x8Slice(z)
g := make([]float64, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
// testFloat32x4TernaryFlaky tests the simd ternary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat32x4TernaryFlaky(t *testing.T, f func(x, y, z simd.Float32x4) simd.Float32x4, want func(x, y, z []float32) []float32, flakiness float64) {
+func testFloat32x4TernaryFlaky(t *testing.T, f func(x, y, z archsimd.Float32x4) archsimd.Float32x4, want func(x, y, z []float32) []float32, flakiness float64) {
n := 4
t.Helper()
forSliceTriple(t, float32s, n, func(x, y, z []float32) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
- b := simd.LoadFloat32x4Slice(y)
- c := simd.LoadFloat32x4Slice(z)
+ a := archsimd.LoadFloat32x4Slice(x)
+ b := archsimd.LoadFloat32x4Slice(y)
+ c := archsimd.LoadFloat32x4Slice(z)
g := make([]float32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
// testFloat32x8TernaryFlaky tests the simd ternary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat32x8TernaryFlaky(t *testing.T, f func(x, y, z simd.Float32x8) simd.Float32x8, want func(x, y, z []float32) []float32, flakiness float64) {
+func testFloat32x8TernaryFlaky(t *testing.T, f func(x, y, z archsimd.Float32x8) archsimd.Float32x8, want func(x, y, z []float32) []float32, flakiness float64) {
n := 8
t.Helper()
forSliceTriple(t, float32s, n, func(x, y, z []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
- b := simd.LoadFloat32x8Slice(y)
- c := simd.LoadFloat32x8Slice(z)
+ a := archsimd.LoadFloat32x8Slice(x)
+ b := archsimd.LoadFloat32x8Slice(y)
+ c := archsimd.LoadFloat32x8Slice(z)
g := make([]float32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
// testFloat32x16TernaryFlaky tests the simd ternary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat32x16TernaryFlaky(t *testing.T, f func(x, y, z simd.Float32x16) simd.Float32x16, want func(x, y, z []float32) []float32, flakiness float64) {
+func testFloat32x16TernaryFlaky(t *testing.T, f func(x, y, z archsimd.Float32x16) archsimd.Float32x16, want func(x, y, z []float32) []float32, flakiness float64) {
n := 16
t.Helper()
forSliceTriple(t, float32s, n, func(x, y, z []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
- b := simd.LoadFloat32x16Slice(y)
- c := simd.LoadFloat32x16Slice(z)
+ a := archsimd.LoadFloat32x16Slice(x)
+ b := archsimd.LoadFloat32x16Slice(y)
+ c := archsimd.LoadFloat32x16Slice(z)
g := make([]float32, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.simd && amd64
+
+package simd_test
+
+import (
+ "simd/archsimd"
+ "testing"
+)
+
+func TestFMA(t *testing.T) {
+ if archsimd.X86.AVX512() {
+ testFloat32x4TernaryFlaky(t, archsimd.Float32x4.MulAdd, fmaSlice[float32], 0.001)
+ testFloat32x8TernaryFlaky(t, archsimd.Float32x8.MulAdd, fmaSlice[float32], 0.001)
+ testFloat32x16TernaryFlaky(t, archsimd.Float32x16.MulAdd, fmaSlice[float32], 0.001)
+ testFloat64x2Ternary(t, archsimd.Float64x2.MulAdd, fmaSlice[float64])
+ testFloat64x4Ternary(t, archsimd.Float64x4.MulAdd, fmaSlice[float64])
+ testFloat64x8Ternary(t, archsimd.Float64x8.MulAdd, fmaSlice[float64])
+ }
+}
import (
"fmt"
- "simd"
+ "simd/archsimd"
"testing"
)
-func Transpose4(a0, a1, a2, a3 simd.Int32x4) (b0, b1, b2, b3 simd.Int32x4) {
+func Transpose4(a0, a1, a2, a3 archsimd.Int32x4) (b0, b1, b2, b3 archsimd.Int32x4) {
t0, t1 := a0.InterleaveLo(a1), a0.InterleaveHi(a1)
t2, t3 := a2.InterleaveLo(a3), a2.InterleaveHi(a3)
return
}
-func Transpose8(a0, a1, a2, a3, a4, a5, a6, a7 simd.Int32x8) (b0, b1, b2, b3, b4, b5, b6, b7 simd.Int32x8) {
+func Transpose8(a0, a1, a2, a3, a4, a5, a6, a7 archsimd.Int32x8) (b0, b1, b2, b3, b4, b5, b6, b7 archsimd.Int32x8) {
t0, t1 := a0.InterleaveLoGrouped(a1), a0.InterleaveHiGrouped(a1)
t2, t3 := a2.InterleaveLoGrouped(a3), a2.InterleaveHiGrouped(a3)
t4, t5 := a4.InterleaveLoGrouped(a5), a4.InterleaveHiGrouped(a5)
func TestTranspose4(t *testing.T) {
r := make([]int32, 16, 16)
- w := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
- x := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
- y := simd.LoadInt32x4Slice([]int32{0xE, 0xF, 0x10, 0x11})
- z := simd.LoadInt32x4Slice([]int32{5, 6, 7, 8})
+ w := archsimd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
+ x := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
+ y := archsimd.LoadInt32x4Slice([]int32{0xE, 0xF, 0x10, 0x11})
+ z := archsimd.LoadInt32x4Slice([]int32{5, 6, 7, 8})
a, b, c, d := Transpose4(w, x, y, z)
a.StoreSlice(r[0:])
a = append(a, i)
}
- p := simd.LoadInt32x8Slice(a[0:])
- q := simd.LoadInt32x8Slice(a[8:])
- r := simd.LoadInt32x8Slice(a[16:])
- s := simd.LoadInt32x8Slice(a[24:])
+ p := archsimd.LoadInt32x8Slice(a[0:])
+ q := archsimd.LoadInt32x8Slice(a[8:])
+ r := archsimd.LoadInt32x8Slice(a[16:])
+ s := archsimd.LoadInt32x8Slice(a[24:])
- w := simd.LoadInt32x8Slice(a[32:])
- x := simd.LoadInt32x8Slice(a[40:])
- y := simd.LoadInt32x8Slice(a[48:])
- z := simd.LoadInt32x8Slice(a[56:])
+ w := archsimd.LoadInt32x8Slice(a[32:])
+ x := archsimd.LoadInt32x8Slice(a[40:])
+ y := archsimd.LoadInt32x8Slice(a[48:])
+ z := archsimd.LoadInt32x8Slice(a[56:])
p, q, r, s, w, x, y, z = Transpose8(p, q, r, s, w, x, y, z)
- foo := func(a simd.Int32x8, z int32) {
+ foo := func(a archsimd.Int32x8, z int32) {
a.StoreSlice(m)
var o []int32
for i := int32(0); i < 8; i++ {
}
// transpose diagonal
d0, d1, d2, d3 :=
- simd.LoadInt32x4Slice(r0[i:]),
- simd.LoadInt32x4Slice(r1[i:]),
- simd.LoadInt32x4Slice(r2[i:]),
- simd.LoadInt32x4Slice(r3[i:])
+ archsimd.LoadInt32x4Slice(r0[i:]),
+ archsimd.LoadInt32x4Slice(r1[i:]),
+ archsimd.LoadInt32x4Slice(r2[i:]),
+ archsimd.LoadInt32x4Slice(r3[i:])
d0, d1, d2, d3 = Transpose4(d0, d1, d2, d3)
for ; j < i; j += B {
a0, a1, a2, a3 := m[j], m[j+1], m[j+2], m[j+3]
u0, u1, u2, u3 :=
- simd.LoadInt32x4Slice(a0[i:]),
- simd.LoadInt32x4Slice(a1[i:]),
- simd.LoadInt32x4Slice(a2[i:]),
- simd.LoadInt32x4Slice(a3[i:])
+ archsimd.LoadInt32x4Slice(a0[i:]),
+ archsimd.LoadInt32x4Slice(a1[i:]),
+ archsimd.LoadInt32x4Slice(a2[i:]),
+ archsimd.LoadInt32x4Slice(a3[i:])
u0, u1, u2, u3 = Transpose4(u0, u1, u2, u3)
- l0 := simd.LoadInt32x4Slice(r0[j:])
+ l0 := archsimd.LoadInt32x4Slice(r0[j:])
u0.StoreSlice(r0[j:])
- l1 := simd.LoadInt32x4Slice(r1[j:])
+ l1 := archsimd.LoadInt32x4Slice(r1[j:])
u1.StoreSlice(r1[j:])
- l2 := simd.LoadInt32x4Slice(r2[j:])
+ l2 := archsimd.LoadInt32x4Slice(r2[j:])
u2.StoreSlice(r2[j:])
- l3 := simd.LoadInt32x4Slice(r3[j:])
+ l3 := archsimd.LoadInt32x4Slice(r3[j:])
u3.StoreSlice(r3[j:])
u0, u1, u2, u3 = Transpose4(l0, l1, l2, l3)
}
// transpose diagonal
d0, d1, d2, d3, d4, d5, d6, d7 :=
- simd.LoadInt32x8Slice(r0[i:]),
- simd.LoadInt32x8Slice(r1[i:]),
- simd.LoadInt32x8Slice(r2[i:]),
- simd.LoadInt32x8Slice(r3[i:]),
- simd.LoadInt32x8Slice(r4[i:]),
- simd.LoadInt32x8Slice(r5[i:]),
- simd.LoadInt32x8Slice(r6[i:]),
- simd.LoadInt32x8Slice(r7[i:])
+ archsimd.LoadInt32x8Slice(r0[i:]),
+ archsimd.LoadInt32x8Slice(r1[i:]),
+ archsimd.LoadInt32x8Slice(r2[i:]),
+ archsimd.LoadInt32x8Slice(r3[i:]),
+ archsimd.LoadInt32x8Slice(r4[i:]),
+ archsimd.LoadInt32x8Slice(r5[i:]),
+ archsimd.LoadInt32x8Slice(r6[i:]),
+ archsimd.LoadInt32x8Slice(r7[i:])
d0, d1, d2, d3, d4, d5, d6, d7 = Transpose8(d0, d1, d2, d3, d4, d5, d6, d7)
for ; j < i; j += B {
a7, a0, a1, a2, a3, a4, a5, a6 := m[j+7], m[j], m[j+1], m[j+2], m[j+3], m[j+4], m[j+5], m[j+6]
u0, u1, u2, u3, u4, u5, u6, u7 :=
- simd.LoadInt32x8Slice(a0[i:]),
- simd.LoadInt32x8Slice(a1[i:]),
- simd.LoadInt32x8Slice(a2[i:]),
- simd.LoadInt32x8Slice(a3[i:]),
- simd.LoadInt32x8Slice(a4[i:]),
- simd.LoadInt32x8Slice(a5[i:]),
- simd.LoadInt32x8Slice(a6[i:]),
- simd.LoadInt32x8Slice(a7[i:])
+ archsimd.LoadInt32x8Slice(a0[i:]),
+ archsimd.LoadInt32x8Slice(a1[i:]),
+ archsimd.LoadInt32x8Slice(a2[i:]),
+ archsimd.LoadInt32x8Slice(a3[i:]),
+ archsimd.LoadInt32x8Slice(a4[i:]),
+ archsimd.LoadInt32x8Slice(a5[i:]),
+ archsimd.LoadInt32x8Slice(a6[i:]),
+ archsimd.LoadInt32x8Slice(a7[i:])
u0, u1, u2, u3, u4, u5, u6, u7 = Transpose8(u0, u1, u2, u3, u4, u5, u6, u7)
- l0 := simd.LoadInt32x8Slice(r0[j:])
+ l0 := archsimd.LoadInt32x8Slice(r0[j:])
u0.StoreSlice(r0[j:])
- l1 := simd.LoadInt32x8Slice(r1[j:])
+ l1 := archsimd.LoadInt32x8Slice(r1[j:])
u1.StoreSlice(r1[j:])
- l2 := simd.LoadInt32x8Slice(r2[j:])
+ l2 := archsimd.LoadInt32x8Slice(r2[j:])
u2.StoreSlice(r2[j:])
- l3 := simd.LoadInt32x8Slice(r3[j:])
+ l3 := archsimd.LoadInt32x8Slice(r3[j:])
u3.StoreSlice(r3[j:])
- l4 := simd.LoadInt32x8Slice(r4[j:])
+ l4 := archsimd.LoadInt32x8Slice(r4[j:])
u4.StoreSlice(r4[j:])
- l5 := simd.LoadInt32x8Slice(r5[j:])
+ l5 := archsimd.LoadInt32x8Slice(r5[j:])
u5.StoreSlice(r5[j:])
- l6 := simd.LoadInt32x8Slice(r6[j:])
+ l6 := archsimd.LoadInt32x8Slice(r6[j:])
u6.StoreSlice(r6[j:])
- l7 := simd.LoadInt32x8Slice(r7[j:])
+ l7 := archsimd.LoadInt32x8Slice(r7[j:])
u7.StoreSlice(r7[j:])
u0, u1, u2, u3, u4, u5, u6, u7 = Transpose8(l0, l1, l2, l3, l4, l5, l6, l7)
package simd_test
import (
- "simd"
+ "simd/archsimd"
"testing"
)
// testInt8x16Unary tests the simd unary method f against the expected behavior generated by want
-func testInt8x16Unary(t *testing.T, f func(_ simd.Int8x16) simd.Int8x16, want func(_ []int8) []int8) {
+func testInt8x16Unary(t *testing.T, f func(_ archsimd.Int8x16) archsimd.Int8x16, want func(_ []int8) []int8) {
n := 16
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
- a := simd.LoadInt8x16Slice(x)
+ a := archsimd.LoadInt8x16Slice(x)
g := make([]int8, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt16x8Unary tests the simd unary method f against the expected behavior generated by want
-func testInt16x8Unary(t *testing.T, f func(_ simd.Int16x8) simd.Int16x8, want func(_ []int16) []int16) {
+func testInt16x8Unary(t *testing.T, f func(_ archsimd.Int16x8) archsimd.Int16x8, want func(_ []int16) []int16) {
n := 8
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x8Slice(x)
+ a := archsimd.LoadInt16x8Slice(x)
g := make([]int16, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt32x4Unary tests the simd unary method f against the expected behavior generated by want
-func testInt32x4Unary(t *testing.T, f func(_ simd.Int32x4) simd.Int32x4, want func(_ []int32) []int32) {
+func testInt32x4Unary(t *testing.T, f func(_ archsimd.Int32x4) archsimd.Int32x4, want func(_ []int32) []int32) {
n := 4
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x4Slice(x)
+ a := archsimd.LoadInt32x4Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt64x2Unary tests the simd unary method f against the expected behavior generated by want
-func testInt64x2Unary(t *testing.T, f func(_ simd.Int64x2) simd.Int64x2, want func(_ []int64) []int64) {
+func testInt64x2Unary(t *testing.T, f func(_ archsimd.Int64x2) archsimd.Int64x2, want func(_ []int64) []int64) {
n := 2
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
- a := simd.LoadInt64x2Slice(x)
+ a := archsimd.LoadInt64x2Slice(x)
g := make([]int64, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint8x16Unary tests the simd unary method f against the expected behavior generated by want
-func testUint8x16Unary(t *testing.T, f func(_ simd.Uint8x16) simd.Uint8x16, want func(_ []uint8) []uint8) {
+func testUint8x16Unary(t *testing.T, f func(_ archsimd.Uint8x16) archsimd.Uint8x16, want func(_ []uint8) []uint8) {
n := 16
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
- a := simd.LoadUint8x16Slice(x)
+ a := archsimd.LoadUint8x16Slice(x)
g := make([]uint8, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint16x8Unary tests the simd unary method f against the expected behavior generated by want
-func testUint16x8Unary(t *testing.T, f func(_ simd.Uint16x8) simd.Uint16x8, want func(_ []uint16) []uint16) {
+func testUint16x8Unary(t *testing.T, f func(_ archsimd.Uint16x8) archsimd.Uint16x8, want func(_ []uint16) []uint16) {
n := 8
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x8Slice(x)
+ a := archsimd.LoadUint16x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint32x4Unary tests the simd unary method f against the expected behavior generated by want
-func testUint32x4Unary(t *testing.T, f func(_ simd.Uint32x4) simd.Uint32x4, want func(_ []uint32) []uint32) {
+func testUint32x4Unary(t *testing.T, f func(_ archsimd.Uint32x4) archsimd.Uint32x4, want func(_ []uint32) []uint32) {
n := 4
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x4Slice(x)
+ a := archsimd.LoadUint32x4Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint64x2Unary tests the simd unary method f against the expected behavior generated by want
-func testUint64x2Unary(t *testing.T, f func(_ simd.Uint64x2) simd.Uint64x2, want func(_ []uint64) []uint64) {
+func testUint64x2Unary(t *testing.T, f func(_ archsimd.Uint64x2) archsimd.Uint64x2, want func(_ []uint64) []uint64) {
n := 2
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
- a := simd.LoadUint64x2Slice(x)
+ a := archsimd.LoadUint64x2Slice(x)
g := make([]uint64, n)
f(a).StoreSlice(g)
w := want(x)
}
// testFloat32x4Unary tests the simd unary method f against the expected behavior generated by want
-func testFloat32x4Unary(t *testing.T, f func(_ simd.Float32x4) simd.Float32x4, want func(_ []float32) []float32) {
+func testFloat32x4Unary(t *testing.T, f func(_ archsimd.Float32x4) archsimd.Float32x4, want func(_ []float32) []float32) {
n := 4
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
+ a := archsimd.LoadFloat32x4Slice(x)
g := make([]float32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testFloat64x2Unary tests the simd unary method f against the expected behavior generated by want
-func testFloat64x2Unary(t *testing.T, f func(_ simd.Float64x2) simd.Float64x2, want func(_ []float64) []float64) {
+func testFloat64x2Unary(t *testing.T, f func(_ archsimd.Float64x2) archsimd.Float64x2, want func(_ []float64) []float64) {
n := 2
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x2Slice(x)
+ a := archsimd.LoadFloat64x2Slice(x)
g := make([]float64, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt8x32Unary tests the simd unary method f against the expected behavior generated by want
-func testInt8x32Unary(t *testing.T, f func(_ simd.Int8x32) simd.Int8x32, want func(_ []int8) []int8) {
+func testInt8x32Unary(t *testing.T, f func(_ archsimd.Int8x32) archsimd.Int8x32, want func(_ []int8) []int8) {
n := 32
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
- a := simd.LoadInt8x32Slice(x)
+ a := archsimd.LoadInt8x32Slice(x)
g := make([]int8, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt16x16Unary tests the simd unary method f against the expected behavior generated by want
-func testInt16x16Unary(t *testing.T, f func(_ simd.Int16x16) simd.Int16x16, want func(_ []int16) []int16) {
+func testInt16x16Unary(t *testing.T, f func(_ archsimd.Int16x16) archsimd.Int16x16, want func(_ []int16) []int16) {
n := 16
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x16Slice(x)
+ a := archsimd.LoadInt16x16Slice(x)
g := make([]int16, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt32x8Unary tests the simd unary method f against the expected behavior generated by want
-func testInt32x8Unary(t *testing.T, f func(_ simd.Int32x8) simd.Int32x8, want func(_ []int32) []int32) {
+func testInt32x8Unary(t *testing.T, f func(_ archsimd.Int32x8) archsimd.Int32x8, want func(_ []int32) []int32) {
n := 8
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x8Slice(x)
+ a := archsimd.LoadInt32x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt64x4Unary tests the simd unary method f against the expected behavior generated by want
-func testInt64x4Unary(t *testing.T, f func(_ simd.Int64x4) simd.Int64x4, want func(_ []int64) []int64) {
+func testInt64x4Unary(t *testing.T, f func(_ archsimd.Int64x4) archsimd.Int64x4, want func(_ []int64) []int64) {
n := 4
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
- a := simd.LoadInt64x4Slice(x)
+ a := archsimd.LoadInt64x4Slice(x)
g := make([]int64, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint8x32Unary tests the simd unary method f against the expected behavior generated by want
-func testUint8x32Unary(t *testing.T, f func(_ simd.Uint8x32) simd.Uint8x32, want func(_ []uint8) []uint8) {
+func testUint8x32Unary(t *testing.T, f func(_ archsimd.Uint8x32) archsimd.Uint8x32, want func(_ []uint8) []uint8) {
n := 32
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
- a := simd.LoadUint8x32Slice(x)
+ a := archsimd.LoadUint8x32Slice(x)
g := make([]uint8, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint16x16Unary tests the simd unary method f against the expected behavior generated by want
-func testUint16x16Unary(t *testing.T, f func(_ simd.Uint16x16) simd.Uint16x16, want func(_ []uint16) []uint16) {
+func testUint16x16Unary(t *testing.T, f func(_ archsimd.Uint16x16) archsimd.Uint16x16, want func(_ []uint16) []uint16) {
n := 16
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x16Slice(x)
+ a := archsimd.LoadUint16x16Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint32x8Unary tests the simd unary method f against the expected behavior generated by want
-func testUint32x8Unary(t *testing.T, f func(_ simd.Uint32x8) simd.Uint32x8, want func(_ []uint32) []uint32) {
+func testUint32x8Unary(t *testing.T, f func(_ archsimd.Uint32x8) archsimd.Uint32x8, want func(_ []uint32) []uint32) {
n := 8
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x8Slice(x)
+ a := archsimd.LoadUint32x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint64x4Unary tests the simd unary method f against the expected behavior generated by want
-func testUint64x4Unary(t *testing.T, f func(_ simd.Uint64x4) simd.Uint64x4, want func(_ []uint64) []uint64) {
+func testUint64x4Unary(t *testing.T, f func(_ archsimd.Uint64x4) archsimd.Uint64x4, want func(_ []uint64) []uint64) {
n := 4
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
- a := simd.LoadUint64x4Slice(x)
+ a := archsimd.LoadUint64x4Slice(x)
g := make([]uint64, n)
f(a).StoreSlice(g)
w := want(x)
}
// testFloat32x8Unary tests the simd unary method f against the expected behavior generated by want
-func testFloat32x8Unary(t *testing.T, f func(_ simd.Float32x8) simd.Float32x8, want func(_ []float32) []float32) {
+func testFloat32x8Unary(t *testing.T, f func(_ archsimd.Float32x8) archsimd.Float32x8, want func(_ []float32) []float32) {
n := 8
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
+ a := archsimd.LoadFloat32x8Slice(x)
g := make([]float32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testFloat64x4Unary tests the simd unary method f against the expected behavior generated by want
-func testFloat64x4Unary(t *testing.T, f func(_ simd.Float64x4) simd.Float64x4, want func(_ []float64) []float64) {
+func testFloat64x4Unary(t *testing.T, f func(_ archsimd.Float64x4) archsimd.Float64x4, want func(_ []float64) []float64) {
n := 4
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x4Slice(x)
+ a := archsimd.LoadFloat64x4Slice(x)
g := make([]float64, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt8x64Unary tests the simd unary method f against the expected behavior generated by want
-func testInt8x64Unary(t *testing.T, f func(_ simd.Int8x64) simd.Int8x64, want func(_ []int8) []int8) {
+func testInt8x64Unary(t *testing.T, f func(_ archsimd.Int8x64) archsimd.Int8x64, want func(_ []int8) []int8) {
n := 64
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
- a := simd.LoadInt8x64Slice(x)
+ a := archsimd.LoadInt8x64Slice(x)
g := make([]int8, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt16x32Unary tests the simd unary method f against the expected behavior generated by want
-func testInt16x32Unary(t *testing.T, f func(_ simd.Int16x32) simd.Int16x32, want func(_ []int16) []int16) {
+func testInt16x32Unary(t *testing.T, f func(_ archsimd.Int16x32) archsimd.Int16x32, want func(_ []int16) []int16) {
n := 32
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x32Slice(x)
+ a := archsimd.LoadInt16x32Slice(x)
g := make([]int16, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt32x16Unary tests the simd unary method f against the expected behavior generated by want
-func testInt32x16Unary(t *testing.T, f func(_ simd.Int32x16) simd.Int32x16, want func(_ []int32) []int32) {
+func testInt32x16Unary(t *testing.T, f func(_ archsimd.Int32x16) archsimd.Int32x16, want func(_ []int32) []int32) {
n := 16
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x16Slice(x)
+ a := archsimd.LoadInt32x16Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testInt64x8Unary tests the simd unary method f against the expected behavior generated by want
-func testInt64x8Unary(t *testing.T, f func(_ simd.Int64x8) simd.Int64x8, want func(_ []int64) []int64) {
+func testInt64x8Unary(t *testing.T, f func(_ archsimd.Int64x8) archsimd.Int64x8, want func(_ []int64) []int64) {
n := 8
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
- a := simd.LoadInt64x8Slice(x)
+ a := archsimd.LoadInt64x8Slice(x)
g := make([]int64, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint8x64Unary tests the simd unary method f against the expected behavior generated by want
-func testUint8x64Unary(t *testing.T, f func(_ simd.Uint8x64) simd.Uint8x64, want func(_ []uint8) []uint8) {
+func testUint8x64Unary(t *testing.T, f func(_ archsimd.Uint8x64) archsimd.Uint8x64, want func(_ []uint8) []uint8) {
n := 64
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
- a := simd.LoadUint8x64Slice(x)
+ a := archsimd.LoadUint8x64Slice(x)
g := make([]uint8, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint16x32Unary tests the simd unary method f against the expected behavior generated by want
-func testUint16x32Unary(t *testing.T, f func(_ simd.Uint16x32) simd.Uint16x32, want func(_ []uint16) []uint16) {
+func testUint16x32Unary(t *testing.T, f func(_ archsimd.Uint16x32) archsimd.Uint16x32, want func(_ []uint16) []uint16) {
n := 32
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x32Slice(x)
+ a := archsimd.LoadUint16x32Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint32x16Unary tests the simd unary method f against the expected behavior generated by want
-func testUint32x16Unary(t *testing.T, f func(_ simd.Uint32x16) simd.Uint32x16, want func(_ []uint32) []uint32) {
+func testUint32x16Unary(t *testing.T, f func(_ archsimd.Uint32x16) archsimd.Uint32x16, want func(_ []uint32) []uint32) {
n := 16
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x16Slice(x)
+ a := archsimd.LoadUint32x16Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testUint64x8Unary tests the simd unary method f against the expected behavior generated by want
-func testUint64x8Unary(t *testing.T, f func(_ simd.Uint64x8) simd.Uint64x8, want func(_ []uint64) []uint64) {
+func testUint64x8Unary(t *testing.T, f func(_ archsimd.Uint64x8) archsimd.Uint64x8, want func(_ []uint64) []uint64) {
n := 8
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
- a := simd.LoadUint64x8Slice(x)
+ a := archsimd.LoadUint64x8Slice(x)
g := make([]uint64, n)
f(a).StoreSlice(g)
w := want(x)
}
// testFloat32x16Unary tests the simd unary method f against the expected behavior generated by want
-func testFloat32x16Unary(t *testing.T, f func(_ simd.Float32x16) simd.Float32x16, want func(_ []float32) []float32) {
+func testFloat32x16Unary(t *testing.T, f func(_ archsimd.Float32x16) archsimd.Float32x16, want func(_ []float32) []float32) {
n := 16
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
+ a := archsimd.LoadFloat32x16Slice(x)
g := make([]float32, n)
f(a).StoreSlice(g)
w := want(x)
}
// testFloat64x8Unary tests the simd unary method f against the expected behavior generated by want
-func testFloat64x8Unary(t *testing.T, f func(_ simd.Float64x8) simd.Float64x8, want func(_ []float64) []float64) {
+func testFloat64x8Unary(t *testing.T, f func(_ archsimd.Float64x8) archsimd.Float64x8, want func(_ []float64) []float64) {
n := 8
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
+ a := archsimd.LoadFloat64x8Slice(x)
g := make([]float64, n)
f(a).StoreSlice(g)
w := want(x)
// testInt8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt8x16ConvertToInt32(t *testing.T, f func(x simd.Int8x16) simd.Int32x16, want func(x []int8) []int32) {
+func testInt8x16ConvertToInt32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int32x16, want func(x []int8) []int32) {
n := 16
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
- a := simd.LoadInt8x16Slice(x)
+ a := archsimd.LoadInt8x16Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x8ConvertToInt32(t *testing.T, f func(x simd.Int16x8) simd.Int32x8, want func(x []int16) []int32) {
+func testInt16x8ConvertToInt32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int32x8, want func(x []int16) []int32) {
n := 8
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x8Slice(x)
+ a := archsimd.LoadInt16x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x4ConvertToInt32(t *testing.T, f func(x simd.Int32x4) simd.Int32x4, want func(x []int32) []int32) {
+func testInt32x4ConvertToInt32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int32x4, want func(x []int32) []int32) {
n := 4
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x4Slice(x)
+ a := archsimd.LoadInt32x4Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint8x16ConvertToInt32(t *testing.T, f func(x simd.Uint8x16) simd.Int32x16, want func(x []uint8) []int32) {
+func testUint8x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int32x16, want func(x []uint8) []int32) {
n := 16
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
- a := simd.LoadUint8x16Slice(x)
+ a := archsimd.LoadUint8x16Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x8ConvertToInt32(t *testing.T, f func(x simd.Uint16x8) simd.Int32x8, want func(x []uint16) []int32) {
+func testUint16x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int32x8, want func(x []uint16) []int32) {
n := 8
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x8Slice(x)
+ a := archsimd.LoadUint16x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x4ConvertToInt32(t *testing.T, f func(x simd.Uint32x4) simd.Int32x4, want func(x []uint32) []int32) {
+func testUint32x4ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int32x4, want func(x []uint32) []int32) {
n := 4
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x4Slice(x)
+ a := archsimd.LoadUint32x4Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x4ConvertToInt32(t *testing.T, f func(x simd.Float32x4) simd.Int32x4, want func(x []float32) []int32) {
+func testFloat32x4ConvertToInt32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int32x4, want func(x []float32) []int32) {
n := 4
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
+ a := archsimd.LoadFloat32x4Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x16ConvertToInt32(t *testing.T, f func(x simd.Int16x16) simd.Int32x16, want func(x []int16) []int32) {
+func testInt16x16ConvertToInt32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int32x16, want func(x []int16) []int32) {
n := 16
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x16Slice(x)
+ a := archsimd.LoadInt16x16Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x8ConvertToInt32(t *testing.T, f func(x simd.Int32x8) simd.Int32x8, want func(x []int32) []int32) {
+func testInt32x8ConvertToInt32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int32x8, want func(x []int32) []int32) {
n := 8
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x8Slice(x)
+ a := archsimd.LoadInt32x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt64x4ConvertToInt32(t *testing.T, f func(x simd.Int64x4) simd.Int32x4, want func(x []int64) []int32) {
+func testInt64x4ConvertToInt32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int32x4, want func(x []int64) []int32) {
n := 4
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
- a := simd.LoadInt64x4Slice(x)
+ a := archsimd.LoadInt64x4Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x16ConvertToInt32(t *testing.T, f func(x simd.Uint16x16) simd.Int32x16, want func(x []uint16) []int32) {
+func testUint16x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int32x16, want func(x []uint16) []int32) {
n := 16
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x16Slice(x)
+ a := archsimd.LoadUint16x16Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x8ConvertToInt32(t *testing.T, f func(x simd.Uint32x8) simd.Int32x8, want func(x []uint32) []int32) {
+func testUint32x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int32x8, want func(x []uint32) []int32) {
n := 8
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x8Slice(x)
+ a := archsimd.LoadUint32x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint64x4ConvertToInt32(t *testing.T, f func(x simd.Uint64x4) simd.Int32x4, want func(x []uint64) []int32) {
+func testUint64x4ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int32x4, want func(x []uint64) []int32) {
n := 4
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
- a := simd.LoadUint64x4Slice(x)
+ a := archsimd.LoadUint64x4Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x8ConvertToInt32(t *testing.T, f func(x simd.Float32x8) simd.Int32x8, want func(x []float32) []int32) {
+func testFloat32x8ConvertToInt32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int32x8, want func(x []float32) []int32) {
n := 8
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
+ a := archsimd.LoadFloat32x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat64x4ConvertToInt32(t *testing.T, f func(x simd.Float64x4) simd.Int32x4, want func(x []float64) []int32) {
+func testFloat64x4ConvertToInt32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int32x4, want func(x []float64) []int32) {
n := 4
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x4Slice(x)
+ a := archsimd.LoadFloat64x4Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x16ConvertToInt32(t *testing.T, f func(x simd.Int32x16) simd.Int32x16, want func(x []int32) []int32) {
+func testInt32x16ConvertToInt32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int32x16, want func(x []int32) []int32) {
n := 16
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x16Slice(x)
+ a := archsimd.LoadInt32x16Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt64x8ConvertToInt32(t *testing.T, f func(x simd.Int64x8) simd.Int32x8, want func(x []int64) []int32) {
+func testInt64x8ConvertToInt32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int32x8, want func(x []int64) []int32) {
n := 8
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
- a := simd.LoadInt64x8Slice(x)
+ a := archsimd.LoadInt64x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x16ConvertToInt32(t *testing.T, f func(x simd.Uint32x16) simd.Int32x16, want func(x []uint32) []int32) {
+func testUint32x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int32x16, want func(x []uint32) []int32) {
n := 16
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x16Slice(x)
+ a := archsimd.LoadUint32x16Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint64x8ConvertToInt32(t *testing.T, f func(x simd.Uint64x8) simd.Int32x8, want func(x []uint64) []int32) {
+func testUint64x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int32x8, want func(x []uint64) []int32) {
n := 8
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
- a := simd.LoadUint64x8Slice(x)
+ a := archsimd.LoadUint64x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x16ConvertToInt32(t *testing.T, f func(x simd.Float32x16) simd.Int32x16, want func(x []float32) []int32) {
+func testFloat32x16ConvertToInt32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int32x16, want func(x []float32) []int32) {
n := 16
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
+ a := archsimd.LoadFloat32x16Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat64x8ConvertToInt32(t *testing.T, f func(x simd.Float64x8) simd.Int32x8, want func(x []float64) []int32) {
+func testFloat64x8ConvertToInt32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int32x8, want func(x []float64) []int32) {
n := 8
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
+ a := archsimd.LoadFloat64x8Slice(x)
g := make([]int32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt8x16ConvertToUint32(t *testing.T, f func(x simd.Int8x16) simd.Uint32x16, want func(x []int8) []uint32) {
+func testInt8x16ConvertToUint32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint32x16, want func(x []int8) []uint32) {
n := 16
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
- a := simd.LoadInt8x16Slice(x)
+ a := archsimd.LoadInt8x16Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x8ConvertToUint32(t *testing.T, f func(x simd.Int16x8) simd.Uint32x8, want func(x []int16) []uint32) {
+func testInt16x8ConvertToUint32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint32x8, want func(x []int16) []uint32) {
n := 8
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x8Slice(x)
+ a := archsimd.LoadInt16x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x4ConvertToUint32(t *testing.T, f func(x simd.Int32x4) simd.Uint32x4, want func(x []int32) []uint32) {
+func testInt32x4ConvertToUint32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint32x4, want func(x []int32) []uint32) {
n := 4
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x4Slice(x)
+ a := archsimd.LoadInt32x4Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint8x16ConvertToUint32(t *testing.T, f func(x simd.Uint8x16) simd.Uint32x16, want func(x []uint8) []uint32) {
+func testUint8x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint32x16, want func(x []uint8) []uint32) {
n := 16
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
- a := simd.LoadUint8x16Slice(x)
+ a := archsimd.LoadUint8x16Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x8ConvertToUint32(t *testing.T, f func(x simd.Uint16x8) simd.Uint32x8, want func(x []uint16) []uint32) {
+func testUint16x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint32x8, want func(x []uint16) []uint32) {
n := 8
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x8Slice(x)
+ a := archsimd.LoadUint16x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x4ConvertToUint32(t *testing.T, f func(x simd.Uint32x4) simd.Uint32x4, want func(x []uint32) []uint32) {
+func testUint32x4ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint32x4, want func(x []uint32) []uint32) {
n := 4
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x4Slice(x)
+ a := archsimd.LoadUint32x4Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x4ConvertToUint32(t *testing.T, f func(x simd.Float32x4) simd.Uint32x4, want func(x []float32) []uint32) {
+func testFloat32x4ConvertToUint32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint32x4, want func(x []float32) []uint32) {
n := 4
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
+ a := archsimd.LoadFloat32x4Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x16ConvertToUint32(t *testing.T, f func(x simd.Int16x16) simd.Uint32x16, want func(x []int16) []uint32) {
+func testInt16x16ConvertToUint32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint32x16, want func(x []int16) []uint32) {
n := 16
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x16Slice(x)
+ a := archsimd.LoadInt16x16Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x8ConvertToUint32(t *testing.T, f func(x simd.Int32x8) simd.Uint32x8, want func(x []int32) []uint32) {
+func testInt32x8ConvertToUint32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint32x8, want func(x []int32) []uint32) {
n := 8
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x8Slice(x)
+ a := archsimd.LoadInt32x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt64x4ConvertToUint32(t *testing.T, f func(x simd.Int64x4) simd.Uint32x4, want func(x []int64) []uint32) {
+func testInt64x4ConvertToUint32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint32x4, want func(x []int64) []uint32) {
n := 4
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
- a := simd.LoadInt64x4Slice(x)
+ a := archsimd.LoadInt64x4Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x16ConvertToUint32(t *testing.T, f func(x simd.Uint16x16) simd.Uint32x16, want func(x []uint16) []uint32) {
+func testUint16x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint32x16, want func(x []uint16) []uint32) {
n := 16
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x16Slice(x)
+ a := archsimd.LoadUint16x16Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x8ConvertToUint32(t *testing.T, f func(x simd.Uint32x8) simd.Uint32x8, want func(x []uint32) []uint32) {
+func testUint32x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint32x8, want func(x []uint32) []uint32) {
n := 8
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x8Slice(x)
+ a := archsimd.LoadUint32x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint64x4ConvertToUint32(t *testing.T, f func(x simd.Uint64x4) simd.Uint32x4, want func(x []uint64) []uint32) {
+func testUint64x4ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint32x4, want func(x []uint64) []uint32) {
n := 4
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
- a := simd.LoadUint64x4Slice(x)
+ a := archsimd.LoadUint64x4Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x8ConvertToUint32(t *testing.T, f func(x simd.Float32x8) simd.Uint32x8, want func(x []float32) []uint32) {
+func testFloat32x8ConvertToUint32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint32x8, want func(x []float32) []uint32) {
n := 8
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
+ a := archsimd.LoadFloat32x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat64x4ConvertToUint32(t *testing.T, f func(x simd.Float64x4) simd.Uint32x4, want func(x []float64) []uint32) {
+func testFloat64x4ConvertToUint32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint32x4, want func(x []float64) []uint32) {
n := 4
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x4Slice(x)
+ a := archsimd.LoadFloat64x4Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x16ConvertToUint32(t *testing.T, f func(x simd.Int32x16) simd.Uint32x16, want func(x []int32) []uint32) {
+func testInt32x16ConvertToUint32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint32x16, want func(x []int32) []uint32) {
n := 16
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x16Slice(x)
+ a := archsimd.LoadInt32x16Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt64x8ConvertToUint32(t *testing.T, f func(x simd.Int64x8) simd.Uint32x8, want func(x []int64) []uint32) {
+func testInt64x8ConvertToUint32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint32x8, want func(x []int64) []uint32) {
n := 8
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
- a := simd.LoadInt64x8Slice(x)
+ a := archsimd.LoadInt64x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x16ConvertToUint32(t *testing.T, f func(x simd.Uint32x16) simd.Uint32x16, want func(x []uint32) []uint32) {
+func testUint32x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint32x16, want func(x []uint32) []uint32) {
n := 16
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x16Slice(x)
+ a := archsimd.LoadUint32x16Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testUint64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint64x8ConvertToUint32(t *testing.T, f func(x simd.Uint64x8) simd.Uint32x8, want func(x []uint64) []uint32) {
+func testUint64x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint32x8, want func(x []uint64) []uint32) {
n := 8
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
- a := simd.LoadUint64x8Slice(x)
+ a := archsimd.LoadUint64x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x16ConvertToUint32(t *testing.T, f func(x simd.Float32x16) simd.Uint32x16, want func(x []float32) []uint32) {
+func testFloat32x16ConvertToUint32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint32x16, want func(x []float32) []uint32) {
n := 16
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
+ a := archsimd.LoadFloat32x16Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat64x8ConvertToUint32(t *testing.T, f func(x simd.Float64x8) simd.Uint32x8, want func(x []float64) []uint32) {
+func testFloat64x8ConvertToUint32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint32x8, want func(x []float64) []uint32) {
n := 8
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
+ a := archsimd.LoadFloat64x8Slice(x)
g := make([]uint32, n)
f(a).StoreSlice(g)
w := want(x)
// testInt8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt8x16ConvertToUint16(t *testing.T, f func(x simd.Int8x16) simd.Uint16x16, want func(x []int8) []uint16) {
+func testInt8x16ConvertToUint16(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint16x16, want func(x []int8) []uint16) {
n := 16
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
- a := simd.LoadInt8x16Slice(x)
+ a := archsimd.LoadInt8x16Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testInt16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x8ConvertToUint16(t *testing.T, f func(x simd.Int16x8) simd.Uint16x8, want func(x []int16) []uint16) {
+func testInt16x8ConvertToUint16(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint16x8, want func(x []int16) []uint16) {
n := 8
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x8Slice(x)
+ a := archsimd.LoadInt16x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testUint8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint8x16ConvertToUint16(t *testing.T, f func(x simd.Uint8x16) simd.Uint16x16, want func(x []uint8) []uint16) {
+func testUint8x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint16x16, want func(x []uint8) []uint16) {
n := 16
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
- a := simd.LoadUint8x16Slice(x)
+ a := archsimd.LoadUint8x16Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testUint16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x8ConvertToUint16(t *testing.T, f func(x simd.Uint16x8) simd.Uint16x8, want func(x []uint16) []uint16) {
+func testUint16x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint16x8, want func(x []uint16) []uint16) {
n := 8
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x8Slice(x)
+ a := archsimd.LoadUint16x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testInt8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt8x32ConvertToUint16(t *testing.T, f func(x simd.Int8x32) simd.Uint16x32, want func(x []int8) []uint16) {
+func testInt8x32ConvertToUint16(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint16x32, want func(x []int8) []uint16) {
n := 32
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
- a := simd.LoadInt8x32Slice(x)
+ a := archsimd.LoadInt8x32Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testInt16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x16ConvertToUint16(t *testing.T, f func(x simd.Int16x16) simd.Uint16x16, want func(x []int16) []uint16) {
+func testInt16x16ConvertToUint16(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint16x16, want func(x []int16) []uint16) {
n := 16
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x16Slice(x)
+ a := archsimd.LoadInt16x16Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testInt32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x8ConvertToUint16(t *testing.T, f func(x simd.Int32x8) simd.Uint16x8, want func(x []int32) []uint16) {
+func testInt32x8ConvertToUint16(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint16x8, want func(x []int32) []uint16) {
n := 8
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x8Slice(x)
+ a := archsimd.LoadInt32x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testUint8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint8x32ConvertToUint16(t *testing.T, f func(x simd.Uint8x32) simd.Uint16x32, want func(x []uint8) []uint16) {
+func testUint8x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint16x32, want func(x []uint8) []uint16) {
n := 32
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
- a := simd.LoadUint8x32Slice(x)
+ a := archsimd.LoadUint8x32Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testUint16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x16ConvertToUint16(t *testing.T, f func(x simd.Uint16x16) simd.Uint16x16, want func(x []uint16) []uint16) {
+func testUint16x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint16x16, want func(x []uint16) []uint16) {
n := 16
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x16Slice(x)
+ a := archsimd.LoadUint16x16Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testUint32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x8ConvertToUint16(t *testing.T, f func(x simd.Uint32x8) simd.Uint16x8, want func(x []uint32) []uint16) {
+func testUint32x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint16x8, want func(x []uint32) []uint16) {
n := 8
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x8Slice(x)
+ a := archsimd.LoadUint32x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x8ConvertToUint16(t *testing.T, f func(x simd.Float32x8) simd.Uint16x8, want func(x []float32) []uint16) {
+func testFloat32x8ConvertToUint16(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint16x8, want func(x []float32) []uint16) {
n := 8
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
+ a := archsimd.LoadFloat32x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testInt16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x32ConvertToUint16(t *testing.T, f func(x simd.Int16x32) simd.Uint16x32, want func(x []int16) []uint16) {
+func testInt16x32ConvertToUint16(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint16x32, want func(x []int16) []uint16) {
n := 32
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
- a := simd.LoadInt16x32Slice(x)
+ a := archsimd.LoadInt16x32Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testInt32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x16ConvertToUint16(t *testing.T, f func(x simd.Int32x16) simd.Uint16x16, want func(x []int32) []uint16) {
+func testInt32x16ConvertToUint16(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint16x16, want func(x []int32) []uint16) {
n := 16
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
- a := simd.LoadInt32x16Slice(x)
+ a := archsimd.LoadInt32x16Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testInt64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt64x8ConvertToUint16(t *testing.T, f func(x simd.Int64x8) simd.Uint16x8, want func(x []int64) []uint16) {
+func testInt64x8ConvertToUint16(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint16x8, want func(x []int64) []uint16) {
n := 8
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
- a := simd.LoadInt64x8Slice(x)
+ a := archsimd.LoadInt64x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testUint16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x32ConvertToUint16(t *testing.T, f func(x simd.Uint16x32) simd.Uint16x32, want func(x []uint16) []uint16) {
+func testUint16x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint16x32, want func(x []uint16) []uint16) {
n := 32
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
- a := simd.LoadUint16x32Slice(x)
+ a := archsimd.LoadUint16x32Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testUint32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x16ConvertToUint16(t *testing.T, f func(x simd.Uint32x16) simd.Uint16x16, want func(x []uint32) []uint16) {
+func testUint32x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint16x16, want func(x []uint32) []uint16) {
n := 16
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
- a := simd.LoadUint32x16Slice(x)
+ a := archsimd.LoadUint32x16Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testUint64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint64x8ConvertToUint16(t *testing.T, f func(x simd.Uint64x8) simd.Uint16x8, want func(x []uint64) []uint16) {
+func testUint64x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint16x8, want func(x []uint64) []uint16) {
n := 8
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
- a := simd.LoadUint64x8Slice(x)
+ a := archsimd.LoadUint64x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x16ConvertToUint16(t *testing.T, f func(x simd.Float32x16) simd.Uint16x16, want func(x []float32) []uint16) {
+func testFloat32x16ConvertToUint16(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint16x16, want func(x []float32) []uint16) {
n := 16
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
+ a := archsimd.LoadFloat32x16Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat64x8ConvertToUint16(t *testing.T, f func(x simd.Float64x8) simd.Uint16x8, want func(x []float64) []uint16) {
+func testFloat64x8ConvertToUint16(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint16x8, want func(x []float64) []uint16) {
n := 8
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
+ a := archsimd.LoadFloat64x8Slice(x)
g := make([]uint16, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x4UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat32x4UnaryFlaky(t *testing.T, f func(x simd.Float32x4) simd.Float32x4, want func(x []float32) []float32, flakiness float64) {
+func testFloat32x4UnaryFlaky(t *testing.T, f func(x archsimd.Float32x4) archsimd.Float32x4, want func(x []float32) []float32, flakiness float64) {
n := 4
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x4Slice(x)
+ a := archsimd.LoadFloat32x4Slice(x)
g := make([]float32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat64x2UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat64x2UnaryFlaky(t *testing.T, f func(x simd.Float64x2) simd.Float64x2, want func(x []float64) []float64, flakiness float64) {
+func testFloat64x2UnaryFlaky(t *testing.T, f func(x archsimd.Float64x2) archsimd.Float64x2, want func(x []float64) []float64, flakiness float64) {
n := 2
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x2Slice(x)
+ a := archsimd.LoadFloat64x2Slice(x)
g := make([]float64, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x8UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat32x8UnaryFlaky(t *testing.T, f func(x simd.Float32x8) simd.Float32x8, want func(x []float32) []float32, flakiness float64) {
+func testFloat32x8UnaryFlaky(t *testing.T, f func(x archsimd.Float32x8) archsimd.Float32x8, want func(x []float32) []float32, flakiness float64) {
n := 8
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x8Slice(x)
+ a := archsimd.LoadFloat32x8Slice(x)
g := make([]float32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat64x4UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat64x4UnaryFlaky(t *testing.T, f func(x simd.Float64x4) simd.Float64x4, want func(x []float64) []float64, flakiness float64) {
+func testFloat64x4UnaryFlaky(t *testing.T, f func(x archsimd.Float64x4) archsimd.Float64x4, want func(x []float64) []float64, flakiness float64) {
n := 4
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x4Slice(x)
+ a := archsimd.LoadFloat64x4Slice(x)
g := make([]float64, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat32x16UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat32x16UnaryFlaky(t *testing.T, f func(x simd.Float32x16) simd.Float32x16, want func(x []float32) []float32, flakiness float64) {
+func testFloat32x16UnaryFlaky(t *testing.T, f func(x archsimd.Float32x16) archsimd.Float32x16, want func(x []float32) []float32, flakiness float64) {
n := 16
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
- a := simd.LoadFloat32x16Slice(x)
+ a := archsimd.LoadFloat32x16Slice(x)
g := make([]float32, n)
f(a).StoreSlice(g)
w := want(x)
// testFloat64x8UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
-func testFloat64x8UnaryFlaky(t *testing.T, f func(x simd.Float64x8) simd.Float64x8, want func(x []float64) []float64, flakiness float64) {
+func testFloat64x8UnaryFlaky(t *testing.T, f func(x archsimd.Float64x8) archsimd.Float64x8, want func(x []float64) []float64, flakiness float64) {
n := 8
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
- a := simd.LoadFloat64x8Slice(x)
+ a := archsimd.LoadFloat64x8Slice(x)
g := make([]float64, n)
f(a).StoreSlice(g)
w := want(x)
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.simd && amd64
+
+package simd_test
+
+import (
+ "math"
+ "simd/archsimd"
+ "testing"
+)
+
+func TestCeil(t *testing.T) {
+ testFloat32x4Unary(t, archsimd.Float32x4.Ceil, ceilSlice[float32])
+ testFloat32x8Unary(t, archsimd.Float32x8.Ceil, ceilSlice[float32])
+ testFloat64x2Unary(t, archsimd.Float64x2.Ceil, ceilSlice[float64])
+ testFloat64x4Unary(t, archsimd.Float64x4.Ceil, ceilSlice[float64])
+ if archsimd.X86.AVX512() {
+ // testFloat32x16Unary(t, archsimd.Float32x16.Ceil, ceilSlice[float32]) // missing
+ // testFloat64x8Unary(t, archsimd.Float64x8.Ceil, ceilSlice[float64]) // missing
+ }
+}
+
+func TestFloor(t *testing.T) {
+ testFloat32x4Unary(t, archsimd.Float32x4.Floor, floorSlice[float32])
+ testFloat32x8Unary(t, archsimd.Float32x8.Floor, floorSlice[float32])
+ testFloat64x2Unary(t, archsimd.Float64x2.Floor, floorSlice[float64])
+ testFloat64x4Unary(t, archsimd.Float64x4.Floor, floorSlice[float64])
+ if archsimd.X86.AVX512() {
+ // testFloat32x16Unary(t, archsimd.Float32x16.Floor, floorSlice[float32]) // missing
+ // testFloat64x8Unary(t, archsimd.Float64x8.Floor, floorSlice[float64]) // missing
+ }
+}
+
+func TestTrunc(t *testing.T) {
+ testFloat32x4Unary(t, archsimd.Float32x4.Trunc, truncSlice[float32])
+ testFloat32x8Unary(t, archsimd.Float32x8.Trunc, truncSlice[float32])
+ testFloat64x2Unary(t, archsimd.Float64x2.Trunc, truncSlice[float64])
+ testFloat64x4Unary(t, archsimd.Float64x4.Trunc, truncSlice[float64])
+ if archsimd.X86.AVX512() {
+ // testFloat32x16Unary(t, archsimd.Float32x16.Trunc, truncSlice[float32]) // missing
+ // testFloat64x8Unary(t, archsimd.Float64x8.Trunc, truncSlice[float64]) // missing
+ }
+}
+
+func TestRound(t *testing.T) {
+ testFloat32x4Unary(t, archsimd.Float32x4.RoundToEven, roundSlice[float32])
+ testFloat32x8Unary(t, archsimd.Float32x8.RoundToEven, roundSlice[float32])
+ testFloat64x2Unary(t, archsimd.Float64x2.RoundToEven, roundSlice[float64])
+ testFloat64x4Unary(t, archsimd.Float64x4.RoundToEven, roundSlice[float64])
+ if archsimd.X86.AVX512() {
+ // testFloat32x16Unary(t, archsimd.Float32x16.Round, roundSlice[float32]) // missing
+ // testFloat64x8Unary(t, archsimd.Float64x8.Round, roundSlice[float64]) // missing
+ }
+}
+
+func TestSqrt(t *testing.T) {
+ testFloat32x4Unary(t, archsimd.Float32x4.Sqrt, sqrtSlice[float32])
+ testFloat32x8Unary(t, archsimd.Float32x8.Sqrt, sqrtSlice[float32])
+ testFloat64x2Unary(t, archsimd.Float64x2.Sqrt, sqrtSlice[float64])
+ testFloat64x4Unary(t, archsimd.Float64x4.Sqrt, sqrtSlice[float64])
+ if archsimd.X86.AVX512() {
+ testFloat32x16Unary(t, archsimd.Float32x16.Sqrt, sqrtSlice[float32])
+ testFloat64x8Unary(t, archsimd.Float64x8.Sqrt, sqrtSlice[float64])
+ }
+}
+
+func TestNot(t *testing.T) {
+ testInt8x16Unary(t, archsimd.Int8x16.Not, map1[int8](not))
+ testInt8x32Unary(t, archsimd.Int8x32.Not, map1[int8](not))
+ testInt16x8Unary(t, archsimd.Int16x8.Not, map1[int16](not))
+ testInt16x16Unary(t, archsimd.Int16x16.Not, map1[int16](not))
+ testInt32x4Unary(t, archsimd.Int32x4.Not, map1[int32](not))
+ testInt32x8Unary(t, archsimd.Int32x8.Not, map1[int32](not))
+}
+
+func TestAbsolute(t *testing.T) {
+ testInt8x16Unary(t, archsimd.Int8x16.Abs, map1[int8](abs))
+ testInt8x32Unary(t, archsimd.Int8x32.Abs, map1[int8](abs))
+ testInt16x8Unary(t, archsimd.Int16x8.Abs, map1[int16](abs))
+ testInt16x16Unary(t, archsimd.Int16x16.Abs, map1[int16](abs))
+ testInt32x4Unary(t, archsimd.Int32x4.Abs, map1[int32](abs))
+ testInt32x8Unary(t, archsimd.Int32x8.Abs, map1[int32](abs))
+ if archsimd.X86.AVX512() {
+ testInt8x64Unary(t, archsimd.Int8x64.Abs, map1[int8](abs))
+ testInt16x32Unary(t, archsimd.Int16x32.Abs, map1[int16](abs))
+ testInt32x16Unary(t, archsimd.Int32x16.Abs, map1[int32](abs))
+ testInt64x2Unary(t, archsimd.Int64x2.Abs, map1[int64](abs))
+ testInt64x4Unary(t, archsimd.Int64x4.Abs, map1[int64](abs))
+ testInt64x8Unary(t, archsimd.Int64x8.Abs, map1[int64](abs))
+ }
+}
+
+func TestCeilScaledResidue(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ t.Skip("Needs AVX512")
+ }
+ testFloat64x8UnaryFlaky(t,
+ func(x archsimd.Float64x8) archsimd.Float64x8 { return x.CeilScaledResidue(0) },
+ map1(ceilResidueForPrecision[float64](0)),
+ 0.001)
+ testFloat64x8UnaryFlaky(t,
+ func(x archsimd.Float64x8) archsimd.Float64x8 { return x.CeilScaledResidue(1) },
+ map1(ceilResidueForPrecision[float64](1)),
+ 0.001)
+ testFloat64x8Unary(t,
+ func(x archsimd.Float64x8) archsimd.Float64x8 { return x.Sub(x.CeilScaled(0)) },
+ map1[float64](func(x float64) float64 { return x - math.Ceil(x) }))
+}
+
+func TestToUint32(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ t.Skip("Needs AVX512")
+ }
+ testFloat32x4ConvertToUint32(t, archsimd.Float32x4.ConvertToUint32, map1[float32](toUint32))
+ testFloat32x8ConvertToUint32(t, archsimd.Float32x8.ConvertToUint32, map1[float32](toUint32))
+ testFloat32x16ConvertToUint32(t, archsimd.Float32x16.ConvertToUint32, map1[float32](toUint32))
+}
+
+func TestToInt32(t *testing.T) {
+ testFloat32x4ConvertToInt32(t, archsimd.Float32x4.ConvertToInt32, map1[float32](toInt32))
+ testFloat32x8ConvertToInt32(t, archsimd.Float32x8.ConvertToInt32, map1[float32](toInt32))
+}
+
+func TestConverts(t *testing.T) {
+ testUint8x16ConvertToUint16(t, archsimd.Uint8x16.ExtendToUint16, map1[uint8](toUint16))
+ testUint16x8ConvertToUint32(t, archsimd.Uint16x8.ExtendToUint32, map1[uint16](toUint32))
+}
+
+func TestConvertsAVX512(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ t.Skip("Needs AVX512")
+ }
+ testUint8x32ConvertToUint16(t, archsimd.Uint8x32.ExtendToUint16, map1[uint8](toUint16))
+}
//go:build goexperiment.simd
-package simd
+package archsimd
// Masked returns x but with elements zeroed where mask is false.
func (x Int8x16) Masked(mask Mask8x16) Int8x16 {
//go:build goexperiment.simd
-package simd
+package archsimd
/* AESDecryptLastRound */
//go:build goexperiment.simd
-package simd
+package archsimd
/* blend */
//go:build goexperiment.simd
-package simd
+package archsimd
// BroadcastInt8x16 returns a vector with the input
// x assigned to all elements of the output.
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.simd && amd64
+
+package archsimd_test
+
+import (
+ "simd/archsimd"
+ "simd/archsimd/internal/test_helpers"
+ "testing"
+)
+
+func TestConcatSelectedConstant64(t *testing.T) {
+ a := make([]int64, 2)
+ x := archsimd.LoadInt64x2Slice([]int64{4, 5})
+ y := archsimd.LoadInt64x2Slice([]int64{6, 7})
+ z := x.ExportTestConcatSelectedConstant(0b10, y)
+ z.StoreSlice(a)
+ test_helpers.CheckSlices[int64](t, a, []int64{4, 7})
+}
+
+func TestConcatSelectedConstantGrouped64(t *testing.T) {
+ a := make([]float64, 4)
+ x := archsimd.LoadFloat64x4Slice([]float64{4, 5, 8, 9})
+ y := archsimd.LoadFloat64x4Slice([]float64{6, 7, 10, 11})
+ z := x.ExportTestConcatSelectedConstantGrouped(0b_11_10, y)
+ z.StoreSlice(a)
+ test_helpers.CheckSlices[float64](t, a, []float64{4, 7, 9, 11})
+}
+
+func TestConcatSelectedConstant32(t *testing.T) {
+ a := make([]float32, 4)
+ x := archsimd.LoadFloat32x4Slice([]float32{4, 5, 8, 9})
+ y := archsimd.LoadFloat32x4Slice([]float32{6, 7, 10, 11})
+ z := x.ExportTestConcatSelectedConstant(0b_11_01_10_00, y)
+ z.StoreSlice(a)
+ test_helpers.CheckSlices[float32](t, a, []float32{4, 8, 7, 11})
+}
+
+func TestConcatSelectedConstantGrouped32(t *testing.T) {
+ a := make([]uint32, 8)
+ x := archsimd.LoadUint32x8Slice([]uint32{0, 1, 2, 3, 8, 9, 10, 11})
+ y := archsimd.LoadUint32x8Slice([]uint32{4, 5, 6, 7, 12, 13, 14, 15})
+ z := x.ExportTestConcatSelectedConstantGrouped(0b_11_01_00_10, y)
+ z.StoreSlice(a)
+ test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15})
+}
+
+func TestTern(t *testing.T) {
+ if !archsimd.X86.AVX512() {
+ t.Skip("This test needs AVX512")
+ }
+ x := archsimd.LoadInt32x8Slice([]int32{0, 0, 0, 0, 1, 1, 1, 1})
+ y := archsimd.LoadInt32x8Slice([]int32{0, 0, 1, 1, 0, 0, 1, 1})
+ z := archsimd.LoadInt32x8Slice([]int32{0, 1, 0, 1, 0, 1, 0, 1})
+
+ foo := func(w archsimd.Int32x8, k uint8) {
+ a := make([]int32, 8)
+ w.StoreSlice(a)
+ t.Logf("For k=%0b, w=%v", k, a)
+ for i, b := range a {
+ if (int32(k)>>i)&1 != b {
+ t.Errorf("Element %d of stored slice (=%d) did not match corresponding bit in 0b%b",
+ i, b, k)
+ }
+ }
+ }
+
+ foo(x.ExportTestTern(0b1111_0000, y, z), 0b1111_0000)
+ foo(x.ExportTestTern(0b1100_1100, y, z), 0b1100_1100)
+ foo(x.ExportTestTern(0b1010_1010, y, z), 0b1010_1010)
+}
+
+func TestSelect2x4x32(t *testing.T) {
+ for a := range uint8(8) {
+ for b := range uint8(8) {
+ for c := range uint8(8) {
+ for d := range uint8(8) {
+ x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
+ y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
+ z := select2x4x32(x, a, b, c, d, y)
+ w := make([]int32, 4, 4)
+ z.StoreSlice(w)
+ if w[0] != int32(a) || w[1] != int32(b) ||
+ w[2] != int32(c) || w[3] != int32(d) {
+ t.Errorf("Expected [%d %d %d %d] got %v", a, b, c, d, w)
+ }
+ }
+ }
+ }
+ }
+}
+
+func TestSelect2x8x32Grouped(t *testing.T) {
+ for a := range uint8(8) {
+ for b := range uint8(8) {
+ for c := range uint8(8) {
+ for d := range uint8(8) {
+ x := archsimd.LoadInt32x8Slice([]int32{0, 1, 2, 3, 10, 11, 12, 13})
+ y := archsimd.LoadInt32x8Slice([]int32{4, 5, 6, 7, 14, 15, 16, 17})
+ z := select2x8x32Grouped(x, a, b, c, d, y)
+ w := make([]int32, 8, 8)
+ z.StoreSlice(w)
+ if w[0] != int32(a) || w[1] != int32(b) ||
+ w[2] != int32(c) || w[3] != int32(d) ||
+ w[4] != int32(10+a) || w[5] != int32(10+b) ||
+ w[6] != int32(10+c) || w[7] != int32(10+d) {
+ t.Errorf("Expected [%d %d %d %d %d %d %d %d] got %v", a, b, c, d, 10+a, 10+b, 10+c, 10+d, w)
+ }
+ }
+ }
+ }
+ }
+}
+
+// select2x4x32 returns a selection of 4 elements in x and y, numbered
+// 0-7, where 0-3 are the four elements of x and 4-7 are the four elements
+// of y.
+func select2x4x32(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 {
+ pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
+
+ a, b, c, d = a&3, b&3, c&3, d&3
+
+ switch pattern {
+ case archsimd.LLLL:
+ return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), x)
+ case archsimd.HHHH:
+ return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), y)
+ case archsimd.LLHH:
+ return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), y)
+ case archsimd.HHLL:
+ return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), x)
+
+ case archsimd.HLLL:
+ z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), x)
+ return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), x)
+ case archsimd.LHLL:
+ z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), y)
+ return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), x)
+
+ case archsimd.HLHH:
+ z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), x)
+ return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), y)
+ case archsimd.LHHH:
+ z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), y)
+ return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), y)
+
+ case archsimd.LLLH:
+ z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), y)
+ return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
+ case archsimd.LLHL:
+ z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), x)
+ return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
+ case archsimd.HHLH:
+ z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), y)
+ return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
+ case archsimd.HHHL:
+ z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), x)
+ return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
+
+ case archsimd.LHLH:
+ z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, c, b, d), y)
+ return z.ExportTestConcatSelectedConstant(0b11_01_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 1, 3) */, z)
+ case archsimd.HLHL:
+ z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(b, d, a, c), y)
+ return z.ExportTestConcatSelectedConstant(0b01_11_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 3, 1) */, z)
+ case archsimd.HLLH:
+ z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(b, c, a, d), y)
+ return z.ExportTestConcatSelectedConstant(0b11_01_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 1, 3) */, z)
+ case archsimd.LHHL:
+ z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, d, b, c), y)
+ return z.ExportTestConcatSelectedConstant(0b01_11_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 3, 1) */, z)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// select2x8x32Grouped returns a pair of selection of 4 elements in x and y,
+// numbered 0-7, where 0-3 are the four elements of x's two groups (lower and
+// upper 128 bits) and 4-7 are the four elements of y's two groups.
+
+func select2x8x32Grouped(x archsimd.Int32x8, a, b, c, d uint8, y archsimd.Int32x8) archsimd.Int32x8 {
+ // selections as being expressible in the ExportTestConcatSelectedConstant pattern,
+ // or not. Classification is by H and L, where H is a selection from 4-7
+ // and L is a selection from 0-3.
+ // archsimd.LLHH -> CSC(x,y, a, b, c&3, d&3)
+ // archsimd.HHLL -> CSC(y,x, a&3, b&3, c, d)
+ // archsimd.LLLL -> CSC(x,x, a, b, c, d)
+ // archsimd.HHHH -> CSC(y,y, a&3, b&3, c&3, d&3)
+
+ // archsimd.LLLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(x, z, a, b, 0, 2)
+ // archsimd.LLHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(x, z, a, b, 0, 2)
+ // archsimd.HHLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(y, z, a&3, b&3, 0, 2)
+ // archsimd.HHHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(y, z, a&3, b&3, 0, 2)
+
+ // archsimd.LHLL -> z = CSC(x, y, a, a, b&3, b&3); CSC(z, x, 0, 2, c, d)
+ // etc
+
+ // archsimd.LHLH -> z = CSC(x, y, a, c, b&3, d&3); CSC(z, z, 0, 2, 1, 3)
+ // archsimd.HLHL -> z = CSC(x, y, b, d, a&3, c&3); CSC(z, z, 2, 0, 3, 1)
+
+ pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
+
+ a, b, c, d = a&3, b&3, c&3, d&3
+
+ switch pattern {
+ case archsimd.LLLL:
+ return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), x)
+ case archsimd.HHHH:
+ return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), y)
+ case archsimd.LLHH:
+ return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), y)
+ case archsimd.HHLL:
+ return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), x)
+
+ case archsimd.HLLL:
+ z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), x)
+ return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), x)
+ case archsimd.LHLL:
+ z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), y)
+ return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), x)
+
+ case archsimd.HLHH:
+ z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), x)
+ return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), y)
+ case archsimd.LHHH:
+ z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), y)
+ return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), y)
+
+ case archsimd.LLLH:
+ z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), y)
+ return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
+ case archsimd.LLHL:
+ z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), x)
+ return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
+ case archsimd.HHLH:
+ z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), y)
+ return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
+ case archsimd.HHHL:
+ z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), x)
+ return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z)
+
+ case archsimd.LHLH:
+ z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, c, b, d), y)
+ return z.ExportTestConcatSelectedConstantGrouped(0b11_01_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 1, 3) */, z)
+ case archsimd.HLHL:
+ z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(b, d, a, c), y)
+ return z.ExportTestConcatSelectedConstantGrouped(0b01_11_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 3, 1) */, z)
+ case archsimd.HLLH:
+ z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(b, c, a, d), y)
+ return z.ExportTestConcatSelectedConstantGrouped(0b11_01_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 1, 3) */, z)
+ case archsimd.LHHL:
+ z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, d, b, c), y)
+ return z.ExportTestConcatSelectedConstantGrouped(0b01_11_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 3, 1) */, z)
+ }
+ panic("missing case, switch should be exhaustive")
+}
//go:build goexperiment.simd && amd64
-package simd
+package archsimd
// These constants represent the source pattern for the four parameters
// (a, b, c, d) passed to SelectFromPair and SelectFromPairGrouped.
//go:build goexperiment.simd
-package simd
+package archsimd
import "unsafe"
//go:build goexperiment.simd
-package simd
+package archsimd
import "unsafe"
//go:build goexperiment.simd && amd64
-package simd
+package archsimd
import (
"internal/strconv"
import (
"fmt"
"os"
- "simd"
+ "simd/archsimd"
"unsafe"
)
-func load(s []float64) simd.Float64x4 {
- return simd.LoadFloat64x4((*[4]float64)(s[:4]))
+func load(s []float64) archsimd.Float64x4 {
+ return archsimd.LoadFloat64x4((*[4]float64)(s[:4]))
}
-type S1 = simd.Float64x4
+type S1 = archsimd.Float64x4
-type S2 simd.Float64x4
+type S2 archsimd.Float64x4
func (s S2) Len() int {
- return simd.Float64x4(s).Len()
+ return archsimd.Float64x4(s).Len()
}
func (s S2) Load(a []float64) S2 {
}
func (s S2) Store(a *[4]float64) {
- simd.Float64x4(s).Store(a)
+ archsimd.Float64x4(s).Store(a)
}
func (s S2) Add(a S2) S2 {
- return S2(simd.Float64x4(s).Add(simd.Float64x4(a)))
+ return S2(archsimd.Float64x4(s).Add(archsimd.Float64x4(a)))
}
func (s S2) Mul(a S2) S2 {
- return S2(simd.Float64x4(s).Mul(simd.Float64x4(a)))
+ return S2(archsimd.Float64x4(s).Mul(archsimd.Float64x4(a)))
}
type S3 struct {
- simd.Float64x4
+ archsimd.Float64x4
}
func ip64_0(a, b []float64) float64 {
}
//go:noinline
-func FMA(a, b, c simd.Float64x4) simd.Float64x4 {
+func FMA(a, b, c archsimd.Float64x4) archsimd.Float64x4 {
return a.Add(b.Mul(c))
}
//go:build goexperiment.simd
-package simd
+package archsimd
// v128 is a tag type that tells the compiler that this is really 128-bit SIMD
type v128 struct {
//go:build goexperiment.simd
-package simd
+package archsimd
import "unsafe"
+++ /dev/null
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build goexperiment.simd && amd64
-
-package simd_test
-
-import (
- "simd"
- "testing"
-)
-
-func TestAdd(t *testing.T) {
- testFloat32x4Binary(t, simd.Float32x4.Add, addSlice[float32])
- testFloat32x8Binary(t, simd.Float32x8.Add, addSlice[float32])
- testFloat64x2Binary(t, simd.Float64x2.Add, addSlice[float64])
- testFloat64x4Binary(t, simd.Float64x4.Add, addSlice[float64])
-
- testInt16x16Binary(t, simd.Int16x16.Add, addSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.Add, addSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.Add, addSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.Add, addSlice[int32])
- testInt64x2Binary(t, simd.Int64x2.Add, addSlice[int64])
- testInt64x4Binary(t, simd.Int64x4.Add, addSlice[int64])
- testInt8x16Binary(t, simd.Int8x16.Add, addSlice[int8])
- testInt8x32Binary(t, simd.Int8x32.Add, addSlice[int8])
-
- testUint16x16Binary(t, simd.Uint16x16.Add, addSlice[uint16])
- testUint16x8Binary(t, simd.Uint16x8.Add, addSlice[uint16])
- testUint32x4Binary(t, simd.Uint32x4.Add, addSlice[uint32])
- testUint32x8Binary(t, simd.Uint32x8.Add, addSlice[uint32])
- testUint64x2Binary(t, simd.Uint64x2.Add, addSlice[uint64])
- testUint64x4Binary(t, simd.Uint64x4.Add, addSlice[uint64])
- testUint8x16Binary(t, simd.Uint8x16.Add, addSlice[uint8])
- testUint8x32Binary(t, simd.Uint8x32.Add, addSlice[uint8])
-
- if simd.X86.AVX512() {
- testFloat32x16Binary(t, simd.Float32x16.Add, addSlice[float32])
- testFloat64x8Binary(t, simd.Float64x8.Add, addSlice[float64])
- testInt8x64Binary(t, simd.Int8x64.Add, addSlice[int8])
- testInt16x32Binary(t, simd.Int16x32.Add, addSlice[int16])
- testInt32x16Binary(t, simd.Int32x16.Add, addSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.Add, addSlice[int64])
- testUint8x64Binary(t, simd.Uint8x64.Add, addSlice[uint8])
- testUint16x32Binary(t, simd.Uint16x32.Add, addSlice[uint16])
- testUint32x16Binary(t, simd.Uint32x16.Add, addSlice[uint32])
- testUint64x8Binary(t, simd.Uint64x8.Add, addSlice[uint64])
- }
-}
-
-func TestSub(t *testing.T) {
- testFloat32x4Binary(t, simd.Float32x4.Sub, subSlice[float32])
- testFloat32x8Binary(t, simd.Float32x8.Sub, subSlice[float32])
- testFloat64x2Binary(t, simd.Float64x2.Sub, subSlice[float64])
- testFloat64x4Binary(t, simd.Float64x4.Sub, subSlice[float64])
-
- testInt16x16Binary(t, simd.Int16x16.Sub, subSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.Sub, subSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.Sub, subSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.Sub, subSlice[int32])
- testInt64x2Binary(t, simd.Int64x2.Sub, subSlice[int64])
- testInt64x4Binary(t, simd.Int64x4.Sub, subSlice[int64])
- testInt8x16Binary(t, simd.Int8x16.Sub, subSlice[int8])
- testInt8x32Binary(t, simd.Int8x32.Sub, subSlice[int8])
-
- testUint16x16Binary(t, simd.Uint16x16.Sub, subSlice[uint16])
- testUint16x8Binary(t, simd.Uint16x8.Sub, subSlice[uint16])
- testUint32x4Binary(t, simd.Uint32x4.Sub, subSlice[uint32])
- testUint32x8Binary(t, simd.Uint32x8.Sub, subSlice[uint32])
- testUint64x2Binary(t, simd.Uint64x2.Sub, subSlice[uint64])
- testUint64x4Binary(t, simd.Uint64x4.Sub, subSlice[uint64])
- testUint8x16Binary(t, simd.Uint8x16.Sub, subSlice[uint8])
- testUint8x32Binary(t, simd.Uint8x32.Sub, subSlice[uint8])
-
- if simd.X86.AVX512() {
- testFloat32x16Binary(t, simd.Float32x16.Sub, subSlice[float32])
- testFloat64x8Binary(t, simd.Float64x8.Sub, subSlice[float64])
- testInt8x64Binary(t, simd.Int8x64.Sub, subSlice[int8])
- testInt16x32Binary(t, simd.Int16x32.Sub, subSlice[int16])
- testInt32x16Binary(t, simd.Int32x16.Sub, subSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.Sub, subSlice[int64])
- testUint8x64Binary(t, simd.Uint8x64.Sub, subSlice[uint8])
- testUint16x32Binary(t, simd.Uint16x32.Sub, subSlice[uint16])
- testUint32x16Binary(t, simd.Uint32x16.Sub, subSlice[uint32])
- testUint64x8Binary(t, simd.Uint64x8.Sub, subSlice[uint64])
- }
-}
-
-func TestMax(t *testing.T) {
- // testFloat32x4Binary(t, simd.Float32x4.Max, maxSlice[float32]) // nan is wrong
- // testFloat32x8Binary(t, simd.Float32x8.Max, maxSlice[float32]) // nan is wrong
- // testFloat64x2Binary(t, simd.Float64x2.Max, maxSlice[float64]) // nan is wrong
- // testFloat64x4Binary(t, simd.Float64x4.Max, maxSlice[float64]) // nan is wrong
-
- testInt16x16Binary(t, simd.Int16x16.Max, maxSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.Max, maxSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.Max, maxSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.Max, maxSlice[int32])
-
- if simd.X86.AVX512() {
- testInt64x2Binary(t, simd.Int64x2.Max, maxSlice[int64])
- testInt64x4Binary(t, simd.Int64x4.Max, maxSlice[int64])
- }
-
- testInt8x16Binary(t, simd.Int8x16.Max, maxSlice[int8])
- testInt8x32Binary(t, simd.Int8x32.Max, maxSlice[int8])
-
- testUint16x16Binary(t, simd.Uint16x16.Max, maxSlice[uint16])
- testUint16x8Binary(t, simd.Uint16x8.Max, maxSlice[uint16])
- testUint32x4Binary(t, simd.Uint32x4.Max, maxSlice[uint32])
- testUint32x8Binary(t, simd.Uint32x8.Max, maxSlice[uint32])
-
- if simd.X86.AVX512() {
- testUint64x2Binary(t, simd.Uint64x2.Max, maxSlice[uint64])
- testUint64x4Binary(t, simd.Uint64x4.Max, maxSlice[uint64])
- }
-
- testUint8x16Binary(t, simd.Uint8x16.Max, maxSlice[uint8])
- testUint8x32Binary(t, simd.Uint8x32.Max, maxSlice[uint8])
-
- if simd.X86.AVX512() {
- // testFloat32x16Binary(t, simd.Float32x16.Max, maxSlice[float32]) // nan is wrong
- // testFloat64x8Binary(t, simd.Float64x8.Max, maxSlice[float64]) // nan is wrong
- testInt8x64Binary(t, simd.Int8x64.Max, maxSlice[int8])
- testInt16x32Binary(t, simd.Int16x32.Max, maxSlice[int16])
- testInt32x16Binary(t, simd.Int32x16.Max, maxSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.Max, maxSlice[int64])
- testUint8x64Binary(t, simd.Uint8x64.Max, maxSlice[uint8])
- testUint16x32Binary(t, simd.Uint16x32.Max, maxSlice[uint16])
- testUint32x16Binary(t, simd.Uint32x16.Max, maxSlice[uint32])
- testUint64x8Binary(t, simd.Uint64x8.Max, maxSlice[uint64])
- }
-}
-
-func TestMin(t *testing.T) {
- // testFloat32x4Binary(t, simd.Float32x4.Min, minSlice[float32]) // nan is wrong
- // testFloat32x8Binary(t, simd.Float32x8.Min, minSlice[float32]) // nan is wrong
- // testFloat64x2Binary(t, simd.Float64x2.Min, minSlice[float64]) // nan is wrong
- // testFloat64x4Binary(t, simd.Float64x4.Min, minSlice[float64]) // nan is wrong
-
- testInt16x16Binary(t, simd.Int16x16.Min, minSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.Min, minSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.Min, minSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.Min, minSlice[int32])
-
- if simd.X86.AVX512() {
- testInt64x2Binary(t, simd.Int64x2.Min, minSlice[int64])
- testInt64x4Binary(t, simd.Int64x4.Min, minSlice[int64])
- }
-
- testInt8x16Binary(t, simd.Int8x16.Min, minSlice[int8])
- testInt8x32Binary(t, simd.Int8x32.Min, minSlice[int8])
-
- testUint16x16Binary(t, simd.Uint16x16.Min, minSlice[uint16])
- testUint16x8Binary(t, simd.Uint16x8.Min, minSlice[uint16])
- testUint32x4Binary(t, simd.Uint32x4.Min, minSlice[uint32])
- testUint32x8Binary(t, simd.Uint32x8.Min, minSlice[uint32])
-
- if simd.X86.AVX512() {
- testUint64x2Binary(t, simd.Uint64x2.Min, minSlice[uint64])
- testUint64x4Binary(t, simd.Uint64x4.Min, minSlice[uint64])
- }
-
- testUint8x16Binary(t, simd.Uint8x16.Min, minSlice[uint8])
- testUint8x32Binary(t, simd.Uint8x32.Min, minSlice[uint8])
-
- if simd.X86.AVX512() {
- // testFloat32x16Binary(t, simd.Float32x16.Min, minSlice[float32]) // nan is wrong
- // testFloat64x8Binary(t, simd.Float64x8.Min, minSlice[float64]) // nan is wrong
- testInt8x64Binary(t, simd.Int8x64.Min, minSlice[int8])
- testInt16x32Binary(t, simd.Int16x32.Min, minSlice[int16])
- testInt32x16Binary(t, simd.Int32x16.Min, minSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.Min, minSlice[int64])
- testUint8x64Binary(t, simd.Uint8x64.Min, minSlice[uint8])
- testUint16x32Binary(t, simd.Uint16x32.Min, minSlice[uint16])
- testUint32x16Binary(t, simd.Uint32x16.Min, minSlice[uint32])
- testUint64x8Binary(t, simd.Uint64x8.Min, minSlice[uint64])
- }
-}
-
-func TestAnd(t *testing.T) {
- testInt16x16Binary(t, simd.Int16x16.And, andSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.And, andSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.And, andSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.And, andSlice[int32])
- testInt64x2Binary(t, simd.Int64x2.And, andSlice[int64])
- testInt64x4Binary(t, simd.Int64x4.And, andSlice[int64])
- testInt8x16Binary(t, simd.Int8x16.And, andSlice[int8])
- testInt8x32Binary(t, simd.Int8x32.And, andSlice[int8])
-
- testUint16x16Binary(t, simd.Uint16x16.And, andSlice[uint16])
- testUint16x8Binary(t, simd.Uint16x8.And, andSlice[uint16])
- testUint32x4Binary(t, simd.Uint32x4.And, andSlice[uint32])
- testUint32x8Binary(t, simd.Uint32x8.And, andSlice[uint32])
- testUint64x2Binary(t, simd.Uint64x2.And, andSlice[uint64])
- testUint64x4Binary(t, simd.Uint64x4.And, andSlice[uint64])
- testUint8x16Binary(t, simd.Uint8x16.And, andSlice[uint8])
- testUint8x32Binary(t, simd.Uint8x32.And, andSlice[uint8])
-
- if simd.X86.AVX512() {
- // testInt8x64Binary(t, simd.Int8x64.And, andISlice[int8]) // missing
- // testInt16x32Binary(t, simd.Int16x32.And, andISlice[int16]) // missing
- testInt32x16Binary(t, simd.Int32x16.And, andSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.And, andSlice[int64])
- // testUint8x64Binary(t, simd.Uint8x64.And, andISlice[uint8]) // missing
- // testUint16x32Binary(t, simd.Uint16x32.And, andISlice[uint16]) // missing
- testUint32x16Binary(t, simd.Uint32x16.And, andSlice[uint32])
- testUint64x8Binary(t, simd.Uint64x8.And, andSlice[uint64])
- }
-}
-
-func TestAndNot(t *testing.T) {
- testInt16x16Binary(t, simd.Int16x16.AndNot, andNotSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.AndNot, andNotSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.AndNot, andNotSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.AndNot, andNotSlice[int32])
- testInt64x2Binary(t, simd.Int64x2.AndNot, andNotSlice[int64])
- testInt64x4Binary(t, simd.Int64x4.AndNot, andNotSlice[int64])
- testInt8x16Binary(t, simd.Int8x16.AndNot, andNotSlice[int8])
- testInt8x32Binary(t, simd.Int8x32.AndNot, andNotSlice[int8])
-
- testUint16x16Binary(t, simd.Uint16x16.AndNot, andNotSlice[uint16])
- testUint16x8Binary(t, simd.Uint16x8.AndNot, andNotSlice[uint16])
- testUint32x4Binary(t, simd.Uint32x4.AndNot, andNotSlice[uint32])
- testUint32x8Binary(t, simd.Uint32x8.AndNot, andNotSlice[uint32])
- testUint64x2Binary(t, simd.Uint64x2.AndNot, andNotSlice[uint64])
- testUint64x4Binary(t, simd.Uint64x4.AndNot, andNotSlice[uint64])
- testUint8x16Binary(t, simd.Uint8x16.AndNot, andNotSlice[uint8])
- testUint8x32Binary(t, simd.Uint8x32.AndNot, andNotSlice[uint8])
-
- if simd.X86.AVX512() {
- testInt8x64Binary(t, simd.Int8x64.AndNot, andNotSlice[int8])
- testInt16x32Binary(t, simd.Int16x32.AndNot, andNotSlice[int16])
- testInt32x16Binary(t, simd.Int32x16.AndNot, andNotSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.AndNot, andNotSlice[int64])
- testUint8x64Binary(t, simd.Uint8x64.AndNot, andNotSlice[uint8])
- testUint16x32Binary(t, simd.Uint16x32.AndNot, andNotSlice[uint16])
- testUint32x16Binary(t, simd.Uint32x16.AndNot, andNotSlice[uint32])
- testUint64x8Binary(t, simd.Uint64x8.AndNot, andNotSlice[uint64])
- }
-}
-
-func TestXor(t *testing.T) {
- testInt16x16Binary(t, simd.Int16x16.Xor, xorSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.Xor, xorSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.Xor, xorSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.Xor, xorSlice[int32])
- testInt64x2Binary(t, simd.Int64x2.Xor, xorSlice[int64])
- testInt64x4Binary(t, simd.Int64x4.Xor, xorSlice[int64])
- testInt8x16Binary(t, simd.Int8x16.Xor, xorSlice[int8])
- testInt8x32Binary(t, simd.Int8x32.Xor, xorSlice[int8])
-
- testUint16x16Binary(t, simd.Uint16x16.Xor, xorSlice[uint16])
- testUint16x8Binary(t, simd.Uint16x8.Xor, xorSlice[uint16])
- testUint32x4Binary(t, simd.Uint32x4.Xor, xorSlice[uint32])
- testUint32x8Binary(t, simd.Uint32x8.Xor, xorSlice[uint32])
- testUint64x2Binary(t, simd.Uint64x2.Xor, xorSlice[uint64])
- testUint64x4Binary(t, simd.Uint64x4.Xor, xorSlice[uint64])
- testUint8x16Binary(t, simd.Uint8x16.Xor, xorSlice[uint8])
- testUint8x32Binary(t, simd.Uint8x32.Xor, xorSlice[uint8])
-
- if simd.X86.AVX512() {
- // testInt8x64Binary(t, simd.Int8x64.Xor, andISlice[int8]) // missing
- // testInt16x32Binary(t, simd.Int16x32.Xor, andISlice[int16]) // missing
- testInt32x16Binary(t, simd.Int32x16.Xor, xorSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.Xor, xorSlice[int64])
- // testUint8x64Binary(t, simd.Uint8x64.Xor, andISlice[uint8]) // missing
- // testUint16x32Binary(t, simd.Uint16x32.Xor, andISlice[uint16]) // missing
- testUint32x16Binary(t, simd.Uint32x16.Xor, xorSlice[uint32])
- testUint64x8Binary(t, simd.Uint64x8.Xor, xorSlice[uint64])
- }
-}
-
-func TestOr(t *testing.T) {
- testInt16x16Binary(t, simd.Int16x16.Or, orSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.Or, orSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.Or, orSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.Or, orSlice[int32])
- testInt64x2Binary(t, simd.Int64x2.Or, orSlice[int64])
- testInt64x4Binary(t, simd.Int64x4.Or, orSlice[int64])
- testInt8x16Binary(t, simd.Int8x16.Or, orSlice[int8])
- testInt8x32Binary(t, simd.Int8x32.Or, orSlice[int8])
-
- testUint16x16Binary(t, simd.Uint16x16.Or, orSlice[uint16])
- testUint16x8Binary(t, simd.Uint16x8.Or, orSlice[uint16])
- testUint32x4Binary(t, simd.Uint32x4.Or, orSlice[uint32])
- testUint32x8Binary(t, simd.Uint32x8.Or, orSlice[uint32])
- testUint64x2Binary(t, simd.Uint64x2.Or, orSlice[uint64])
- testUint64x4Binary(t, simd.Uint64x4.Or, orSlice[uint64])
- testUint8x16Binary(t, simd.Uint8x16.Or, orSlice[uint8])
- testUint8x32Binary(t, simd.Uint8x32.Or, orSlice[uint8])
-
- if simd.X86.AVX512() {
- // testInt8x64Binary(t, simd.Int8x64.Or, andISlice[int8]) // missing
- // testInt16x32Binary(t, simd.Int16x32.Or, andISlice[int16]) // missing
- testInt32x16Binary(t, simd.Int32x16.Or, orSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.Or, orSlice[int64])
- // testUint8x64Binary(t, simd.Uint8x64.Or, andISlice[uint8]) // missing
- // testUint16x32Binary(t, simd.Uint16x32.Or, andISlice[uint16]) // missing
- testUint32x16Binary(t, simd.Uint32x16.Or, orSlice[uint32])
- testUint64x8Binary(t, simd.Uint64x8.Or, orSlice[uint64])
- }
-}
-
-func TestMul(t *testing.T) {
- testFloat32x4Binary(t, simd.Float32x4.Mul, mulSlice[float32])
- testFloat32x8Binary(t, simd.Float32x8.Mul, mulSlice[float32])
- testFloat64x2Binary(t, simd.Float64x2.Mul, mulSlice[float64])
- testFloat64x4Binary(t, simd.Float64x4.Mul, mulSlice[float64])
-
- testInt16x16Binary(t, simd.Int16x16.Mul, mulSlice[int16])
- testInt16x8Binary(t, simd.Int16x8.Mul, mulSlice[int16])
- testInt32x4Binary(t, simd.Int32x4.Mul, mulSlice[int32])
- testInt32x8Binary(t, simd.Int32x8.Mul, mulSlice[int32])
-
- // testInt8x16Binary(t, simd.Int8x16.Mul, mulSlice[int8]) // nope
- // testInt8x32Binary(t, simd.Int8x32.Mul, mulSlice[int8])
-
- // TODO we should be able to do these, there's no difference between signed/unsigned Mul
- // testUint16x16Binary(t, simd.Uint16x16.Mul, mulSlice[uint16])
- // testUint16x8Binary(t, simd.Uint16x8.Mul, mulSlice[uint16])
- // testUint32x4Binary(t, simd.Uint32x4.Mul, mulSlice[uint32])
- // testUint32x8Binary(t, simd.Uint32x8.Mul, mulSlice[uint32])
- // testUint64x2Binary(t, simd.Uint64x2.Mul, mulSlice[uint64])
- // testUint64x4Binary(t, simd.Uint64x4.Mul, mulSlice[uint64])
-
- // testUint8x16Binary(t, simd.Uint8x16.Mul, mulSlice[uint8]) // nope
- // testUint8x32Binary(t, simd.Uint8x32.Mul, mulSlice[uint8])
-
- if simd.X86.AVX512() {
- testInt64x2Binary(t, simd.Int64x2.Mul, mulSlice[int64]) // avx512 only
- testInt64x4Binary(t, simd.Int64x4.Mul, mulSlice[int64])
-
- testFloat32x16Binary(t, simd.Float32x16.Mul, mulSlice[float32])
- testFloat64x8Binary(t, simd.Float64x8.Mul, mulSlice[float64])
-
- // testInt8x64Binary(t, simd.Int8x64.Mul, mulSlice[int8]) // nope
- testInt16x32Binary(t, simd.Int16x32.Mul, mulSlice[int16])
- testInt32x16Binary(t, simd.Int32x16.Mul, mulSlice[int32])
- testInt64x8Binary(t, simd.Int64x8.Mul, mulSlice[int64])
- // testUint8x64Binary(t, simd.Uint8x64.Mul, mulSlice[uint8]) // nope
-
- // TODO signed should do the job
- // testUint16x32Binary(t, simd.Uint16x32.Mul, mulSlice[uint16])
- // testUint32x16Binary(t, simd.Uint32x16.Mul, mulSlice[uint32])
- // testUint64x8Binary(t, simd.Uint64x8.Mul, mulSlice[uint64])
- }
-}
-
-func TestDiv(t *testing.T) {
- testFloat32x4Binary(t, simd.Float32x4.Div, divSlice[float32])
- testFloat32x8Binary(t, simd.Float32x8.Div, divSlice[float32])
- testFloat64x2Binary(t, simd.Float64x2.Div, divSlice[float64])
- testFloat64x4Binary(t, simd.Float64x4.Div, divSlice[float64])
-
- if simd.X86.AVX512() {
- testFloat32x16Binary(t, simd.Float32x16.Div, divSlice[float32])
- testFloat64x8Binary(t, simd.Float64x8.Div, divSlice[float64])
- }
-}
+++ /dev/null
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build goexperiment.simd && amd64
-
-package simd_test
-
-import (
- "simd"
- "testing"
-)
-
-// AVX 2 lacks most comparisons, but they can be synthesized
-// from > and =
-var comparisonFixed bool = simd.X86.AVX512()
-
-func TestLess(t *testing.T) {
- testFloat32x4Compare(t, simd.Float32x4.Less, lessSlice[float32])
- testFloat32x8Compare(t, simd.Float32x8.Less, lessSlice[float32])
- testFloat64x2Compare(t, simd.Float64x2.Less, lessSlice[float64])
- testFloat64x4Compare(t, simd.Float64x4.Less, lessSlice[float64])
-
- testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
- testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
- testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
- testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32])
- testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64])
- testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64])
- testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
- testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
-
- testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
- testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
- testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
- testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32])
- testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64])
- testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64])
- testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
- testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
-
- testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16])
- testUint16x8Compare(t, simd.Uint16x8.Less, lessSlice[uint16])
- testUint32x4Compare(t, simd.Uint32x4.Less, lessSlice[uint32])
- testUint32x8Compare(t, simd.Uint32x8.Less, lessSlice[uint32])
- testUint64x2Compare(t, simd.Uint64x2.Less, lessSlice[uint64])
- testUint64x4Compare(t, simd.Uint64x4.Less, lessSlice[uint64])
- testUint8x16Compare(t, simd.Uint8x16.Less, lessSlice[uint8])
- testUint8x32Compare(t, simd.Uint8x32.Less, lessSlice[uint8])
-
- if simd.X86.AVX512() {
- testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16])
- testUint16x8Compare(t, simd.Uint16x8.Less, lessSlice[uint16])
- testUint32x4Compare(t, simd.Uint32x4.Less, lessSlice[uint32])
- testUint32x8Compare(t, simd.Uint32x8.Less, lessSlice[uint32])
- testUint64x2Compare(t, simd.Uint64x2.Less, lessSlice[uint64])
- testUint64x4Compare(t, simd.Uint64x4.Less, lessSlice[uint64])
- testUint8x16Compare(t, simd.Uint8x16.Less, lessSlice[uint8])
- testUint8x32Compare(t, simd.Uint8x32.Less, lessSlice[uint8])
-
- testFloat32x16Compare(t, simd.Float32x16.Less, lessSlice[float32])
- testFloat64x8Compare(t, simd.Float64x8.Less, lessSlice[float64])
- testInt8x64Compare(t, simd.Int8x64.Less, lessSlice[int8])
- testInt16x32Compare(t, simd.Int16x32.Less, lessSlice[int16])
- testInt32x16Compare(t, simd.Int32x16.Less, lessSlice[int32])
- testInt64x8Compare(t, simd.Int64x8.Less, lessSlice[int64])
- testUint8x64Compare(t, simd.Uint8x64.Less, lessSlice[uint8])
- testUint16x32Compare(t, simd.Uint16x32.Less, lessSlice[uint16])
- testUint32x16Compare(t, simd.Uint32x16.Less, lessSlice[uint32])
- testUint64x8Compare(t, simd.Uint64x8.Less, lessSlice[uint64])
- }
-}
-
-func TestLessEqual(t *testing.T) {
- testFloat32x4Compare(t, simd.Float32x4.LessEqual, lessEqualSlice[float32])
- testFloat32x8Compare(t, simd.Float32x8.LessEqual, lessEqualSlice[float32])
- testFloat64x2Compare(t, simd.Float64x2.LessEqual, lessEqualSlice[float64])
- testFloat64x4Compare(t, simd.Float64x4.LessEqual, lessEqualSlice[float64])
-
- testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16])
- testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16])
- testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32])
- testInt32x8Compare(t, simd.Int32x8.LessEqual, lessEqualSlice[int32])
- testInt64x2Compare(t, simd.Int64x2.LessEqual, lessEqualSlice[int64])
- testInt64x4Compare(t, simd.Int64x4.LessEqual, lessEqualSlice[int64])
- testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8])
- testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8])
-
- testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16])
- testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16])
- testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32])
- testUint32x8Compare(t, simd.Uint32x8.LessEqual, lessEqualSlice[uint32])
- testUint64x2Compare(t, simd.Uint64x2.LessEqual, lessEqualSlice[uint64])
- testUint64x4Compare(t, simd.Uint64x4.LessEqual, lessEqualSlice[uint64])
- testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8])
- testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8])
-
- if simd.X86.AVX512() {
- testFloat32x16Compare(t, simd.Float32x16.LessEqual, lessEqualSlice[float32])
- testFloat64x8Compare(t, simd.Float64x8.LessEqual, lessEqualSlice[float64])
- testInt8x64Compare(t, simd.Int8x64.LessEqual, lessEqualSlice[int8])
- testInt16x32Compare(t, simd.Int16x32.LessEqual, lessEqualSlice[int16])
- testInt32x16Compare(t, simd.Int32x16.LessEqual, lessEqualSlice[int32])
- testInt64x8Compare(t, simd.Int64x8.LessEqual, lessEqualSlice[int64])
- testUint8x64Compare(t, simd.Uint8x64.LessEqual, lessEqualSlice[uint8])
- testUint16x32Compare(t, simd.Uint16x32.LessEqual, lessEqualSlice[uint16])
- testUint32x16Compare(t, simd.Uint32x16.LessEqual, lessEqualSlice[uint32])
- testUint64x8Compare(t, simd.Uint64x8.LessEqual, lessEqualSlice[uint64])
- }
-}
-
-func TestGreater(t *testing.T) {
- testFloat32x4Compare(t, simd.Float32x4.Greater, greaterSlice[float32])
- testFloat32x8Compare(t, simd.Float32x8.Greater, greaterSlice[float32])
- testFloat64x2Compare(t, simd.Float64x2.Greater, greaterSlice[float64])
- testFloat64x4Compare(t, simd.Float64x4.Greater, greaterSlice[float64])
-
- testInt16x16Compare(t, simd.Int16x16.Greater, greaterSlice[int16])
- testInt16x8Compare(t, simd.Int16x8.Greater, greaterSlice[int16])
- testInt32x4Compare(t, simd.Int32x4.Greater, greaterSlice[int32])
- testInt32x8Compare(t, simd.Int32x8.Greater, greaterSlice[int32])
-
- testInt64x2Compare(t, simd.Int64x2.Greater, greaterSlice[int64])
- testInt64x4Compare(t, simd.Int64x4.Greater, greaterSlice[int64])
- testInt8x16Compare(t, simd.Int8x16.Greater, greaterSlice[int8])
- testInt8x32Compare(t, simd.Int8x32.Greater, greaterSlice[int8])
-
- testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16])
- testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16])
- testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32])
- testUint32x8Compare(t, simd.Uint32x8.Greater, greaterSlice[uint32])
-
- testUint64x2Compare(t, simd.Uint64x2.Greater, greaterSlice[uint64])
- testUint64x4Compare(t, simd.Uint64x4.Greater, greaterSlice[uint64])
- testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8])
- testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8])
-
- if simd.X86.AVX512() {
-
- testFloat32x16Compare(t, simd.Float32x16.Greater, greaterSlice[float32])
- testFloat64x8Compare(t, simd.Float64x8.Greater, greaterSlice[float64])
- testInt8x64Compare(t, simd.Int8x64.Greater, greaterSlice[int8])
- testInt16x32Compare(t, simd.Int16x32.Greater, greaterSlice[int16])
- testInt32x16Compare(t, simd.Int32x16.Greater, greaterSlice[int32])
- testInt64x8Compare(t, simd.Int64x8.Greater, greaterSlice[int64])
- testUint8x64Compare(t, simd.Uint8x64.Greater, greaterSlice[uint8])
- testUint16x32Compare(t, simd.Uint16x32.Greater, greaterSlice[uint16])
- testUint32x16Compare(t, simd.Uint32x16.Greater, greaterSlice[uint32])
- testUint64x8Compare(t, simd.Uint64x8.Greater, greaterSlice[uint64])
- }
-}
-
-func TestGreaterEqual(t *testing.T) {
- testFloat32x4Compare(t, simd.Float32x4.GreaterEqual, greaterEqualSlice[float32])
- testFloat32x8Compare(t, simd.Float32x8.GreaterEqual, greaterEqualSlice[float32])
- testFloat64x2Compare(t, simd.Float64x2.GreaterEqual, greaterEqualSlice[float64])
- testFloat64x4Compare(t, simd.Float64x4.GreaterEqual, greaterEqualSlice[float64])
-
- testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
- testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16])
- testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32])
- testInt32x8Compare(t, simd.Int32x8.GreaterEqual, greaterEqualSlice[int32])
- testInt64x2Compare(t, simd.Int64x2.GreaterEqual, greaterEqualSlice[int64])
- testInt64x4Compare(t, simd.Int64x4.GreaterEqual, greaterEqualSlice[int64])
- testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8])
- testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
-
- testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
- testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
- testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
- testUint32x8Compare(t, simd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
- testUint64x2Compare(t, simd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64])
- testUint64x4Compare(t, simd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
- testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
- testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
-
- if simd.X86.AVX512() {
- testFloat32x16Compare(t, simd.Float32x16.GreaterEqual, greaterEqualSlice[float32])
- testFloat64x8Compare(t, simd.Float64x8.GreaterEqual, greaterEqualSlice[float64])
- testInt8x64Compare(t, simd.Int8x64.GreaterEqual, greaterEqualSlice[int8])
- testInt16x32Compare(t, simd.Int16x32.GreaterEqual, greaterEqualSlice[int16])
- testInt32x16Compare(t, simd.Int32x16.GreaterEqual, greaterEqualSlice[int32])
- testInt64x8Compare(t, simd.Int64x8.GreaterEqual, greaterEqualSlice[int64])
- testUint8x64Compare(t, simd.Uint8x64.GreaterEqual, greaterEqualSlice[uint8])
- testUint16x32Compare(t, simd.Uint16x32.GreaterEqual, greaterEqualSlice[uint16])
- testUint32x16Compare(t, simd.Uint32x16.GreaterEqual, greaterEqualSlice[uint32])
- testUint64x8Compare(t, simd.Uint64x8.GreaterEqual, greaterEqualSlice[uint64])
- }
-}
-
-func TestEqual(t *testing.T) {
- testFloat32x4Compare(t, simd.Float32x4.Equal, equalSlice[float32])
- testFloat32x8Compare(t, simd.Float32x8.Equal, equalSlice[float32])
- testFloat64x2Compare(t, simd.Float64x2.Equal, equalSlice[float64])
- testFloat64x4Compare(t, simd.Float64x4.Equal, equalSlice[float64])
-
- testInt16x16Compare(t, simd.Int16x16.Equal, equalSlice[int16])
- testInt16x8Compare(t, simd.Int16x8.Equal, equalSlice[int16])
- testInt32x4Compare(t, simd.Int32x4.Equal, equalSlice[int32])
- testInt32x8Compare(t, simd.Int32x8.Equal, equalSlice[int32])
- testInt64x2Compare(t, simd.Int64x2.Equal, equalSlice[int64])
- testInt64x4Compare(t, simd.Int64x4.Equal, equalSlice[int64])
- testInt8x16Compare(t, simd.Int8x16.Equal, equalSlice[int8])
- testInt8x32Compare(t, simd.Int8x32.Equal, equalSlice[int8])
-
- testUint16x16Compare(t, simd.Uint16x16.Equal, equalSlice[uint16])
- testUint16x8Compare(t, simd.Uint16x8.Equal, equalSlice[uint16])
- testUint32x4Compare(t, simd.Uint32x4.Equal, equalSlice[uint32])
- testUint32x8Compare(t, simd.Uint32x8.Equal, equalSlice[uint32])
- testUint64x2Compare(t, simd.Uint64x2.Equal, equalSlice[uint64])
- testUint64x4Compare(t, simd.Uint64x4.Equal, equalSlice[uint64])
- testUint8x16Compare(t, simd.Uint8x16.Equal, equalSlice[uint8])
- testUint8x32Compare(t, simd.Uint8x32.Equal, equalSlice[uint8])
-
- if simd.X86.AVX512() {
- testFloat32x16Compare(t, simd.Float32x16.Equal, equalSlice[float32])
- testFloat64x8Compare(t, simd.Float64x8.Equal, equalSlice[float64])
- testInt8x64Compare(t, simd.Int8x64.Equal, equalSlice[int8])
- testInt16x32Compare(t, simd.Int16x32.Equal, equalSlice[int16])
- testInt32x16Compare(t, simd.Int32x16.Equal, equalSlice[int32])
- testInt64x8Compare(t, simd.Int64x8.Equal, equalSlice[int64])
- testUint8x64Compare(t, simd.Uint8x64.Equal, equalSlice[uint8])
- testUint16x32Compare(t, simd.Uint16x32.Equal, equalSlice[uint16])
- testUint32x16Compare(t, simd.Uint32x16.Equal, equalSlice[uint32])
- testUint64x8Compare(t, simd.Uint64x8.Equal, equalSlice[uint64])
- }
-}
-
-func TestNotEqual(t *testing.T) {
- testFloat32x4Compare(t, simd.Float32x4.NotEqual, notEqualSlice[float32])
- testFloat32x8Compare(t, simd.Float32x8.NotEqual, notEqualSlice[float32])
- testFloat64x2Compare(t, simd.Float64x2.NotEqual, notEqualSlice[float64])
- testFloat64x4Compare(t, simd.Float64x4.NotEqual, notEqualSlice[float64])
-
- testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16])
- testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16])
- testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32])
- testInt32x8Compare(t, simd.Int32x8.NotEqual, notEqualSlice[int32])
- testInt64x2Compare(t, simd.Int64x2.NotEqual, notEqualSlice[int64])
- testInt64x4Compare(t, simd.Int64x4.NotEqual, notEqualSlice[int64])
- testInt8x16Compare(t, simd.Int8x16.NotEqual, notEqualSlice[int8])
- testInt8x32Compare(t, simd.Int8x32.NotEqual, notEqualSlice[int8])
-
- testUint16x16Compare(t, simd.Uint16x16.NotEqual, notEqualSlice[uint16])
- testUint16x8Compare(t, simd.Uint16x8.NotEqual, notEqualSlice[uint16])
- testUint32x4Compare(t, simd.Uint32x4.NotEqual, notEqualSlice[uint32])
- testUint32x8Compare(t, simd.Uint32x8.NotEqual, notEqualSlice[uint32])
- testUint64x2Compare(t, simd.Uint64x2.NotEqual, notEqualSlice[uint64])
- testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64])
- testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8])
- testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8])
-
- if simd.X86.AVX512() {
- testFloat32x16Compare(t, simd.Float32x16.NotEqual, notEqualSlice[float32])
- testFloat64x8Compare(t, simd.Float64x8.NotEqual, notEqualSlice[float64])
- testInt8x64Compare(t, simd.Int8x64.NotEqual, notEqualSlice[int8])
- testInt16x32Compare(t, simd.Int16x32.NotEqual, notEqualSlice[int16])
- testInt32x16Compare(t, simd.Int32x16.NotEqual, notEqualSlice[int32])
- testInt64x8Compare(t, simd.Int64x8.NotEqual, notEqualSlice[int64])
- testUint8x64Compare(t, simd.Uint8x64.NotEqual, notEqualSlice[uint8])
- testUint16x32Compare(t, simd.Uint16x32.NotEqual, notEqualSlice[uint16])
- testUint32x16Compare(t, simd.Uint32x16.NotEqual, notEqualSlice[uint32])
- testUint64x8Compare(t, simd.Uint64x8.NotEqual, notEqualSlice[uint64])
- }
-}
+++ /dev/null
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build goexperiment.simd && amd64
-
-package simd_test
-
-import (
- "simd"
- "testing"
-)
-
-func TestFMA(t *testing.T) {
- if simd.X86.AVX512() {
- testFloat32x4TernaryFlaky(t, simd.Float32x4.MulAdd, fmaSlice[float32], 0.001)
- testFloat32x8TernaryFlaky(t, simd.Float32x8.MulAdd, fmaSlice[float32], 0.001)
- testFloat32x16TernaryFlaky(t, simd.Float32x16.MulAdd, fmaSlice[float32], 0.001)
- testFloat64x2Ternary(t, simd.Float64x2.MulAdd, fmaSlice[float64])
- testFloat64x4Ternary(t, simd.Float64x4.MulAdd, fmaSlice[float64])
- testFloat64x8Ternary(t, simd.Float64x8.MulAdd, fmaSlice[float64])
- }
-}
+++ /dev/null
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build goexperiment.simd && amd64
-
-package simd_test
-
-import (
- "math"
- "simd"
- "testing"
-)
-
-func TestCeil(t *testing.T) {
- testFloat32x4Unary(t, simd.Float32x4.Ceil, ceilSlice[float32])
- testFloat32x8Unary(t, simd.Float32x8.Ceil, ceilSlice[float32])
- testFloat64x2Unary(t, simd.Float64x2.Ceil, ceilSlice[float64])
- testFloat64x4Unary(t, simd.Float64x4.Ceil, ceilSlice[float64])
- if simd.X86.AVX512() {
- // testFloat32x16Unary(t, simd.Float32x16.Ceil, ceilSlice[float32]) // missing
- // testFloat64x8Unary(t, simd.Float64x8.Ceil, ceilSlice[float64]) // missing
- }
-}
-
-func TestFloor(t *testing.T) {
- testFloat32x4Unary(t, simd.Float32x4.Floor, floorSlice[float32])
- testFloat32x8Unary(t, simd.Float32x8.Floor, floorSlice[float32])
- testFloat64x2Unary(t, simd.Float64x2.Floor, floorSlice[float64])
- testFloat64x4Unary(t, simd.Float64x4.Floor, floorSlice[float64])
- if simd.X86.AVX512() {
- // testFloat32x16Unary(t, simd.Float32x16.Floor, floorSlice[float32]) // missing
- // testFloat64x8Unary(t, simd.Float64x8.Floor, floorSlice[float64]) // missing
- }
-}
-
-func TestTrunc(t *testing.T) {
- testFloat32x4Unary(t, simd.Float32x4.Trunc, truncSlice[float32])
- testFloat32x8Unary(t, simd.Float32x8.Trunc, truncSlice[float32])
- testFloat64x2Unary(t, simd.Float64x2.Trunc, truncSlice[float64])
- testFloat64x4Unary(t, simd.Float64x4.Trunc, truncSlice[float64])
- if simd.X86.AVX512() {
- // testFloat32x16Unary(t, simd.Float32x16.Trunc, truncSlice[float32]) // missing
- // testFloat64x8Unary(t, simd.Float64x8.Trunc, truncSlice[float64]) // missing
- }
-}
-
-func TestRound(t *testing.T) {
- testFloat32x4Unary(t, simd.Float32x4.RoundToEven, roundSlice[float32])
- testFloat32x8Unary(t, simd.Float32x8.RoundToEven, roundSlice[float32])
- testFloat64x2Unary(t, simd.Float64x2.RoundToEven, roundSlice[float64])
- testFloat64x4Unary(t, simd.Float64x4.RoundToEven, roundSlice[float64])
- if simd.X86.AVX512() {
- // testFloat32x16Unary(t, simd.Float32x16.Round, roundSlice[float32]) // missing
- // testFloat64x8Unary(t, simd.Float64x8.Round, roundSlice[float64]) // missing
- }
-}
-
-func TestSqrt(t *testing.T) {
- testFloat32x4Unary(t, simd.Float32x4.Sqrt, sqrtSlice[float32])
- testFloat32x8Unary(t, simd.Float32x8.Sqrt, sqrtSlice[float32])
- testFloat64x2Unary(t, simd.Float64x2.Sqrt, sqrtSlice[float64])
- testFloat64x4Unary(t, simd.Float64x4.Sqrt, sqrtSlice[float64])
- if simd.X86.AVX512() {
- testFloat32x16Unary(t, simd.Float32x16.Sqrt, sqrtSlice[float32])
- testFloat64x8Unary(t, simd.Float64x8.Sqrt, sqrtSlice[float64])
- }
-}
-
-func TestNot(t *testing.T) {
- testInt8x16Unary(t, simd.Int8x16.Not, map1[int8](not))
- testInt8x32Unary(t, simd.Int8x32.Not, map1[int8](not))
- testInt16x8Unary(t, simd.Int16x8.Not, map1[int16](not))
- testInt16x16Unary(t, simd.Int16x16.Not, map1[int16](not))
- testInt32x4Unary(t, simd.Int32x4.Not, map1[int32](not))
- testInt32x8Unary(t, simd.Int32x8.Not, map1[int32](not))
-}
-
-func TestAbsolute(t *testing.T) {
- testInt8x16Unary(t, simd.Int8x16.Abs, map1[int8](abs))
- testInt8x32Unary(t, simd.Int8x32.Abs, map1[int8](abs))
- testInt16x8Unary(t, simd.Int16x8.Abs, map1[int16](abs))
- testInt16x16Unary(t, simd.Int16x16.Abs, map1[int16](abs))
- testInt32x4Unary(t, simd.Int32x4.Abs, map1[int32](abs))
- testInt32x8Unary(t, simd.Int32x8.Abs, map1[int32](abs))
- if simd.X86.AVX512() {
- testInt8x64Unary(t, simd.Int8x64.Abs, map1[int8](abs))
- testInt16x32Unary(t, simd.Int16x32.Abs, map1[int16](abs))
- testInt32x16Unary(t, simd.Int32x16.Abs, map1[int32](abs))
- testInt64x2Unary(t, simd.Int64x2.Abs, map1[int64](abs))
- testInt64x4Unary(t, simd.Int64x4.Abs, map1[int64](abs))
- testInt64x8Unary(t, simd.Int64x8.Abs, map1[int64](abs))
- }
-}
-
-func TestCeilScaledResidue(t *testing.T) {
- if !simd.X86.AVX512() {
- t.Skip("Needs AVX512")
- }
- testFloat64x8UnaryFlaky(t,
- func(x simd.Float64x8) simd.Float64x8 { return x.CeilScaledResidue(0) },
- map1(ceilResidueForPrecision[float64](0)),
- 0.001)
- testFloat64x8UnaryFlaky(t,
- func(x simd.Float64x8) simd.Float64x8 { return x.CeilScaledResidue(1) },
- map1(ceilResidueForPrecision[float64](1)),
- 0.001)
- testFloat64x8Unary(t,
- func(x simd.Float64x8) simd.Float64x8 { return x.Sub(x.CeilScaled(0)) },
- map1[float64](func(x float64) float64 { return x - math.Ceil(x) }))
-}
-
-func TestToUint32(t *testing.T) {
- if !simd.X86.AVX512() {
- t.Skip("Needs AVX512")
- }
- testFloat32x4ConvertToUint32(t, simd.Float32x4.ConvertToUint32, map1[float32](toUint32))
- testFloat32x8ConvertToUint32(t, simd.Float32x8.ConvertToUint32, map1[float32](toUint32))
- testFloat32x16ConvertToUint32(t, simd.Float32x16.ConvertToUint32, map1[float32](toUint32))
-}
-
-func TestToInt32(t *testing.T) {
- testFloat32x4ConvertToInt32(t, simd.Float32x4.ConvertToInt32, map1[float32](toInt32))
- testFloat32x8ConvertToInt32(t, simd.Float32x8.ConvertToInt32, map1[float32](toInt32))
-}
-
-func TestConverts(t *testing.T) {
- testUint8x16ConvertToUint16(t, simd.Uint8x16.ExtendToUint16, map1[uint8](toUint16))
- testUint16x8ConvertToUint32(t, simd.Uint16x8.ExtendToUint32, map1[uint16](toUint32))
-}
-
-func TestConvertsAVX512(t *testing.T) {
- if !simd.X86.AVX512() {
- t.Skip("Needs AVX512")
- }
- testUint8x32ConvertToUint16(t, simd.Uint8x32.ExtendToUint16, map1[uint8](toUint16))
-}
+++ /dev/null
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build goexperiment.simd && amd64
-
-package simd_test
-
-import (
- "simd"
- "simd/internal/test_helpers"
- "testing"
-)
-
-func TestConcatSelectedConstant64(t *testing.T) {
- a := make([]int64, 2)
- x := simd.LoadInt64x2Slice([]int64{4, 5})
- y := simd.LoadInt64x2Slice([]int64{6, 7})
- z := x.ExportTestConcatSelectedConstant(0b10, y)
- z.StoreSlice(a)
- test_helpers.CheckSlices[int64](t, a, []int64{4, 7})
-}
-
-func TestConcatSelectedConstantGrouped64(t *testing.T) {
- a := make([]float64, 4)
- x := simd.LoadFloat64x4Slice([]float64{4, 5, 8, 9})
- y := simd.LoadFloat64x4Slice([]float64{6, 7, 10, 11})
- z := x.ExportTestConcatSelectedConstantGrouped(0b_11_10, y)
- z.StoreSlice(a)
- test_helpers.CheckSlices[float64](t, a, []float64{4, 7, 9, 11})
-}
-
-func TestConcatSelectedConstant32(t *testing.T) {
- a := make([]float32, 4)
- x := simd.LoadFloat32x4Slice([]float32{4, 5, 8, 9})
- y := simd.LoadFloat32x4Slice([]float32{6, 7, 10, 11})
- z := x.ExportTestConcatSelectedConstant(0b_11_01_10_00, y)
- z.StoreSlice(a)
- test_helpers.CheckSlices[float32](t, a, []float32{4, 8, 7, 11})
-}
-
-func TestConcatSelectedConstantGrouped32(t *testing.T) {
- a := make([]uint32, 8)
- x := simd.LoadUint32x8Slice([]uint32{0, 1, 2, 3, 8, 9, 10, 11})
- y := simd.LoadUint32x8Slice([]uint32{4, 5, 6, 7, 12, 13, 14, 15})
- z := x.ExportTestConcatSelectedConstantGrouped(0b_11_01_00_10, y)
- z.StoreSlice(a)
- test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15})
-}
-
-func TestTern(t *testing.T) {
- if !simd.X86.AVX512() {
- t.Skip("This test needs AVX512")
- }
- x := simd.LoadInt32x8Slice([]int32{0, 0, 0, 0, 1, 1, 1, 1})
- y := simd.LoadInt32x8Slice([]int32{0, 0, 1, 1, 0, 0, 1, 1})
- z := simd.LoadInt32x8Slice([]int32{0, 1, 0, 1, 0, 1, 0, 1})
-
- foo := func(w simd.Int32x8, k uint8) {
- a := make([]int32, 8)
- w.StoreSlice(a)
- t.Logf("For k=%0b, w=%v", k, a)
- for i, b := range a {
- if (int32(k)>>i)&1 != b {
- t.Errorf("Element %d of stored slice (=%d) did not match corresponding bit in 0b%b",
- i, b, k)
- }
- }
- }
-
- foo(x.ExportTestTern(0b1111_0000, y, z), 0b1111_0000)
- foo(x.ExportTestTern(0b1100_1100, y, z), 0b1100_1100)
- foo(x.ExportTestTern(0b1010_1010, y, z), 0b1010_1010)
-}
-
-func TestSelect2x4x32(t *testing.T) {
- for a := range uint8(8) {
- for b := range uint8(8) {
- for c := range uint8(8) {
- for d := range uint8(8) {
- x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
- y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
- z := select2x4x32(x, a, b, c, d, y)
- w := make([]int32, 4, 4)
- z.StoreSlice(w)
- if w[0] != int32(a) || w[1] != int32(b) ||
- w[2] != int32(c) || w[3] != int32(d) {
- t.Errorf("Expected [%d %d %d %d] got %v", a, b, c, d, w)
- }
- }
- }
- }
- }
-}
-
-func TestSelect2x8x32Grouped(t *testing.T) {
- for a := range uint8(8) {
- for b := range uint8(8) {
- for c := range uint8(8) {
- for d := range uint8(8) {
- x := simd.LoadInt32x8Slice([]int32{0, 1, 2, 3, 10, 11, 12, 13})
- y := simd.LoadInt32x8Slice([]int32{4, 5, 6, 7, 14, 15, 16, 17})
- z := select2x8x32Grouped(x, a, b, c, d, y)
- w := make([]int32, 8, 8)
- z.StoreSlice(w)
- if w[0] != int32(a) || w[1] != int32(b) ||
- w[2] != int32(c) || w[3] != int32(d) ||
- w[4] != int32(10+a) || w[5] != int32(10+b) ||
- w[6] != int32(10+c) || w[7] != int32(10+d) {
- t.Errorf("Expected [%d %d %d %d %d %d %d %d] got %v", a, b, c, d, 10+a, 10+b, 10+c, 10+d, w)
- }
- }
- }
- }
- }
-}
-
-// select2x4x32 returns a selection of 4 elements in x and y, numbered
-// 0-7, where 0-3 are the four elements of x and 4-7 are the four elements
-// of y.
-func select2x4x32(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) simd.Int32x4 {
- pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
-
- a, b, c, d = a&3, b&3, c&3, d&3
-
- switch pattern {
- case simd.LLLL:
- return x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, c, d), x)
- case simd.HHHH:
- return y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, c, d), y)
- case simd.LLHH:
- return x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, c, d), y)
- case simd.HHLL:
- return y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, c, d), x)
-
- case simd.HLLL:
- z := y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, a, b, b), x)
- return z.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(0, 2, c, d), x)
- case simd.LHLL:
- z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, a, b, b), y)
- return z.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(0, 2, c, d), x)
-
- case simd.HLHH:
- z := y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, a, b, b), x)
- return z.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(0, 2, c, d), y)
- case simd.LHHH:
- z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, a, b, b), y)
- return z.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(0, 2, c, d), y)
-
- case simd.LLLH:
- z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(c, c, d, d), y)
- return x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, 0, 2), z)
- case simd.LLHL:
- z := y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(c, c, d, d), x)
- return x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, 0, 2), z)
- case simd.HHLH:
- z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(c, c, d, d), y)
- return y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, 0, 2), z)
- case simd.HHHL:
- z := y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(c, c, d, d), x)
- return y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, 0, 2), z)
-
- case simd.LHLH:
- z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, c, b, d), y)
- return z.ExportTestConcatSelectedConstant(0b11_01_10_00 /* =simd.ExportTestCscImm4(0, 2, 1, 3) */, z)
- case simd.HLHL:
- z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(b, d, a, c), y)
- return z.ExportTestConcatSelectedConstant(0b01_11_00_10 /* =simd.ExportTestCscImm4(2, 0, 3, 1) */, z)
- case simd.HLLH:
- z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(b, c, a, d), y)
- return z.ExportTestConcatSelectedConstant(0b11_01_00_10 /* =simd.ExportTestCscImm4(2, 0, 1, 3) */, z)
- case simd.LHHL:
- z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, d, b, c), y)
- return z.ExportTestConcatSelectedConstant(0b01_11_10_00 /* =simd.ExportTestCscImm4(0, 2, 3, 1) */, z)
- }
- panic("missing case, switch should be exhaustive")
-}
-
-// select2x8x32Grouped returns a pair of selection of 4 elements in x and y,
-// numbered 0-7, where 0-3 are the four elements of x's two groups (lower and
-// upper 128 bits) and 4-7 are the four elements of y's two groups.
-
-func select2x8x32Grouped(x simd.Int32x8, a, b, c, d uint8, y simd.Int32x8) simd.Int32x8 {
- // selections as being expressible in the ExportTestConcatSelectedConstant pattern,
- // or not. Classification is by H and L, where H is a selection from 4-7
- // and L is a selection from 0-3.
- // simd.LLHH -> CSC(x,y, a, b, c&3, d&3)
- // simd.HHLL -> CSC(y,x, a&3, b&3, c, d)
- // simd.LLLL -> CSC(x,x, a, b, c, d)
- // simd.HHHH -> CSC(y,y, a&3, b&3, c&3, d&3)
-
- // simd.LLLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(x, z, a, b, 0, 2)
- // simd.LLHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(x, z, a, b, 0, 2)
- // simd.HHLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(y, z, a&3, b&3, 0, 2)
- // simd.HHHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(y, z, a&3, b&3, 0, 2)
-
- // simd.LHLL -> z = CSC(x, y, a, a, b&3, b&3); CSC(z, x, 0, 2, c, d)
- // etc
-
- // simd.LHLH -> z = CSC(x, y, a, c, b&3, d&3); CSC(z, z, 0, 2, 1, 3)
- // simd.HLHL -> z = CSC(x, y, b, d, a&3, c&3); CSC(z, z, 2, 0, 3, 1)
-
- pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
-
- a, b, c, d = a&3, b&3, c&3, d&3
-
- switch pattern {
- case simd.LLLL:
- return x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, c, d), x)
- case simd.HHHH:
- return y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, c, d), y)
- case simd.LLHH:
- return x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, c, d), y)
- case simd.HHLL:
- return y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, c, d), x)
-
- case simd.HLLL:
- z := y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, a, b, b), x)
- return z.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(0, 2, c, d), x)
- case simd.LHLL:
- z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, a, b, b), y)
- return z.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(0, 2, c, d), x)
-
- case simd.HLHH:
- z := y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, a, b, b), x)
- return z.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(0, 2, c, d), y)
- case simd.LHHH:
- z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, a, b, b), y)
- return z.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(0, 2, c, d), y)
-
- case simd.LLLH:
- z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(c, c, d, d), y)
- return x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, 0, 2), z)
- case simd.LLHL:
- z := y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(c, c, d, d), x)
- return x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, 0, 2), z)
- case simd.HHLH:
- z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(c, c, d, d), y)
- return y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, 0, 2), z)
- case simd.HHHL:
- z := y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(c, c, d, d), x)
- return y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, 0, 2), z)
-
- case simd.LHLH:
- z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, c, b, d), y)
- return z.ExportTestConcatSelectedConstantGrouped(0b11_01_10_00 /* =simd.ExportTestCscImm4(0, 2, 1, 3) */, z)
- case simd.HLHL:
- z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(b, d, a, c), y)
- return z.ExportTestConcatSelectedConstantGrouped(0b01_11_00_10 /* =simd.ExportTestCscImm4(2, 0, 3, 1) */, z)
- case simd.HLLH:
- z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(b, c, a, d), y)
- return z.ExportTestConcatSelectedConstantGrouped(0b11_01_00_10 /* =simd.ExportTestCscImm4(2, 0, 1, 3) */, z)
- case simd.LHHL:
- z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, d, b, c), y)
- return z.ExportTestConcatSelectedConstantGrouped(0b01_11_10_00 /* =simd.ExportTestCscImm4(0, 2, 3, 1) */, z)
- }
- panic("missing case, switch should be exhaustive")
-}
package codegen
-import "simd"
+import "simd/archsimd"
func vptest1() bool {
- v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
- v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
+ v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1})
+ v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0})
// amd64:`VPTEST\s(.*)(.*)$`
// amd64:`SETCS\s(.*)$`
return v1.AndNot(v2).IsZero()
}
func vptest2() bool {
- v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
- v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
+ v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1})
+ v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0})
// amd64:`VPTEST\s(.*)(.*)$`
// amd64:`SETEQ\s(.*)$`
return v1.And(v2).IsZero()
}
type Args2 struct {
- V0 simd.Uint8x32
- V1 simd.Uint8x32
+ V0 archsimd.Uint8x32
+ V1 archsimd.Uint8x32
x string
}
//go:noinline
-func simdStructNoSpill(a Args2) simd.Uint8x32 {
+func simdStructNoSpill(a Args2) archsimd.Uint8x32 {
// amd64:-`VMOVDQU\s.*$`
return a.V0.Xor(a.V1)
}
-func simdStructWrapperNoSpill(a Args2) simd.Uint8x32 {
+func simdStructWrapperNoSpill(a Args2) archsimd.Uint8x32 {
// amd64:-`VMOVDQU\s.*$`
a.x = "test"
return simdStructNoSpill(a)
}
//go:noinline
-func simdArrayNoSpill(a [1]Args2) simd.Uint8x32 {
+func simdArrayNoSpill(a [1]Args2) archsimd.Uint8x32 {
// amd64:-`VMOVDQU\s.*$`
return a[0].V0.Xor(a[0].V1)
}
-func simdArrayWrapperNoSpill(a [1]Args2) simd.Uint8x32 {
+func simdArrayWrapperNoSpill(a [1]Args2) archsimd.Uint8x32 {
// amd64:-`VMOVDQU\s.*$`
a[0].x = "test"
return simdArrayNoSpill(a)
}
-func simdFeatureGuardedMaskOpt() simd.Int16x16 {
- var x, y simd.Int16x16
- if simd.X86.AVX512() {
- mask := simd.Mask16x16FromBits(5)
+func simdFeatureGuardedMaskOpt() archsimd.Int16x16 {
+ var x, y archsimd.Int16x16
+ if archsimd.X86.AVX512() {
+ mask := archsimd.Mask16x16FromBits(5)
return x.Add(y).Masked(mask) // amd64:`VPADDW.Z\s.*$`
}
- mask := simd.Mask16x16FromBits(5)
+ mask := archsimd.Mask16x16FromBits(5)
return x.Add(y).Masked(mask) // amd64:`VPAND\s.*$`
}
-func simdMaskedMerge() simd.Int16x16 {
- var x, y simd.Int16x16
- if simd.X86.AVX512() {
- mask := simd.Mask16x16FromBits(5)
+func simdMaskedMerge() archsimd.Int16x16 {
+ var x, y archsimd.Int16x16
+ if archsimd.X86.AVX512() {
+ mask := archsimd.Mask16x16FromBits(5)
return x.Add(y).Merge(x, mask) // amd64:-`VPBLENDVB\s.*$`
}
- mask := simd.Mask16x16FromBits(5)
+ mask := archsimd.Mask16x16FromBits(5)
return x.Add(y).Merge(x, mask) // amd64:`VPBLENDVB\s.*$`
}
package foo
-import "simd"
+import "simd/archsimd"
-func f1(x simd.Int8x16) {
+func f1(x archsimd.Int8x16) {
return // ERROR "has features avx"
}
-func g1() simd.Int8x16 {
- var x simd.Int8x16
+func g1() archsimd.Int8x16 {
+ var x archsimd.Int8x16
return x // ERROR "has features avx$"
}
-type T1 simd.Int8x16
+type T1 archsimd.Int8x16
func (x T1) h() {
return // ERROR "has features avx$"
}
-func f2(x simd.Int8x64) {
+func f2(x archsimd.Int8x64) {
return // ERROR "has features avx[+]avx2[+]avx512$"
}
-func g2() simd.Int8x64 {
- var x simd.Int8x64
+func g2() archsimd.Int8x64 {
+ var x archsimd.Int8x64
return x // ERROR "has features avx[+]avx2[+]avx512$"
}
-type T2 simd.Int8x64
+type T2 archsimd.Int8x64
func (x T2) h() {
return // ERROR "has features avx[+]avx2[+]avx512$"
func f() {
if a == 0 {
- if !simd.X86.AVX512() {
+ if !archsimd.X86.AVX512() {
return
}
println("has avx512") // ERROR "has features avx[+]avx2[+]avx512$"
} else {
- if !simd.X86.AVX2() {
+ if !archsimd.X86.AVX2() {
return
}
println("has avx2") // ERROR "has features avx[+]avx2$"
} // ERROR "has features avx[+]avx2$"
func g() {
- if simd.X86.AVX2() { // ERROR "has features avx[+]avx2$"
+ if archsimd.X86.AVX2() { // ERROR "has features avx[+]avx2$"
for range 5 { // ERROR "has features avx[+]avx2$"
if a < 0 { // ERROR "has features avx[+]avx2$"
a++ // ERROR "has features avx[+]avx2$"
}
func hasIrreducibleLoop() {
- if simd.X86.AVX2() {
+ if archsimd.X86.AVX2() {
goto a // ERROR "has features avx[+]avx2$"
} else {
goto b
println("c")
}
-func ternRewrite(m, w, x, y, z simd.Int32x16) (t0, t1, t2 simd.Int32x16) {
- if !simd.X86.AVX512() { // ERROR "has features avx[+]avx2[+]avx512$"
+func ternRewrite(m, w, x, y, z archsimd.Int32x16) (t0, t1, t2 archsimd.Int32x16) {
+ if !archsimd.X86.AVX512() { // ERROR "has features avx[+]avx2[+]avx512$"
return // ERROR "has features avx[+]avx2[+]avx512$" // all blocks have it because of the vector size
}
t0 = w.Xor(y).Xor(z) // ERROR "Rewriting.*ternInt"
return // ERROR "has features avx[+]avx2[+]avx512$"
}
-func ternTricky1(x, y, z simd.Int32x8) simd.Int32x8 {
+func ternTricky1(x, y, z archsimd.Int32x8) archsimd.Int32x8 {
// Int32x8 is a 256-bit vector and does not guarantee AVX-512
// a is a 3-variable logical expression occurring outside AVX-512 feature check
a := x.Xor(y).Xor(z)
- var w simd.Int32x8
- if !simd.X86.AVX512() { // ERROR "has features avx$"
+ var w archsimd.Int32x8
+ if !archsimd.X86.AVX512() { // ERROR "has features avx$"
// do nothing
} else {
w = y.AndNot(a) // ERROR "has features avx[+]avx2[+]avx512" "Rewriting.*ternInt"
return a.Or(w) // ERROR "has features avx$"
}
-func ternTricky2(x, y, z simd.Int32x8) simd.Int32x8 {
+func ternTricky2(x, y, z archsimd.Int32x8) archsimd.Int32x8 {
// Int32x8 is a 256-bit vector and does not guarantee AVX-512
- var a, w simd.Int32x8
- if !simd.X86.AVX512() { // ERROR "has features avx$"
+ var a, w archsimd.Int32x8
+ if !archsimd.X86.AVX512() { // ERROR "has features avx$"
// do nothing
} else {
a = x.Xor(y).Xor(z)
return a.Or(w) // ERROR "has features avx$"
}
-func ternTricky3(x, y, z simd.Int32x8) simd.Int32x8 {
+func ternTricky3(x, y, z archsimd.Int32x8) archsimd.Int32x8 {
// Int32x8 is a 256-bit vector and does not guarantee AVX-512
a := x.Xor(y).Xor(z)
w := y.AndNot(a)
- if !simd.X86.AVX512() { // ERROR "has features avx$"
+ if !archsimd.X86.AVX512() { // ERROR "has features avx$"
return a // ERROR "has features avx$"
}
// a is a common subexpression
package p
import (
- "simd"
+ "simd/archsimd"
"unsafe"
)
tos *[2][4][4]float32,
blend int,
) {
- tiny := simd.BroadcastFloat32x8(0)
+ tiny := archsimd.BroadcastFloat32x8(0)
for {
- dstCol12 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[0][0:]))))
- dstCol34 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[0][2:]))))
- dstCol56 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[1][0:]))))
- dstCol78 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[1][2:]))))
+ dstCol12 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[0][0:]))))
+ dstCol34 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[0][2:]))))
+ dstCol56 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[1][0:]))))
+ dstCol78 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[1][2:]))))
- tosCol12 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[0][0:]))))
- tosCol34 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[0][2:]))))
- tosCol56 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[1][0:]))))
- tosCol78 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[1][2:]))))
+ tosCol12 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[0][0:]))))
+ tosCol34 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[0][2:]))))
+ tosCol56 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[1][0:]))))
+ tosCol78 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[1][2:]))))
- var Cr0, Cr1, Cr2 simd.Float32x8
+ var Cr0, Cr1, Cr2 archsimd.Float32x8
if blend != 0 {
invas := tosCol78.Max(tiny)
invad := dstCol78.Max(tiny)
Cs0 := tosCol12.Mul(invas)
Cs1 := tosCol34.Mul(invas)
Cs2 := tosCol56.Mul(invas)
- var Cm0, Cm1, Cm2 simd.Float32x8
+ var Cm0, Cm1, Cm2 archsimd.Float32x8
switch blend {
case 4:
case 10:
Cr1 = dstCol78.Mul(Cs1).Mul(Cm1)
Cr2 = dstCol78.Mul(Cs2).Mul(Cm2)
}
- var resR, resG, resB, resA simd.Float32x8
+ var resR, resG, resB, resA archsimd.Float32x8
if blend == 0 {
resR = tosCol12
resG = tosCol34
package p
import (
- "simd"
+ "simd/archsimd"
)
func PackComplex(b bool) {
for {
if b {
var indices [4]uint32
- simd.Uint32x4{}.ShiftAllRight(20).Store(&indices)
+ archsimd.Uint32x4{}.ShiftAllRight(20).Store(&indices)
_ = indices[indices[0]]
}
}
px := &src[y]
if b {
var indices [4]uint32
- fu := simd.LoadFloat32x4(px).AsUint32x4()
+ fu := archsimd.LoadFloat32x4(px).AsUint32x4()
fu.ShiftAllRight(0).Store(nil)
- entry := simd.LoadUint32x4(&[4]uint32{
+ entry := archsimd.LoadUint32x4(&[4]uint32{
toSrgbTable[indices[0]],
})
var res [4]uint32
package foo
-import "simd"
+import "simd/archsimd"
-func hasClosure(a, b, c, d simd.Int64x4) (w, x, y, z simd.Int64x4) {
+func hasClosure(a, b, c, d archsimd.Int64x4) (w, x, y, z archsimd.Int64x4) {
shuf := func() { // ERROR "can inline hasClosure.func1"
w = z.RotateAllLeft(1).Xor(a)
x = w.RotateAllLeft(3).Xor(b)