From: David Chase Date: Mon, 8 Dec 2025 18:24:12 +0000 (-0500) Subject: [dev.simd] simd, cmd/compile: move "simd" to "simd/archsimd" X-Git-Tag: go1.26rc1~1^2~45^2~3 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=144cf17d2c444a530d7c08c5870dc8e70bec2c72;p=gostls13.git [dev.simd] simd, cmd/compile: move "simd" to "simd/archsimd" Also removes a few leftover TODOs and scraps of commented-out code from simd development. Updated etetest.sh to make it behave whether amd64 implies the experiment, or not. Fixes #76473. Change-Id: I6d9792214d7f514cb90c21b101dbf7d07c1d0e55 Reviewed-on: https://go-review.googlesource.com/c/go/+/728220 TryBot-Bypass: David Chase Reviewed-by: Cherry Mui --- diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go index 2ce5c8accc..33f9c325c3 100644 --- a/src/cmd/compile/internal/inline/inl.go +++ b/src/cmd/compile/internal/inline/inl.go @@ -445,7 +445,7 @@ type hairyVisitor struct { func isDebugFn(fn *ir.Func) bool { // if n := fn.Nname; n != nil { - // if n.Sym().Name == "Int32x8.Transpose8" && n.Sym().Pkg.Path == "simd" { + // if n.Sym().Name == "Int32x8.Transpose8" && n.Sym().Pkg.Path == "simd/archsimd" { // fmt.Printf("isDebugFn '%s' DOT '%s'\n", n.Sym().Pkg.Path, n.Sym().Name) // return true // } diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index 17beb7b848..4425c5617b 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1644,7 +1644,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { // Only enable intrinsics, if SIMD experiment. simdIntrinsics(addF) - addF("simd", "ClearAVXUpperBits", + addF(simdPackage, "ClearAVXUpperBits", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem()) return nil @@ -1668,15 +1668,18 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64) addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64) + // sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go. sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) { - addF("simd", method, + addF(simdPackage, method, func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { x, a, b, c, d, y := args[0], args[1], args[2], args[3], args[4], args[5] if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 && c.Op == ssa.OpConst8 && d.Op == ssa.OpConst8 { - return select4FromPair(x, a, b, c, d, y, s, hwop, vectype) - } else { - return s.callResult(n, callNormal) + z := select4FromPair(x, a, b, c, d, y, s, hwop, vectype) + if z != nil { + return z + } } + return s.callResult(n, callNormal) }, sys.AMD64) } @@ -1693,15 +1696,18 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { sfp4("Uint32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512) sfp4("Float32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x16, types.TypeVec512) + // sfp2 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go. sfp2 := func(method string, hwop ssa.Op, vectype *types.Type, cscimm func(i, j uint8) int64) { - addF("simd", method, + addF(simdPackage, method, func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { x, a, b, y := args[0], args[1], args[2], args[3] if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 { - return select2FromPair(x, a, b, y, s, hwop, vectype, cscimm) - } else { - return s.callResult(n, callNormal) + z := select2FromPair(x, a, b, y, s, hwop, vectype, cscimm) + if z != nil { + return z + } } + return s.callResult(n, callNormal) }, sys.AMD64) } @@ -1767,6 +1773,9 @@ const ( func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type, csc func(a, b uint8) int64) *ssa.Value { a, b := uint8(_a.AuxInt8()), uint8(_b.AuxInt8()) + if a > 3 || b > 3 { + return nil + } pattern := (a&2)>>1 + (b & 2) a, b = a&1, b&1 @@ -1785,6 +1794,9 @@ func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type func select4FromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *types.Type) *ssa.Value { a, b, c, d := uint8(_a.AuxInt8()), uint8(_b.AuxInt8()), uint8(_c.AuxInt8()), uint8(_d.AuxInt8()) + if a > 7 || b > 7 || c > 7 || d > 7 { + return nil + } pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1 a, b, c, d = a&3, b&3, c&3, d&3 @@ -2154,7 +2166,7 @@ func findIntrinsic(sym *types.Sym) intrinsicBuilder { fn := sym.Name if ssa.IntrinsicsDisable { if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GrtCallerSP" || fn == "GetClosurePtr") || - pkg == "internal/simd" || pkg == "simd" { // TODO after simd has been moved to package simd, remove internal/simd + pkg == simdPackage { // These runtime functions don't have definitions, must be intrinsics. } else { return nil diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go index 91b975c913..3d866a6bf4 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics_test.go +++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go @@ -1407,13 +1407,13 @@ func TestIntrinsics(t *testing.T) { gotIntrinsics[testIntrinsicKey{ik.arch.Name, ik.pkg, ik.fn}] = struct{}{} } for ik, _ := range gotIntrinsics { - if _, found := wantIntrinsics[ik]; !found && (ik.pkg != "simd" || *simd) { + if _, found := wantIntrinsics[ik]; !found && (ik.pkg != "simd/archsimd" || *simd) { t.Errorf("Got unwanted intrinsic %v %v.%v", ik.archName, ik.pkg, ik.fn) } } for ik, _ := range wantIntrinsics { - if _, found := gotIntrinsics[ik]; !found && (ik.pkg != "simd" || *simd) { + if _, found := gotIntrinsics[ik]; !found && (ik.pkg != "simd/archsimd" || *simd) { t.Errorf("Want missing intrinsic %v %v.%v", ik.archName, ik.pkg, ik.fn) } } diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 8aa7fa4552..42d33bbe03 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -9,7 +9,7 @@ import ( "cmd/internal/sys" ) -const simdPackage = "simd" +const simdPackage = "simd/archsimd" func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64) diff --git a/src/cmd/compile/internal/types/size.go b/src/cmd/compile/internal/types/size.go index 0162164679..1acb041e35 100644 --- a/src/cmd/compile/internal/types/size.go +++ b/src/cmd/compile/internal/types/size.go @@ -471,11 +471,6 @@ func simdify(st *Type, isTag bool) { } else { st.floatRegs = 1 } - // if st.Sym() != nil { - // base.Warn("Simdify %s, %v, %d", st.Sym().Name, isTag, st.width) - // } else { - // base.Warn("Simdify %v, %v, %d", st, isTag, st.width) - // } } // CalcStructSize calculates the size of t, @@ -491,10 +486,9 @@ func CalcStructSize(t *Type) { case sym.Name == "align64" && isAtomicStdPkg(sym.Pkg): maxAlign = 8 - case buildcfg.Experiment.SIMD && (sym.Pkg.Path == "internal/simd" || sym.Pkg.Path == "simd") && len(t.Fields()) >= 1: + case buildcfg.Experiment.SIMD && (sym.Pkg.Path == "simd/archsimd") && len(t.Fields()) >= 1: // This gates the experiment -- without it, no user-visible types can be "simd". // The SSA-visible SIMD types remain. - // TODO after simd has been moved to package simd, remove internal/simd. switch sym.Name { case "v128": simdify(t, true) diff --git a/src/cmd/compile/internal/types2/stdlib_test.go b/src/cmd/compile/internal/types2/stdlib_test.go index ad1974ad85..ee49bbddfa 100644 --- a/src/cmd/compile/internal/types2/stdlib_test.go +++ b/src/cmd/compile/internal/types2/stdlib_test.go @@ -361,8 +361,8 @@ var excluded = map[string]bool{ "builtin": true, "cmd/compile/internal/ssa/_gen": true, "runtime/_mkmalloc": true, - "simd/_gen/simdgen": true, - "simd/_gen/unify": true, + "simd/archsimd/_gen/simdgen": true, + "simd/archsimd/_gen/unify": true, } // printPackageMu synchronizes the printing of type-checked package files in diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index d58bd294cd..f4b7e9dae5 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -73,7 +73,7 @@ var depsRules = ` internal/byteorder, internal/cpu, internal/goarch < internal/chacha8rand; internal/goarch, math/bits < internal/strconv; - internal/cpu, internal/strconv < simd; + internal/cpu, internal/strconv < simd/archsimd; # RUNTIME is the core runtime group of packages, all of them very light-weight. internal/abi, @@ -709,7 +709,7 @@ var depsRules = ` < testing; testing, math - < simd/internal/test_helpers; + < simd/archsimd/internal/test_helpers; log/slog, testing < testing/slogtest; diff --git a/src/go/types/stdlib_test.go b/src/go/types/stdlib_test.go index da2b38403f..6b72bbf44d 100644 --- a/src/go/types/stdlib_test.go +++ b/src/go/types/stdlib_test.go @@ -361,8 +361,8 @@ var excluded = map[string]bool{ "builtin": true, "cmd/compile/internal/ssa/_gen": true, "runtime/_mkmalloc": true, - "simd/_gen/simdgen": true, - "simd/_gen/unify": true, + "simd/archsimd/_gen/simdgen": true, + "simd/archsimd/_gen/unify": true, } // printPackageMu synchronizes the printing of type-checked package files in diff --git a/src/simd/_gen/simdgen/.gitignore b/src/simd/_gen/simdgen/.gitignore deleted file mode 100644 index de579f6b9b..0000000000 --- a/src/simd/_gen/simdgen/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -testdata/* -.gemini/* -.gemini* diff --git a/src/simd/_gen/go.mod b/src/simd/archsimd/_gen/go.mod similarity index 74% rename from src/simd/_gen/go.mod rename to src/simd/archsimd/_gen/go.mod index fa360f560a..1ea88518aa 100644 --- a/src/simd/_gen/go.mod +++ b/src/simd/archsimd/_gen/go.mod @@ -1,4 +1,4 @@ -module simd/_gen +module simd/archsimd/_gen go 1.24 diff --git a/src/simd/_gen/go.sum b/src/simd/archsimd/_gen/go.sum similarity index 100% rename from src/simd/_gen/go.sum rename to src/simd/archsimd/_gen/go.sum diff --git a/src/simd/_gen/main.go b/src/simd/archsimd/_gen/main.go similarity index 100% rename from src/simd/_gen/main.go rename to src/simd/archsimd/_gen/main.go diff --git a/src/simd/_gen/simdgen/categories.yaml b/src/simd/archsimd/_gen/simdgen/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/categories.yaml rename to src/simd/archsimd/_gen/simdgen/categories.yaml diff --git a/src/simd/_gen/simdgen/etetest.sh b/src/simd/archsimd/_gen/simdgen/etetest.sh similarity index 62% rename from src/simd/_gen/simdgen/etetest.sh rename to src/simd/archsimd/_gen/simdgen/etetest.sh index f6559fcfff..0bd2354fbb 100755 --- a/src/simd/_gen/simdgen/etetest.sh +++ b/src/simd/archsimd/_gen/simdgen/etetest.sh @@ -9,16 +9,13 @@ if [[ ! -d "$XEDDATA" ]]; then exit 1 fi +# Ensure that goroot is the appropriate ancestor of this directory which go >/dev/null || exit 1 goroot="$(go env GOROOT)" -if [[ ! ../../../.. -ef "$goroot" ]]; then +ancestor="../../../../.." +if [[ ! $ancestor -ef "$goroot" ]]; then # We might be able to make this work but it's SO CONFUSING. - echo >&2 "go command in path has GOROOT $goroot" - exit 1 -fi - -if [[ $(go env GOEXPERIMENT) != simd ]]; then - echo >&2 "GOEXPERIMENT=$(go env GOEXPERIMENT), expected simd" + echo >&2 "go command in path has GOROOT $goroot instead of" `(cd $ancestor; pwd)` exit 1 fi @@ -34,11 +31,12 @@ cd "$goroot"/src go install cmd/compile # Tests -GOARCH=amd64 go run -C simd/testdata . -GOARCH=amd64 go test -v simd -go test go/doc go/build -go test cmd/api -v -check -run ^TestCheck$ -go test cmd/compile/internal/ssagen -simd=0 +# Set the GOEXPERIMENT explicitly. +GOEXPERIMENT=simd GOARCH=amd64 go run -C simd/archsimd/testdata . +GOEXPERIMENT=simd GOARCH=amd64 go test -v simd/archsimd +GOEXPERIMENT=simd GOARCH=amd64 go test go/doc go/build +GOEXPERIMENT=simd GOARCH=amd64 go test cmd/api -v -check -run ^TestCheck$ +GOEXPERIMENT=simd GOARCH=amd64 go test cmd/compile/internal/ssagen -simd=0 # Check tests without the GOEXPERIMENT GOEXPERIMENT= go test go/doc go/build diff --git a/src/simd/_gen/simdgen/gen_simdGenericOps.go b/src/simd/archsimd/_gen/simdgen/gen_simdGenericOps.go similarity index 100% rename from src/simd/_gen/simdgen/gen_simdGenericOps.go rename to src/simd/archsimd/_gen/simdgen/gen_simdGenericOps.go diff --git a/src/simd/_gen/simdgen/gen_simdIntrinsics.go b/src/simd/archsimd/_gen/simdgen/gen_simdIntrinsics.go similarity index 100% rename from src/simd/_gen/simdgen/gen_simdIntrinsics.go rename to src/simd/archsimd/_gen/simdgen/gen_simdIntrinsics.go diff --git a/src/simd/_gen/simdgen/gen_simdMachineOps.go b/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go similarity index 100% rename from src/simd/_gen/simdgen/gen_simdMachineOps.go rename to src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go diff --git a/src/simd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go similarity index 99% rename from src/simd/_gen/simdgen/gen_simdTypes.go rename to src/simd/archsimd/_gen/simdgen/gen_simdTypes.go index f98795e1b0..2041404e91 100644 --- a/src/simd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go @@ -123,7 +123,7 @@ func compareSimdTypePairs(x, y simdTypePair) int { const simdPackageHeader = generatedHeader + ` //go:build goexperiment.simd -package simd +package archsimd ` const simdTypesTemplates = ` diff --git a/src/simd/_gen/simdgen/gen_simdrules.go b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go similarity index 100% rename from src/simd/_gen/simdgen/gen_simdrules.go rename to src/simd/archsimd/_gen/simdgen/gen_simdrules.go diff --git a/src/simd/_gen/simdgen/gen_simdssa.go b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go similarity index 100% rename from src/simd/_gen/simdgen/gen_simdssa.go rename to src/simd/archsimd/_gen/simdgen/gen_simdssa.go diff --git a/src/simd/_gen/simdgen/gen_utility.go b/src/simd/archsimd/_gen/simdgen/gen_utility.go similarity index 100% rename from src/simd/_gen/simdgen/gen_utility.go rename to src/simd/archsimd/_gen/simdgen/gen_utility.go diff --git a/src/simd/_gen/simdgen/go.yaml b/src/simd/archsimd/_gen/simdgen/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/go.yaml rename to src/simd/archsimd/_gen/simdgen/go.yaml diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/archsimd/_gen/simdgen/godefs.go similarity index 99% rename from src/simd/_gen/simdgen/godefs.go rename to src/simd/archsimd/_gen/simdgen/godefs.go index fb43116576..2c10377420 100644 --- a/src/simd/_gen/simdgen/godefs.go +++ b/src/simd/archsimd/_gen/simdgen/godefs.go @@ -13,7 +13,7 @@ import ( "strings" "unicode" - "simd/_gen/unify" + "simd/archsimd/_gen/unify" ) type Operation struct { diff --git a/src/simd/_gen/simdgen/main.go b/src/simd/archsimd/_gen/simdgen/main.go similarity index 99% rename from src/simd/_gen/simdgen/main.go rename to src/simd/archsimd/_gen/simdgen/main.go index ca75cff55d..3df95c81cb 100644 --- a/src/simd/_gen/simdgen/main.go +++ b/src/simd/archsimd/_gen/simdgen/main.go @@ -92,7 +92,7 @@ import ( "slices" "strings" - "simd/_gen/unify" + "simd/archsimd/_gen/unify" "gopkg.in/yaml.v3" ) @@ -117,7 +117,7 @@ var ( flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`") ) -const simdPackage = "simd" +const simdPackage = "simd/archsimd" func main() { flag.Parse() diff --git a/src/simd/_gen/simdgen/ops/AddSub/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/AddSub/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/AddSub/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/AddSub/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml diff --git a/src/simd/_gen/simdgen/ops/BitwiseLogic/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/BitwiseLogic/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/BitwiseLogic/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/BitwiseLogic/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/BitwiseLogic/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/BitwiseLogic/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/BitwiseLogic/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/BitwiseLogic/go.yaml diff --git a/src/simd/_gen/simdgen/ops/Compares/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Compares/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/Compares/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Compares/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml diff --git a/src/simd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Converts/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Converts/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml diff --git a/src/simd/_gen/simdgen/ops/FPonlyArith/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/FPonlyArith/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/FPonlyArith/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/FPonlyArith/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/go.yaml diff --git a/src/simd/_gen/simdgen/ops/GaloisField/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/GaloisField/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/GaloisField/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/GaloisField/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/GaloisField/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/GaloisField/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/GaloisField/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/GaloisField/go.yaml diff --git a/src/simd/_gen/simdgen/ops/IntOnlyArith/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/IntOnlyArith/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/IntOnlyArith/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/IntOnlyArith/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/go.yaml diff --git a/src/simd/_gen/simdgen/ops/MLOps/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/MLOps/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/MLOps/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/MLOps/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml diff --git a/src/simd/_gen/simdgen/ops/MinMax/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/MinMax/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/MinMax/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/MinMax/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/MinMax/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/MinMax/go.yaml diff --git a/src/simd/_gen/simdgen/ops/Moves/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Moves/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/Moves/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Moves/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml diff --git a/src/simd/_gen/simdgen/ops/Mul/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Mul/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/Mul/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Mul/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Mul/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Mul/go.yaml diff --git a/src/simd/_gen/simdgen/ops/Others/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Others/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Others/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Others/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/Others/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Others/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/Others/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/Others/go.yaml diff --git a/src/simd/_gen/simdgen/ops/ShiftRotate/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/ShiftRotate/categories.yaml rename to src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml diff --git a/src/simd/_gen/simdgen/ops/ShiftRotate/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/go.yaml similarity index 100% rename from src/simd/_gen/simdgen/ops/ShiftRotate/go.yaml rename to src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/go.yaml diff --git a/src/simd/_gen/simdgen/pprint.go b/src/simd/archsimd/_gen/simdgen/pprint.go similarity index 100% rename from src/simd/_gen/simdgen/pprint.go rename to src/simd/archsimd/_gen/simdgen/pprint.go diff --git a/src/simd/_gen/simdgen/sort_test.go b/src/simd/archsimd/_gen/simdgen/sort_test.go similarity index 100% rename from src/simd/_gen/simdgen/sort_test.go rename to src/simd/archsimd/_gen/simdgen/sort_test.go diff --git a/src/simd/_gen/simdgen/types.yaml b/src/simd/archsimd/_gen/simdgen/types.yaml similarity index 100% rename from src/simd/_gen/simdgen/types.yaml rename to src/simd/archsimd/_gen/simdgen/types.yaml diff --git a/src/simd/_gen/simdgen/xed.go b/src/simd/archsimd/_gen/simdgen/xed.go similarity index 99% rename from src/simd/_gen/simdgen/xed.go rename to src/simd/archsimd/_gen/simdgen/xed.go index 31a147a839..4ba6738e7e 100644 --- a/src/simd/_gen/simdgen/xed.go +++ b/src/simd/archsimd/_gen/simdgen/xed.go @@ -15,7 +15,7 @@ import ( "strconv" "strings" - "simd/_gen/unify" + "simd/archsimd/_gen/unify" "golang.org/x/arch/x86/xeddata" "gopkg.in/yaml.v3" diff --git a/src/simd/_gen/tmplgen/main.go b/src/simd/archsimd/_gen/tmplgen/main.go similarity index 94% rename from src/simd/_gen/tmplgen/main.go rename to src/simd/archsimd/_gen/tmplgen/main.go index 6ec8d45b9b..71b5a7a8c7 100644 --- a/src/simd/_gen/tmplgen/main.go +++ b/src/simd/archsimd/_gen/tmplgen/main.go @@ -247,7 +247,7 @@ func prologue(s string, out io.Writer) { //go:build goexperiment.simd -package simd +package archsimd `, s) } @@ -267,7 +267,7 @@ func unsafePrologue(s string, out io.Writer) { //go:build goexperiment.simd -package simd +package archsimd import "unsafe" @@ -287,7 +287,7 @@ func testPrologue(t, s string, out io.Writer) { package simd_test import ( - "simd" + "simd/archsimd" "testing" ) @@ -324,12 +324,12 @@ func (x {{.VType}}) StoreSlice(s []{{.Etype}}) { var unaryTemplate = templateOf("unary_helpers", ` // test{{.VType}}Unary tests the simd unary method f against the expected behavior generated by want -func test{{.VType}}Unary(t *testing.T, f func(_ simd.{{.VType}}) simd.{{.VType}}, want func(_ []{{.Etype}}) []{{.Etype}}) { +func test{{.VType}}Unary(t *testing.T, f func(_ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_ []{{.Etype}}) []{{.Etype}}) { n := {{.Count}} t.Helper() forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool { t.Helper() - a := simd.Load{{.VType}}Slice(x) + a := archsimd.Load{{.VType}}Slice(x) g := make([]{{.Etype}}, n) f(a).StoreSlice(g) w := want(x) @@ -341,12 +341,12 @@ func test{{.VType}}Unary(t *testing.T, f func(_ simd.{{.VType}}) simd.{{.VType}} var unaryFlakyTemplate = shapedTemplateOf(unaryFlaky, "unary_flaky_helpers", ` // test{{.VType}}UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func test{{.VType}}UnaryFlaky(t *testing.T, f func(x simd.{{.VType}}) simd.{{.VType}}, want func(x []{{.Etype}}) []{{.Etype}}, flakiness float64) { +func test{{.VType}}UnaryFlaky(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.VType}}, want func(x []{{.Etype}}) []{{.Etype}}, flakiness float64) { n := {{.Count}} t.Helper() forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool { t.Helper() - a := simd.Load{{.VType}}Slice(x) + a := archsimd.Load{{.VType}}Slice(x) g := make([]{{.Etype}}, n) f(a).StoreSlice(g) w := want(x) @@ -358,12 +358,12 @@ func test{{.VType}}UnaryFlaky(t *testing.T, f func(x simd.{{.VType}}) simd.{{.VT var convertTemplate = templateOf("convert_helpers", ` // test{{.VType}}ConvertTo{{.OEType}} tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x simd.{{.VType}}) simd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) { +func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) { n := {{.Count}} t.Helper() forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool { t.Helper() - a := simd.Load{{.VType}}Slice(x) + a := archsimd.Load{{.VType}}Slice(x) g := make([]{{.OEtype}}, n) f(a).StoreSlice(g) w := want(x) @@ -378,13 +378,13 @@ var unaryToUint16 = convertTemplate.target("uint", 16) var binaryTemplate = templateOf("binary_helpers", ` // test{{.VType}}Binary tests the simd binary method f against the expected behavior generated by want -func test{{.VType}}Binary(t *testing.T, f func(_, _ simd.{{.VType}}) simd.{{.VType}}, want func(_, _ []{{.Etype}}) []{{.Etype}}) { +func test{{.VType}}Binary(t *testing.T, f func(_, _ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_, _ []{{.Etype}}) []{{.Etype}}) { n := {{.Count}} t.Helper() forSlicePair(t, {{.Etype}}s, n, func(x, y []{{.Etype}}) bool { t.Helper() - a := simd.Load{{.VType}}Slice(x) - b := simd.Load{{.VType}}Slice(y) + a := archsimd.Load{{.VType}}Slice(x) + b := archsimd.Load{{.VType}}Slice(y) g := make([]{{.Etype}}, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -395,14 +395,14 @@ func test{{.VType}}Binary(t *testing.T, f func(_, _ simd.{{.VType}}) simd.{{.VTy var ternaryTemplate = templateOf("ternary_helpers", ` // test{{.VType}}Ternary tests the simd ternary method f against the expected behavior generated by want -func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ simd.{{.VType}}) simd.{{.VType}}, want func(_, _, _ []{{.Etype}}) []{{.Etype}}) { +func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_, _, _ []{{.Etype}}) []{{.Etype}}) { n := {{.Count}} t.Helper() forSliceTriple(t, {{.Etype}}s, n, func(x, y, z []{{.Etype}}) bool { t.Helper() - a := simd.Load{{.VType}}Slice(x) - b := simd.Load{{.VType}}Slice(y) - c := simd.Load{{.VType}}Slice(z) + a := archsimd.Load{{.VType}}Slice(x) + b := archsimd.Load{{.VType}}Slice(y) + c := archsimd.Load{{.VType}}Slice(z) g := make([]{{.Etype}}, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -414,14 +414,14 @@ func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ simd.{{.VType}}) simd.{{ var ternaryFlakyTemplate = shapedTemplateOf(ternaryFlaky, "ternary_helpers", ` // test{{.VType}}TernaryFlaky tests the simd ternary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.VType}}) simd.{{.VType}}, want func(x, y, z []{{.Etype}}) []{{.Etype}}, flakiness float64) { +func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z archsimd.{{.VType}}) archsimd.{{.VType}}, want func(x, y, z []{{.Etype}}) []{{.Etype}}, flakiness float64) { n := {{.Count}} t.Helper() forSliceTriple(t, {{.Etype}}s, n, func(x, y, z []{{.Etype}}) bool { t.Helper() - a := simd.Load{{.VType}}Slice(x) - b := simd.Load{{.VType}}Slice(y) - c := simd.Load{{.VType}}Slice(z) + a := archsimd.Load{{.VType}}Slice(x) + b := archsimd.Load{{.VType}}Slice(y) + c := archsimd.Load{{.VType}}Slice(z) g := make([]{{.Etype}}, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -432,13 +432,13 @@ func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.VType}}) si var compareTemplate = templateOf("compare_helpers", ` // test{{.VType}}Compare tests the simd comparison method f against the expected behavior generated by want -func test{{.VType}}Compare(t *testing.T, f func(_, _ simd.{{.VType}}) simd.Mask{{.WxC}}, want func(_, _ []{{.Etype}}) []int64) { +func test{{.VType}}Compare(t *testing.T, f func(_, _ archsimd.{{.VType}}) archsimd.Mask{{.WxC}}, want func(_, _ []{{.Etype}}) []int64) { n := {{.Count}} t.Helper() forSlicePair(t, {{.Etype}}s, n, func(x, y []{{.Etype}}) bool { t.Helper() - a := simd.Load{{.VType}}Slice(x) - b := simd.Load{{.VType}}Slice(y) + a := archsimd.Load{{.VType}}Slice(x) + b := archsimd.Load{{.VType}}Slice(y) g := make([]int{{.EWidth}}, n) f(a, b).AsInt{{.WxC}}().StoreSlice(g) w := want(x, y) @@ -452,15 +452,15 @@ var compareMaskedTemplate = templateOf("comparemasked_helpers", ` // test{{.VType}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func test{{.VType}}CompareMasked(t *testing.T, - f func(_, _ simd.{{.VType}}, m simd.Mask{{.WxC}}) simd.Mask{{.WxC}}, + f func(_, _ archsimd.{{.VType}}, m archsimd.Mask{{.WxC}}) archsimd.Mask{{.WxC}}, want func(_, _ []{{.Etype}}) []int64) { n := {{.Count}} t.Helper() forSlicePairMasked(t, {{.Etype}}s, n, func(x, y []{{.Etype}}, m []bool) bool { t.Helper() - a := simd.Load{{.VType}}Slice(x) - b := simd.Load{{.VType}}Slice(y) - k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.EWidth}}](m)).ToMask() + a := archsimd.Load{{.VType}}Slice(x) + b := archsimd.Load{{.VType}}Slice(y) + k := archsimd.LoadInt{{.WxC}}Slice(toVect[int{{.EWidth}}](m)).ToMask() g := make([]int{{.EWidth}}, n) f(a, b, k).AsInt{{.WxC}}().StoreSlice(g) w := want(x, y) @@ -814,7 +814,7 @@ func (x {{.VType}}) String() string { const SIMD = "../../" const TD = "../../internal/simd_test/" -const SSA = "../../../cmd/compile/internal/ssa/" +const SSA = "../../../../cmd/compile/internal/ssa/" func main() { sl := flag.String("sl", SIMD+"slice_gen_amd64.go", "file name for slice operations") diff --git a/src/simd/_gen/unify/closure.go b/src/simd/archsimd/_gen/unify/closure.go similarity index 100% rename from src/simd/_gen/unify/closure.go rename to src/simd/archsimd/_gen/unify/closure.go diff --git a/src/simd/_gen/unify/domain.go b/src/simd/archsimd/_gen/unify/domain.go similarity index 100% rename from src/simd/_gen/unify/domain.go rename to src/simd/archsimd/_gen/unify/domain.go diff --git a/src/simd/_gen/unify/dot.go b/src/simd/archsimd/_gen/unify/dot.go similarity index 100% rename from src/simd/_gen/unify/dot.go rename to src/simd/archsimd/_gen/unify/dot.go diff --git a/src/simd/_gen/unify/env.go b/src/simd/archsimd/_gen/unify/env.go similarity index 100% rename from src/simd/_gen/unify/env.go rename to src/simd/archsimd/_gen/unify/env.go diff --git a/src/simd/_gen/unify/html.go b/src/simd/archsimd/_gen/unify/html.go similarity index 100% rename from src/simd/_gen/unify/html.go rename to src/simd/archsimd/_gen/unify/html.go diff --git a/src/simd/_gen/unify/pos.go b/src/simd/archsimd/_gen/unify/pos.go similarity index 100% rename from src/simd/_gen/unify/pos.go rename to src/simd/archsimd/_gen/unify/pos.go diff --git a/src/simd/_gen/unify/testdata/stress.yaml b/src/simd/archsimd/_gen/unify/testdata/stress.yaml similarity index 100% rename from src/simd/_gen/unify/testdata/stress.yaml rename to src/simd/archsimd/_gen/unify/testdata/stress.yaml diff --git a/src/simd/_gen/unify/testdata/unify.yaml b/src/simd/archsimd/_gen/unify/testdata/unify.yaml similarity index 100% rename from src/simd/_gen/unify/testdata/unify.yaml rename to src/simd/archsimd/_gen/unify/testdata/unify.yaml diff --git a/src/simd/_gen/unify/testdata/vars.yaml b/src/simd/archsimd/_gen/unify/testdata/vars.yaml similarity index 100% rename from src/simd/_gen/unify/testdata/vars.yaml rename to src/simd/archsimd/_gen/unify/testdata/vars.yaml diff --git a/src/simd/_gen/unify/trace.go b/src/simd/archsimd/_gen/unify/trace.go similarity index 100% rename from src/simd/_gen/unify/trace.go rename to src/simd/archsimd/_gen/unify/trace.go diff --git a/src/simd/_gen/unify/unify.go b/src/simd/archsimd/_gen/unify/unify.go similarity index 100% rename from src/simd/_gen/unify/unify.go rename to src/simd/archsimd/_gen/unify/unify.go diff --git a/src/simd/_gen/unify/unify_test.go b/src/simd/archsimd/_gen/unify/unify_test.go similarity index 100% rename from src/simd/_gen/unify/unify_test.go rename to src/simd/archsimd/_gen/unify/unify_test.go diff --git a/src/simd/_gen/unify/value.go b/src/simd/archsimd/_gen/unify/value.go similarity index 100% rename from src/simd/_gen/unify/value.go rename to src/simd/archsimd/_gen/unify/value.go diff --git a/src/simd/_gen/unify/value_test.go b/src/simd/archsimd/_gen/unify/value_test.go similarity index 100% rename from src/simd/_gen/unify/value_test.go rename to src/simd/archsimd/_gen/unify/value_test.go diff --git a/src/simd/_gen/unify/yaml.go b/src/simd/archsimd/_gen/unify/yaml.go similarity index 100% rename from src/simd/_gen/unify/yaml.go rename to src/simd/archsimd/_gen/unify/yaml.go diff --git a/src/simd/_gen/unify/yaml_test.go b/src/simd/archsimd/_gen/unify/yaml_test.go similarity index 100% rename from src/simd/_gen/unify/yaml_test.go rename to src/simd/archsimd/_gen/unify/yaml_test.go diff --git a/src/simd/compare_gen_amd64.go b/src/simd/archsimd/compare_gen_amd64.go similarity index 99% rename from src/simd/compare_gen_amd64.go rename to src/simd/archsimd/compare_gen_amd64.go index 01e4f84211..92660f2323 100644 --- a/src/simd/compare_gen_amd64.go +++ b/src/simd/archsimd/compare_gen_amd64.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd // Less returns a mask whose elements indicate whether x < y // diff --git a/src/simd/cpu.go b/src/simd/archsimd/cpu.go similarity index 99% rename from src/simd/cpu.go rename to src/simd/archsimd/cpu.go index b115910fbe..bb0ebbc16a 100644 --- a/src/simd/cpu.go +++ b/src/simd/archsimd/cpu.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd import "internal/cpu" diff --git a/src/simd/dummy.s b/src/simd/archsimd/dummy.s similarity index 100% rename from src/simd/dummy.s rename to src/simd/archsimd/dummy.s diff --git a/src/simd/export_test.go b/src/simd/archsimd/export_test.go similarity index 98% rename from src/simd/export_test.go rename to src/simd/archsimd/export_test.go index c6e9c4a855..f448af10d4 100644 --- a/src/simd/export_test.go +++ b/src/simd/archsimd/export_test.go @@ -6,7 +6,7 @@ // This exposes some internal interfaces to simd_test. -package simd +package archsimd func (x Int64x2) ExportTestConcatSelectedConstant(indices uint8, y Int64x2) Int64x2 { return x.concatSelectedConstant(indices, y) diff --git a/src/simd/extra_amd64.go b/src/simd/archsimd/extra_amd64.go similarity index 99% rename from src/simd/extra_amd64.go rename to src/simd/archsimd/extra_amd64.go index a7832e6a57..921e148f63 100644 --- a/src/simd/extra_amd64.go +++ b/src/simd/archsimd/extra_amd64.go @@ -4,7 +4,7 @@ //go:build goexperiment.simd && amd64 -package simd +package archsimd // ClearAVXUpperBits clears the high bits of Y0-Y15 and Z0-Z15 registers. // It is intended for transitioning from AVX to SSE, eliminating the diff --git a/src/simd/generate.go b/src/simd/archsimd/generate.go similarity index 94% rename from src/simd/generate.go rename to src/simd/archsimd/generate.go index 5cd94e165e..587eb32dfc 100644 --- a/src/simd/generate.go +++ b/src/simd/archsimd/generate.go @@ -4,7 +4,7 @@ //go:build goexperiment.simd -package simd +package archsimd // Invoke code generators. diff --git a/src/simd/internal/simd_test/binary_helpers_test.go b/src/simd/archsimd/internal/simd_test/binary_helpers_test.go similarity index 65% rename from src/simd/internal/simd_test/binary_helpers_test.go rename to src/simd/archsimd/internal/simd_test/binary_helpers_test.go index 82cf784bca..9c361dbeb9 100644 --- a/src/simd/internal/simd_test/binary_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/binary_helpers_test.go @@ -9,18 +9,18 @@ package simd_test import ( - "simd" + "simd/archsimd" "testing" ) // testInt8x16Binary tests the simd binary method f against the expected behavior generated by want -func testInt8x16Binary(t *testing.T, f func(_, _ simd.Int8x16) simd.Int8x16, want func(_, _ []int8) []int8) { +func testInt8x16Binary(t *testing.T, f func(_, _ archsimd.Int8x16) archsimd.Int8x16, want func(_, _ []int8) []int8) { n := 16 t.Helper() forSlicePair(t, int8s, n, func(x, y []int8) bool { t.Helper() - a := simd.LoadInt8x16Slice(x) - b := simd.LoadInt8x16Slice(y) + a := archsimd.LoadInt8x16Slice(x) + b := archsimd.LoadInt8x16Slice(y) g := make([]int8, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -29,13 +29,13 @@ func testInt8x16Binary(t *testing.T, f func(_, _ simd.Int8x16) simd.Int8x16, wan } // testInt16x8Binary tests the simd binary method f against the expected behavior generated by want -func testInt16x8Binary(t *testing.T, f func(_, _ simd.Int16x8) simd.Int16x8, want func(_, _ []int16) []int16) { +func testInt16x8Binary(t *testing.T, f func(_, _ archsimd.Int16x8) archsimd.Int16x8, want func(_, _ []int16) []int16) { n := 8 t.Helper() forSlicePair(t, int16s, n, func(x, y []int16) bool { t.Helper() - a := simd.LoadInt16x8Slice(x) - b := simd.LoadInt16x8Slice(y) + a := archsimd.LoadInt16x8Slice(x) + b := archsimd.LoadInt16x8Slice(y) g := make([]int16, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -44,13 +44,13 @@ func testInt16x8Binary(t *testing.T, f func(_, _ simd.Int16x8) simd.Int16x8, wan } // testInt32x4Binary tests the simd binary method f against the expected behavior generated by want -func testInt32x4Binary(t *testing.T, f func(_, _ simd.Int32x4) simd.Int32x4, want func(_, _ []int32) []int32) { +func testInt32x4Binary(t *testing.T, f func(_, _ archsimd.Int32x4) archsimd.Int32x4, want func(_, _ []int32) []int32) { n := 4 t.Helper() forSlicePair(t, int32s, n, func(x, y []int32) bool { t.Helper() - a := simd.LoadInt32x4Slice(x) - b := simd.LoadInt32x4Slice(y) + a := archsimd.LoadInt32x4Slice(x) + b := archsimd.LoadInt32x4Slice(y) g := make([]int32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -59,13 +59,13 @@ func testInt32x4Binary(t *testing.T, f func(_, _ simd.Int32x4) simd.Int32x4, wan } // testInt64x2Binary tests the simd binary method f against the expected behavior generated by want -func testInt64x2Binary(t *testing.T, f func(_, _ simd.Int64x2) simd.Int64x2, want func(_, _ []int64) []int64) { +func testInt64x2Binary(t *testing.T, f func(_, _ archsimd.Int64x2) archsimd.Int64x2, want func(_, _ []int64) []int64) { n := 2 t.Helper() forSlicePair(t, int64s, n, func(x, y []int64) bool { t.Helper() - a := simd.LoadInt64x2Slice(x) - b := simd.LoadInt64x2Slice(y) + a := archsimd.LoadInt64x2Slice(x) + b := archsimd.LoadInt64x2Slice(y) g := make([]int64, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -74,13 +74,13 @@ func testInt64x2Binary(t *testing.T, f func(_, _ simd.Int64x2) simd.Int64x2, wan } // testUint8x16Binary tests the simd binary method f against the expected behavior generated by want -func testUint8x16Binary(t *testing.T, f func(_, _ simd.Uint8x16) simd.Uint8x16, want func(_, _ []uint8) []uint8) { +func testUint8x16Binary(t *testing.T, f func(_, _ archsimd.Uint8x16) archsimd.Uint8x16, want func(_, _ []uint8) []uint8) { n := 16 t.Helper() forSlicePair(t, uint8s, n, func(x, y []uint8) bool { t.Helper() - a := simd.LoadUint8x16Slice(x) - b := simd.LoadUint8x16Slice(y) + a := archsimd.LoadUint8x16Slice(x) + b := archsimd.LoadUint8x16Slice(y) g := make([]uint8, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -89,13 +89,13 @@ func testUint8x16Binary(t *testing.T, f func(_, _ simd.Uint8x16) simd.Uint8x16, } // testUint16x8Binary tests the simd binary method f against the expected behavior generated by want -func testUint16x8Binary(t *testing.T, f func(_, _ simd.Uint16x8) simd.Uint16x8, want func(_, _ []uint16) []uint16) { +func testUint16x8Binary(t *testing.T, f func(_, _ archsimd.Uint16x8) archsimd.Uint16x8, want func(_, _ []uint16) []uint16) { n := 8 t.Helper() forSlicePair(t, uint16s, n, func(x, y []uint16) bool { t.Helper() - a := simd.LoadUint16x8Slice(x) - b := simd.LoadUint16x8Slice(y) + a := archsimd.LoadUint16x8Slice(x) + b := archsimd.LoadUint16x8Slice(y) g := make([]uint16, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -104,13 +104,13 @@ func testUint16x8Binary(t *testing.T, f func(_, _ simd.Uint16x8) simd.Uint16x8, } // testUint32x4Binary tests the simd binary method f against the expected behavior generated by want -func testUint32x4Binary(t *testing.T, f func(_, _ simd.Uint32x4) simd.Uint32x4, want func(_, _ []uint32) []uint32) { +func testUint32x4Binary(t *testing.T, f func(_, _ archsimd.Uint32x4) archsimd.Uint32x4, want func(_, _ []uint32) []uint32) { n := 4 t.Helper() forSlicePair(t, uint32s, n, func(x, y []uint32) bool { t.Helper() - a := simd.LoadUint32x4Slice(x) - b := simd.LoadUint32x4Slice(y) + a := archsimd.LoadUint32x4Slice(x) + b := archsimd.LoadUint32x4Slice(y) g := make([]uint32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -119,13 +119,13 @@ func testUint32x4Binary(t *testing.T, f func(_, _ simd.Uint32x4) simd.Uint32x4, } // testUint64x2Binary tests the simd binary method f against the expected behavior generated by want -func testUint64x2Binary(t *testing.T, f func(_, _ simd.Uint64x2) simd.Uint64x2, want func(_, _ []uint64) []uint64) { +func testUint64x2Binary(t *testing.T, f func(_, _ archsimd.Uint64x2) archsimd.Uint64x2, want func(_, _ []uint64) []uint64) { n := 2 t.Helper() forSlicePair(t, uint64s, n, func(x, y []uint64) bool { t.Helper() - a := simd.LoadUint64x2Slice(x) - b := simd.LoadUint64x2Slice(y) + a := archsimd.LoadUint64x2Slice(x) + b := archsimd.LoadUint64x2Slice(y) g := make([]uint64, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -134,13 +134,13 @@ func testUint64x2Binary(t *testing.T, f func(_, _ simd.Uint64x2) simd.Uint64x2, } // testFloat32x4Binary tests the simd binary method f against the expected behavior generated by want -func testFloat32x4Binary(t *testing.T, f func(_, _ simd.Float32x4) simd.Float32x4, want func(_, _ []float32) []float32) { +func testFloat32x4Binary(t *testing.T, f func(_, _ archsimd.Float32x4) archsimd.Float32x4, want func(_, _ []float32) []float32) { n := 4 t.Helper() forSlicePair(t, float32s, n, func(x, y []float32) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) - b := simd.LoadFloat32x4Slice(y) + a := archsimd.LoadFloat32x4Slice(x) + b := archsimd.LoadFloat32x4Slice(y) g := make([]float32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -149,13 +149,13 @@ func testFloat32x4Binary(t *testing.T, f func(_, _ simd.Float32x4) simd.Float32x } // testFloat64x2Binary tests the simd binary method f against the expected behavior generated by want -func testFloat64x2Binary(t *testing.T, f func(_, _ simd.Float64x2) simd.Float64x2, want func(_, _ []float64) []float64) { +func testFloat64x2Binary(t *testing.T, f func(_, _ archsimd.Float64x2) archsimd.Float64x2, want func(_, _ []float64) []float64) { n := 2 t.Helper() forSlicePair(t, float64s, n, func(x, y []float64) bool { t.Helper() - a := simd.LoadFloat64x2Slice(x) - b := simd.LoadFloat64x2Slice(y) + a := archsimd.LoadFloat64x2Slice(x) + b := archsimd.LoadFloat64x2Slice(y) g := make([]float64, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -164,13 +164,13 @@ func testFloat64x2Binary(t *testing.T, f func(_, _ simd.Float64x2) simd.Float64x } // testInt8x32Binary tests the simd binary method f against the expected behavior generated by want -func testInt8x32Binary(t *testing.T, f func(_, _ simd.Int8x32) simd.Int8x32, want func(_, _ []int8) []int8) { +func testInt8x32Binary(t *testing.T, f func(_, _ archsimd.Int8x32) archsimd.Int8x32, want func(_, _ []int8) []int8) { n := 32 t.Helper() forSlicePair(t, int8s, n, func(x, y []int8) bool { t.Helper() - a := simd.LoadInt8x32Slice(x) - b := simd.LoadInt8x32Slice(y) + a := archsimd.LoadInt8x32Slice(x) + b := archsimd.LoadInt8x32Slice(y) g := make([]int8, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -179,13 +179,13 @@ func testInt8x32Binary(t *testing.T, f func(_, _ simd.Int8x32) simd.Int8x32, wan } // testInt16x16Binary tests the simd binary method f against the expected behavior generated by want -func testInt16x16Binary(t *testing.T, f func(_, _ simd.Int16x16) simd.Int16x16, want func(_, _ []int16) []int16) { +func testInt16x16Binary(t *testing.T, f func(_, _ archsimd.Int16x16) archsimd.Int16x16, want func(_, _ []int16) []int16) { n := 16 t.Helper() forSlicePair(t, int16s, n, func(x, y []int16) bool { t.Helper() - a := simd.LoadInt16x16Slice(x) - b := simd.LoadInt16x16Slice(y) + a := archsimd.LoadInt16x16Slice(x) + b := archsimd.LoadInt16x16Slice(y) g := make([]int16, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -194,13 +194,13 @@ func testInt16x16Binary(t *testing.T, f func(_, _ simd.Int16x16) simd.Int16x16, } // testInt32x8Binary tests the simd binary method f against the expected behavior generated by want -func testInt32x8Binary(t *testing.T, f func(_, _ simd.Int32x8) simd.Int32x8, want func(_, _ []int32) []int32) { +func testInt32x8Binary(t *testing.T, f func(_, _ archsimd.Int32x8) archsimd.Int32x8, want func(_, _ []int32) []int32) { n := 8 t.Helper() forSlicePair(t, int32s, n, func(x, y []int32) bool { t.Helper() - a := simd.LoadInt32x8Slice(x) - b := simd.LoadInt32x8Slice(y) + a := archsimd.LoadInt32x8Slice(x) + b := archsimd.LoadInt32x8Slice(y) g := make([]int32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -209,13 +209,13 @@ func testInt32x8Binary(t *testing.T, f func(_, _ simd.Int32x8) simd.Int32x8, wan } // testInt64x4Binary tests the simd binary method f against the expected behavior generated by want -func testInt64x4Binary(t *testing.T, f func(_, _ simd.Int64x4) simd.Int64x4, want func(_, _ []int64) []int64) { +func testInt64x4Binary(t *testing.T, f func(_, _ archsimd.Int64x4) archsimd.Int64x4, want func(_, _ []int64) []int64) { n := 4 t.Helper() forSlicePair(t, int64s, n, func(x, y []int64) bool { t.Helper() - a := simd.LoadInt64x4Slice(x) - b := simd.LoadInt64x4Slice(y) + a := archsimd.LoadInt64x4Slice(x) + b := archsimd.LoadInt64x4Slice(y) g := make([]int64, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -224,13 +224,13 @@ func testInt64x4Binary(t *testing.T, f func(_, _ simd.Int64x4) simd.Int64x4, wan } // testUint8x32Binary tests the simd binary method f against the expected behavior generated by want -func testUint8x32Binary(t *testing.T, f func(_, _ simd.Uint8x32) simd.Uint8x32, want func(_, _ []uint8) []uint8) { +func testUint8x32Binary(t *testing.T, f func(_, _ archsimd.Uint8x32) archsimd.Uint8x32, want func(_, _ []uint8) []uint8) { n := 32 t.Helper() forSlicePair(t, uint8s, n, func(x, y []uint8) bool { t.Helper() - a := simd.LoadUint8x32Slice(x) - b := simd.LoadUint8x32Slice(y) + a := archsimd.LoadUint8x32Slice(x) + b := archsimd.LoadUint8x32Slice(y) g := make([]uint8, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -239,13 +239,13 @@ func testUint8x32Binary(t *testing.T, f func(_, _ simd.Uint8x32) simd.Uint8x32, } // testUint16x16Binary tests the simd binary method f against the expected behavior generated by want -func testUint16x16Binary(t *testing.T, f func(_, _ simd.Uint16x16) simd.Uint16x16, want func(_, _ []uint16) []uint16) { +func testUint16x16Binary(t *testing.T, f func(_, _ archsimd.Uint16x16) archsimd.Uint16x16, want func(_, _ []uint16) []uint16) { n := 16 t.Helper() forSlicePair(t, uint16s, n, func(x, y []uint16) bool { t.Helper() - a := simd.LoadUint16x16Slice(x) - b := simd.LoadUint16x16Slice(y) + a := archsimd.LoadUint16x16Slice(x) + b := archsimd.LoadUint16x16Slice(y) g := make([]uint16, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -254,13 +254,13 @@ func testUint16x16Binary(t *testing.T, f func(_, _ simd.Uint16x16) simd.Uint16x1 } // testUint32x8Binary tests the simd binary method f against the expected behavior generated by want -func testUint32x8Binary(t *testing.T, f func(_, _ simd.Uint32x8) simd.Uint32x8, want func(_, _ []uint32) []uint32) { +func testUint32x8Binary(t *testing.T, f func(_, _ archsimd.Uint32x8) archsimd.Uint32x8, want func(_, _ []uint32) []uint32) { n := 8 t.Helper() forSlicePair(t, uint32s, n, func(x, y []uint32) bool { t.Helper() - a := simd.LoadUint32x8Slice(x) - b := simd.LoadUint32x8Slice(y) + a := archsimd.LoadUint32x8Slice(x) + b := archsimd.LoadUint32x8Slice(y) g := make([]uint32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -269,13 +269,13 @@ func testUint32x8Binary(t *testing.T, f func(_, _ simd.Uint32x8) simd.Uint32x8, } // testUint64x4Binary tests the simd binary method f against the expected behavior generated by want -func testUint64x4Binary(t *testing.T, f func(_, _ simd.Uint64x4) simd.Uint64x4, want func(_, _ []uint64) []uint64) { +func testUint64x4Binary(t *testing.T, f func(_, _ archsimd.Uint64x4) archsimd.Uint64x4, want func(_, _ []uint64) []uint64) { n := 4 t.Helper() forSlicePair(t, uint64s, n, func(x, y []uint64) bool { t.Helper() - a := simd.LoadUint64x4Slice(x) - b := simd.LoadUint64x4Slice(y) + a := archsimd.LoadUint64x4Slice(x) + b := archsimd.LoadUint64x4Slice(y) g := make([]uint64, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -284,13 +284,13 @@ func testUint64x4Binary(t *testing.T, f func(_, _ simd.Uint64x4) simd.Uint64x4, } // testFloat32x8Binary tests the simd binary method f against the expected behavior generated by want -func testFloat32x8Binary(t *testing.T, f func(_, _ simd.Float32x8) simd.Float32x8, want func(_, _ []float32) []float32) { +func testFloat32x8Binary(t *testing.T, f func(_, _ archsimd.Float32x8) archsimd.Float32x8, want func(_, _ []float32) []float32) { n := 8 t.Helper() forSlicePair(t, float32s, n, func(x, y []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) - b := simd.LoadFloat32x8Slice(y) + a := archsimd.LoadFloat32x8Slice(x) + b := archsimd.LoadFloat32x8Slice(y) g := make([]float32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -299,13 +299,13 @@ func testFloat32x8Binary(t *testing.T, f func(_, _ simd.Float32x8) simd.Float32x } // testFloat64x4Binary tests the simd binary method f against the expected behavior generated by want -func testFloat64x4Binary(t *testing.T, f func(_, _ simd.Float64x4) simd.Float64x4, want func(_, _ []float64) []float64) { +func testFloat64x4Binary(t *testing.T, f func(_, _ archsimd.Float64x4) archsimd.Float64x4, want func(_, _ []float64) []float64) { n := 4 t.Helper() forSlicePair(t, float64s, n, func(x, y []float64) bool { t.Helper() - a := simd.LoadFloat64x4Slice(x) - b := simd.LoadFloat64x4Slice(y) + a := archsimd.LoadFloat64x4Slice(x) + b := archsimd.LoadFloat64x4Slice(y) g := make([]float64, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -314,13 +314,13 @@ func testFloat64x4Binary(t *testing.T, f func(_, _ simd.Float64x4) simd.Float64x } // testInt8x64Binary tests the simd binary method f against the expected behavior generated by want -func testInt8x64Binary(t *testing.T, f func(_, _ simd.Int8x64) simd.Int8x64, want func(_, _ []int8) []int8) { +func testInt8x64Binary(t *testing.T, f func(_, _ archsimd.Int8x64) archsimd.Int8x64, want func(_, _ []int8) []int8) { n := 64 t.Helper() forSlicePair(t, int8s, n, func(x, y []int8) bool { t.Helper() - a := simd.LoadInt8x64Slice(x) - b := simd.LoadInt8x64Slice(y) + a := archsimd.LoadInt8x64Slice(x) + b := archsimd.LoadInt8x64Slice(y) g := make([]int8, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -329,13 +329,13 @@ func testInt8x64Binary(t *testing.T, f func(_, _ simd.Int8x64) simd.Int8x64, wan } // testInt16x32Binary tests the simd binary method f against the expected behavior generated by want -func testInt16x32Binary(t *testing.T, f func(_, _ simd.Int16x32) simd.Int16x32, want func(_, _ []int16) []int16) { +func testInt16x32Binary(t *testing.T, f func(_, _ archsimd.Int16x32) archsimd.Int16x32, want func(_, _ []int16) []int16) { n := 32 t.Helper() forSlicePair(t, int16s, n, func(x, y []int16) bool { t.Helper() - a := simd.LoadInt16x32Slice(x) - b := simd.LoadInt16x32Slice(y) + a := archsimd.LoadInt16x32Slice(x) + b := archsimd.LoadInt16x32Slice(y) g := make([]int16, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -344,13 +344,13 @@ func testInt16x32Binary(t *testing.T, f func(_, _ simd.Int16x32) simd.Int16x32, } // testInt32x16Binary tests the simd binary method f against the expected behavior generated by want -func testInt32x16Binary(t *testing.T, f func(_, _ simd.Int32x16) simd.Int32x16, want func(_, _ []int32) []int32) { +func testInt32x16Binary(t *testing.T, f func(_, _ archsimd.Int32x16) archsimd.Int32x16, want func(_, _ []int32) []int32) { n := 16 t.Helper() forSlicePair(t, int32s, n, func(x, y []int32) bool { t.Helper() - a := simd.LoadInt32x16Slice(x) - b := simd.LoadInt32x16Slice(y) + a := archsimd.LoadInt32x16Slice(x) + b := archsimd.LoadInt32x16Slice(y) g := make([]int32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -359,13 +359,13 @@ func testInt32x16Binary(t *testing.T, f func(_, _ simd.Int32x16) simd.Int32x16, } // testInt64x8Binary tests the simd binary method f against the expected behavior generated by want -func testInt64x8Binary(t *testing.T, f func(_, _ simd.Int64x8) simd.Int64x8, want func(_, _ []int64) []int64) { +func testInt64x8Binary(t *testing.T, f func(_, _ archsimd.Int64x8) archsimd.Int64x8, want func(_, _ []int64) []int64) { n := 8 t.Helper() forSlicePair(t, int64s, n, func(x, y []int64) bool { t.Helper() - a := simd.LoadInt64x8Slice(x) - b := simd.LoadInt64x8Slice(y) + a := archsimd.LoadInt64x8Slice(x) + b := archsimd.LoadInt64x8Slice(y) g := make([]int64, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -374,13 +374,13 @@ func testInt64x8Binary(t *testing.T, f func(_, _ simd.Int64x8) simd.Int64x8, wan } // testUint8x64Binary tests the simd binary method f against the expected behavior generated by want -func testUint8x64Binary(t *testing.T, f func(_, _ simd.Uint8x64) simd.Uint8x64, want func(_, _ []uint8) []uint8) { +func testUint8x64Binary(t *testing.T, f func(_, _ archsimd.Uint8x64) archsimd.Uint8x64, want func(_, _ []uint8) []uint8) { n := 64 t.Helper() forSlicePair(t, uint8s, n, func(x, y []uint8) bool { t.Helper() - a := simd.LoadUint8x64Slice(x) - b := simd.LoadUint8x64Slice(y) + a := archsimd.LoadUint8x64Slice(x) + b := archsimd.LoadUint8x64Slice(y) g := make([]uint8, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -389,13 +389,13 @@ func testUint8x64Binary(t *testing.T, f func(_, _ simd.Uint8x64) simd.Uint8x64, } // testUint16x32Binary tests the simd binary method f against the expected behavior generated by want -func testUint16x32Binary(t *testing.T, f func(_, _ simd.Uint16x32) simd.Uint16x32, want func(_, _ []uint16) []uint16) { +func testUint16x32Binary(t *testing.T, f func(_, _ archsimd.Uint16x32) archsimd.Uint16x32, want func(_, _ []uint16) []uint16) { n := 32 t.Helper() forSlicePair(t, uint16s, n, func(x, y []uint16) bool { t.Helper() - a := simd.LoadUint16x32Slice(x) - b := simd.LoadUint16x32Slice(y) + a := archsimd.LoadUint16x32Slice(x) + b := archsimd.LoadUint16x32Slice(y) g := make([]uint16, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -404,13 +404,13 @@ func testUint16x32Binary(t *testing.T, f func(_, _ simd.Uint16x32) simd.Uint16x3 } // testUint32x16Binary tests the simd binary method f against the expected behavior generated by want -func testUint32x16Binary(t *testing.T, f func(_, _ simd.Uint32x16) simd.Uint32x16, want func(_, _ []uint32) []uint32) { +func testUint32x16Binary(t *testing.T, f func(_, _ archsimd.Uint32x16) archsimd.Uint32x16, want func(_, _ []uint32) []uint32) { n := 16 t.Helper() forSlicePair(t, uint32s, n, func(x, y []uint32) bool { t.Helper() - a := simd.LoadUint32x16Slice(x) - b := simd.LoadUint32x16Slice(y) + a := archsimd.LoadUint32x16Slice(x) + b := archsimd.LoadUint32x16Slice(y) g := make([]uint32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -419,13 +419,13 @@ func testUint32x16Binary(t *testing.T, f func(_, _ simd.Uint32x16) simd.Uint32x1 } // testUint64x8Binary tests the simd binary method f against the expected behavior generated by want -func testUint64x8Binary(t *testing.T, f func(_, _ simd.Uint64x8) simd.Uint64x8, want func(_, _ []uint64) []uint64) { +func testUint64x8Binary(t *testing.T, f func(_, _ archsimd.Uint64x8) archsimd.Uint64x8, want func(_, _ []uint64) []uint64) { n := 8 t.Helper() forSlicePair(t, uint64s, n, func(x, y []uint64) bool { t.Helper() - a := simd.LoadUint64x8Slice(x) - b := simd.LoadUint64x8Slice(y) + a := archsimd.LoadUint64x8Slice(x) + b := archsimd.LoadUint64x8Slice(y) g := make([]uint64, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -434,13 +434,13 @@ func testUint64x8Binary(t *testing.T, f func(_, _ simd.Uint64x8) simd.Uint64x8, } // testFloat32x16Binary tests the simd binary method f against the expected behavior generated by want -func testFloat32x16Binary(t *testing.T, f func(_, _ simd.Float32x16) simd.Float32x16, want func(_, _ []float32) []float32) { +func testFloat32x16Binary(t *testing.T, f func(_, _ archsimd.Float32x16) archsimd.Float32x16, want func(_, _ []float32) []float32) { n := 16 t.Helper() forSlicePair(t, float32s, n, func(x, y []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) - b := simd.LoadFloat32x16Slice(y) + a := archsimd.LoadFloat32x16Slice(x) + b := archsimd.LoadFloat32x16Slice(y) g := make([]float32, n) f(a, b).StoreSlice(g) w := want(x, y) @@ -449,13 +449,13 @@ func testFloat32x16Binary(t *testing.T, f func(_, _ simd.Float32x16) simd.Float3 } // testFloat64x8Binary tests the simd binary method f against the expected behavior generated by want -func testFloat64x8Binary(t *testing.T, f func(_, _ simd.Float64x8) simd.Float64x8, want func(_, _ []float64) []float64) { +func testFloat64x8Binary(t *testing.T, f func(_, _ archsimd.Float64x8) archsimd.Float64x8, want func(_, _ []float64) []float64) { n := 8 t.Helper() forSlicePair(t, float64s, n, func(x, y []float64) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) - b := simd.LoadFloat64x8Slice(y) + a := archsimd.LoadFloat64x8Slice(x) + b := archsimd.LoadFloat64x8Slice(y) g := make([]float64, n) f(a, b).StoreSlice(g) w := want(x, y) diff --git a/src/simd/archsimd/internal/simd_test/binary_test.go b/src/simd/archsimd/internal/simd_test/binary_test.go new file mode 100644 index 0000000000..fa2b9511ca --- /dev/null +++ b/src/simd/archsimd/internal/simd_test/binary_test.go @@ -0,0 +1,361 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.simd && amd64 + +package simd_test + +import ( + "simd/archsimd" + "testing" +) + +func TestAdd(t *testing.T) { + testFloat32x4Binary(t, archsimd.Float32x4.Add, addSlice[float32]) + testFloat32x8Binary(t, archsimd.Float32x8.Add, addSlice[float32]) + testFloat64x2Binary(t, archsimd.Float64x2.Add, addSlice[float64]) + testFloat64x4Binary(t, archsimd.Float64x4.Add, addSlice[float64]) + + testInt16x16Binary(t, archsimd.Int16x16.Add, addSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.Add, addSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.Add, addSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.Add, addSlice[int32]) + testInt64x2Binary(t, archsimd.Int64x2.Add, addSlice[int64]) + testInt64x4Binary(t, archsimd.Int64x4.Add, addSlice[int64]) + testInt8x16Binary(t, archsimd.Int8x16.Add, addSlice[int8]) + testInt8x32Binary(t, archsimd.Int8x32.Add, addSlice[int8]) + + testUint16x16Binary(t, archsimd.Uint16x16.Add, addSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.Add, addSlice[uint16]) + testUint32x4Binary(t, archsimd.Uint32x4.Add, addSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.Add, addSlice[uint32]) + testUint64x2Binary(t, archsimd.Uint64x2.Add, addSlice[uint64]) + testUint64x4Binary(t, archsimd.Uint64x4.Add, addSlice[uint64]) + testUint8x16Binary(t, archsimd.Uint8x16.Add, addSlice[uint8]) + testUint8x32Binary(t, archsimd.Uint8x32.Add, addSlice[uint8]) + + if archsimd.X86.AVX512() { + testFloat32x16Binary(t, archsimd.Float32x16.Add, addSlice[float32]) + testFloat64x8Binary(t, archsimd.Float64x8.Add, addSlice[float64]) + testInt8x64Binary(t, archsimd.Int8x64.Add, addSlice[int8]) + testInt16x32Binary(t, archsimd.Int16x32.Add, addSlice[int16]) + testInt32x16Binary(t, archsimd.Int32x16.Add, addSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.Add, addSlice[int64]) + testUint8x64Binary(t, archsimd.Uint8x64.Add, addSlice[uint8]) + testUint16x32Binary(t, archsimd.Uint16x32.Add, addSlice[uint16]) + testUint32x16Binary(t, archsimd.Uint32x16.Add, addSlice[uint32]) + testUint64x8Binary(t, archsimd.Uint64x8.Add, addSlice[uint64]) + } +} + +func TestSub(t *testing.T) { + testFloat32x4Binary(t, archsimd.Float32x4.Sub, subSlice[float32]) + testFloat32x8Binary(t, archsimd.Float32x8.Sub, subSlice[float32]) + testFloat64x2Binary(t, archsimd.Float64x2.Sub, subSlice[float64]) + testFloat64x4Binary(t, archsimd.Float64x4.Sub, subSlice[float64]) + + testInt16x16Binary(t, archsimd.Int16x16.Sub, subSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.Sub, subSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.Sub, subSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.Sub, subSlice[int32]) + testInt64x2Binary(t, archsimd.Int64x2.Sub, subSlice[int64]) + testInt64x4Binary(t, archsimd.Int64x4.Sub, subSlice[int64]) + testInt8x16Binary(t, archsimd.Int8x16.Sub, subSlice[int8]) + testInt8x32Binary(t, archsimd.Int8x32.Sub, subSlice[int8]) + + testUint16x16Binary(t, archsimd.Uint16x16.Sub, subSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.Sub, subSlice[uint16]) + testUint32x4Binary(t, archsimd.Uint32x4.Sub, subSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.Sub, subSlice[uint32]) + testUint64x2Binary(t, archsimd.Uint64x2.Sub, subSlice[uint64]) + testUint64x4Binary(t, archsimd.Uint64x4.Sub, subSlice[uint64]) + testUint8x16Binary(t, archsimd.Uint8x16.Sub, subSlice[uint8]) + testUint8x32Binary(t, archsimd.Uint8x32.Sub, subSlice[uint8]) + + if archsimd.X86.AVX512() { + testFloat32x16Binary(t, archsimd.Float32x16.Sub, subSlice[float32]) + testFloat64x8Binary(t, archsimd.Float64x8.Sub, subSlice[float64]) + testInt8x64Binary(t, archsimd.Int8x64.Sub, subSlice[int8]) + testInt16x32Binary(t, archsimd.Int16x32.Sub, subSlice[int16]) + testInt32x16Binary(t, archsimd.Int32x16.Sub, subSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.Sub, subSlice[int64]) + testUint8x64Binary(t, archsimd.Uint8x64.Sub, subSlice[uint8]) + testUint16x32Binary(t, archsimd.Uint16x32.Sub, subSlice[uint16]) + testUint32x16Binary(t, archsimd.Uint32x16.Sub, subSlice[uint32]) + testUint64x8Binary(t, archsimd.Uint64x8.Sub, subSlice[uint64]) + } +} + +func TestMax(t *testing.T) { + // testFloat32x4Binary(t, archsimd.Float32x4.Max, maxSlice[float32]) // nan is wrong + // testFloat32x8Binary(t, archsimd.Float32x8.Max, maxSlice[float32]) // nan is wrong + // testFloat64x2Binary(t, archsimd.Float64x2.Max, maxSlice[float64]) // nan is wrong + // testFloat64x4Binary(t, archsimd.Float64x4.Max, maxSlice[float64]) // nan is wrong + + testInt16x16Binary(t, archsimd.Int16x16.Max, maxSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.Max, maxSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.Max, maxSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.Max, maxSlice[int32]) + + if archsimd.X86.AVX512() { + testInt64x2Binary(t, archsimd.Int64x2.Max, maxSlice[int64]) + testInt64x4Binary(t, archsimd.Int64x4.Max, maxSlice[int64]) + } + + testInt8x16Binary(t, archsimd.Int8x16.Max, maxSlice[int8]) + testInt8x32Binary(t, archsimd.Int8x32.Max, maxSlice[int8]) + + testUint16x16Binary(t, archsimd.Uint16x16.Max, maxSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.Max, maxSlice[uint16]) + testUint32x4Binary(t, archsimd.Uint32x4.Max, maxSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.Max, maxSlice[uint32]) + + if archsimd.X86.AVX512() { + testUint64x2Binary(t, archsimd.Uint64x2.Max, maxSlice[uint64]) + testUint64x4Binary(t, archsimd.Uint64x4.Max, maxSlice[uint64]) + } + + testUint8x16Binary(t, archsimd.Uint8x16.Max, maxSlice[uint8]) + testUint8x32Binary(t, archsimd.Uint8x32.Max, maxSlice[uint8]) + + if archsimd.X86.AVX512() { + // testFloat32x16Binary(t, archsimd.Float32x16.Max, maxSlice[float32]) // nan is wrong + // testFloat64x8Binary(t, archsimd.Float64x8.Max, maxSlice[float64]) // nan is wrong + testInt8x64Binary(t, archsimd.Int8x64.Max, maxSlice[int8]) + testInt16x32Binary(t, archsimd.Int16x32.Max, maxSlice[int16]) + testInt32x16Binary(t, archsimd.Int32x16.Max, maxSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.Max, maxSlice[int64]) + testUint8x64Binary(t, archsimd.Uint8x64.Max, maxSlice[uint8]) + testUint16x32Binary(t, archsimd.Uint16x32.Max, maxSlice[uint16]) + testUint32x16Binary(t, archsimd.Uint32x16.Max, maxSlice[uint32]) + testUint64x8Binary(t, archsimd.Uint64x8.Max, maxSlice[uint64]) + } +} + +func TestMin(t *testing.T) { + // testFloat32x4Binary(t, archsimd.Float32x4.Min, minSlice[float32]) // nan is wrong + // testFloat32x8Binary(t, archsimd.Float32x8.Min, minSlice[float32]) // nan is wrong + // testFloat64x2Binary(t, archsimd.Float64x2.Min, minSlice[float64]) // nan is wrong + // testFloat64x4Binary(t, archsimd.Float64x4.Min, minSlice[float64]) // nan is wrong + + testInt16x16Binary(t, archsimd.Int16x16.Min, minSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.Min, minSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.Min, minSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.Min, minSlice[int32]) + + if archsimd.X86.AVX512() { + testInt64x2Binary(t, archsimd.Int64x2.Min, minSlice[int64]) + testInt64x4Binary(t, archsimd.Int64x4.Min, minSlice[int64]) + } + + testInt8x16Binary(t, archsimd.Int8x16.Min, minSlice[int8]) + testInt8x32Binary(t, archsimd.Int8x32.Min, minSlice[int8]) + + testUint16x16Binary(t, archsimd.Uint16x16.Min, minSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.Min, minSlice[uint16]) + testUint32x4Binary(t, archsimd.Uint32x4.Min, minSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.Min, minSlice[uint32]) + + if archsimd.X86.AVX512() { + testUint64x2Binary(t, archsimd.Uint64x2.Min, minSlice[uint64]) + testUint64x4Binary(t, archsimd.Uint64x4.Min, minSlice[uint64]) + } + + testUint8x16Binary(t, archsimd.Uint8x16.Min, minSlice[uint8]) + testUint8x32Binary(t, archsimd.Uint8x32.Min, minSlice[uint8]) + + if archsimd.X86.AVX512() { + // testFloat32x16Binary(t, archsimd.Float32x16.Min, minSlice[float32]) // nan is wrong + // testFloat64x8Binary(t, archsimd.Float64x8.Min, minSlice[float64]) // nan is wrong + testInt8x64Binary(t, archsimd.Int8x64.Min, minSlice[int8]) + testInt16x32Binary(t, archsimd.Int16x32.Min, minSlice[int16]) + testInt32x16Binary(t, archsimd.Int32x16.Min, minSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.Min, minSlice[int64]) + testUint8x64Binary(t, archsimd.Uint8x64.Min, minSlice[uint8]) + testUint16x32Binary(t, archsimd.Uint16x32.Min, minSlice[uint16]) + testUint32x16Binary(t, archsimd.Uint32x16.Min, minSlice[uint32]) + testUint64x8Binary(t, archsimd.Uint64x8.Min, minSlice[uint64]) + } +} + +func TestAnd(t *testing.T) { + testInt16x16Binary(t, archsimd.Int16x16.And, andSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.And, andSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.And, andSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.And, andSlice[int32]) + testInt64x2Binary(t, archsimd.Int64x2.And, andSlice[int64]) + testInt64x4Binary(t, archsimd.Int64x4.And, andSlice[int64]) + testInt8x16Binary(t, archsimd.Int8x16.And, andSlice[int8]) + testInt8x32Binary(t, archsimd.Int8x32.And, andSlice[int8]) + + testUint16x16Binary(t, archsimd.Uint16x16.And, andSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.And, andSlice[uint16]) + testUint32x4Binary(t, archsimd.Uint32x4.And, andSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.And, andSlice[uint32]) + testUint64x2Binary(t, archsimd.Uint64x2.And, andSlice[uint64]) + testUint64x4Binary(t, archsimd.Uint64x4.And, andSlice[uint64]) + testUint8x16Binary(t, archsimd.Uint8x16.And, andSlice[uint8]) + testUint8x32Binary(t, archsimd.Uint8x32.And, andSlice[uint8]) + + if archsimd.X86.AVX512() { + // testInt8x64Binary(t, archsimd.Int8x64.And, andISlice[int8]) // missing + // testInt16x32Binary(t, archsimd.Int16x32.And, andISlice[int16]) // missing + testInt32x16Binary(t, archsimd.Int32x16.And, andSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.And, andSlice[int64]) + // testUint8x64Binary(t, archsimd.Uint8x64.And, andISlice[uint8]) // missing + // testUint16x32Binary(t, archsimd.Uint16x32.And, andISlice[uint16]) // missing + testUint32x16Binary(t, archsimd.Uint32x16.And, andSlice[uint32]) + testUint64x8Binary(t, archsimd.Uint64x8.And, andSlice[uint64]) + } +} + +func TestAndNot(t *testing.T) { + testInt16x16Binary(t, archsimd.Int16x16.AndNot, andNotSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.AndNot, andNotSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.AndNot, andNotSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.AndNot, andNotSlice[int32]) + testInt64x2Binary(t, archsimd.Int64x2.AndNot, andNotSlice[int64]) + testInt64x4Binary(t, archsimd.Int64x4.AndNot, andNotSlice[int64]) + testInt8x16Binary(t, archsimd.Int8x16.AndNot, andNotSlice[int8]) + testInt8x32Binary(t, archsimd.Int8x32.AndNot, andNotSlice[int8]) + + testUint16x16Binary(t, archsimd.Uint16x16.AndNot, andNotSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.AndNot, andNotSlice[uint16]) + testUint32x4Binary(t, archsimd.Uint32x4.AndNot, andNotSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.AndNot, andNotSlice[uint32]) + testUint64x2Binary(t, archsimd.Uint64x2.AndNot, andNotSlice[uint64]) + testUint64x4Binary(t, archsimd.Uint64x4.AndNot, andNotSlice[uint64]) + testUint8x16Binary(t, archsimd.Uint8x16.AndNot, andNotSlice[uint8]) + testUint8x32Binary(t, archsimd.Uint8x32.AndNot, andNotSlice[uint8]) + + if archsimd.X86.AVX512() { + testInt8x64Binary(t, archsimd.Int8x64.AndNot, andNotSlice[int8]) + testInt16x32Binary(t, archsimd.Int16x32.AndNot, andNotSlice[int16]) + testInt32x16Binary(t, archsimd.Int32x16.AndNot, andNotSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.AndNot, andNotSlice[int64]) + testUint8x64Binary(t, archsimd.Uint8x64.AndNot, andNotSlice[uint8]) + testUint16x32Binary(t, archsimd.Uint16x32.AndNot, andNotSlice[uint16]) + testUint32x16Binary(t, archsimd.Uint32x16.AndNot, andNotSlice[uint32]) + testUint64x8Binary(t, archsimd.Uint64x8.AndNot, andNotSlice[uint64]) + } +} + +func TestXor(t *testing.T) { + testInt16x16Binary(t, archsimd.Int16x16.Xor, xorSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.Xor, xorSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.Xor, xorSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.Xor, xorSlice[int32]) + testInt64x2Binary(t, archsimd.Int64x2.Xor, xorSlice[int64]) + testInt64x4Binary(t, archsimd.Int64x4.Xor, xorSlice[int64]) + testInt8x16Binary(t, archsimd.Int8x16.Xor, xorSlice[int8]) + testInt8x32Binary(t, archsimd.Int8x32.Xor, xorSlice[int8]) + + testUint16x16Binary(t, archsimd.Uint16x16.Xor, xorSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.Xor, xorSlice[uint16]) + testUint32x4Binary(t, archsimd.Uint32x4.Xor, xorSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.Xor, xorSlice[uint32]) + testUint64x2Binary(t, archsimd.Uint64x2.Xor, xorSlice[uint64]) + testUint64x4Binary(t, archsimd.Uint64x4.Xor, xorSlice[uint64]) + testUint8x16Binary(t, archsimd.Uint8x16.Xor, xorSlice[uint8]) + testUint8x32Binary(t, archsimd.Uint8x32.Xor, xorSlice[uint8]) + + if archsimd.X86.AVX512() { + // testInt8x64Binary(t, archsimd.Int8x64.Xor, andISlice[int8]) // missing + // testInt16x32Binary(t, archsimd.Int16x32.Xor, andISlice[int16]) // missing + testInt32x16Binary(t, archsimd.Int32x16.Xor, xorSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.Xor, xorSlice[int64]) + // testUint8x64Binary(t, archsimd.Uint8x64.Xor, andISlice[uint8]) // missing + // testUint16x32Binary(t, archsimd.Uint16x32.Xor, andISlice[uint16]) // missing + testUint32x16Binary(t, archsimd.Uint32x16.Xor, xorSlice[uint32]) + testUint64x8Binary(t, archsimd.Uint64x8.Xor, xorSlice[uint64]) + } +} + +func TestOr(t *testing.T) { + testInt16x16Binary(t, archsimd.Int16x16.Or, orSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.Or, orSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.Or, orSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.Or, orSlice[int32]) + testInt64x2Binary(t, archsimd.Int64x2.Or, orSlice[int64]) + testInt64x4Binary(t, archsimd.Int64x4.Or, orSlice[int64]) + testInt8x16Binary(t, archsimd.Int8x16.Or, orSlice[int8]) + testInt8x32Binary(t, archsimd.Int8x32.Or, orSlice[int8]) + + testUint16x16Binary(t, archsimd.Uint16x16.Or, orSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.Or, orSlice[uint16]) + testUint32x4Binary(t, archsimd.Uint32x4.Or, orSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.Or, orSlice[uint32]) + testUint64x2Binary(t, archsimd.Uint64x2.Or, orSlice[uint64]) + testUint64x4Binary(t, archsimd.Uint64x4.Or, orSlice[uint64]) + testUint8x16Binary(t, archsimd.Uint8x16.Or, orSlice[uint8]) + testUint8x32Binary(t, archsimd.Uint8x32.Or, orSlice[uint8]) + + if archsimd.X86.AVX512() { + // testInt8x64Binary(t, archsimd.Int8x64.Or, andISlice[int8]) // missing + // testInt16x32Binary(t, archsimd.Int16x32.Or, andISlice[int16]) // missing + testInt32x16Binary(t, archsimd.Int32x16.Or, orSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.Or, orSlice[int64]) + // testUint8x64Binary(t, archsimd.Uint8x64.Or, andISlice[uint8]) // missing + // testUint16x32Binary(t, archsimd.Uint16x32.Or, andISlice[uint16]) // missing + testUint32x16Binary(t, archsimd.Uint32x16.Or, orSlice[uint32]) + testUint64x8Binary(t, archsimd.Uint64x8.Or, orSlice[uint64]) + } +} + +func TestMul(t *testing.T) { + testFloat32x4Binary(t, archsimd.Float32x4.Mul, mulSlice[float32]) + testFloat32x8Binary(t, archsimd.Float32x8.Mul, mulSlice[float32]) + testFloat64x2Binary(t, archsimd.Float64x2.Mul, mulSlice[float64]) + testFloat64x4Binary(t, archsimd.Float64x4.Mul, mulSlice[float64]) + + testInt16x16Binary(t, archsimd.Int16x16.Mul, mulSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.Mul, mulSlice[int16]) + testInt32x4Binary(t, archsimd.Int32x4.Mul, mulSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.Mul, mulSlice[int32]) + + // testInt8x16Binary(t, archsimd.Int8x16.Mul, mulSlice[int8]) // nope + // testInt8x32Binary(t, archsimd.Int8x32.Mul, mulSlice[int8]) + + // TODO we should be able to do these, there's no difference between signed/unsigned Mul + // testUint16x16Binary(t, archsimd.Uint16x16.Mul, mulSlice[uint16]) + // testUint16x8Binary(t, archsimd.Uint16x8.Mul, mulSlice[uint16]) + // testUint32x4Binary(t, archsimd.Uint32x4.Mul, mulSlice[uint32]) + // testUint32x8Binary(t, archsimd.Uint32x8.Mul, mulSlice[uint32]) + // testUint64x2Binary(t, archsimd.Uint64x2.Mul, mulSlice[uint64]) + // testUint64x4Binary(t, archsimd.Uint64x4.Mul, mulSlice[uint64]) + + // testUint8x16Binary(t, archsimd.Uint8x16.Mul, mulSlice[uint8]) // nope + // testUint8x32Binary(t, archsimd.Uint8x32.Mul, mulSlice[uint8]) + + if archsimd.X86.AVX512() { + testInt64x2Binary(t, archsimd.Int64x2.Mul, mulSlice[int64]) // avx512 only + testInt64x4Binary(t, archsimd.Int64x4.Mul, mulSlice[int64]) + + testFloat32x16Binary(t, archsimd.Float32x16.Mul, mulSlice[float32]) + testFloat64x8Binary(t, archsimd.Float64x8.Mul, mulSlice[float64]) + + // testInt8x64Binary(t, archsimd.Int8x64.Mul, mulSlice[int8]) // nope + testInt16x32Binary(t, archsimd.Int16x32.Mul, mulSlice[int16]) + testInt32x16Binary(t, archsimd.Int32x16.Mul, mulSlice[int32]) + testInt64x8Binary(t, archsimd.Int64x8.Mul, mulSlice[int64]) + // testUint8x64Binary(t, archsimd.Uint8x64.Mul, mulSlice[uint8]) // nope + + // TODO signed should do the job + // testUint16x32Binary(t, archsimd.Uint16x32.Mul, mulSlice[uint16]) + // testUint32x16Binary(t, archsimd.Uint32x16.Mul, mulSlice[uint32]) + // testUint64x8Binary(t, archsimd.Uint64x8.Mul, mulSlice[uint64]) + } +} + +func TestDiv(t *testing.T) { + testFloat32x4Binary(t, archsimd.Float32x4.Div, divSlice[float32]) + testFloat32x8Binary(t, archsimd.Float32x8.Div, divSlice[float32]) + testFloat64x2Binary(t, archsimd.Float64x2.Div, divSlice[float64]) + testFloat64x4Binary(t, archsimd.Float64x4.Div, divSlice[float64]) + + if archsimd.X86.AVX512() { + testFloat32x16Binary(t, archsimd.Float32x16.Div, divSlice[float32]) + testFloat64x8Binary(t, archsimd.Float64x8.Div, divSlice[float64]) + } +} diff --git a/src/simd/internal/simd_test/compare_helpers_test.go b/src/simd/archsimd/internal/simd_test/compare_helpers_test.go similarity index 66% rename from src/simd/internal/simd_test/compare_helpers_test.go rename to src/simd/archsimd/internal/simd_test/compare_helpers_test.go index aef703c66a..15c9f9bc31 100644 --- a/src/simd/internal/simd_test/compare_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/compare_helpers_test.go @@ -9,18 +9,18 @@ package simd_test import ( - "simd" + "simd/archsimd" "testing" ) // testInt8x16Compare tests the simd comparison method f against the expected behavior generated by want -func testInt8x16Compare(t *testing.T, f func(_, _ simd.Int8x16) simd.Mask8x16, want func(_, _ []int8) []int64) { +func testInt8x16Compare(t *testing.T, f func(_, _ archsimd.Int8x16) archsimd.Mask8x16, want func(_, _ []int8) []int64) { n := 16 t.Helper() forSlicePair(t, int8s, n, func(x, y []int8) bool { t.Helper() - a := simd.LoadInt8x16Slice(x) - b := simd.LoadInt8x16Slice(y) + a := archsimd.LoadInt8x16Slice(x) + b := archsimd.LoadInt8x16Slice(y) g := make([]int8, n) f(a, b).AsInt8x16().StoreSlice(g) w := want(x, y) @@ -29,13 +29,13 @@ func testInt8x16Compare(t *testing.T, f func(_, _ simd.Int8x16) simd.Mask8x16, w } // testInt16x8Compare tests the simd comparison method f against the expected behavior generated by want -func testInt16x8Compare(t *testing.T, f func(_, _ simd.Int16x8) simd.Mask16x8, want func(_, _ []int16) []int64) { +func testInt16x8Compare(t *testing.T, f func(_, _ archsimd.Int16x8) archsimd.Mask16x8, want func(_, _ []int16) []int64) { n := 8 t.Helper() forSlicePair(t, int16s, n, func(x, y []int16) bool { t.Helper() - a := simd.LoadInt16x8Slice(x) - b := simd.LoadInt16x8Slice(y) + a := archsimd.LoadInt16x8Slice(x) + b := archsimd.LoadInt16x8Slice(y) g := make([]int16, n) f(a, b).AsInt16x8().StoreSlice(g) w := want(x, y) @@ -44,13 +44,13 @@ func testInt16x8Compare(t *testing.T, f func(_, _ simd.Int16x8) simd.Mask16x8, w } // testInt32x4Compare tests the simd comparison method f against the expected behavior generated by want -func testInt32x4Compare(t *testing.T, f func(_, _ simd.Int32x4) simd.Mask32x4, want func(_, _ []int32) []int64) { +func testInt32x4Compare(t *testing.T, f func(_, _ archsimd.Int32x4) archsimd.Mask32x4, want func(_, _ []int32) []int64) { n := 4 t.Helper() forSlicePair(t, int32s, n, func(x, y []int32) bool { t.Helper() - a := simd.LoadInt32x4Slice(x) - b := simd.LoadInt32x4Slice(y) + a := archsimd.LoadInt32x4Slice(x) + b := archsimd.LoadInt32x4Slice(y) g := make([]int32, n) f(a, b).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -59,13 +59,13 @@ func testInt32x4Compare(t *testing.T, f func(_, _ simd.Int32x4) simd.Mask32x4, w } // testInt64x2Compare tests the simd comparison method f against the expected behavior generated by want -func testInt64x2Compare(t *testing.T, f func(_, _ simd.Int64x2) simd.Mask64x2, want func(_, _ []int64) []int64) { +func testInt64x2Compare(t *testing.T, f func(_, _ archsimd.Int64x2) archsimd.Mask64x2, want func(_, _ []int64) []int64) { n := 2 t.Helper() forSlicePair(t, int64s, n, func(x, y []int64) bool { t.Helper() - a := simd.LoadInt64x2Slice(x) - b := simd.LoadInt64x2Slice(y) + a := archsimd.LoadInt64x2Slice(x) + b := archsimd.LoadInt64x2Slice(y) g := make([]int64, n) f(a, b).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -74,13 +74,13 @@ func testInt64x2Compare(t *testing.T, f func(_, _ simd.Int64x2) simd.Mask64x2, w } // testUint8x16Compare tests the simd comparison method f against the expected behavior generated by want -func testUint8x16Compare(t *testing.T, f func(_, _ simd.Uint8x16) simd.Mask8x16, want func(_, _ []uint8) []int64) { +func testUint8x16Compare(t *testing.T, f func(_, _ archsimd.Uint8x16) archsimd.Mask8x16, want func(_, _ []uint8) []int64) { n := 16 t.Helper() forSlicePair(t, uint8s, n, func(x, y []uint8) bool { t.Helper() - a := simd.LoadUint8x16Slice(x) - b := simd.LoadUint8x16Slice(y) + a := archsimd.LoadUint8x16Slice(x) + b := archsimd.LoadUint8x16Slice(y) g := make([]int8, n) f(a, b).AsInt8x16().StoreSlice(g) w := want(x, y) @@ -89,13 +89,13 @@ func testUint8x16Compare(t *testing.T, f func(_, _ simd.Uint8x16) simd.Mask8x16, } // testUint16x8Compare tests the simd comparison method f against the expected behavior generated by want -func testUint16x8Compare(t *testing.T, f func(_, _ simd.Uint16x8) simd.Mask16x8, want func(_, _ []uint16) []int64) { +func testUint16x8Compare(t *testing.T, f func(_, _ archsimd.Uint16x8) archsimd.Mask16x8, want func(_, _ []uint16) []int64) { n := 8 t.Helper() forSlicePair(t, uint16s, n, func(x, y []uint16) bool { t.Helper() - a := simd.LoadUint16x8Slice(x) - b := simd.LoadUint16x8Slice(y) + a := archsimd.LoadUint16x8Slice(x) + b := archsimd.LoadUint16x8Slice(y) g := make([]int16, n) f(a, b).AsInt16x8().StoreSlice(g) w := want(x, y) @@ -104,13 +104,13 @@ func testUint16x8Compare(t *testing.T, f func(_, _ simd.Uint16x8) simd.Mask16x8, } // testUint32x4Compare tests the simd comparison method f against the expected behavior generated by want -func testUint32x4Compare(t *testing.T, f func(_, _ simd.Uint32x4) simd.Mask32x4, want func(_, _ []uint32) []int64) { +func testUint32x4Compare(t *testing.T, f func(_, _ archsimd.Uint32x4) archsimd.Mask32x4, want func(_, _ []uint32) []int64) { n := 4 t.Helper() forSlicePair(t, uint32s, n, func(x, y []uint32) bool { t.Helper() - a := simd.LoadUint32x4Slice(x) - b := simd.LoadUint32x4Slice(y) + a := archsimd.LoadUint32x4Slice(x) + b := archsimd.LoadUint32x4Slice(y) g := make([]int32, n) f(a, b).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -119,13 +119,13 @@ func testUint32x4Compare(t *testing.T, f func(_, _ simd.Uint32x4) simd.Mask32x4, } // testUint64x2Compare tests the simd comparison method f against the expected behavior generated by want -func testUint64x2Compare(t *testing.T, f func(_, _ simd.Uint64x2) simd.Mask64x2, want func(_, _ []uint64) []int64) { +func testUint64x2Compare(t *testing.T, f func(_, _ archsimd.Uint64x2) archsimd.Mask64x2, want func(_, _ []uint64) []int64) { n := 2 t.Helper() forSlicePair(t, uint64s, n, func(x, y []uint64) bool { t.Helper() - a := simd.LoadUint64x2Slice(x) - b := simd.LoadUint64x2Slice(y) + a := archsimd.LoadUint64x2Slice(x) + b := archsimd.LoadUint64x2Slice(y) g := make([]int64, n) f(a, b).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -134,13 +134,13 @@ func testUint64x2Compare(t *testing.T, f func(_, _ simd.Uint64x2) simd.Mask64x2, } // testFloat32x4Compare tests the simd comparison method f against the expected behavior generated by want -func testFloat32x4Compare(t *testing.T, f func(_, _ simd.Float32x4) simd.Mask32x4, want func(_, _ []float32) []int64) { +func testFloat32x4Compare(t *testing.T, f func(_, _ archsimd.Float32x4) archsimd.Mask32x4, want func(_, _ []float32) []int64) { n := 4 t.Helper() forSlicePair(t, float32s, n, func(x, y []float32) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) - b := simd.LoadFloat32x4Slice(y) + a := archsimd.LoadFloat32x4Slice(x) + b := archsimd.LoadFloat32x4Slice(y) g := make([]int32, n) f(a, b).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -149,13 +149,13 @@ func testFloat32x4Compare(t *testing.T, f func(_, _ simd.Float32x4) simd.Mask32x } // testFloat64x2Compare tests the simd comparison method f against the expected behavior generated by want -func testFloat64x2Compare(t *testing.T, f func(_, _ simd.Float64x2) simd.Mask64x2, want func(_, _ []float64) []int64) { +func testFloat64x2Compare(t *testing.T, f func(_, _ archsimd.Float64x2) archsimd.Mask64x2, want func(_, _ []float64) []int64) { n := 2 t.Helper() forSlicePair(t, float64s, n, func(x, y []float64) bool { t.Helper() - a := simd.LoadFloat64x2Slice(x) - b := simd.LoadFloat64x2Slice(y) + a := archsimd.LoadFloat64x2Slice(x) + b := archsimd.LoadFloat64x2Slice(y) g := make([]int64, n) f(a, b).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -164,13 +164,13 @@ func testFloat64x2Compare(t *testing.T, f func(_, _ simd.Float64x2) simd.Mask64x } // testInt8x32Compare tests the simd comparison method f against the expected behavior generated by want -func testInt8x32Compare(t *testing.T, f func(_, _ simd.Int8x32) simd.Mask8x32, want func(_, _ []int8) []int64) { +func testInt8x32Compare(t *testing.T, f func(_, _ archsimd.Int8x32) archsimd.Mask8x32, want func(_, _ []int8) []int64) { n := 32 t.Helper() forSlicePair(t, int8s, n, func(x, y []int8) bool { t.Helper() - a := simd.LoadInt8x32Slice(x) - b := simd.LoadInt8x32Slice(y) + a := archsimd.LoadInt8x32Slice(x) + b := archsimd.LoadInt8x32Slice(y) g := make([]int8, n) f(a, b).AsInt8x32().StoreSlice(g) w := want(x, y) @@ -179,13 +179,13 @@ func testInt8x32Compare(t *testing.T, f func(_, _ simd.Int8x32) simd.Mask8x32, w } // testInt16x16Compare tests the simd comparison method f against the expected behavior generated by want -func testInt16x16Compare(t *testing.T, f func(_, _ simd.Int16x16) simd.Mask16x16, want func(_, _ []int16) []int64) { +func testInt16x16Compare(t *testing.T, f func(_, _ archsimd.Int16x16) archsimd.Mask16x16, want func(_, _ []int16) []int64) { n := 16 t.Helper() forSlicePair(t, int16s, n, func(x, y []int16) bool { t.Helper() - a := simd.LoadInt16x16Slice(x) - b := simd.LoadInt16x16Slice(y) + a := archsimd.LoadInt16x16Slice(x) + b := archsimd.LoadInt16x16Slice(y) g := make([]int16, n) f(a, b).AsInt16x16().StoreSlice(g) w := want(x, y) @@ -194,13 +194,13 @@ func testInt16x16Compare(t *testing.T, f func(_, _ simd.Int16x16) simd.Mask16x16 } // testInt32x8Compare tests the simd comparison method f against the expected behavior generated by want -func testInt32x8Compare(t *testing.T, f func(_, _ simd.Int32x8) simd.Mask32x8, want func(_, _ []int32) []int64) { +func testInt32x8Compare(t *testing.T, f func(_, _ archsimd.Int32x8) archsimd.Mask32x8, want func(_, _ []int32) []int64) { n := 8 t.Helper() forSlicePair(t, int32s, n, func(x, y []int32) bool { t.Helper() - a := simd.LoadInt32x8Slice(x) - b := simd.LoadInt32x8Slice(y) + a := archsimd.LoadInt32x8Slice(x) + b := archsimd.LoadInt32x8Slice(y) g := make([]int32, n) f(a, b).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -209,13 +209,13 @@ func testInt32x8Compare(t *testing.T, f func(_, _ simd.Int32x8) simd.Mask32x8, w } // testInt64x4Compare tests the simd comparison method f against the expected behavior generated by want -func testInt64x4Compare(t *testing.T, f func(_, _ simd.Int64x4) simd.Mask64x4, want func(_, _ []int64) []int64) { +func testInt64x4Compare(t *testing.T, f func(_, _ archsimd.Int64x4) archsimd.Mask64x4, want func(_, _ []int64) []int64) { n := 4 t.Helper() forSlicePair(t, int64s, n, func(x, y []int64) bool { t.Helper() - a := simd.LoadInt64x4Slice(x) - b := simd.LoadInt64x4Slice(y) + a := archsimd.LoadInt64x4Slice(x) + b := archsimd.LoadInt64x4Slice(y) g := make([]int64, n) f(a, b).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -224,13 +224,13 @@ func testInt64x4Compare(t *testing.T, f func(_, _ simd.Int64x4) simd.Mask64x4, w } // testUint8x32Compare tests the simd comparison method f against the expected behavior generated by want -func testUint8x32Compare(t *testing.T, f func(_, _ simd.Uint8x32) simd.Mask8x32, want func(_, _ []uint8) []int64) { +func testUint8x32Compare(t *testing.T, f func(_, _ archsimd.Uint8x32) archsimd.Mask8x32, want func(_, _ []uint8) []int64) { n := 32 t.Helper() forSlicePair(t, uint8s, n, func(x, y []uint8) bool { t.Helper() - a := simd.LoadUint8x32Slice(x) - b := simd.LoadUint8x32Slice(y) + a := archsimd.LoadUint8x32Slice(x) + b := archsimd.LoadUint8x32Slice(y) g := make([]int8, n) f(a, b).AsInt8x32().StoreSlice(g) w := want(x, y) @@ -239,13 +239,13 @@ func testUint8x32Compare(t *testing.T, f func(_, _ simd.Uint8x32) simd.Mask8x32, } // testUint16x16Compare tests the simd comparison method f against the expected behavior generated by want -func testUint16x16Compare(t *testing.T, f func(_, _ simd.Uint16x16) simd.Mask16x16, want func(_, _ []uint16) []int64) { +func testUint16x16Compare(t *testing.T, f func(_, _ archsimd.Uint16x16) archsimd.Mask16x16, want func(_, _ []uint16) []int64) { n := 16 t.Helper() forSlicePair(t, uint16s, n, func(x, y []uint16) bool { t.Helper() - a := simd.LoadUint16x16Slice(x) - b := simd.LoadUint16x16Slice(y) + a := archsimd.LoadUint16x16Slice(x) + b := archsimd.LoadUint16x16Slice(y) g := make([]int16, n) f(a, b).AsInt16x16().StoreSlice(g) w := want(x, y) @@ -254,13 +254,13 @@ func testUint16x16Compare(t *testing.T, f func(_, _ simd.Uint16x16) simd.Mask16x } // testUint32x8Compare tests the simd comparison method f against the expected behavior generated by want -func testUint32x8Compare(t *testing.T, f func(_, _ simd.Uint32x8) simd.Mask32x8, want func(_, _ []uint32) []int64) { +func testUint32x8Compare(t *testing.T, f func(_, _ archsimd.Uint32x8) archsimd.Mask32x8, want func(_, _ []uint32) []int64) { n := 8 t.Helper() forSlicePair(t, uint32s, n, func(x, y []uint32) bool { t.Helper() - a := simd.LoadUint32x8Slice(x) - b := simd.LoadUint32x8Slice(y) + a := archsimd.LoadUint32x8Slice(x) + b := archsimd.LoadUint32x8Slice(y) g := make([]int32, n) f(a, b).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -269,13 +269,13 @@ func testUint32x8Compare(t *testing.T, f func(_, _ simd.Uint32x8) simd.Mask32x8, } // testUint64x4Compare tests the simd comparison method f against the expected behavior generated by want -func testUint64x4Compare(t *testing.T, f func(_, _ simd.Uint64x4) simd.Mask64x4, want func(_, _ []uint64) []int64) { +func testUint64x4Compare(t *testing.T, f func(_, _ archsimd.Uint64x4) archsimd.Mask64x4, want func(_, _ []uint64) []int64) { n := 4 t.Helper() forSlicePair(t, uint64s, n, func(x, y []uint64) bool { t.Helper() - a := simd.LoadUint64x4Slice(x) - b := simd.LoadUint64x4Slice(y) + a := archsimd.LoadUint64x4Slice(x) + b := archsimd.LoadUint64x4Slice(y) g := make([]int64, n) f(a, b).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -284,13 +284,13 @@ func testUint64x4Compare(t *testing.T, f func(_, _ simd.Uint64x4) simd.Mask64x4, } // testFloat32x8Compare tests the simd comparison method f against the expected behavior generated by want -func testFloat32x8Compare(t *testing.T, f func(_, _ simd.Float32x8) simd.Mask32x8, want func(_, _ []float32) []int64) { +func testFloat32x8Compare(t *testing.T, f func(_, _ archsimd.Float32x8) archsimd.Mask32x8, want func(_, _ []float32) []int64) { n := 8 t.Helper() forSlicePair(t, float32s, n, func(x, y []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) - b := simd.LoadFloat32x8Slice(y) + a := archsimd.LoadFloat32x8Slice(x) + b := archsimd.LoadFloat32x8Slice(y) g := make([]int32, n) f(a, b).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -299,13 +299,13 @@ func testFloat32x8Compare(t *testing.T, f func(_, _ simd.Float32x8) simd.Mask32x } // testFloat64x4Compare tests the simd comparison method f against the expected behavior generated by want -func testFloat64x4Compare(t *testing.T, f func(_, _ simd.Float64x4) simd.Mask64x4, want func(_, _ []float64) []int64) { +func testFloat64x4Compare(t *testing.T, f func(_, _ archsimd.Float64x4) archsimd.Mask64x4, want func(_, _ []float64) []int64) { n := 4 t.Helper() forSlicePair(t, float64s, n, func(x, y []float64) bool { t.Helper() - a := simd.LoadFloat64x4Slice(x) - b := simd.LoadFloat64x4Slice(y) + a := archsimd.LoadFloat64x4Slice(x) + b := archsimd.LoadFloat64x4Slice(y) g := make([]int64, n) f(a, b).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -314,13 +314,13 @@ func testFloat64x4Compare(t *testing.T, f func(_, _ simd.Float64x4) simd.Mask64x } // testInt8x64Compare tests the simd comparison method f against the expected behavior generated by want -func testInt8x64Compare(t *testing.T, f func(_, _ simd.Int8x64) simd.Mask8x64, want func(_, _ []int8) []int64) { +func testInt8x64Compare(t *testing.T, f func(_, _ archsimd.Int8x64) archsimd.Mask8x64, want func(_, _ []int8) []int64) { n := 64 t.Helper() forSlicePair(t, int8s, n, func(x, y []int8) bool { t.Helper() - a := simd.LoadInt8x64Slice(x) - b := simd.LoadInt8x64Slice(y) + a := archsimd.LoadInt8x64Slice(x) + b := archsimd.LoadInt8x64Slice(y) g := make([]int8, n) f(a, b).AsInt8x64().StoreSlice(g) w := want(x, y) @@ -329,13 +329,13 @@ func testInt8x64Compare(t *testing.T, f func(_, _ simd.Int8x64) simd.Mask8x64, w } // testInt16x32Compare tests the simd comparison method f against the expected behavior generated by want -func testInt16x32Compare(t *testing.T, f func(_, _ simd.Int16x32) simd.Mask16x32, want func(_, _ []int16) []int64) { +func testInt16x32Compare(t *testing.T, f func(_, _ archsimd.Int16x32) archsimd.Mask16x32, want func(_, _ []int16) []int64) { n := 32 t.Helper() forSlicePair(t, int16s, n, func(x, y []int16) bool { t.Helper() - a := simd.LoadInt16x32Slice(x) - b := simd.LoadInt16x32Slice(y) + a := archsimd.LoadInt16x32Slice(x) + b := archsimd.LoadInt16x32Slice(y) g := make([]int16, n) f(a, b).AsInt16x32().StoreSlice(g) w := want(x, y) @@ -344,13 +344,13 @@ func testInt16x32Compare(t *testing.T, f func(_, _ simd.Int16x32) simd.Mask16x32 } // testInt32x16Compare tests the simd comparison method f against the expected behavior generated by want -func testInt32x16Compare(t *testing.T, f func(_, _ simd.Int32x16) simd.Mask32x16, want func(_, _ []int32) []int64) { +func testInt32x16Compare(t *testing.T, f func(_, _ archsimd.Int32x16) archsimd.Mask32x16, want func(_, _ []int32) []int64) { n := 16 t.Helper() forSlicePair(t, int32s, n, func(x, y []int32) bool { t.Helper() - a := simd.LoadInt32x16Slice(x) - b := simd.LoadInt32x16Slice(y) + a := archsimd.LoadInt32x16Slice(x) + b := archsimd.LoadInt32x16Slice(y) g := make([]int32, n) f(a, b).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -359,13 +359,13 @@ func testInt32x16Compare(t *testing.T, f func(_, _ simd.Int32x16) simd.Mask32x16 } // testInt64x8Compare tests the simd comparison method f against the expected behavior generated by want -func testInt64x8Compare(t *testing.T, f func(_, _ simd.Int64x8) simd.Mask64x8, want func(_, _ []int64) []int64) { +func testInt64x8Compare(t *testing.T, f func(_, _ archsimd.Int64x8) archsimd.Mask64x8, want func(_, _ []int64) []int64) { n := 8 t.Helper() forSlicePair(t, int64s, n, func(x, y []int64) bool { t.Helper() - a := simd.LoadInt64x8Slice(x) - b := simd.LoadInt64x8Slice(y) + a := archsimd.LoadInt64x8Slice(x) + b := archsimd.LoadInt64x8Slice(y) g := make([]int64, n) f(a, b).AsInt64x8().StoreSlice(g) w := want(x, y) @@ -374,13 +374,13 @@ func testInt64x8Compare(t *testing.T, f func(_, _ simd.Int64x8) simd.Mask64x8, w } // testUint8x64Compare tests the simd comparison method f against the expected behavior generated by want -func testUint8x64Compare(t *testing.T, f func(_, _ simd.Uint8x64) simd.Mask8x64, want func(_, _ []uint8) []int64) { +func testUint8x64Compare(t *testing.T, f func(_, _ archsimd.Uint8x64) archsimd.Mask8x64, want func(_, _ []uint8) []int64) { n := 64 t.Helper() forSlicePair(t, uint8s, n, func(x, y []uint8) bool { t.Helper() - a := simd.LoadUint8x64Slice(x) - b := simd.LoadUint8x64Slice(y) + a := archsimd.LoadUint8x64Slice(x) + b := archsimd.LoadUint8x64Slice(y) g := make([]int8, n) f(a, b).AsInt8x64().StoreSlice(g) w := want(x, y) @@ -389,13 +389,13 @@ func testUint8x64Compare(t *testing.T, f func(_, _ simd.Uint8x64) simd.Mask8x64, } // testUint16x32Compare tests the simd comparison method f against the expected behavior generated by want -func testUint16x32Compare(t *testing.T, f func(_, _ simd.Uint16x32) simd.Mask16x32, want func(_, _ []uint16) []int64) { +func testUint16x32Compare(t *testing.T, f func(_, _ archsimd.Uint16x32) archsimd.Mask16x32, want func(_, _ []uint16) []int64) { n := 32 t.Helper() forSlicePair(t, uint16s, n, func(x, y []uint16) bool { t.Helper() - a := simd.LoadUint16x32Slice(x) - b := simd.LoadUint16x32Slice(y) + a := archsimd.LoadUint16x32Slice(x) + b := archsimd.LoadUint16x32Slice(y) g := make([]int16, n) f(a, b).AsInt16x32().StoreSlice(g) w := want(x, y) @@ -404,13 +404,13 @@ func testUint16x32Compare(t *testing.T, f func(_, _ simd.Uint16x32) simd.Mask16x } // testUint32x16Compare tests the simd comparison method f against the expected behavior generated by want -func testUint32x16Compare(t *testing.T, f func(_, _ simd.Uint32x16) simd.Mask32x16, want func(_, _ []uint32) []int64) { +func testUint32x16Compare(t *testing.T, f func(_, _ archsimd.Uint32x16) archsimd.Mask32x16, want func(_, _ []uint32) []int64) { n := 16 t.Helper() forSlicePair(t, uint32s, n, func(x, y []uint32) bool { t.Helper() - a := simd.LoadUint32x16Slice(x) - b := simd.LoadUint32x16Slice(y) + a := archsimd.LoadUint32x16Slice(x) + b := archsimd.LoadUint32x16Slice(y) g := make([]int32, n) f(a, b).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -419,13 +419,13 @@ func testUint32x16Compare(t *testing.T, f func(_, _ simd.Uint32x16) simd.Mask32x } // testUint64x8Compare tests the simd comparison method f against the expected behavior generated by want -func testUint64x8Compare(t *testing.T, f func(_, _ simd.Uint64x8) simd.Mask64x8, want func(_, _ []uint64) []int64) { +func testUint64x8Compare(t *testing.T, f func(_, _ archsimd.Uint64x8) archsimd.Mask64x8, want func(_, _ []uint64) []int64) { n := 8 t.Helper() forSlicePair(t, uint64s, n, func(x, y []uint64) bool { t.Helper() - a := simd.LoadUint64x8Slice(x) - b := simd.LoadUint64x8Slice(y) + a := archsimd.LoadUint64x8Slice(x) + b := archsimd.LoadUint64x8Slice(y) g := make([]int64, n) f(a, b).AsInt64x8().StoreSlice(g) w := want(x, y) @@ -434,13 +434,13 @@ func testUint64x8Compare(t *testing.T, f func(_, _ simd.Uint64x8) simd.Mask64x8, } // testFloat32x16Compare tests the simd comparison method f against the expected behavior generated by want -func testFloat32x16Compare(t *testing.T, f func(_, _ simd.Float32x16) simd.Mask32x16, want func(_, _ []float32) []int64) { +func testFloat32x16Compare(t *testing.T, f func(_, _ archsimd.Float32x16) archsimd.Mask32x16, want func(_, _ []float32) []int64) { n := 16 t.Helper() forSlicePair(t, float32s, n, func(x, y []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) - b := simd.LoadFloat32x16Slice(y) + a := archsimd.LoadFloat32x16Slice(x) + b := archsimd.LoadFloat32x16Slice(y) g := make([]int32, n) f(a, b).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -449,13 +449,13 @@ func testFloat32x16Compare(t *testing.T, f func(_, _ simd.Float32x16) simd.Mask3 } // testFloat64x8Compare tests the simd comparison method f against the expected behavior generated by want -func testFloat64x8Compare(t *testing.T, f func(_, _ simd.Float64x8) simd.Mask64x8, want func(_, _ []float64) []int64) { +func testFloat64x8Compare(t *testing.T, f func(_, _ archsimd.Float64x8) archsimd.Mask64x8, want func(_, _ []float64) []int64) { n := 8 t.Helper() forSlicePair(t, float64s, n, func(x, y []float64) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) - b := simd.LoadFloat64x8Slice(y) + a := archsimd.LoadFloat64x8Slice(x) + b := archsimd.LoadFloat64x8Slice(y) g := make([]int64, n) f(a, b).AsInt64x8().StoreSlice(g) w := want(x, y) diff --git a/src/simd/archsimd/internal/simd_test/compare_test.go b/src/simd/archsimd/internal/simd_test/compare_test.go new file mode 100644 index 0000000000..4485e9bdaa --- /dev/null +++ b/src/simd/archsimd/internal/simd_test/compare_test.go @@ -0,0 +1,265 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.simd && amd64 + +package simd_test + +import ( + "simd/archsimd" + "testing" +) + +// AVX 2 lacks most comparisons, but they can be synthesized +// from > and = +var comparisonFixed bool = archsimd.X86.AVX512() + +func TestLess(t *testing.T) { + testFloat32x4Compare(t, archsimd.Float32x4.Less, lessSlice[float32]) + testFloat32x8Compare(t, archsimd.Float32x8.Less, lessSlice[float32]) + testFloat64x2Compare(t, archsimd.Float64x2.Less, lessSlice[float64]) + testFloat64x4Compare(t, archsimd.Float64x4.Less, lessSlice[float64]) + + testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16]) + testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16]) + testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32]) + testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32]) + testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64]) + testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64]) + testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8]) + testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8]) + + testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16]) + testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16]) + testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32]) + testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32]) + testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64]) + testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64]) + testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8]) + testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8]) + + testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16]) + testUint16x8Compare(t, archsimd.Uint16x8.Less, lessSlice[uint16]) + testUint32x4Compare(t, archsimd.Uint32x4.Less, lessSlice[uint32]) + testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32]) + testUint64x2Compare(t, archsimd.Uint64x2.Less, lessSlice[uint64]) + testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64]) + testUint8x16Compare(t, archsimd.Uint8x16.Less, lessSlice[uint8]) + testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8]) + + if archsimd.X86.AVX512() { + testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16]) + testUint16x8Compare(t, archsimd.Uint16x8.Less, lessSlice[uint16]) + testUint32x4Compare(t, archsimd.Uint32x4.Less, lessSlice[uint32]) + testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32]) + testUint64x2Compare(t, archsimd.Uint64x2.Less, lessSlice[uint64]) + testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64]) + testUint8x16Compare(t, archsimd.Uint8x16.Less, lessSlice[uint8]) + testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8]) + + testFloat32x16Compare(t, archsimd.Float32x16.Less, lessSlice[float32]) + testFloat64x8Compare(t, archsimd.Float64x8.Less, lessSlice[float64]) + testInt8x64Compare(t, archsimd.Int8x64.Less, lessSlice[int8]) + testInt16x32Compare(t, archsimd.Int16x32.Less, lessSlice[int16]) + testInt32x16Compare(t, archsimd.Int32x16.Less, lessSlice[int32]) + testInt64x8Compare(t, archsimd.Int64x8.Less, lessSlice[int64]) + testUint8x64Compare(t, archsimd.Uint8x64.Less, lessSlice[uint8]) + testUint16x32Compare(t, archsimd.Uint16x32.Less, lessSlice[uint16]) + testUint32x16Compare(t, archsimd.Uint32x16.Less, lessSlice[uint32]) + testUint64x8Compare(t, archsimd.Uint64x8.Less, lessSlice[uint64]) + } +} + +func TestLessEqual(t *testing.T) { + testFloat32x4Compare(t, archsimd.Float32x4.LessEqual, lessEqualSlice[float32]) + testFloat32x8Compare(t, archsimd.Float32x8.LessEqual, lessEqualSlice[float32]) + testFloat64x2Compare(t, archsimd.Float64x2.LessEqual, lessEqualSlice[float64]) + testFloat64x4Compare(t, archsimd.Float64x4.LessEqual, lessEqualSlice[float64]) + + testInt16x16Compare(t, archsimd.Int16x16.LessEqual, lessEqualSlice[int16]) + testInt16x8Compare(t, archsimd.Int16x8.LessEqual, lessEqualSlice[int16]) + testInt32x4Compare(t, archsimd.Int32x4.LessEqual, lessEqualSlice[int32]) + testInt32x8Compare(t, archsimd.Int32x8.LessEqual, lessEqualSlice[int32]) + testInt64x2Compare(t, archsimd.Int64x2.LessEqual, lessEqualSlice[int64]) + testInt64x4Compare(t, archsimd.Int64x4.LessEqual, lessEqualSlice[int64]) + testInt8x16Compare(t, archsimd.Int8x16.LessEqual, lessEqualSlice[int8]) + testInt8x32Compare(t, archsimd.Int8x32.LessEqual, lessEqualSlice[int8]) + + testUint16x16Compare(t, archsimd.Uint16x16.LessEqual, lessEqualSlice[uint16]) + testUint16x8Compare(t, archsimd.Uint16x8.LessEqual, lessEqualSlice[uint16]) + testUint32x4Compare(t, archsimd.Uint32x4.LessEqual, lessEqualSlice[uint32]) + testUint32x8Compare(t, archsimd.Uint32x8.LessEqual, lessEqualSlice[uint32]) + testUint64x2Compare(t, archsimd.Uint64x2.LessEqual, lessEqualSlice[uint64]) + testUint64x4Compare(t, archsimd.Uint64x4.LessEqual, lessEqualSlice[uint64]) + testUint8x16Compare(t, archsimd.Uint8x16.LessEqual, lessEqualSlice[uint8]) + testUint8x32Compare(t, archsimd.Uint8x32.LessEqual, lessEqualSlice[uint8]) + + if archsimd.X86.AVX512() { + testFloat32x16Compare(t, archsimd.Float32x16.LessEqual, lessEqualSlice[float32]) + testFloat64x8Compare(t, archsimd.Float64x8.LessEqual, lessEqualSlice[float64]) + testInt8x64Compare(t, archsimd.Int8x64.LessEqual, lessEqualSlice[int8]) + testInt16x32Compare(t, archsimd.Int16x32.LessEqual, lessEqualSlice[int16]) + testInt32x16Compare(t, archsimd.Int32x16.LessEqual, lessEqualSlice[int32]) + testInt64x8Compare(t, archsimd.Int64x8.LessEqual, lessEqualSlice[int64]) + testUint8x64Compare(t, archsimd.Uint8x64.LessEqual, lessEqualSlice[uint8]) + testUint16x32Compare(t, archsimd.Uint16x32.LessEqual, lessEqualSlice[uint16]) + testUint32x16Compare(t, archsimd.Uint32x16.LessEqual, lessEqualSlice[uint32]) + testUint64x8Compare(t, archsimd.Uint64x8.LessEqual, lessEqualSlice[uint64]) + } +} + +func TestGreater(t *testing.T) { + testFloat32x4Compare(t, archsimd.Float32x4.Greater, greaterSlice[float32]) + testFloat32x8Compare(t, archsimd.Float32x8.Greater, greaterSlice[float32]) + testFloat64x2Compare(t, archsimd.Float64x2.Greater, greaterSlice[float64]) + testFloat64x4Compare(t, archsimd.Float64x4.Greater, greaterSlice[float64]) + + testInt16x16Compare(t, archsimd.Int16x16.Greater, greaterSlice[int16]) + testInt16x8Compare(t, archsimd.Int16x8.Greater, greaterSlice[int16]) + testInt32x4Compare(t, archsimd.Int32x4.Greater, greaterSlice[int32]) + testInt32x8Compare(t, archsimd.Int32x8.Greater, greaterSlice[int32]) + + testInt64x2Compare(t, archsimd.Int64x2.Greater, greaterSlice[int64]) + testInt64x4Compare(t, archsimd.Int64x4.Greater, greaterSlice[int64]) + testInt8x16Compare(t, archsimd.Int8x16.Greater, greaterSlice[int8]) + testInt8x32Compare(t, archsimd.Int8x32.Greater, greaterSlice[int8]) + + testUint16x16Compare(t, archsimd.Uint16x16.Greater, greaterSlice[uint16]) + testUint16x8Compare(t, archsimd.Uint16x8.Greater, greaterSlice[uint16]) + testUint32x4Compare(t, archsimd.Uint32x4.Greater, greaterSlice[uint32]) + testUint32x8Compare(t, archsimd.Uint32x8.Greater, greaterSlice[uint32]) + + testUint64x2Compare(t, archsimd.Uint64x2.Greater, greaterSlice[uint64]) + testUint64x4Compare(t, archsimd.Uint64x4.Greater, greaterSlice[uint64]) + testUint8x16Compare(t, archsimd.Uint8x16.Greater, greaterSlice[uint8]) + testUint8x32Compare(t, archsimd.Uint8x32.Greater, greaterSlice[uint8]) + + if archsimd.X86.AVX512() { + + testFloat32x16Compare(t, archsimd.Float32x16.Greater, greaterSlice[float32]) + testFloat64x8Compare(t, archsimd.Float64x8.Greater, greaterSlice[float64]) + testInt8x64Compare(t, archsimd.Int8x64.Greater, greaterSlice[int8]) + testInt16x32Compare(t, archsimd.Int16x32.Greater, greaterSlice[int16]) + testInt32x16Compare(t, archsimd.Int32x16.Greater, greaterSlice[int32]) + testInt64x8Compare(t, archsimd.Int64x8.Greater, greaterSlice[int64]) + testUint8x64Compare(t, archsimd.Uint8x64.Greater, greaterSlice[uint8]) + testUint16x32Compare(t, archsimd.Uint16x32.Greater, greaterSlice[uint16]) + testUint32x16Compare(t, archsimd.Uint32x16.Greater, greaterSlice[uint32]) + testUint64x8Compare(t, archsimd.Uint64x8.Greater, greaterSlice[uint64]) + } +} + +func TestGreaterEqual(t *testing.T) { + testFloat32x4Compare(t, archsimd.Float32x4.GreaterEqual, greaterEqualSlice[float32]) + testFloat32x8Compare(t, archsimd.Float32x8.GreaterEqual, greaterEqualSlice[float32]) + testFloat64x2Compare(t, archsimd.Float64x2.GreaterEqual, greaterEqualSlice[float64]) + testFloat64x4Compare(t, archsimd.Float64x4.GreaterEqual, greaterEqualSlice[float64]) + + testInt16x16Compare(t, archsimd.Int16x16.GreaterEqual, greaterEqualSlice[int16]) + testInt16x8Compare(t, archsimd.Int16x8.GreaterEqual, greaterEqualSlice[int16]) + testInt32x4Compare(t, archsimd.Int32x4.GreaterEqual, greaterEqualSlice[int32]) + testInt32x8Compare(t, archsimd.Int32x8.GreaterEqual, greaterEqualSlice[int32]) + testInt64x2Compare(t, archsimd.Int64x2.GreaterEqual, greaterEqualSlice[int64]) + testInt64x4Compare(t, archsimd.Int64x4.GreaterEqual, greaterEqualSlice[int64]) + testInt8x16Compare(t, archsimd.Int8x16.GreaterEqual, greaterEqualSlice[int8]) + testInt8x32Compare(t, archsimd.Int8x32.GreaterEqual, greaterEqualSlice[int8]) + + testUint16x16Compare(t, archsimd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16]) + testUint16x8Compare(t, archsimd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16]) + testUint32x4Compare(t, archsimd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32]) + testUint32x8Compare(t, archsimd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32]) + testUint64x2Compare(t, archsimd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64]) + testUint64x4Compare(t, archsimd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64]) + testUint8x16Compare(t, archsimd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8]) + testUint8x32Compare(t, archsimd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8]) + + if archsimd.X86.AVX512() { + testFloat32x16Compare(t, archsimd.Float32x16.GreaterEqual, greaterEqualSlice[float32]) + testFloat64x8Compare(t, archsimd.Float64x8.GreaterEqual, greaterEqualSlice[float64]) + testInt8x64Compare(t, archsimd.Int8x64.GreaterEqual, greaterEqualSlice[int8]) + testInt16x32Compare(t, archsimd.Int16x32.GreaterEqual, greaterEqualSlice[int16]) + testInt32x16Compare(t, archsimd.Int32x16.GreaterEqual, greaterEqualSlice[int32]) + testInt64x8Compare(t, archsimd.Int64x8.GreaterEqual, greaterEqualSlice[int64]) + testUint8x64Compare(t, archsimd.Uint8x64.GreaterEqual, greaterEqualSlice[uint8]) + testUint16x32Compare(t, archsimd.Uint16x32.GreaterEqual, greaterEqualSlice[uint16]) + testUint32x16Compare(t, archsimd.Uint32x16.GreaterEqual, greaterEqualSlice[uint32]) + testUint64x8Compare(t, archsimd.Uint64x8.GreaterEqual, greaterEqualSlice[uint64]) + } +} + +func TestEqual(t *testing.T) { + testFloat32x4Compare(t, archsimd.Float32x4.Equal, equalSlice[float32]) + testFloat32x8Compare(t, archsimd.Float32x8.Equal, equalSlice[float32]) + testFloat64x2Compare(t, archsimd.Float64x2.Equal, equalSlice[float64]) + testFloat64x4Compare(t, archsimd.Float64x4.Equal, equalSlice[float64]) + + testInt16x16Compare(t, archsimd.Int16x16.Equal, equalSlice[int16]) + testInt16x8Compare(t, archsimd.Int16x8.Equal, equalSlice[int16]) + testInt32x4Compare(t, archsimd.Int32x4.Equal, equalSlice[int32]) + testInt32x8Compare(t, archsimd.Int32x8.Equal, equalSlice[int32]) + testInt64x2Compare(t, archsimd.Int64x2.Equal, equalSlice[int64]) + testInt64x4Compare(t, archsimd.Int64x4.Equal, equalSlice[int64]) + testInt8x16Compare(t, archsimd.Int8x16.Equal, equalSlice[int8]) + testInt8x32Compare(t, archsimd.Int8x32.Equal, equalSlice[int8]) + + testUint16x16Compare(t, archsimd.Uint16x16.Equal, equalSlice[uint16]) + testUint16x8Compare(t, archsimd.Uint16x8.Equal, equalSlice[uint16]) + testUint32x4Compare(t, archsimd.Uint32x4.Equal, equalSlice[uint32]) + testUint32x8Compare(t, archsimd.Uint32x8.Equal, equalSlice[uint32]) + testUint64x2Compare(t, archsimd.Uint64x2.Equal, equalSlice[uint64]) + testUint64x4Compare(t, archsimd.Uint64x4.Equal, equalSlice[uint64]) + testUint8x16Compare(t, archsimd.Uint8x16.Equal, equalSlice[uint8]) + testUint8x32Compare(t, archsimd.Uint8x32.Equal, equalSlice[uint8]) + + if archsimd.X86.AVX512() { + testFloat32x16Compare(t, archsimd.Float32x16.Equal, equalSlice[float32]) + testFloat64x8Compare(t, archsimd.Float64x8.Equal, equalSlice[float64]) + testInt8x64Compare(t, archsimd.Int8x64.Equal, equalSlice[int8]) + testInt16x32Compare(t, archsimd.Int16x32.Equal, equalSlice[int16]) + testInt32x16Compare(t, archsimd.Int32x16.Equal, equalSlice[int32]) + testInt64x8Compare(t, archsimd.Int64x8.Equal, equalSlice[int64]) + testUint8x64Compare(t, archsimd.Uint8x64.Equal, equalSlice[uint8]) + testUint16x32Compare(t, archsimd.Uint16x32.Equal, equalSlice[uint16]) + testUint32x16Compare(t, archsimd.Uint32x16.Equal, equalSlice[uint32]) + testUint64x8Compare(t, archsimd.Uint64x8.Equal, equalSlice[uint64]) + } +} + +func TestNotEqual(t *testing.T) { + testFloat32x4Compare(t, archsimd.Float32x4.NotEqual, notEqualSlice[float32]) + testFloat32x8Compare(t, archsimd.Float32x8.NotEqual, notEqualSlice[float32]) + testFloat64x2Compare(t, archsimd.Float64x2.NotEqual, notEqualSlice[float64]) + testFloat64x4Compare(t, archsimd.Float64x4.NotEqual, notEqualSlice[float64]) + + testInt16x16Compare(t, archsimd.Int16x16.NotEqual, notEqualSlice[int16]) + testInt16x8Compare(t, archsimd.Int16x8.NotEqual, notEqualSlice[int16]) + testInt32x4Compare(t, archsimd.Int32x4.NotEqual, notEqualSlice[int32]) + testInt32x8Compare(t, archsimd.Int32x8.NotEqual, notEqualSlice[int32]) + testInt64x2Compare(t, archsimd.Int64x2.NotEqual, notEqualSlice[int64]) + testInt64x4Compare(t, archsimd.Int64x4.NotEqual, notEqualSlice[int64]) + testInt8x16Compare(t, archsimd.Int8x16.NotEqual, notEqualSlice[int8]) + testInt8x32Compare(t, archsimd.Int8x32.NotEqual, notEqualSlice[int8]) + + testUint16x16Compare(t, archsimd.Uint16x16.NotEqual, notEqualSlice[uint16]) + testUint16x8Compare(t, archsimd.Uint16x8.NotEqual, notEqualSlice[uint16]) + testUint32x4Compare(t, archsimd.Uint32x4.NotEqual, notEqualSlice[uint32]) + testUint32x8Compare(t, archsimd.Uint32x8.NotEqual, notEqualSlice[uint32]) + testUint64x2Compare(t, archsimd.Uint64x2.NotEqual, notEqualSlice[uint64]) + testUint64x4Compare(t, archsimd.Uint64x4.NotEqual, notEqualSlice[uint64]) + testUint8x16Compare(t, archsimd.Uint8x16.NotEqual, notEqualSlice[uint8]) + testUint8x32Compare(t, archsimd.Uint8x32.NotEqual, notEqualSlice[uint8]) + + if archsimd.X86.AVX512() { + testFloat32x16Compare(t, archsimd.Float32x16.NotEqual, notEqualSlice[float32]) + testFloat64x8Compare(t, archsimd.Float64x8.NotEqual, notEqualSlice[float64]) + testInt8x64Compare(t, archsimd.Int8x64.NotEqual, notEqualSlice[int8]) + testInt16x32Compare(t, archsimd.Int16x32.NotEqual, notEqualSlice[int16]) + testInt32x16Compare(t, archsimd.Int32x16.NotEqual, notEqualSlice[int32]) + testInt64x8Compare(t, archsimd.Int64x8.NotEqual, notEqualSlice[int64]) + testUint8x64Compare(t, archsimd.Uint8x64.NotEqual, notEqualSlice[uint8]) + testUint16x32Compare(t, archsimd.Uint16x32.NotEqual, notEqualSlice[uint16]) + testUint32x16Compare(t, archsimd.Uint32x16.NotEqual, notEqualSlice[uint32]) + testUint64x8Compare(t, archsimd.Uint64x8.NotEqual, notEqualSlice[uint64]) + } +} diff --git a/src/simd/internal/simd_test/comparemasked_helpers_test.go b/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go similarity index 76% rename from src/simd/internal/simd_test/comparemasked_helpers_test.go rename to src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go index 4c05d10bb3..895ea2606a 100644 --- a/src/simd/internal/simd_test/comparemasked_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go @@ -9,22 +9,22 @@ package simd_test import ( - "simd" + "simd/archsimd" "testing" ) // testInt8x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt8x16CompareMasked(t *testing.T, - f func(_, _ simd.Int8x16, m simd.Mask8x16) simd.Mask8x16, + f func(_, _ archsimd.Int8x16, m archsimd.Mask8x16) archsimd.Mask8x16, want func(_, _ []int8) []int64) { n := 16 t.Helper() forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool { t.Helper() - a := simd.LoadInt8x16Slice(x) - b := simd.LoadInt8x16Slice(y) - k := simd.LoadInt8x16Slice(toVect[int8](m)).ToMask() + a := archsimd.LoadInt8x16Slice(x) + b := archsimd.LoadInt8x16Slice(y) + k := archsimd.LoadInt8x16Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x16().StoreSlice(g) w := want(x, y) @@ -40,15 +40,15 @@ func testInt8x16CompareMasked(t *testing.T, // testInt16x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt16x8CompareMasked(t *testing.T, - f func(_, _ simd.Int16x8, m simd.Mask16x8) simd.Mask16x8, + f func(_, _ archsimd.Int16x8, m archsimd.Mask16x8) archsimd.Mask16x8, want func(_, _ []int16) []int64) { n := 8 t.Helper() forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool { t.Helper() - a := simd.LoadInt16x8Slice(x) - b := simd.LoadInt16x8Slice(y) - k := simd.LoadInt16x8Slice(toVect[int16](m)).ToMask() + a := archsimd.LoadInt16x8Slice(x) + b := archsimd.LoadInt16x8Slice(y) + k := archsimd.LoadInt16x8Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x8().StoreSlice(g) w := want(x, y) @@ -64,15 +64,15 @@ func testInt16x8CompareMasked(t *testing.T, // testInt32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt32x4CompareMasked(t *testing.T, - f func(_, _ simd.Int32x4, m simd.Mask32x4) simd.Mask32x4, + f func(_, _ archsimd.Int32x4, m archsimd.Mask32x4) archsimd.Mask32x4, want func(_, _ []int32) []int64) { n := 4 t.Helper() forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool { t.Helper() - a := simd.LoadInt32x4Slice(x) - b := simd.LoadInt32x4Slice(y) - k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadInt32x4Slice(x) + b := archsimd.LoadInt32x4Slice(y) + k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -88,15 +88,15 @@ func testInt32x4CompareMasked(t *testing.T, // testInt64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt64x2CompareMasked(t *testing.T, - f func(_, _ simd.Int64x2, m simd.Mask64x2) simd.Mask64x2, + f func(_, _ archsimd.Int64x2, m archsimd.Mask64x2) archsimd.Mask64x2, want func(_, _ []int64) []int64) { n := 2 t.Helper() forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool { t.Helper() - a := simd.LoadInt64x2Slice(x) - b := simd.LoadInt64x2Slice(y) - k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadInt64x2Slice(x) + b := archsimd.LoadInt64x2Slice(y) + k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -112,15 +112,15 @@ func testInt64x2CompareMasked(t *testing.T, // testUint8x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint8x16CompareMasked(t *testing.T, - f func(_, _ simd.Uint8x16, m simd.Mask8x16) simd.Mask8x16, + f func(_, _ archsimd.Uint8x16, m archsimd.Mask8x16) archsimd.Mask8x16, want func(_, _ []uint8) []int64) { n := 16 t.Helper() forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool { t.Helper() - a := simd.LoadUint8x16Slice(x) - b := simd.LoadUint8x16Slice(y) - k := simd.LoadInt8x16Slice(toVect[int8](m)).ToMask() + a := archsimd.LoadUint8x16Slice(x) + b := archsimd.LoadUint8x16Slice(y) + k := archsimd.LoadInt8x16Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x16().StoreSlice(g) w := want(x, y) @@ -136,15 +136,15 @@ func testUint8x16CompareMasked(t *testing.T, // testUint16x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint16x8CompareMasked(t *testing.T, - f func(_, _ simd.Uint16x8, m simd.Mask16x8) simd.Mask16x8, + f func(_, _ archsimd.Uint16x8, m archsimd.Mask16x8) archsimd.Mask16x8, want func(_, _ []uint16) []int64) { n := 8 t.Helper() forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool { t.Helper() - a := simd.LoadUint16x8Slice(x) - b := simd.LoadUint16x8Slice(y) - k := simd.LoadInt16x8Slice(toVect[int16](m)).ToMask() + a := archsimd.LoadUint16x8Slice(x) + b := archsimd.LoadUint16x8Slice(y) + k := archsimd.LoadInt16x8Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x8().StoreSlice(g) w := want(x, y) @@ -160,15 +160,15 @@ func testUint16x8CompareMasked(t *testing.T, // testUint32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint32x4CompareMasked(t *testing.T, - f func(_, _ simd.Uint32x4, m simd.Mask32x4) simd.Mask32x4, + f func(_, _ archsimd.Uint32x4, m archsimd.Mask32x4) archsimd.Mask32x4, want func(_, _ []uint32) []int64) { n := 4 t.Helper() forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool { t.Helper() - a := simd.LoadUint32x4Slice(x) - b := simd.LoadUint32x4Slice(y) - k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadUint32x4Slice(x) + b := archsimd.LoadUint32x4Slice(y) + k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -184,15 +184,15 @@ func testUint32x4CompareMasked(t *testing.T, // testUint64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint64x2CompareMasked(t *testing.T, - f func(_, _ simd.Uint64x2, m simd.Mask64x2) simd.Mask64x2, + f func(_, _ archsimd.Uint64x2, m archsimd.Mask64x2) archsimd.Mask64x2, want func(_, _ []uint64) []int64) { n := 2 t.Helper() forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool { t.Helper() - a := simd.LoadUint64x2Slice(x) - b := simd.LoadUint64x2Slice(y) - k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadUint64x2Slice(x) + b := archsimd.LoadUint64x2Slice(y) + k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -208,15 +208,15 @@ func testUint64x2CompareMasked(t *testing.T, // testFloat32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testFloat32x4CompareMasked(t *testing.T, - f func(_, _ simd.Float32x4, m simd.Mask32x4) simd.Mask32x4, + f func(_, _ archsimd.Float32x4, m archsimd.Mask32x4) archsimd.Mask32x4, want func(_, _ []float32) []int64) { n := 4 t.Helper() forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) - b := simd.LoadFloat32x4Slice(y) - k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadFloat32x4Slice(x) + b := archsimd.LoadFloat32x4Slice(y) + k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x4().StoreSlice(g) w := want(x, y) @@ -232,15 +232,15 @@ func testFloat32x4CompareMasked(t *testing.T, // testFloat64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testFloat64x2CompareMasked(t *testing.T, - f func(_, _ simd.Float64x2, m simd.Mask64x2) simd.Mask64x2, + f func(_, _ archsimd.Float64x2, m archsimd.Mask64x2) archsimd.Mask64x2, want func(_, _ []float64) []int64) { n := 2 t.Helper() forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool { t.Helper() - a := simd.LoadFloat64x2Slice(x) - b := simd.LoadFloat64x2Slice(y) - k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadFloat64x2Slice(x) + b := archsimd.LoadFloat64x2Slice(y) + k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x2().StoreSlice(g) w := want(x, y) @@ -256,15 +256,15 @@ func testFloat64x2CompareMasked(t *testing.T, // testInt8x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt8x32CompareMasked(t *testing.T, - f func(_, _ simd.Int8x32, m simd.Mask8x32) simd.Mask8x32, + f func(_, _ archsimd.Int8x32, m archsimd.Mask8x32) archsimd.Mask8x32, want func(_, _ []int8) []int64) { n := 32 t.Helper() forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool { t.Helper() - a := simd.LoadInt8x32Slice(x) - b := simd.LoadInt8x32Slice(y) - k := simd.LoadInt8x32Slice(toVect[int8](m)).ToMask() + a := archsimd.LoadInt8x32Slice(x) + b := archsimd.LoadInt8x32Slice(y) + k := archsimd.LoadInt8x32Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x32().StoreSlice(g) w := want(x, y) @@ -280,15 +280,15 @@ func testInt8x32CompareMasked(t *testing.T, // testInt16x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt16x16CompareMasked(t *testing.T, - f func(_, _ simd.Int16x16, m simd.Mask16x16) simd.Mask16x16, + f func(_, _ archsimd.Int16x16, m archsimd.Mask16x16) archsimd.Mask16x16, want func(_, _ []int16) []int64) { n := 16 t.Helper() forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool { t.Helper() - a := simd.LoadInt16x16Slice(x) - b := simd.LoadInt16x16Slice(y) - k := simd.LoadInt16x16Slice(toVect[int16](m)).ToMask() + a := archsimd.LoadInt16x16Slice(x) + b := archsimd.LoadInt16x16Slice(y) + k := archsimd.LoadInt16x16Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x16().StoreSlice(g) w := want(x, y) @@ -304,15 +304,15 @@ func testInt16x16CompareMasked(t *testing.T, // testInt32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt32x8CompareMasked(t *testing.T, - f func(_, _ simd.Int32x8, m simd.Mask32x8) simd.Mask32x8, + f func(_, _ archsimd.Int32x8, m archsimd.Mask32x8) archsimd.Mask32x8, want func(_, _ []int32) []int64) { n := 8 t.Helper() forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool { t.Helper() - a := simd.LoadInt32x8Slice(x) - b := simd.LoadInt32x8Slice(y) - k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadInt32x8Slice(x) + b := archsimd.LoadInt32x8Slice(y) + k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -328,15 +328,15 @@ func testInt32x8CompareMasked(t *testing.T, // testInt64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt64x4CompareMasked(t *testing.T, - f func(_, _ simd.Int64x4, m simd.Mask64x4) simd.Mask64x4, + f func(_, _ archsimd.Int64x4, m archsimd.Mask64x4) archsimd.Mask64x4, want func(_, _ []int64) []int64) { n := 4 t.Helper() forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool { t.Helper() - a := simd.LoadInt64x4Slice(x) - b := simd.LoadInt64x4Slice(y) - k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadInt64x4Slice(x) + b := archsimd.LoadInt64x4Slice(y) + k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -352,15 +352,15 @@ func testInt64x4CompareMasked(t *testing.T, // testUint8x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint8x32CompareMasked(t *testing.T, - f func(_, _ simd.Uint8x32, m simd.Mask8x32) simd.Mask8x32, + f func(_, _ archsimd.Uint8x32, m archsimd.Mask8x32) archsimd.Mask8x32, want func(_, _ []uint8) []int64) { n := 32 t.Helper() forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool { t.Helper() - a := simd.LoadUint8x32Slice(x) - b := simd.LoadUint8x32Slice(y) - k := simd.LoadInt8x32Slice(toVect[int8](m)).ToMask() + a := archsimd.LoadUint8x32Slice(x) + b := archsimd.LoadUint8x32Slice(y) + k := archsimd.LoadInt8x32Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x32().StoreSlice(g) w := want(x, y) @@ -376,15 +376,15 @@ func testUint8x32CompareMasked(t *testing.T, // testUint16x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint16x16CompareMasked(t *testing.T, - f func(_, _ simd.Uint16x16, m simd.Mask16x16) simd.Mask16x16, + f func(_, _ archsimd.Uint16x16, m archsimd.Mask16x16) archsimd.Mask16x16, want func(_, _ []uint16) []int64) { n := 16 t.Helper() forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool { t.Helper() - a := simd.LoadUint16x16Slice(x) - b := simd.LoadUint16x16Slice(y) - k := simd.LoadInt16x16Slice(toVect[int16](m)).ToMask() + a := archsimd.LoadUint16x16Slice(x) + b := archsimd.LoadUint16x16Slice(y) + k := archsimd.LoadInt16x16Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x16().StoreSlice(g) w := want(x, y) @@ -400,15 +400,15 @@ func testUint16x16CompareMasked(t *testing.T, // testUint32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint32x8CompareMasked(t *testing.T, - f func(_, _ simd.Uint32x8, m simd.Mask32x8) simd.Mask32x8, + f func(_, _ archsimd.Uint32x8, m archsimd.Mask32x8) archsimd.Mask32x8, want func(_, _ []uint32) []int64) { n := 8 t.Helper() forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool { t.Helper() - a := simd.LoadUint32x8Slice(x) - b := simd.LoadUint32x8Slice(y) - k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadUint32x8Slice(x) + b := archsimd.LoadUint32x8Slice(y) + k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -424,15 +424,15 @@ func testUint32x8CompareMasked(t *testing.T, // testUint64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint64x4CompareMasked(t *testing.T, - f func(_, _ simd.Uint64x4, m simd.Mask64x4) simd.Mask64x4, + f func(_, _ archsimd.Uint64x4, m archsimd.Mask64x4) archsimd.Mask64x4, want func(_, _ []uint64) []int64) { n := 4 t.Helper() forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool { t.Helper() - a := simd.LoadUint64x4Slice(x) - b := simd.LoadUint64x4Slice(y) - k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadUint64x4Slice(x) + b := archsimd.LoadUint64x4Slice(y) + k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -448,15 +448,15 @@ func testUint64x4CompareMasked(t *testing.T, // testFloat32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testFloat32x8CompareMasked(t *testing.T, - f func(_, _ simd.Float32x8, m simd.Mask32x8) simd.Mask32x8, + f func(_, _ archsimd.Float32x8, m archsimd.Mask32x8) archsimd.Mask32x8, want func(_, _ []float32) []int64) { n := 8 t.Helper() forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) - b := simd.LoadFloat32x8Slice(y) - k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadFloat32x8Slice(x) + b := archsimd.LoadFloat32x8Slice(y) + k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x8().StoreSlice(g) w := want(x, y) @@ -472,15 +472,15 @@ func testFloat32x8CompareMasked(t *testing.T, // testFloat64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testFloat64x4CompareMasked(t *testing.T, - f func(_, _ simd.Float64x4, m simd.Mask64x4) simd.Mask64x4, + f func(_, _ archsimd.Float64x4, m archsimd.Mask64x4) archsimd.Mask64x4, want func(_, _ []float64) []int64) { n := 4 t.Helper() forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool { t.Helper() - a := simd.LoadFloat64x4Slice(x) - b := simd.LoadFloat64x4Slice(y) - k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadFloat64x4Slice(x) + b := archsimd.LoadFloat64x4Slice(y) + k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x4().StoreSlice(g) w := want(x, y) @@ -496,15 +496,15 @@ func testFloat64x4CompareMasked(t *testing.T, // testInt8x64CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt8x64CompareMasked(t *testing.T, - f func(_, _ simd.Int8x64, m simd.Mask8x64) simd.Mask8x64, + f func(_, _ archsimd.Int8x64, m archsimd.Mask8x64) archsimd.Mask8x64, want func(_, _ []int8) []int64) { n := 64 t.Helper() forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool { t.Helper() - a := simd.LoadInt8x64Slice(x) - b := simd.LoadInt8x64Slice(y) - k := simd.LoadInt8x64Slice(toVect[int8](m)).ToMask() + a := archsimd.LoadInt8x64Slice(x) + b := archsimd.LoadInt8x64Slice(y) + k := archsimd.LoadInt8x64Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x64().StoreSlice(g) w := want(x, y) @@ -520,15 +520,15 @@ func testInt8x64CompareMasked(t *testing.T, // testInt16x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt16x32CompareMasked(t *testing.T, - f func(_, _ simd.Int16x32, m simd.Mask16x32) simd.Mask16x32, + f func(_, _ archsimd.Int16x32, m archsimd.Mask16x32) archsimd.Mask16x32, want func(_, _ []int16) []int64) { n := 32 t.Helper() forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool { t.Helper() - a := simd.LoadInt16x32Slice(x) - b := simd.LoadInt16x32Slice(y) - k := simd.LoadInt16x32Slice(toVect[int16](m)).ToMask() + a := archsimd.LoadInt16x32Slice(x) + b := archsimd.LoadInt16x32Slice(y) + k := archsimd.LoadInt16x32Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x32().StoreSlice(g) w := want(x, y) @@ -544,15 +544,15 @@ func testInt16x32CompareMasked(t *testing.T, // testInt32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt32x16CompareMasked(t *testing.T, - f func(_, _ simd.Int32x16, m simd.Mask32x16) simd.Mask32x16, + f func(_, _ archsimd.Int32x16, m archsimd.Mask32x16) archsimd.Mask32x16, want func(_, _ []int32) []int64) { n := 16 t.Helper() forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool { t.Helper() - a := simd.LoadInt32x16Slice(x) - b := simd.LoadInt32x16Slice(y) - k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadInt32x16Slice(x) + b := archsimd.LoadInt32x16Slice(y) + k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -568,15 +568,15 @@ func testInt32x16CompareMasked(t *testing.T, // testInt64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testInt64x8CompareMasked(t *testing.T, - f func(_, _ simd.Int64x8, m simd.Mask64x8) simd.Mask64x8, + f func(_, _ archsimd.Int64x8, m archsimd.Mask64x8) archsimd.Mask64x8, want func(_, _ []int64) []int64) { n := 8 t.Helper() forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool { t.Helper() - a := simd.LoadInt64x8Slice(x) - b := simd.LoadInt64x8Slice(y) - k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadInt64x8Slice(x) + b := archsimd.LoadInt64x8Slice(y) + k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x8().StoreSlice(g) w := want(x, y) @@ -592,15 +592,15 @@ func testInt64x8CompareMasked(t *testing.T, // testUint8x64CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint8x64CompareMasked(t *testing.T, - f func(_, _ simd.Uint8x64, m simd.Mask8x64) simd.Mask8x64, + f func(_, _ archsimd.Uint8x64, m archsimd.Mask8x64) archsimd.Mask8x64, want func(_, _ []uint8) []int64) { n := 64 t.Helper() forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool { t.Helper() - a := simd.LoadUint8x64Slice(x) - b := simd.LoadUint8x64Slice(y) - k := simd.LoadInt8x64Slice(toVect[int8](m)).ToMask() + a := archsimd.LoadUint8x64Slice(x) + b := archsimd.LoadUint8x64Slice(y) + k := archsimd.LoadInt8x64Slice(toVect[int8](m)).ToMask() g := make([]int8, n) f(a, b, k).AsInt8x64().StoreSlice(g) w := want(x, y) @@ -616,15 +616,15 @@ func testUint8x64CompareMasked(t *testing.T, // testUint16x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint16x32CompareMasked(t *testing.T, - f func(_, _ simd.Uint16x32, m simd.Mask16x32) simd.Mask16x32, + f func(_, _ archsimd.Uint16x32, m archsimd.Mask16x32) archsimd.Mask16x32, want func(_, _ []uint16) []int64) { n := 32 t.Helper() forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool { t.Helper() - a := simd.LoadUint16x32Slice(x) - b := simd.LoadUint16x32Slice(y) - k := simd.LoadInt16x32Slice(toVect[int16](m)).ToMask() + a := archsimd.LoadUint16x32Slice(x) + b := archsimd.LoadUint16x32Slice(y) + k := archsimd.LoadInt16x32Slice(toVect[int16](m)).ToMask() g := make([]int16, n) f(a, b, k).AsInt16x32().StoreSlice(g) w := want(x, y) @@ -640,15 +640,15 @@ func testUint16x32CompareMasked(t *testing.T, // testUint32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint32x16CompareMasked(t *testing.T, - f func(_, _ simd.Uint32x16, m simd.Mask32x16) simd.Mask32x16, + f func(_, _ archsimd.Uint32x16, m archsimd.Mask32x16) archsimd.Mask32x16, want func(_, _ []uint32) []int64) { n := 16 t.Helper() forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool { t.Helper() - a := simd.LoadUint32x16Slice(x) - b := simd.LoadUint32x16Slice(y) - k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadUint32x16Slice(x) + b := archsimd.LoadUint32x16Slice(y) + k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -664,15 +664,15 @@ func testUint32x16CompareMasked(t *testing.T, // testUint64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testUint64x8CompareMasked(t *testing.T, - f func(_, _ simd.Uint64x8, m simd.Mask64x8) simd.Mask64x8, + f func(_, _ archsimd.Uint64x8, m archsimd.Mask64x8) archsimd.Mask64x8, want func(_, _ []uint64) []int64) { n := 8 t.Helper() forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool { t.Helper() - a := simd.LoadUint64x8Slice(x) - b := simd.LoadUint64x8Slice(y) - k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadUint64x8Slice(x) + b := archsimd.LoadUint64x8Slice(y) + k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x8().StoreSlice(g) w := want(x, y) @@ -688,15 +688,15 @@ func testUint64x8CompareMasked(t *testing.T, // testFloat32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testFloat32x16CompareMasked(t *testing.T, - f func(_, _ simd.Float32x16, m simd.Mask32x16) simd.Mask32x16, + f func(_, _ archsimd.Float32x16, m archsimd.Mask32x16) archsimd.Mask32x16, want func(_, _ []float32) []int64) { n := 16 t.Helper() forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) - b := simd.LoadFloat32x16Slice(y) - k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask() + a := archsimd.LoadFloat32x16Slice(x) + b := archsimd.LoadFloat32x16Slice(y) + k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask() g := make([]int32, n) f(a, b, k).AsInt32x16().StoreSlice(g) w := want(x, y) @@ -712,15 +712,15 @@ func testFloat32x16CompareMasked(t *testing.T, // testFloat64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want // The mask is applied to the output of want; anything not in the mask, is zeroed. func testFloat64x8CompareMasked(t *testing.T, - f func(_, _ simd.Float64x8, m simd.Mask64x8) simd.Mask64x8, + f func(_, _ archsimd.Float64x8, m archsimd.Mask64x8) archsimd.Mask64x8, want func(_, _ []float64) []int64) { n := 8 t.Helper() forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) - b := simd.LoadFloat64x8Slice(y) - k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask() + a := archsimd.LoadFloat64x8Slice(x) + b := archsimd.LoadFloat64x8Slice(y) + k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask() g := make([]int64, n) f(a, b, k).AsInt64x8().StoreSlice(g) w := want(x, y) diff --git a/src/simd/internal/simd_test/generate.go b/src/simd/archsimd/internal/simd_test/generate.go similarity index 100% rename from src/simd/internal/simd_test/generate.go rename to src/simd/archsimd/internal/simd_test/generate.go diff --git a/src/simd/internal/simd_test/helpers_test.go b/src/simd/archsimd/internal/simd_test/helpers_test.go similarity index 99% rename from src/simd/internal/simd_test/helpers_test.go rename to src/simd/archsimd/internal/simd_test/helpers_test.go index 0a246e0d7d..b9d5098dba 100644 --- a/src/simd/internal/simd_test/helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/helpers_test.go @@ -8,7 +8,7 @@ package simd_test import ( "math" - "simd/internal/test_helpers" + "simd/archsimd/internal/test_helpers" "testing" ) diff --git a/src/simd/internal/simd_test/simd_test.go b/src/simd/archsimd/internal/simd_test/simd_test.go similarity index 75% rename from src/simd/internal/simd_test/simd_test.go rename to src/simd/archsimd/internal/simd_test/simd_test.go index f7538b8003..0e8300f3c9 100644 --- a/src/simd/internal/simd_test/simd_test.go +++ b/src/simd/archsimd/internal/simd_test/simd_test.go @@ -8,7 +8,7 @@ package simd_test import ( "reflect" - "simd" + "simd/archsimd" "slices" "testing" ) @@ -23,27 +23,27 @@ func TestType(t *testing.T) { // - Type alias is ok // - Type conversion is ok // - Conversion to interface is ok - type alias = simd.Int32x4 - type maskT simd.Mask32x4 + type alias = archsimd.Int32x4 + type maskT archsimd.Mask32x4 type myStruct struct { x alias - y *simd.Int32x4 + y *archsimd.Int32x4 z maskT } vals := [4]int32{1, 2, 3, 4} - v := myStruct{x: simd.LoadInt32x4(&vals)} + v := myStruct{x: archsimd.LoadInt32x4(&vals)} // masking elements 1 and 2. want := []int32{2, 4, 0, 0} - y := simd.LoadInt32x4(&vals) + y := archsimd.LoadInt32x4(&vals) v.y = &y sink = y - if !simd.X86.AVX512GFNI() { + if !archsimd.X86.AVX512GFNI() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } - v.z = maskT(simd.Mask32x4FromBits(0b0011)) - *v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z)) + v.z = maskT(archsimd.Mask32x4FromBits(0b0011)) + *v.y = v.y.Add(v.x).Masked(archsimd.Mask32x4(v.z)) got := [4]int32{} v.y.Store(&got) @@ -52,7 +52,7 @@ func TestType(t *testing.T) { func TestUncomparable(t *testing.T) { // Test that simd vectors are not comparable - var x, y any = simd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), simd.LoadUint32x4(&[4]uint32{5, 6, 7, 8}) + var x, y any = archsimd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), archsimd.LoadUint32x4(&[4]uint32{5, 6, 7, 8}) shouldPanic := func(fn func()) { defer func() { if recover() == nil { @@ -69,9 +69,9 @@ func TestFuncValue(t *testing.T) { xv := [4]int32{1, 2, 3, 4} yv := [4]int32{5, 6, 7, 8} want := []int32{6, 8, 10, 12} - x := simd.LoadInt32x4(&xv) - y := simd.LoadInt32x4(&yv) - fn := simd.Int32x4.Add + x := archsimd.LoadInt32x4(&xv) + y := archsimd.LoadInt32x4(&yv) + fn := archsimd.Int32x4.Add sink = fn x = fn(x, y) got := [4]int32{} @@ -85,13 +85,13 @@ func TestReflectMethod(t *testing.T) { xv := [4]int32{1, 2, 3, 4} yv := [4]int32{5, 6, 7, 8} want := []int32{6, 8, 10, 12} - x := simd.LoadInt32x4(&xv) - y := simd.LoadInt32x4(&yv) + x := archsimd.LoadInt32x4(&xv) + y := archsimd.LoadInt32x4(&yv) m, ok := reflect.TypeOf(x).MethodByName("Add") if !ok { t.Fatal("Add method not found") } - fn := m.Func.Interface().(func(x, y simd.Int32x4) simd.Int32x4) + fn := m.Func.Interface().(func(x, y archsimd.Int32x4) archsimd.Int32x4) x = fn(x, y) got := [4]int32{} x.Store(&got) @@ -99,12 +99,12 @@ func TestReflectMethod(t *testing.T) { } func TestVectorConversion(t *testing.T) { - if !simd.X86.AVX512GFNI() { + if !archsimd.X86.AVX512GFNI() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } xv := [4]int32{1, 2, 3, 4} - x := simd.LoadInt32x4(&xv) + x := archsimd.LoadInt32x4(&xv) xPromoted := x.AsInt64x2() xPromotedDemoted := xPromoted.AsInt32x4() got := [4]int32{} @@ -117,13 +117,13 @@ func TestVectorConversion(t *testing.T) { } func TestMaskConversion(t *testing.T) { - if !simd.X86.AVX512GFNI() { + if !archsimd.X86.AVX512GFNI() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } - x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0}) - mask := simd.Int32x4{}.Sub(x).ToMask() - y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask) + x := archsimd.LoadInt32x4Slice([]int32{5, 0, 7, 0}) + mask := archsimd.Int32x4{}.Sub(x).ToMask() + y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask) want := [4]int32{6, 0, 10, 0} got := make([]int32, 4) y.StoreSlice(got) @@ -131,7 +131,7 @@ func TestMaskConversion(t *testing.T) { } func TestPermute(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } @@ -139,7 +139,7 @@ func TestPermute(t *testing.T) { indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0} want := []int64{8, 7, 6, 5, 4, 3, 2, 1} got := make([]int64, 8) - simd.LoadInt64x8Slice(x).Permute(simd.LoadUint64x8Slice(indices)).StoreSlice(got) + archsimd.LoadInt64x8Slice(x).Permute(archsimd.LoadUint64x8Slice(indices)).StoreSlice(got) checkSlices(t, got, want) } @@ -148,12 +148,12 @@ func TestPermuteOrZero(t *testing.T) { indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11} want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12} got := make([]uint8, len(x)) - simd.LoadUint8x16Slice(x).PermuteOrZero(simd.LoadInt8x16Slice(indices)).StoreSlice(got) + archsimd.LoadUint8x16Slice(x).PermuteOrZero(archsimd.LoadInt8x16Slice(indices)).StoreSlice(got) checkSlices(t, got, want) } func TestConcatPermute(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } @@ -162,17 +162,17 @@ func TestConcatPermute(t *testing.T) { indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0} want := []int64{-8, 7, -6, 5, -4, 3, -2, 1} got := make([]int64, 8) - simd.LoadInt64x8Slice(x).ConcatPermute(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got) + archsimd.LoadInt64x8Slice(x).ConcatPermute(archsimd.LoadInt64x8Slice(y), archsimd.LoadUint64x8Slice(indices)).StoreSlice(got) checkSlices(t, got, want) } func TestCompress(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } - v1234 := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) - v2400 := v1234.Compress(simd.Mask32x4FromBits(0b1010)) + v1234 := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) + v2400 := v1234.Compress(archsimd.Mask32x4FromBits(0b1010)) got := make([]int32, 4) v2400.StoreSlice(got) want := []int32{2, 4, 0, 0} @@ -182,12 +182,12 @@ func TestCompress(t *testing.T) { } func TestExpand(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } - v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0}) - v2400 := v3400.Expand(simd.Mask32x4FromBits(0b1010)) + v3400 := archsimd.LoadInt32x4Slice([]int32{3, 4, 0, 0}) + v2400 := v3400.Expand(archsimd.Mask32x4FromBits(0b1010)) got := make([]int32, 4) v2400.StoreSlice(got) want := []int32{0, 3, 0, 4} @@ -200,13 +200,13 @@ var testShiftAllVal uint64 = 3 func TestShiftAll(t *testing.T) { got := make([]int32, 4) - simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got) + archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got) for _, v := range got { if v != 0b1100 { t.Errorf("expect 0b1100, got %b", v) } } - simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got) + archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got) for _, v := range got { if v != 0b11000 { t.Errorf("expect 0b11000, got %b", v) @@ -217,7 +217,7 @@ func TestShiftAll(t *testing.T) { func TestSlicesInt8(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - v := simd.LoadInt8x32Slice(a) + v := archsimd.LoadInt8x32Slice(a) b := make([]int8, 32, 32) v.StoreSlice(b) checkSlices(t, a, b) @@ -226,7 +226,7 @@ func TestSlicesInt8(t *testing.T) { func TestSlicesInt8SetElem(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - v := simd.LoadInt8x16Slice(a) + v := archsimd.LoadInt8x16Slice(a) v = v.SetElem(3, 13) a[3] = 13 @@ -239,7 +239,7 @@ func TestSlicesInt8SetElem(t *testing.T) { func TestSlicesInt8GetElem(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - v := simd.LoadInt8x16Slice(a) + v := archsimd.LoadInt8x16Slice(a) e := v.GetElem(2) if e != a[2] { t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2]) @@ -257,7 +257,7 @@ func TestSlicesInt8TooShortLoad(t *testing.T) { }() a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic - v := simd.LoadInt8x32Slice(a) + v := archsimd.LoadInt8x32Slice(a) b := make([]int8, 32, 32) v.StoreSlice(b) checkSlices(t, a, b) @@ -273,7 +273,7 @@ func TestSlicesInt8TooShortStore(t *testing.T) { }() a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - v := simd.LoadInt8x32Slice(a) + v := archsimd.LoadInt8x32Slice(a) b := make([]int8, 31) // TOO SHORT, should panic v.StoreSlice(b) checkSlices(t, a, b) @@ -281,7 +281,7 @@ func TestSlicesInt8TooShortStore(t *testing.T) { func TestSlicesFloat64(t *testing.T) { a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine - v := simd.LoadFloat64x4Slice(a) + v := archsimd.LoadFloat64x4Slice(a) b := make([]float64, 4, 4) v.StoreSlice(b) for i := range b { @@ -293,19 +293,19 @@ func TestSlicesFloat64(t *testing.T) { // TODO: try to reduce this test to be smaller. func TestMergeLocals(t *testing.T) { - testMergeLocalswrapper(t, simd.Int64x4.Add) + testMergeLocalswrapper(t, archsimd.Int64x4.Add) } //go:noinline func forceSpill() {} -func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) simd.Int64x4) { +func testMergeLocalswrapper(t *testing.T, op func(archsimd.Int64x4, archsimd.Int64x4) archsimd.Int64x4) { t.Helper() s0 := []int64{0, 1, 2, 3} s1 := []int64{-1, 0, -1, 0} want := []int64{-1, 1, 1, 3} - v := simd.LoadInt64x4Slice(s0) - m := simd.LoadInt64x4Slice(s1) + v := archsimd.LoadInt64x4Slice(s0) + m := archsimd.LoadInt64x4Slice(s1) forceSpill() got := make([]int64, 4) gotv := op(v, m) @@ -318,14 +318,14 @@ func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) si } func TestBitMaskFromBits(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } results := [2]int64{} want := [2]int64{0, 6} - m := simd.Mask64x2FromBits(0b10) - simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) + m := archsimd.Mask64x2FromBits(0b10) + archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) for i := range 2 { if results[i] != want[i] { t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i]) @@ -336,14 +336,14 @@ func TestBitMaskFromBits(t *testing.T) { var maskForTestBitMaskFromBitsLoad = uint8(0b10) func TestBitMaskFromBitsLoad(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } results := [2]int64{} want := [2]int64{0, 6} - m := simd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad) - simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) + m := archsimd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad) + archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) for i := range 2 { if results[i] != want[i] { t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i]) @@ -352,11 +352,11 @@ func TestBitMaskFromBitsLoad(t *testing.T) { } func TestBitMaskToBits(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } - if v := simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 { + if v := archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 { t.Errorf("Want 0b101, got %b", v) } } @@ -364,11 +364,11 @@ func TestBitMaskToBits(t *testing.T) { var maskForTestBitMaskFromBitsStore uint8 func TestBitMaskToBitsStore(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } - maskForTestBitMaskFromBitsStore = simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits() + maskForTestBitMaskFromBitsStore = archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits() if maskForTestBitMaskFromBitsStore != 0b101 { t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore) } @@ -378,8 +378,8 @@ func TestMergeFloat(t *testing.T) { k := make([]int64, 4, 4) s := make([]float64, 4, 4) - a := simd.LoadFloat64x4Slice([]float64{1, 2, 3, 4}) - b := simd.LoadFloat64x4Slice([]float64{4, 2, 3, 1}) + a := archsimd.LoadFloat64x4Slice([]float64{1, 2, 3, 4}) + b := archsimd.LoadFloat64x4Slice([]float64{4, 2, 3, 1}) g := a.Greater(b) g.AsInt64x4().StoreSlice(k) c := a.Merge(b, g) @@ -391,7 +391,7 @@ func TestMergeFloat(t *testing.T) { } func TestMergeFloat512(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } @@ -399,8 +399,8 @@ func TestMergeFloat512(t *testing.T) { k := make([]int64, 8, 8) s := make([]float64, 8, 8) - a := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) - b := simd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1}) + a := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) + b := archsimd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1}) g := a.Greater(b) g.AsInt64x8().StoreSlice(k) c := a.Merge(b, g) @@ -418,12 +418,12 @@ func TestMergeFloat512(t *testing.T) { var ro uint8 = 2 func TestRotateAllVariable(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } got := make([]int32, 4) - simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got) + archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got) for _, v := range got { if v != 0b1100 { t.Errorf("Want 0b1100, got %b", v) @@ -433,37 +433,37 @@ func TestRotateAllVariable(t *testing.T) { func TestBroadcastUint32x4(t *testing.T) { s := make([]uint32, 4, 4) - simd.BroadcastUint32x4(123456789).StoreSlice(s) + archsimd.BroadcastUint32x4(123456789).StoreSlice(s) checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789}) } func TestBroadcastFloat32x8(t *testing.T) { s := make([]float32, 8, 8) - simd.BroadcastFloat32x8(123456789).StoreSlice(s) + archsimd.BroadcastFloat32x8(123456789).StoreSlice(s) checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789}) } func TestBroadcastFloat64x2(t *testing.T) { s := make([]float64, 2, 2) - simd.BroadcastFloat64x2(123456789).StoreSlice(s) + archsimd.BroadcastFloat64x2(123456789).StoreSlice(s) checkSlices(t, s, []float64{123456789, 123456789}) } func TestBroadcastUint64x2(t *testing.T) { s := make([]uint64, 2, 2) - simd.BroadcastUint64x2(123456789).StoreSlice(s) + archsimd.BroadcastUint64x2(123456789).StoreSlice(s) checkSlices(t, s, []uint64{123456789, 123456789}) } func TestBroadcastUint16x8(t *testing.T) { s := make([]uint16, 8, 8) - simd.BroadcastUint16x8(12345).StoreSlice(s) + archsimd.BroadcastUint16x8(12345).StoreSlice(s) checkSlices(t, s, []uint16{12345, 12345, 12345, 12345}) } func TestBroadcastInt8x32(t *testing.T) { s := make([]int8, 32, 32) - simd.BroadcastInt8x32(-123).StoreSlice(s) + archsimd.BroadcastInt8x32(-123).StoreSlice(s) checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, @@ -472,7 +472,7 @@ func TestBroadcastInt8x32(t *testing.T) { } func TestMaskOpt512(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } @@ -480,10 +480,10 @@ func TestMaskOpt512(t *testing.T) { k := make([]int64, 8, 8) s := make([]float64, 8, 8) - a := simd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0}) - b := simd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1}) - c := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) - d := simd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16}) + a := archsimd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0}) + b := archsimd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1}) + c := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) + d := archsimd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16}) g := a.Greater(b) e := c.Add(d).Masked(g) e.StoreSlice(s) @@ -496,7 +496,7 @@ func TestMaskOpt512(t *testing.T) { // matrices, but then flattens the rows in order, i.e // x: ABCD ==> a: A1B2 // y: 1234 b: C3D4 -func flattenedTranspose(x, y simd.Int32x4) (a, b simd.Int32x4) { +func flattenedTranspose(x, y archsimd.Int32x4) (a, b archsimd.Int32x4) { return x.InterleaveLo(y), x.InterleaveHi(y) } @@ -504,8 +504,8 @@ func TestFlattenedTranspose(t *testing.T) { r := make([]int32, 4, 4) s := make([]int32, 4, 4) - x := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD}) - y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) + x := archsimd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD}) + y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) a, b := flattenedTranspose(x, y) a.StoreSlice(r) @@ -519,7 +519,7 @@ func TestFlattenedTranspose(t *testing.T) { func TestClearAVXUpperBits(t *testing.T) { // Test that ClearAVXUpperBits is safe even if there are SIMD values // alive (although usually one should not do this). - if !simd.X86.AVX2() { + if !archsimd.X86.AVX2() { t.Skip("Test requires X86.AVX2, not available on this hardware") return } @@ -527,11 +527,11 @@ func TestClearAVXUpperBits(t *testing.T) { r := make([]int64, 4) s := make([]int64, 4) - x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40}) - y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) + x := archsimd.LoadInt64x4Slice([]int64{10, 20, 30, 40}) + y := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) x.Add(y).StoreSlice(r) - simd.ClearAVXUpperBits() + archsimd.ClearAVXUpperBits() x.Sub(y).StoreSlice(s) checkSlices[int64](t, r, []int64{11, 22, 33, 44}) @@ -539,7 +539,7 @@ func TestClearAVXUpperBits(t *testing.T) { } func TestLeadingZeros(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } @@ -547,7 +547,7 @@ func TestLeadingZeros(t *testing.T) { src := []uint64{0b1111, 0} want := []uint64{60, 64} got := make([]uint64, 2) - simd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got) + archsimd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got) for i := range 2 { if want[i] != got[i] { t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i]) @@ -556,8 +556,8 @@ func TestLeadingZeros(t *testing.T) { } func TestIsZero(t *testing.T) { - v1 := simd.LoadUint64x2Slice([]uint64{0, 1}) - v2 := simd.LoadUint64x2Slice([]uint64{0, 0}) + v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1}) + v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0}) if v1.IsZero() { t.Errorf("Result incorrect, want false, got true") } @@ -579,8 +579,8 @@ func TestIsZero(t *testing.T) { } func TestSelect4FromPairConst(t *testing.T) { - x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) - y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) + x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) + y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) llll := x.SelectFromPair(0, 1, 2, 3, y) hhhh := x.SelectFromPair(4, 5, 6, 7, y) @@ -604,7 +604,7 @@ func TestSelect4FromPairConst(t *testing.T) { r := make([]int32, 4, 4) - foo := func(v simd.Int32x4, a, b, c, d int32) { + foo := func(v archsimd.Int32x4, a, b, c, d int32) { v.StoreSlice(r) checkSlices[int32](t, r, []int32{a, b, c, d}) } @@ -631,13 +631,13 @@ func TestSelect4FromPairConst(t *testing.T) { } //go:noinline -func selectFromPairInt32x4(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) simd.Int32x4 { +func selectFromPairInt32x4(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 { return x.SelectFromPair(a, b, c, d, y) } func TestSelect4FromPairVar(t *testing.T) { - x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) - y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) + x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) + y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y) hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y) @@ -661,7 +661,7 @@ func TestSelect4FromPairVar(t *testing.T) { r := make([]int32, 4, 4) - foo := func(v simd.Int32x4, a, b, c, d int32) { + foo := func(v archsimd.Int32x4, a, b, c, d int32) { v.StoreSlice(r) checkSlices[int32](t, r, []int32{a, b, c, d}) } @@ -688,8 +688,8 @@ func TestSelect4FromPairVar(t *testing.T) { } func TestSelect4FromPairConstGrouped(t *testing.T) { - x := simd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13}) - y := simd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17}) + x := archsimd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13}) + y := archsimd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17}) llll := x.SelectFromPairGrouped(0, 1, 2, 3, y) hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y) @@ -713,7 +713,7 @@ func TestSelect4FromPairConstGrouped(t *testing.T) { r := make([]float32, 8, 8) - foo := func(v simd.Float32x8, a, b, c, d float32) { + foo := func(v archsimd.Float32x8, a, b, c, d float32) { v.StoreSlice(r) checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d}) } @@ -740,12 +740,12 @@ func TestSelect4FromPairConstGrouped(t *testing.T) { } func TestSelectFromPairConstGroupedUint32x16(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } - x := simd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33}) - y := simd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37}) + x := archsimd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33}) + y := archsimd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37}) llll := x.SelectFromPairGrouped(0, 1, 2, 3, y) hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y) @@ -769,7 +769,7 @@ func TestSelectFromPairConstGroupedUint32x16(t *testing.T) { r := make([]uint32, 16, 16) - foo := func(v simd.Uint32x16, a, b, c, d uint32) { + foo := func(v archsimd.Uint32x16, a, b, c, d uint32) { v.StoreSlice(r) checkSlices[uint32](t, r, []uint32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d, @@ -800,8 +800,8 @@ func TestSelectFromPairConstGroupedUint32x16(t *testing.T) { } func TestSelect128FromPair(t *testing.T) { - x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) - y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) + x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) + y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) aa := x.Select128FromPair(0, 0, y) ab := x.Select128FromPair(0, 1, y) @@ -812,7 +812,7 @@ func TestSelect128FromPair(t *testing.T) { r := make([]uint64, 4, 4) - foo := func(v simd.Uint64x4, a, b uint64) { + foo := func(v archsimd.Uint64x4, a, b uint64) { a, b = 2*a, 2*b v.StoreSlice(r) checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1}) @@ -827,8 +827,8 @@ func TestSelect128FromPair(t *testing.T) { } func TestSelect128FromPairError(t *testing.T) { - x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) - y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) + x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) + y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) defer func() { if r := recover(); r != nil { @@ -841,13 +841,13 @@ func TestSelect128FromPairError(t *testing.T) { } //go:noinline -func select128FromPair(x simd.Uint64x4, lo, hi uint8, y simd.Uint64x4) simd.Uint64x4 { +func select128FromPair(x archsimd.Uint64x4, lo, hi uint8, y archsimd.Uint64x4) archsimd.Uint64x4 { return x.Select128FromPair(lo, hi, y) } func TestSelect128FromPairVar(t *testing.T) { - x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) - y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) + x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) + y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) aa := select128FromPair(x, 0, 0, y) ab := select128FromPair(x, 0, 1, y) @@ -858,7 +858,7 @@ func TestSelect128FromPairVar(t *testing.T) { r := make([]uint64, 4, 4) - foo := func(v simd.Uint64x4, a, b uint64) { + foo := func(v archsimd.Uint64x4, a, b uint64) { a, b = 2*a, 2*b v.StoreSlice(r) checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1}) @@ -873,8 +873,8 @@ func TestSelect128FromPairVar(t *testing.T) { } func TestSelect2FromPairConst(t *testing.T) { - x := simd.LoadUint64x2Slice([]uint64{0, 1}) - y := simd.LoadUint64x2Slice([]uint64{2, 3}) + x := archsimd.LoadUint64x2Slice([]uint64{0, 1}) + y := archsimd.LoadUint64x2Slice([]uint64{2, 3}) ll := x.SelectFromPair(0, 1, y) hh := x.SelectFromPair(3, 2, y) @@ -883,7 +883,7 @@ func TestSelect2FromPairConst(t *testing.T) { r := make([]uint64, 2, 2) - foo := func(v simd.Uint64x2, a, b uint64) { + foo := func(v archsimd.Uint64x2, a, b uint64) { v.StoreSlice(r) checkSlices[uint64](t, r, []uint64{a, b}) } @@ -895,8 +895,8 @@ func TestSelect2FromPairConst(t *testing.T) { } func TestSelect2FromPairConstGroupedUint(t *testing.T) { - x := simd.LoadUint64x4Slice([]uint64{0, 1, 10, 11}) - y := simd.LoadUint64x4Slice([]uint64{2, 3, 12, 13}) + x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 10, 11}) + y := archsimd.LoadUint64x4Slice([]uint64{2, 3, 12, 13}) ll := x.SelectFromPairGrouped(0, 1, y) hh := x.SelectFromPairGrouped(3, 2, y) @@ -905,7 +905,7 @@ func TestSelect2FromPairConstGroupedUint(t *testing.T) { r := make([]uint64, 4, 4) - foo := func(v simd.Uint64x4, a, b uint64) { + foo := func(v archsimd.Uint64x4, a, b uint64) { v.StoreSlice(r) checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10}) } @@ -917,8 +917,8 @@ func TestSelect2FromPairConstGroupedUint(t *testing.T) { } func TestSelect2FromPairConstGroupedFloat(t *testing.T) { - x := simd.LoadFloat64x4Slice([]float64{0, 1, 10, 11}) - y := simd.LoadFloat64x4Slice([]float64{2, 3, 12, 13}) + x := archsimd.LoadFloat64x4Slice([]float64{0, 1, 10, 11}) + y := archsimd.LoadFloat64x4Slice([]float64{2, 3, 12, 13}) ll := x.SelectFromPairGrouped(0, 1, y) hh := x.SelectFromPairGrouped(3, 2, y) @@ -927,7 +927,7 @@ func TestSelect2FromPairConstGroupedFloat(t *testing.T) { r := make([]float64, 4, 4) - foo := func(v simd.Float64x4, a, b float64) { + foo := func(v archsimd.Float64x4, a, b float64) { v.StoreSlice(r) checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10}) } @@ -939,8 +939,8 @@ func TestSelect2FromPairConstGroupedFloat(t *testing.T) { } func TestSelect2FromPairConstGroupedInt(t *testing.T) { - x := simd.LoadInt64x4Slice([]int64{0, 1, 10, 11}) - y := simd.LoadInt64x4Slice([]int64{2, 3, 12, 13}) + x := archsimd.LoadInt64x4Slice([]int64{0, 1, 10, 11}) + y := archsimd.LoadInt64x4Slice([]int64{2, 3, 12, 13}) ll := x.SelectFromPairGrouped(0, 1, y) hh := x.SelectFromPairGrouped(3, 2, y) @@ -949,7 +949,7 @@ func TestSelect2FromPairConstGroupedInt(t *testing.T) { r := make([]int64, 4, 4) - foo := func(v simd.Int64x4, a, b int64) { + foo := func(v archsimd.Int64x4, a, b int64) { v.StoreSlice(r) checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10}) } @@ -961,13 +961,13 @@ func TestSelect2FromPairConstGroupedInt(t *testing.T) { } func TestSelect2FromPairConstGroupedInt512(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } - x := simd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31}) - y := simd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33}) + x := archsimd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31}) + y := archsimd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33}) ll := x.SelectFromPairGrouped(0, 1, y) hh := x.SelectFromPairGrouped(3, 2, y) @@ -976,7 +976,7 @@ func TestSelect2FromPairConstGroupedInt512(t *testing.T) { r := make([]int64, 8, 8) - foo := func(v simd.Int64x8, a, b int64) { + foo := func(v archsimd.Int64x8, a, b int64) { v.StoreSlice(r) checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30}) } @@ -988,10 +988,10 @@ func TestSelect2FromPairConstGroupedInt512(t *testing.T) { } func TestString(t *testing.T) { - x := simd.LoadUint32x4Slice([]uint32{0, 1, 2, 3}) - y := simd.LoadInt64x4Slice([]int64{-4, -5, -6, -7}) - z := simd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9}) - w := simd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9}) + x := archsimd.LoadUint32x4Slice([]uint32{0, 1, 2, 3}) + y := archsimd.LoadInt64x4Slice([]int64{-4, -5, -6, -7}) + z := archsimd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9}) + w := archsimd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9}) sx := "{0,1,2,3}" sy := "{-4,-5,-6,-7}" @@ -1023,7 +1023,7 @@ func a() []int32 { // applyTo3 returns a 16-element slice of the results of // applying f to the respective elements of vectors x, y, and z. -func applyTo3(x, y, z simd.Int32x16, f func(x, y, z int32) int32) []int32 { +func applyTo3(x, y, z archsimd.Int32x16, f func(x, y, z int32) int32) []int32 { ax, ay, az := a(), a(), a() x.StoreSlice(ax) y.StoreSlice(ay) @@ -1038,7 +1038,7 @@ func applyTo3(x, y, z simd.Int32x16, f func(x, y, z int32) int32) []int32 { // applyTo3 returns a 16-element slice of the results of // applying f to the respective elements of vectors x, y, z, and w. -func applyTo4(x, y, z, w simd.Int32x16, f func(x, y, z, w int32) int32) []int32 { +func applyTo4(x, y, z, w archsimd.Int32x16, f func(x, y, z, w int32) int32) []int32 { ax, ay, az, aw := a(), a(), a(), a() x.StoreSlice(ax) y.StoreSlice(ay) @@ -1053,7 +1053,7 @@ func applyTo4(x, y, z, w simd.Int32x16, f func(x, y, z, w int32) int32) []int32 } func TestSelectTernOptInt32x16(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } @@ -1063,13 +1063,13 @@ func TestSelectTernOptInt32x16(t *testing.T) { aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1} am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} - x := simd.LoadInt32x16Slice(ax) - y := simd.LoadInt32x16Slice(ay) - z := simd.LoadInt32x16Slice(az) - w := simd.LoadInt32x16Slice(aw) - m := simd.LoadInt32x16Slice(am) + x := archsimd.LoadInt32x16Slice(ax) + y := archsimd.LoadInt32x16Slice(ay) + z := archsimd.LoadInt32x16Slice(az) + w := archsimd.LoadInt32x16Slice(aw) + m := archsimd.LoadInt32x16Slice(am) - foo := func(v simd.Int32x16, s []int32) { + foo := func(v archsimd.Int32x16, s []int32) { r := make([]int32, 16, 16) v.StoreSlice(r) checkSlices[int32](t, r, s) @@ -1095,13 +1095,13 @@ func TestSelectTernOptInt32x16(t *testing.T) { } func TestMaskedMerge(t *testing.T) { - x := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) - y := simd.LoadInt64x4Slice([]int64{5, 6, 1, 1}) - z := simd.LoadInt64x4Slice([]int64{-1, -2, -3, -4}) + x := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) + y := archsimd.LoadInt64x4Slice([]int64{5, 6, 1, 1}) + z := archsimd.LoadInt64x4Slice([]int64{-1, -2, -3, -4}) res := make([]int64, 4) expected := []int64{6, 8, -3, -4} mask := x.Less(y) - if simd.X86.AVX512() { + if archsimd.X86.AVX512() { x.Add(y).Merge(z, mask).StoreSlice(res) } else { x.Add(y).Merge(z, mask).StoreSlice(res) @@ -1114,7 +1114,7 @@ func TestMaskedMerge(t *testing.T) { } func TestDotProductQuadruple(t *testing.T) { - if !simd.X86.AVXVNNI() { + if !archsimd.X86.AVXVNNI() { t.Skip("Test requires X86.AVXVNNI, not available on this hardware") return } @@ -1132,9 +1132,9 @@ func TestDotProductQuadruple(t *testing.T) { wanted1[i] = 30 wanted2[i] = 30 } - x := simd.LoadInt8x16Slice(xd) - y := simd.LoadUint8x16Slice(yd) - z := simd.LoadInt32x4Slice(zd) + x := archsimd.LoadInt8x16Slice(xd) + y := archsimd.LoadUint8x16Slice(yd) + z := archsimd.LoadInt32x4Slice(zd) x.DotProductQuadruple(y).StoreSlice(res1) x.DotProductQuadruple(y).Add(z).StoreSlice(res1) for i := range 4 { @@ -1151,7 +1151,7 @@ func TestPermuteScalars(t *testing.T) { x := []int32{11, 12, 13, 14} want := []int32{12, 13, 14, 11} got := make([]int32, 4) - simd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got) + archsimd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got) checkSlices(t, got, want) } @@ -1159,7 +1159,7 @@ func TestPermuteScalarsGrouped(t *testing.T) { x := []int32{11, 12, 13, 14, 21, 22, 23, 24} want := []int32{12, 13, 14, 11, 22, 23, 24, 21} got := make([]int32, 8) - simd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got) + archsimd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got) checkSlices(t, got, want) } @@ -1167,7 +1167,7 @@ func TestPermuteScalarsHi(t *testing.T) { x := []int16{-1, -2, -3, -4, 11, 12, 13, 14} want := []int16{-1, -2, -3, -4, 12, 13, 14, 11} got := make([]int16, len(x)) - simd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got) + archsimd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got) checkSlices(t, got, want) } @@ -1175,7 +1175,7 @@ func TestPermuteScalarsLo(t *testing.T) { x := []int16{11, 12, 13, 14, 4, 5, 6, 7} want := []int16{12, 13, 14, 11, 4, 5, 6, 7} got := make([]int16, len(x)) - simd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got) + archsimd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got) checkSlices(t, got, want) } @@ -1183,7 +1183,7 @@ func TestPermuteScalarsHiGrouped(t *testing.T) { x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114} want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111} got := make([]int16, len(x)) - simd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got) + archsimd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got) checkSlices(t, got, want) } @@ -1191,15 +1191,15 @@ func TestPermuteScalarsLoGrouped(t *testing.T) { x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17} want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17} got := make([]int16, len(x)) - simd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got) + archsimd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got) checkSlices(t, got, want) } func TestClMul(t *testing.T) { - var x = simd.LoadUint64x2Slice([]uint64{1, 5}) - var y = simd.LoadUint64x2Slice([]uint64{3, 9}) + var x = archsimd.LoadUint64x2Slice([]uint64{1, 5}) + var y = archsimd.LoadUint64x2Slice([]uint64{3, 9}) - foo := func(v simd.Uint64x2, s []uint64) { + foo := func(v archsimd.Uint64x2, s []uint64) { r := make([]uint64, 2, 2) v.StoreSlice(r) checkSlices[uint64](t, r, s) diff --git a/src/simd/internal/simd_test/simulation_helpers_test.go b/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go similarity index 100% rename from src/simd/internal/simd_test/simulation_helpers_test.go rename to src/simd/archsimd/internal/simd_test/simulation_helpers_test.go diff --git a/src/simd/internal/simd_test/slicepart_test.go b/src/simd/archsimd/internal/simd_test/slicepart_test.go similarity index 91% rename from src/simd/internal/simd_test/slicepart_test.go rename to src/simd/archsimd/internal/simd_test/slicepart_test.go index b7a4a4f71b..9c7805c415 100644 --- a/src/simd/internal/simd_test/slicepart_test.go +++ b/src/simd/archsimd/internal/simd_test/slicepart_test.go @@ -7,13 +7,13 @@ package simd_test import ( - "simd" + "simd/archsimd" "testing" ) func TestSlicePartInt8x16(t *testing.T) { Do(t, 16, func(a, c []int8) { - u := simd.LoadInt8x16SlicePart(a) + u := archsimd.LoadInt8x16SlicePart(a) u.StoreSlice(c) }) } @@ -24,7 +24,7 @@ func TestSlicePartInt8x32(t *testing.T) { b := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} for i := 32; i >= 0; i-- { - u := simd.LoadInt8x32SlicePart(a[:i]) + u := archsimd.LoadInt8x32SlicePart(a[:i]) c := make([]int8, 32, 32) u.StoreSlice(c) checkSlices(t, c, b) @@ -38,7 +38,7 @@ func TestSlicePartUint8x16(t *testing.T) { a := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} b := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} for i := 16; i >= 0; i-- { - u := simd.LoadUint8x16SlicePart(a[:i]) + u := archsimd.LoadUint8x16SlicePart(a[:i]) c := make([]uint8, 32, 32) u.StoreSlice(c) checkSlices(t, c, b) @@ -54,7 +54,7 @@ func TestSlicePartUint8x32(t *testing.T) { b := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} for i := 32; i >= 0; i-- { - u := simd.LoadUint8x32SlicePart(a[:i]) + u := archsimd.LoadUint8x32SlicePart(a[:i]) c := make([]uint8, 32, 32) u.StoreSlice(c) checkSlices(t, c, b) @@ -68,7 +68,7 @@ func TestSlicePartInt16x8(t *testing.T) { a := []int16{1, 2, 3, 4, 5, 6, 7, 8} b := []int16{1, 2, 3, 4, 5, 6, 7, 8} for i := 8; i >= 0; i-- { - u := simd.LoadInt16x8SlicePart(a[:i]) + u := archsimd.LoadInt16x8SlicePart(a[:i]) c := make([]int16, 16, 16) u.StoreSlice(c) checkSlices(t, c, b) @@ -82,7 +82,7 @@ func TestSlicePartInt16x16(t *testing.T) { a := []int16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} b := []int16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} for i := 16; i >= 0; i-- { - u := simd.LoadInt16x16SlicePart(a[:i]) + u := archsimd.LoadInt16x16SlicePart(a[:i]) c := make([]int16, 16, 16) u.StoreSlice(c) checkSlices(t, c, b) @@ -96,7 +96,7 @@ func TestSlicesPartStoreInt8x16(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} b := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} for i := 16; i >= 0; i-- { - v := simd.LoadInt8x16Slice(a) + v := archsimd.LoadInt8x16Slice(a) c := make([]int8, 32, 32) v.StoreSlicePart(c[:i]) checkSlices(t, c, b) @@ -110,7 +110,7 @@ func TestSlicesPartStoreInt16x8(t *testing.T) { a := []int16{1, 2, 3, 4, 5, 6, 7, 8} b := []int16{1, 2, 3, 4, 5, 6, 7, 8} for i := 8; i >= 0; i-- { - v := simd.LoadInt16x8Slice(a) + v := archsimd.LoadInt16x8Slice(a) c := make([]int16, 32, 32) v.StoreSlicePart(c[:i]) checkSlices(t, c, b) @@ -124,7 +124,7 @@ func TestSlicesPartStoreInt16x16(t *testing.T) { a := []int16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} b := []int16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} for i := 16; i >= 0; i-- { - v := simd.LoadInt16x16Slice(a) + v := archsimd.LoadInt16x16Slice(a) c := make([]int16, 32, 32) v.StoreSlicePart(c[:i]) checkSlices(t, c, b) @@ -138,7 +138,7 @@ func TestSlicesPartStoreUint8x16(t *testing.T) { a := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} b := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} for i := 16; i >= 0; i-- { - v := simd.LoadUint8x16Slice(a) + v := archsimd.LoadUint8x16Slice(a) c := make([]uint8, 32, 32) v.StoreSlicePart(c[:i]) checkSlices(t, c, b) @@ -152,7 +152,7 @@ func TestSlicesPartStoreUint16x16(t *testing.T) { a := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} b := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} for i := 16; i >= 0; i-- { - v := simd.LoadUint16x16Slice(a) + v := archsimd.LoadUint16x16Slice(a) c := make([]uint16, 32, 32) v.StoreSlicePart(c[:i]) checkSlices(t, c, b) @@ -168,7 +168,7 @@ func TestSlicesPartStoreUint8x32(t *testing.T) { b := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} for i := 32; i >= 0; i-- { - v := simd.LoadUint8x32Slice(a) + v := archsimd.LoadUint8x32Slice(a) c := make([]uint8, 32, 32) v.StoreSlicePart(c[:i]) checkSlices(t, c, b) @@ -187,7 +187,7 @@ func TestSlicePartInt32(t *testing.T) { // Test the load first // e is a partial slice. e := a[i:] - v := simd.LoadInt32x4SlicePart(e) + v := archsimd.LoadInt32x4SlicePart(e) // d contains what a ought to contain d := make([]int32, L) for j := 0; j < len(e) && j < len(d); j++ { @@ -228,7 +228,7 @@ func TestSlicePartUint64(t *testing.T) { // Test the load first // e is a partial slice. e := a[i:] - v := simd.LoadUint64x4SlicePart(e) + v := archsimd.LoadUint64x4SlicePart(e) // d contains what a ought to contain d := make([]uint64, L) for j := 0; j < len(e) && j < len(d); j++ { @@ -269,7 +269,7 @@ func TestSlicePartFloat64(t *testing.T) { // Test the load first // e is a partial slice. e := a[i:] - v := simd.LoadFloat64x2SlicePart(e) + v := archsimd.LoadFloat64x2SlicePart(e) // d contains what a ought to contain d := make([]float64, L) for j := 0; j < len(e) && j < len(d); j++ { @@ -310,7 +310,7 @@ func TestSlicePartFloat32(t *testing.T) { // Test the load first // e is a partial slice. e := a[i:] - v := simd.LoadFloat32x8SlicePart(e) + v := archsimd.LoadFloat32x8SlicePart(e) // d contains what a ought to contain d := make([]float32, L) for j := 0; j < len(e) && j < len(d); j++ { @@ -345,7 +345,7 @@ func TestSlicePartFloat32(t *testing.T) { // 512-bit load func TestSlicePartInt64(t *testing.T) { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } @@ -357,7 +357,7 @@ func TestSlicePartInt64(t *testing.T) { // Test the load first // e is a partial slice. e := a[i:] - v := simd.LoadInt64x8SlicePart(e) + v := archsimd.LoadInt64x8SlicePart(e) // d contains what a ought to contain d := make([]int64, L) for j := 0; j < len(e) && j < len(d); j++ { diff --git a/src/simd/internal/simd_test/ternary_helpers_test.go b/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go similarity index 63% rename from src/simd/internal/simd_test/ternary_helpers_test.go rename to src/simd/archsimd/internal/simd_test/ternary_helpers_test.go index 401270c7bd..c37f9ef0ca 100644 --- a/src/simd/internal/simd_test/ternary_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go @@ -9,19 +9,19 @@ package simd_test import ( - "simd" + "simd/archsimd" "testing" ) // testInt8x16Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt8x16Ternary(t *testing.T, f func(_, _, _ simd.Int8x16) simd.Int8x16, want func(_, _, _ []int8) []int8) { +func testInt8x16Ternary(t *testing.T, f func(_, _, _ archsimd.Int8x16) archsimd.Int8x16, want func(_, _, _ []int8) []int8) { n := 16 t.Helper() forSliceTriple(t, int8s, n, func(x, y, z []int8) bool { t.Helper() - a := simd.LoadInt8x16Slice(x) - b := simd.LoadInt8x16Slice(y) - c := simd.LoadInt8x16Slice(z) + a := archsimd.LoadInt8x16Slice(x) + b := archsimd.LoadInt8x16Slice(y) + c := archsimd.LoadInt8x16Slice(z) g := make([]int8, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -30,14 +30,14 @@ func testInt8x16Ternary(t *testing.T, f func(_, _, _ simd.Int8x16) simd.Int8x16, } // testInt16x8Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt16x8Ternary(t *testing.T, f func(_, _, _ simd.Int16x8) simd.Int16x8, want func(_, _, _ []int16) []int16) { +func testInt16x8Ternary(t *testing.T, f func(_, _, _ archsimd.Int16x8) archsimd.Int16x8, want func(_, _, _ []int16) []int16) { n := 8 t.Helper() forSliceTriple(t, int16s, n, func(x, y, z []int16) bool { t.Helper() - a := simd.LoadInt16x8Slice(x) - b := simd.LoadInt16x8Slice(y) - c := simd.LoadInt16x8Slice(z) + a := archsimd.LoadInt16x8Slice(x) + b := archsimd.LoadInt16x8Slice(y) + c := archsimd.LoadInt16x8Slice(z) g := make([]int16, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -46,14 +46,14 @@ func testInt16x8Ternary(t *testing.T, f func(_, _, _ simd.Int16x8) simd.Int16x8, } // testInt32x4Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt32x4Ternary(t *testing.T, f func(_, _, _ simd.Int32x4) simd.Int32x4, want func(_, _, _ []int32) []int32) { +func testInt32x4Ternary(t *testing.T, f func(_, _, _ archsimd.Int32x4) archsimd.Int32x4, want func(_, _, _ []int32) []int32) { n := 4 t.Helper() forSliceTriple(t, int32s, n, func(x, y, z []int32) bool { t.Helper() - a := simd.LoadInt32x4Slice(x) - b := simd.LoadInt32x4Slice(y) - c := simd.LoadInt32x4Slice(z) + a := archsimd.LoadInt32x4Slice(x) + b := archsimd.LoadInt32x4Slice(y) + c := archsimd.LoadInt32x4Slice(z) g := make([]int32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -62,14 +62,14 @@ func testInt32x4Ternary(t *testing.T, f func(_, _, _ simd.Int32x4) simd.Int32x4, } // testInt64x2Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt64x2Ternary(t *testing.T, f func(_, _, _ simd.Int64x2) simd.Int64x2, want func(_, _, _ []int64) []int64) { +func testInt64x2Ternary(t *testing.T, f func(_, _, _ archsimd.Int64x2) archsimd.Int64x2, want func(_, _, _ []int64) []int64) { n := 2 t.Helper() forSliceTriple(t, int64s, n, func(x, y, z []int64) bool { t.Helper() - a := simd.LoadInt64x2Slice(x) - b := simd.LoadInt64x2Slice(y) - c := simd.LoadInt64x2Slice(z) + a := archsimd.LoadInt64x2Slice(x) + b := archsimd.LoadInt64x2Slice(y) + c := archsimd.LoadInt64x2Slice(z) g := make([]int64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -78,14 +78,14 @@ func testInt64x2Ternary(t *testing.T, f func(_, _, _ simd.Int64x2) simd.Int64x2, } // testUint8x16Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint8x16Ternary(t *testing.T, f func(_, _, _ simd.Uint8x16) simd.Uint8x16, want func(_, _, _ []uint8) []uint8) { +func testUint8x16Ternary(t *testing.T, f func(_, _, _ archsimd.Uint8x16) archsimd.Uint8x16, want func(_, _, _ []uint8) []uint8) { n := 16 t.Helper() forSliceTriple(t, uint8s, n, func(x, y, z []uint8) bool { t.Helper() - a := simd.LoadUint8x16Slice(x) - b := simd.LoadUint8x16Slice(y) - c := simd.LoadUint8x16Slice(z) + a := archsimd.LoadUint8x16Slice(x) + b := archsimd.LoadUint8x16Slice(y) + c := archsimd.LoadUint8x16Slice(z) g := make([]uint8, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -94,14 +94,14 @@ func testUint8x16Ternary(t *testing.T, f func(_, _, _ simd.Uint8x16) simd.Uint8x } // testUint16x8Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint16x8Ternary(t *testing.T, f func(_, _, _ simd.Uint16x8) simd.Uint16x8, want func(_, _, _ []uint16) []uint16) { +func testUint16x8Ternary(t *testing.T, f func(_, _, _ archsimd.Uint16x8) archsimd.Uint16x8, want func(_, _, _ []uint16) []uint16) { n := 8 t.Helper() forSliceTriple(t, uint16s, n, func(x, y, z []uint16) bool { t.Helper() - a := simd.LoadUint16x8Slice(x) - b := simd.LoadUint16x8Slice(y) - c := simd.LoadUint16x8Slice(z) + a := archsimd.LoadUint16x8Slice(x) + b := archsimd.LoadUint16x8Slice(y) + c := archsimd.LoadUint16x8Slice(z) g := make([]uint16, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -110,14 +110,14 @@ func testUint16x8Ternary(t *testing.T, f func(_, _, _ simd.Uint16x8) simd.Uint16 } // testUint32x4Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint32x4Ternary(t *testing.T, f func(_, _, _ simd.Uint32x4) simd.Uint32x4, want func(_, _, _ []uint32) []uint32) { +func testUint32x4Ternary(t *testing.T, f func(_, _, _ archsimd.Uint32x4) archsimd.Uint32x4, want func(_, _, _ []uint32) []uint32) { n := 4 t.Helper() forSliceTriple(t, uint32s, n, func(x, y, z []uint32) bool { t.Helper() - a := simd.LoadUint32x4Slice(x) - b := simd.LoadUint32x4Slice(y) - c := simd.LoadUint32x4Slice(z) + a := archsimd.LoadUint32x4Slice(x) + b := archsimd.LoadUint32x4Slice(y) + c := archsimd.LoadUint32x4Slice(z) g := make([]uint32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -126,14 +126,14 @@ func testUint32x4Ternary(t *testing.T, f func(_, _, _ simd.Uint32x4) simd.Uint32 } // testUint64x2Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint64x2Ternary(t *testing.T, f func(_, _, _ simd.Uint64x2) simd.Uint64x2, want func(_, _, _ []uint64) []uint64) { +func testUint64x2Ternary(t *testing.T, f func(_, _, _ archsimd.Uint64x2) archsimd.Uint64x2, want func(_, _, _ []uint64) []uint64) { n := 2 t.Helper() forSliceTriple(t, uint64s, n, func(x, y, z []uint64) bool { t.Helper() - a := simd.LoadUint64x2Slice(x) - b := simd.LoadUint64x2Slice(y) - c := simd.LoadUint64x2Slice(z) + a := archsimd.LoadUint64x2Slice(x) + b := archsimd.LoadUint64x2Slice(y) + c := archsimd.LoadUint64x2Slice(z) g := make([]uint64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -142,14 +142,14 @@ func testUint64x2Ternary(t *testing.T, f func(_, _, _ simd.Uint64x2) simd.Uint64 } // testFloat32x4Ternary tests the simd ternary method f against the expected behavior generated by want -func testFloat32x4Ternary(t *testing.T, f func(_, _, _ simd.Float32x4) simd.Float32x4, want func(_, _, _ []float32) []float32) { +func testFloat32x4Ternary(t *testing.T, f func(_, _, _ archsimd.Float32x4) archsimd.Float32x4, want func(_, _, _ []float32) []float32) { n := 4 t.Helper() forSliceTriple(t, float32s, n, func(x, y, z []float32) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) - b := simd.LoadFloat32x4Slice(y) - c := simd.LoadFloat32x4Slice(z) + a := archsimd.LoadFloat32x4Slice(x) + b := archsimd.LoadFloat32x4Slice(y) + c := archsimd.LoadFloat32x4Slice(z) g := make([]float32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -158,14 +158,14 @@ func testFloat32x4Ternary(t *testing.T, f func(_, _, _ simd.Float32x4) simd.Floa } // testFloat64x2Ternary tests the simd ternary method f against the expected behavior generated by want -func testFloat64x2Ternary(t *testing.T, f func(_, _, _ simd.Float64x2) simd.Float64x2, want func(_, _, _ []float64) []float64) { +func testFloat64x2Ternary(t *testing.T, f func(_, _, _ archsimd.Float64x2) archsimd.Float64x2, want func(_, _, _ []float64) []float64) { n := 2 t.Helper() forSliceTriple(t, float64s, n, func(x, y, z []float64) bool { t.Helper() - a := simd.LoadFloat64x2Slice(x) - b := simd.LoadFloat64x2Slice(y) - c := simd.LoadFloat64x2Slice(z) + a := archsimd.LoadFloat64x2Slice(x) + b := archsimd.LoadFloat64x2Slice(y) + c := archsimd.LoadFloat64x2Slice(z) g := make([]float64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -174,14 +174,14 @@ func testFloat64x2Ternary(t *testing.T, f func(_, _, _ simd.Float64x2) simd.Floa } // testInt8x32Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt8x32Ternary(t *testing.T, f func(_, _, _ simd.Int8x32) simd.Int8x32, want func(_, _, _ []int8) []int8) { +func testInt8x32Ternary(t *testing.T, f func(_, _, _ archsimd.Int8x32) archsimd.Int8x32, want func(_, _, _ []int8) []int8) { n := 32 t.Helper() forSliceTriple(t, int8s, n, func(x, y, z []int8) bool { t.Helper() - a := simd.LoadInt8x32Slice(x) - b := simd.LoadInt8x32Slice(y) - c := simd.LoadInt8x32Slice(z) + a := archsimd.LoadInt8x32Slice(x) + b := archsimd.LoadInt8x32Slice(y) + c := archsimd.LoadInt8x32Slice(z) g := make([]int8, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -190,14 +190,14 @@ func testInt8x32Ternary(t *testing.T, f func(_, _, _ simd.Int8x32) simd.Int8x32, } // testInt16x16Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt16x16Ternary(t *testing.T, f func(_, _, _ simd.Int16x16) simd.Int16x16, want func(_, _, _ []int16) []int16) { +func testInt16x16Ternary(t *testing.T, f func(_, _, _ archsimd.Int16x16) archsimd.Int16x16, want func(_, _, _ []int16) []int16) { n := 16 t.Helper() forSliceTriple(t, int16s, n, func(x, y, z []int16) bool { t.Helper() - a := simd.LoadInt16x16Slice(x) - b := simd.LoadInt16x16Slice(y) - c := simd.LoadInt16x16Slice(z) + a := archsimd.LoadInt16x16Slice(x) + b := archsimd.LoadInt16x16Slice(y) + c := archsimd.LoadInt16x16Slice(z) g := make([]int16, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -206,14 +206,14 @@ func testInt16x16Ternary(t *testing.T, f func(_, _, _ simd.Int16x16) simd.Int16x } // testInt32x8Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt32x8Ternary(t *testing.T, f func(_, _, _ simd.Int32x8) simd.Int32x8, want func(_, _, _ []int32) []int32) { +func testInt32x8Ternary(t *testing.T, f func(_, _, _ archsimd.Int32x8) archsimd.Int32x8, want func(_, _, _ []int32) []int32) { n := 8 t.Helper() forSliceTriple(t, int32s, n, func(x, y, z []int32) bool { t.Helper() - a := simd.LoadInt32x8Slice(x) - b := simd.LoadInt32x8Slice(y) - c := simd.LoadInt32x8Slice(z) + a := archsimd.LoadInt32x8Slice(x) + b := archsimd.LoadInt32x8Slice(y) + c := archsimd.LoadInt32x8Slice(z) g := make([]int32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -222,14 +222,14 @@ func testInt32x8Ternary(t *testing.T, f func(_, _, _ simd.Int32x8) simd.Int32x8, } // testInt64x4Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt64x4Ternary(t *testing.T, f func(_, _, _ simd.Int64x4) simd.Int64x4, want func(_, _, _ []int64) []int64) { +func testInt64x4Ternary(t *testing.T, f func(_, _, _ archsimd.Int64x4) archsimd.Int64x4, want func(_, _, _ []int64) []int64) { n := 4 t.Helper() forSliceTriple(t, int64s, n, func(x, y, z []int64) bool { t.Helper() - a := simd.LoadInt64x4Slice(x) - b := simd.LoadInt64x4Slice(y) - c := simd.LoadInt64x4Slice(z) + a := archsimd.LoadInt64x4Slice(x) + b := archsimd.LoadInt64x4Slice(y) + c := archsimd.LoadInt64x4Slice(z) g := make([]int64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -238,14 +238,14 @@ func testInt64x4Ternary(t *testing.T, f func(_, _, _ simd.Int64x4) simd.Int64x4, } // testUint8x32Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint8x32Ternary(t *testing.T, f func(_, _, _ simd.Uint8x32) simd.Uint8x32, want func(_, _, _ []uint8) []uint8) { +func testUint8x32Ternary(t *testing.T, f func(_, _, _ archsimd.Uint8x32) archsimd.Uint8x32, want func(_, _, _ []uint8) []uint8) { n := 32 t.Helper() forSliceTriple(t, uint8s, n, func(x, y, z []uint8) bool { t.Helper() - a := simd.LoadUint8x32Slice(x) - b := simd.LoadUint8x32Slice(y) - c := simd.LoadUint8x32Slice(z) + a := archsimd.LoadUint8x32Slice(x) + b := archsimd.LoadUint8x32Slice(y) + c := archsimd.LoadUint8x32Slice(z) g := make([]uint8, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -254,14 +254,14 @@ func testUint8x32Ternary(t *testing.T, f func(_, _, _ simd.Uint8x32) simd.Uint8x } // testUint16x16Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint16x16Ternary(t *testing.T, f func(_, _, _ simd.Uint16x16) simd.Uint16x16, want func(_, _, _ []uint16) []uint16) { +func testUint16x16Ternary(t *testing.T, f func(_, _, _ archsimd.Uint16x16) archsimd.Uint16x16, want func(_, _, _ []uint16) []uint16) { n := 16 t.Helper() forSliceTriple(t, uint16s, n, func(x, y, z []uint16) bool { t.Helper() - a := simd.LoadUint16x16Slice(x) - b := simd.LoadUint16x16Slice(y) - c := simd.LoadUint16x16Slice(z) + a := archsimd.LoadUint16x16Slice(x) + b := archsimd.LoadUint16x16Slice(y) + c := archsimd.LoadUint16x16Slice(z) g := make([]uint16, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -270,14 +270,14 @@ func testUint16x16Ternary(t *testing.T, f func(_, _, _ simd.Uint16x16) simd.Uint } // testUint32x8Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint32x8Ternary(t *testing.T, f func(_, _, _ simd.Uint32x8) simd.Uint32x8, want func(_, _, _ []uint32) []uint32) { +func testUint32x8Ternary(t *testing.T, f func(_, _, _ archsimd.Uint32x8) archsimd.Uint32x8, want func(_, _, _ []uint32) []uint32) { n := 8 t.Helper() forSliceTriple(t, uint32s, n, func(x, y, z []uint32) bool { t.Helper() - a := simd.LoadUint32x8Slice(x) - b := simd.LoadUint32x8Slice(y) - c := simd.LoadUint32x8Slice(z) + a := archsimd.LoadUint32x8Slice(x) + b := archsimd.LoadUint32x8Slice(y) + c := archsimd.LoadUint32x8Slice(z) g := make([]uint32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -286,14 +286,14 @@ func testUint32x8Ternary(t *testing.T, f func(_, _, _ simd.Uint32x8) simd.Uint32 } // testUint64x4Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint64x4Ternary(t *testing.T, f func(_, _, _ simd.Uint64x4) simd.Uint64x4, want func(_, _, _ []uint64) []uint64) { +func testUint64x4Ternary(t *testing.T, f func(_, _, _ archsimd.Uint64x4) archsimd.Uint64x4, want func(_, _, _ []uint64) []uint64) { n := 4 t.Helper() forSliceTriple(t, uint64s, n, func(x, y, z []uint64) bool { t.Helper() - a := simd.LoadUint64x4Slice(x) - b := simd.LoadUint64x4Slice(y) - c := simd.LoadUint64x4Slice(z) + a := archsimd.LoadUint64x4Slice(x) + b := archsimd.LoadUint64x4Slice(y) + c := archsimd.LoadUint64x4Slice(z) g := make([]uint64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -302,14 +302,14 @@ func testUint64x4Ternary(t *testing.T, f func(_, _, _ simd.Uint64x4) simd.Uint64 } // testFloat32x8Ternary tests the simd ternary method f against the expected behavior generated by want -func testFloat32x8Ternary(t *testing.T, f func(_, _, _ simd.Float32x8) simd.Float32x8, want func(_, _, _ []float32) []float32) { +func testFloat32x8Ternary(t *testing.T, f func(_, _, _ archsimd.Float32x8) archsimd.Float32x8, want func(_, _, _ []float32) []float32) { n := 8 t.Helper() forSliceTriple(t, float32s, n, func(x, y, z []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) - b := simd.LoadFloat32x8Slice(y) - c := simd.LoadFloat32x8Slice(z) + a := archsimd.LoadFloat32x8Slice(x) + b := archsimd.LoadFloat32x8Slice(y) + c := archsimd.LoadFloat32x8Slice(z) g := make([]float32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -318,14 +318,14 @@ func testFloat32x8Ternary(t *testing.T, f func(_, _, _ simd.Float32x8) simd.Floa } // testFloat64x4Ternary tests the simd ternary method f against the expected behavior generated by want -func testFloat64x4Ternary(t *testing.T, f func(_, _, _ simd.Float64x4) simd.Float64x4, want func(_, _, _ []float64) []float64) { +func testFloat64x4Ternary(t *testing.T, f func(_, _, _ archsimd.Float64x4) archsimd.Float64x4, want func(_, _, _ []float64) []float64) { n := 4 t.Helper() forSliceTriple(t, float64s, n, func(x, y, z []float64) bool { t.Helper() - a := simd.LoadFloat64x4Slice(x) - b := simd.LoadFloat64x4Slice(y) - c := simd.LoadFloat64x4Slice(z) + a := archsimd.LoadFloat64x4Slice(x) + b := archsimd.LoadFloat64x4Slice(y) + c := archsimd.LoadFloat64x4Slice(z) g := make([]float64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -334,14 +334,14 @@ func testFloat64x4Ternary(t *testing.T, f func(_, _, _ simd.Float64x4) simd.Floa } // testInt8x64Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt8x64Ternary(t *testing.T, f func(_, _, _ simd.Int8x64) simd.Int8x64, want func(_, _, _ []int8) []int8) { +func testInt8x64Ternary(t *testing.T, f func(_, _, _ archsimd.Int8x64) archsimd.Int8x64, want func(_, _, _ []int8) []int8) { n := 64 t.Helper() forSliceTriple(t, int8s, n, func(x, y, z []int8) bool { t.Helper() - a := simd.LoadInt8x64Slice(x) - b := simd.LoadInt8x64Slice(y) - c := simd.LoadInt8x64Slice(z) + a := archsimd.LoadInt8x64Slice(x) + b := archsimd.LoadInt8x64Slice(y) + c := archsimd.LoadInt8x64Slice(z) g := make([]int8, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -350,14 +350,14 @@ func testInt8x64Ternary(t *testing.T, f func(_, _, _ simd.Int8x64) simd.Int8x64, } // testInt16x32Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt16x32Ternary(t *testing.T, f func(_, _, _ simd.Int16x32) simd.Int16x32, want func(_, _, _ []int16) []int16) { +func testInt16x32Ternary(t *testing.T, f func(_, _, _ archsimd.Int16x32) archsimd.Int16x32, want func(_, _, _ []int16) []int16) { n := 32 t.Helper() forSliceTriple(t, int16s, n, func(x, y, z []int16) bool { t.Helper() - a := simd.LoadInt16x32Slice(x) - b := simd.LoadInt16x32Slice(y) - c := simd.LoadInt16x32Slice(z) + a := archsimd.LoadInt16x32Slice(x) + b := archsimd.LoadInt16x32Slice(y) + c := archsimd.LoadInt16x32Slice(z) g := make([]int16, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -366,14 +366,14 @@ func testInt16x32Ternary(t *testing.T, f func(_, _, _ simd.Int16x32) simd.Int16x } // testInt32x16Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt32x16Ternary(t *testing.T, f func(_, _, _ simd.Int32x16) simd.Int32x16, want func(_, _, _ []int32) []int32) { +func testInt32x16Ternary(t *testing.T, f func(_, _, _ archsimd.Int32x16) archsimd.Int32x16, want func(_, _, _ []int32) []int32) { n := 16 t.Helper() forSliceTriple(t, int32s, n, func(x, y, z []int32) bool { t.Helper() - a := simd.LoadInt32x16Slice(x) - b := simd.LoadInt32x16Slice(y) - c := simd.LoadInt32x16Slice(z) + a := archsimd.LoadInt32x16Slice(x) + b := archsimd.LoadInt32x16Slice(y) + c := archsimd.LoadInt32x16Slice(z) g := make([]int32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -382,14 +382,14 @@ func testInt32x16Ternary(t *testing.T, f func(_, _, _ simd.Int32x16) simd.Int32x } // testInt64x8Ternary tests the simd ternary method f against the expected behavior generated by want -func testInt64x8Ternary(t *testing.T, f func(_, _, _ simd.Int64x8) simd.Int64x8, want func(_, _, _ []int64) []int64) { +func testInt64x8Ternary(t *testing.T, f func(_, _, _ archsimd.Int64x8) archsimd.Int64x8, want func(_, _, _ []int64) []int64) { n := 8 t.Helper() forSliceTriple(t, int64s, n, func(x, y, z []int64) bool { t.Helper() - a := simd.LoadInt64x8Slice(x) - b := simd.LoadInt64x8Slice(y) - c := simd.LoadInt64x8Slice(z) + a := archsimd.LoadInt64x8Slice(x) + b := archsimd.LoadInt64x8Slice(y) + c := archsimd.LoadInt64x8Slice(z) g := make([]int64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -398,14 +398,14 @@ func testInt64x8Ternary(t *testing.T, f func(_, _, _ simd.Int64x8) simd.Int64x8, } // testUint8x64Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint8x64Ternary(t *testing.T, f func(_, _, _ simd.Uint8x64) simd.Uint8x64, want func(_, _, _ []uint8) []uint8) { +func testUint8x64Ternary(t *testing.T, f func(_, _, _ archsimd.Uint8x64) archsimd.Uint8x64, want func(_, _, _ []uint8) []uint8) { n := 64 t.Helper() forSliceTriple(t, uint8s, n, func(x, y, z []uint8) bool { t.Helper() - a := simd.LoadUint8x64Slice(x) - b := simd.LoadUint8x64Slice(y) - c := simd.LoadUint8x64Slice(z) + a := archsimd.LoadUint8x64Slice(x) + b := archsimd.LoadUint8x64Slice(y) + c := archsimd.LoadUint8x64Slice(z) g := make([]uint8, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -414,14 +414,14 @@ func testUint8x64Ternary(t *testing.T, f func(_, _, _ simd.Uint8x64) simd.Uint8x } // testUint16x32Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint16x32Ternary(t *testing.T, f func(_, _, _ simd.Uint16x32) simd.Uint16x32, want func(_, _, _ []uint16) []uint16) { +func testUint16x32Ternary(t *testing.T, f func(_, _, _ archsimd.Uint16x32) archsimd.Uint16x32, want func(_, _, _ []uint16) []uint16) { n := 32 t.Helper() forSliceTriple(t, uint16s, n, func(x, y, z []uint16) bool { t.Helper() - a := simd.LoadUint16x32Slice(x) - b := simd.LoadUint16x32Slice(y) - c := simd.LoadUint16x32Slice(z) + a := archsimd.LoadUint16x32Slice(x) + b := archsimd.LoadUint16x32Slice(y) + c := archsimd.LoadUint16x32Slice(z) g := make([]uint16, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -430,14 +430,14 @@ func testUint16x32Ternary(t *testing.T, f func(_, _, _ simd.Uint16x32) simd.Uint } // testUint32x16Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint32x16Ternary(t *testing.T, f func(_, _, _ simd.Uint32x16) simd.Uint32x16, want func(_, _, _ []uint32) []uint32) { +func testUint32x16Ternary(t *testing.T, f func(_, _, _ archsimd.Uint32x16) archsimd.Uint32x16, want func(_, _, _ []uint32) []uint32) { n := 16 t.Helper() forSliceTriple(t, uint32s, n, func(x, y, z []uint32) bool { t.Helper() - a := simd.LoadUint32x16Slice(x) - b := simd.LoadUint32x16Slice(y) - c := simd.LoadUint32x16Slice(z) + a := archsimd.LoadUint32x16Slice(x) + b := archsimd.LoadUint32x16Slice(y) + c := archsimd.LoadUint32x16Slice(z) g := make([]uint32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -446,14 +446,14 @@ func testUint32x16Ternary(t *testing.T, f func(_, _, _ simd.Uint32x16) simd.Uint } // testUint64x8Ternary tests the simd ternary method f against the expected behavior generated by want -func testUint64x8Ternary(t *testing.T, f func(_, _, _ simd.Uint64x8) simd.Uint64x8, want func(_, _, _ []uint64) []uint64) { +func testUint64x8Ternary(t *testing.T, f func(_, _, _ archsimd.Uint64x8) archsimd.Uint64x8, want func(_, _, _ []uint64) []uint64) { n := 8 t.Helper() forSliceTriple(t, uint64s, n, func(x, y, z []uint64) bool { t.Helper() - a := simd.LoadUint64x8Slice(x) - b := simd.LoadUint64x8Slice(y) - c := simd.LoadUint64x8Slice(z) + a := archsimd.LoadUint64x8Slice(x) + b := archsimd.LoadUint64x8Slice(y) + c := archsimd.LoadUint64x8Slice(z) g := make([]uint64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -462,14 +462,14 @@ func testUint64x8Ternary(t *testing.T, f func(_, _, _ simd.Uint64x8) simd.Uint64 } // testFloat32x16Ternary tests the simd ternary method f against the expected behavior generated by want -func testFloat32x16Ternary(t *testing.T, f func(_, _, _ simd.Float32x16) simd.Float32x16, want func(_, _, _ []float32) []float32) { +func testFloat32x16Ternary(t *testing.T, f func(_, _, _ archsimd.Float32x16) archsimd.Float32x16, want func(_, _, _ []float32) []float32) { n := 16 t.Helper() forSliceTriple(t, float32s, n, func(x, y, z []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) - b := simd.LoadFloat32x16Slice(y) - c := simd.LoadFloat32x16Slice(z) + a := archsimd.LoadFloat32x16Slice(x) + b := archsimd.LoadFloat32x16Slice(y) + c := archsimd.LoadFloat32x16Slice(z) g := make([]float32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -478,14 +478,14 @@ func testFloat32x16Ternary(t *testing.T, f func(_, _, _ simd.Float32x16) simd.Fl } // testFloat64x8Ternary tests the simd ternary method f against the expected behavior generated by want -func testFloat64x8Ternary(t *testing.T, f func(_, _, _ simd.Float64x8) simd.Float64x8, want func(_, _, _ []float64) []float64) { +func testFloat64x8Ternary(t *testing.T, f func(_, _, _ archsimd.Float64x8) archsimd.Float64x8, want func(_, _, _ []float64) []float64) { n := 8 t.Helper() forSliceTriple(t, float64s, n, func(x, y, z []float64) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) - b := simd.LoadFloat64x8Slice(y) - c := simd.LoadFloat64x8Slice(z) + a := archsimd.LoadFloat64x8Slice(x) + b := archsimd.LoadFloat64x8Slice(y) + c := archsimd.LoadFloat64x8Slice(z) g := make([]float64, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -495,14 +495,14 @@ func testFloat64x8Ternary(t *testing.T, f func(_, _, _ simd.Float64x8) simd.Floa // testFloat32x4TernaryFlaky tests the simd ternary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat32x4TernaryFlaky(t *testing.T, f func(x, y, z simd.Float32x4) simd.Float32x4, want func(x, y, z []float32) []float32, flakiness float64) { +func testFloat32x4TernaryFlaky(t *testing.T, f func(x, y, z archsimd.Float32x4) archsimd.Float32x4, want func(x, y, z []float32) []float32, flakiness float64) { n := 4 t.Helper() forSliceTriple(t, float32s, n, func(x, y, z []float32) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) - b := simd.LoadFloat32x4Slice(y) - c := simd.LoadFloat32x4Slice(z) + a := archsimd.LoadFloat32x4Slice(x) + b := archsimd.LoadFloat32x4Slice(y) + c := archsimd.LoadFloat32x4Slice(z) g := make([]float32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -512,14 +512,14 @@ func testFloat32x4TernaryFlaky(t *testing.T, f func(x, y, z simd.Float32x4) simd // testFloat32x8TernaryFlaky tests the simd ternary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat32x8TernaryFlaky(t *testing.T, f func(x, y, z simd.Float32x8) simd.Float32x8, want func(x, y, z []float32) []float32, flakiness float64) { +func testFloat32x8TernaryFlaky(t *testing.T, f func(x, y, z archsimd.Float32x8) archsimd.Float32x8, want func(x, y, z []float32) []float32, flakiness float64) { n := 8 t.Helper() forSliceTriple(t, float32s, n, func(x, y, z []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) - b := simd.LoadFloat32x8Slice(y) - c := simd.LoadFloat32x8Slice(z) + a := archsimd.LoadFloat32x8Slice(x) + b := archsimd.LoadFloat32x8Slice(y) + c := archsimd.LoadFloat32x8Slice(z) g := make([]float32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) @@ -529,14 +529,14 @@ func testFloat32x8TernaryFlaky(t *testing.T, f func(x, y, z simd.Float32x8) simd // testFloat32x16TernaryFlaky tests the simd ternary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat32x16TernaryFlaky(t *testing.T, f func(x, y, z simd.Float32x16) simd.Float32x16, want func(x, y, z []float32) []float32, flakiness float64) { +func testFloat32x16TernaryFlaky(t *testing.T, f func(x, y, z archsimd.Float32x16) archsimd.Float32x16, want func(x, y, z []float32) []float32, flakiness float64) { n := 16 t.Helper() forSliceTriple(t, float32s, n, func(x, y, z []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) - b := simd.LoadFloat32x16Slice(y) - c := simd.LoadFloat32x16Slice(z) + a := archsimd.LoadFloat32x16Slice(x) + b := archsimd.LoadFloat32x16Slice(y) + c := archsimd.LoadFloat32x16Slice(z) g := make([]float32, n) f(a, b, c).StoreSlice(g) w := want(x, y, z) diff --git a/src/simd/archsimd/internal/simd_test/ternary_test.go b/src/simd/archsimd/internal/simd_test/ternary_test.go new file mode 100644 index 0000000000..0cc091be09 --- /dev/null +++ b/src/simd/archsimd/internal/simd_test/ternary_test.go @@ -0,0 +1,23 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.simd && amd64 + +package simd_test + +import ( + "simd/archsimd" + "testing" +) + +func TestFMA(t *testing.T) { + if archsimd.X86.AVX512() { + testFloat32x4TernaryFlaky(t, archsimd.Float32x4.MulAdd, fmaSlice[float32], 0.001) + testFloat32x8TernaryFlaky(t, archsimd.Float32x8.MulAdd, fmaSlice[float32], 0.001) + testFloat32x16TernaryFlaky(t, archsimd.Float32x16.MulAdd, fmaSlice[float32], 0.001) + testFloat64x2Ternary(t, archsimd.Float64x2.MulAdd, fmaSlice[float64]) + testFloat64x4Ternary(t, archsimd.Float64x4.MulAdd, fmaSlice[float64]) + testFloat64x8Ternary(t, archsimd.Float64x8.MulAdd, fmaSlice[float64]) + } +} diff --git a/src/simd/internal/simd_test/transpose_test.go b/src/simd/archsimd/internal/simd_test/transpose_test.go similarity index 88% rename from src/simd/internal/simd_test/transpose_test.go rename to src/simd/archsimd/internal/simd_test/transpose_test.go index cdf818e997..5e1571ef73 100644 --- a/src/simd/internal/simd_test/transpose_test.go +++ b/src/simd/archsimd/internal/simd_test/transpose_test.go @@ -8,11 +8,11 @@ package simd_test import ( "fmt" - "simd" + "simd/archsimd" "testing" ) -func Transpose4(a0, a1, a2, a3 simd.Int32x4) (b0, b1, b2, b3 simd.Int32x4) { +func Transpose4(a0, a1, a2, a3 archsimd.Int32x4) (b0, b1, b2, b3 archsimd.Int32x4) { t0, t1 := a0.InterleaveLo(a1), a0.InterleaveHi(a1) t2, t3 := a2.InterleaveLo(a3), a2.InterleaveHi(a3) @@ -34,7 +34,7 @@ func Transpose4(a0, a1, a2, a3 simd.Int32x4) (b0, b1, b2, b3 simd.Int32x4) { return } -func Transpose8(a0, a1, a2, a3, a4, a5, a6, a7 simd.Int32x8) (b0, b1, b2, b3, b4, b5, b6, b7 simd.Int32x8) { +func Transpose8(a0, a1, a2, a3, a4, a5, a6, a7 archsimd.Int32x8) (b0, b1, b2, b3, b4, b5, b6, b7 archsimd.Int32x8) { t0, t1 := a0.InterleaveLoGrouped(a1), a0.InterleaveHiGrouped(a1) t2, t3 := a2.InterleaveLoGrouped(a3), a2.InterleaveHiGrouped(a3) t4, t5 := a4.InterleaveLoGrouped(a5), a4.InterleaveHiGrouped(a5) @@ -81,10 +81,10 @@ func Transpose8(a0, a1, a2, a3, a4, a5, a6, a7 simd.Int32x8) (b0, b1, b2, b3, b4 func TestTranspose4(t *testing.T) { r := make([]int32, 16, 16) - w := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD}) - x := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) - y := simd.LoadInt32x4Slice([]int32{0xE, 0xF, 0x10, 0x11}) - z := simd.LoadInt32x4Slice([]int32{5, 6, 7, 8}) + w := archsimd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD}) + x := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) + y := archsimd.LoadInt32x4Slice([]int32{0xE, 0xF, 0x10, 0x11}) + z := archsimd.LoadInt32x4Slice([]int32{5, 6, 7, 8}) a, b, c, d := Transpose4(w, x, y, z) a.StoreSlice(r[0:]) @@ -109,19 +109,19 @@ func TestTranspose8(t *testing.T) { a = append(a, i) } - p := simd.LoadInt32x8Slice(a[0:]) - q := simd.LoadInt32x8Slice(a[8:]) - r := simd.LoadInt32x8Slice(a[16:]) - s := simd.LoadInt32x8Slice(a[24:]) + p := archsimd.LoadInt32x8Slice(a[0:]) + q := archsimd.LoadInt32x8Slice(a[8:]) + r := archsimd.LoadInt32x8Slice(a[16:]) + s := archsimd.LoadInt32x8Slice(a[24:]) - w := simd.LoadInt32x8Slice(a[32:]) - x := simd.LoadInt32x8Slice(a[40:]) - y := simd.LoadInt32x8Slice(a[48:]) - z := simd.LoadInt32x8Slice(a[56:]) + w := archsimd.LoadInt32x8Slice(a[32:]) + x := archsimd.LoadInt32x8Slice(a[40:]) + y := archsimd.LoadInt32x8Slice(a[48:]) + z := archsimd.LoadInt32x8Slice(a[56:]) p, q, r, s, w, x, y, z = Transpose8(p, q, r, s, w, x, y, z) - foo := func(a simd.Int32x8, z int32) { + foo := func(a archsimd.Int32x8, z int32) { a.StoreSlice(m) var o []int32 for i := int32(0); i < 8; i++ { @@ -726,10 +726,10 @@ func transposeTiled4(m [][]int32) { } // transpose diagonal d0, d1, d2, d3 := - simd.LoadInt32x4Slice(r0[i:]), - simd.LoadInt32x4Slice(r1[i:]), - simd.LoadInt32x4Slice(r2[i:]), - simd.LoadInt32x4Slice(r3[i:]) + archsimd.LoadInt32x4Slice(r0[i:]), + archsimd.LoadInt32x4Slice(r1[i:]), + archsimd.LoadInt32x4Slice(r2[i:]), + archsimd.LoadInt32x4Slice(r3[i:]) d0, d1, d2, d3 = Transpose4(d0, d1, d2, d3) @@ -743,20 +743,20 @@ func transposeTiled4(m [][]int32) { for ; j < i; j += B { a0, a1, a2, a3 := m[j], m[j+1], m[j+2], m[j+3] u0, u1, u2, u3 := - simd.LoadInt32x4Slice(a0[i:]), - simd.LoadInt32x4Slice(a1[i:]), - simd.LoadInt32x4Slice(a2[i:]), - simd.LoadInt32x4Slice(a3[i:]) + archsimd.LoadInt32x4Slice(a0[i:]), + archsimd.LoadInt32x4Slice(a1[i:]), + archsimd.LoadInt32x4Slice(a2[i:]), + archsimd.LoadInt32x4Slice(a3[i:]) u0, u1, u2, u3 = Transpose4(u0, u1, u2, u3) - l0 := simd.LoadInt32x4Slice(r0[j:]) + l0 := archsimd.LoadInt32x4Slice(r0[j:]) u0.StoreSlice(r0[j:]) - l1 := simd.LoadInt32x4Slice(r1[j:]) + l1 := archsimd.LoadInt32x4Slice(r1[j:]) u1.StoreSlice(r1[j:]) - l2 := simd.LoadInt32x4Slice(r2[j:]) + l2 := archsimd.LoadInt32x4Slice(r2[j:]) u2.StoreSlice(r2[j:]) - l3 := simd.LoadInt32x4Slice(r3[j:]) + l3 := archsimd.LoadInt32x4Slice(r3[j:]) u3.StoreSlice(r3[j:]) u0, u1, u2, u3 = Transpose4(l0, l1, l2, l3) @@ -790,14 +790,14 @@ func transposeTiled8(m [][]int32) { } // transpose diagonal d0, d1, d2, d3, d4, d5, d6, d7 := - simd.LoadInt32x8Slice(r0[i:]), - simd.LoadInt32x8Slice(r1[i:]), - simd.LoadInt32x8Slice(r2[i:]), - simd.LoadInt32x8Slice(r3[i:]), - simd.LoadInt32x8Slice(r4[i:]), - simd.LoadInt32x8Slice(r5[i:]), - simd.LoadInt32x8Slice(r6[i:]), - simd.LoadInt32x8Slice(r7[i:]) + archsimd.LoadInt32x8Slice(r0[i:]), + archsimd.LoadInt32x8Slice(r1[i:]), + archsimd.LoadInt32x8Slice(r2[i:]), + archsimd.LoadInt32x8Slice(r3[i:]), + archsimd.LoadInt32x8Slice(r4[i:]), + archsimd.LoadInt32x8Slice(r5[i:]), + archsimd.LoadInt32x8Slice(r6[i:]), + archsimd.LoadInt32x8Slice(r7[i:]) d0, d1, d2, d3, d4, d5, d6, d7 = Transpose8(d0, d1, d2, d3, d4, d5, d6, d7) @@ -815,32 +815,32 @@ func transposeTiled8(m [][]int32) { for ; j < i; j += B { a7, a0, a1, a2, a3, a4, a5, a6 := m[j+7], m[j], m[j+1], m[j+2], m[j+3], m[j+4], m[j+5], m[j+6] u0, u1, u2, u3, u4, u5, u6, u7 := - simd.LoadInt32x8Slice(a0[i:]), - simd.LoadInt32x8Slice(a1[i:]), - simd.LoadInt32x8Slice(a2[i:]), - simd.LoadInt32x8Slice(a3[i:]), - simd.LoadInt32x8Slice(a4[i:]), - simd.LoadInt32x8Slice(a5[i:]), - simd.LoadInt32x8Slice(a6[i:]), - simd.LoadInt32x8Slice(a7[i:]) + archsimd.LoadInt32x8Slice(a0[i:]), + archsimd.LoadInt32x8Slice(a1[i:]), + archsimd.LoadInt32x8Slice(a2[i:]), + archsimd.LoadInt32x8Slice(a3[i:]), + archsimd.LoadInt32x8Slice(a4[i:]), + archsimd.LoadInt32x8Slice(a5[i:]), + archsimd.LoadInt32x8Slice(a6[i:]), + archsimd.LoadInt32x8Slice(a7[i:]) u0, u1, u2, u3, u4, u5, u6, u7 = Transpose8(u0, u1, u2, u3, u4, u5, u6, u7) - l0 := simd.LoadInt32x8Slice(r0[j:]) + l0 := archsimd.LoadInt32x8Slice(r0[j:]) u0.StoreSlice(r0[j:]) - l1 := simd.LoadInt32x8Slice(r1[j:]) + l1 := archsimd.LoadInt32x8Slice(r1[j:]) u1.StoreSlice(r1[j:]) - l2 := simd.LoadInt32x8Slice(r2[j:]) + l2 := archsimd.LoadInt32x8Slice(r2[j:]) u2.StoreSlice(r2[j:]) - l3 := simd.LoadInt32x8Slice(r3[j:]) + l3 := archsimd.LoadInt32x8Slice(r3[j:]) u3.StoreSlice(r3[j:]) - l4 := simd.LoadInt32x8Slice(r4[j:]) + l4 := archsimd.LoadInt32x8Slice(r4[j:]) u4.StoreSlice(r4[j:]) - l5 := simd.LoadInt32x8Slice(r5[j:]) + l5 := archsimd.LoadInt32x8Slice(r5[j:]) u5.StoreSlice(r5[j:]) - l6 := simd.LoadInt32x8Slice(r6[j:]) + l6 := archsimd.LoadInt32x8Slice(r6[j:]) u6.StoreSlice(r6[j:]) - l7 := simd.LoadInt32x8Slice(r7[j:]) + l7 := archsimd.LoadInt32x8Slice(r7[j:]) u7.StoreSlice(r7[j:]) u0, u1, u2, u3, u4, u5, u6, u7 = Transpose8(l0, l1, l2, l3, l4, l5, l6, l7) diff --git a/src/simd/internal/simd_test/unary_helpers_test.go b/src/simd/archsimd/internal/simd_test/unary_helpers_test.go similarity index 72% rename from src/simd/internal/simd_test/unary_helpers_test.go rename to src/simd/archsimd/internal/simd_test/unary_helpers_test.go index d99fd3c505..e2610ad98b 100644 --- a/src/simd/internal/simd_test/unary_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/unary_helpers_test.go @@ -9,17 +9,17 @@ package simd_test import ( - "simd" + "simd/archsimd" "testing" ) // testInt8x16Unary tests the simd unary method f against the expected behavior generated by want -func testInt8x16Unary(t *testing.T, f func(_ simd.Int8x16) simd.Int8x16, want func(_ []int8) []int8) { +func testInt8x16Unary(t *testing.T, f func(_ archsimd.Int8x16) archsimd.Int8x16, want func(_ []int8) []int8) { n := 16 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() - a := simd.LoadInt8x16Slice(x) + a := archsimd.LoadInt8x16Slice(x) g := make([]int8, n) f(a).StoreSlice(g) w := want(x) @@ -28,12 +28,12 @@ func testInt8x16Unary(t *testing.T, f func(_ simd.Int8x16) simd.Int8x16, want fu } // testInt16x8Unary tests the simd unary method f against the expected behavior generated by want -func testInt16x8Unary(t *testing.T, f func(_ simd.Int16x8) simd.Int16x8, want func(_ []int16) []int16) { +func testInt16x8Unary(t *testing.T, f func(_ archsimd.Int16x8) archsimd.Int16x8, want func(_ []int16) []int16) { n := 8 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x8Slice(x) + a := archsimd.LoadInt16x8Slice(x) g := make([]int16, n) f(a).StoreSlice(g) w := want(x) @@ -42,12 +42,12 @@ func testInt16x8Unary(t *testing.T, f func(_ simd.Int16x8) simd.Int16x8, want fu } // testInt32x4Unary tests the simd unary method f against the expected behavior generated by want -func testInt32x4Unary(t *testing.T, f func(_ simd.Int32x4) simd.Int32x4, want func(_ []int32) []int32) { +func testInt32x4Unary(t *testing.T, f func(_ archsimd.Int32x4) archsimd.Int32x4, want func(_ []int32) []int32) { n := 4 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x4Slice(x) + a := archsimd.LoadInt32x4Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -56,12 +56,12 @@ func testInt32x4Unary(t *testing.T, f func(_ simd.Int32x4) simd.Int32x4, want fu } // testInt64x2Unary tests the simd unary method f against the expected behavior generated by want -func testInt64x2Unary(t *testing.T, f func(_ simd.Int64x2) simd.Int64x2, want func(_ []int64) []int64) { +func testInt64x2Unary(t *testing.T, f func(_ archsimd.Int64x2) archsimd.Int64x2, want func(_ []int64) []int64) { n := 2 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() - a := simd.LoadInt64x2Slice(x) + a := archsimd.LoadInt64x2Slice(x) g := make([]int64, n) f(a).StoreSlice(g) w := want(x) @@ -70,12 +70,12 @@ func testInt64x2Unary(t *testing.T, f func(_ simd.Int64x2) simd.Int64x2, want fu } // testUint8x16Unary tests the simd unary method f against the expected behavior generated by want -func testUint8x16Unary(t *testing.T, f func(_ simd.Uint8x16) simd.Uint8x16, want func(_ []uint8) []uint8) { +func testUint8x16Unary(t *testing.T, f func(_ archsimd.Uint8x16) archsimd.Uint8x16, want func(_ []uint8) []uint8) { n := 16 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() - a := simd.LoadUint8x16Slice(x) + a := archsimd.LoadUint8x16Slice(x) g := make([]uint8, n) f(a).StoreSlice(g) w := want(x) @@ -84,12 +84,12 @@ func testUint8x16Unary(t *testing.T, f func(_ simd.Uint8x16) simd.Uint8x16, want } // testUint16x8Unary tests the simd unary method f against the expected behavior generated by want -func testUint16x8Unary(t *testing.T, f func(_ simd.Uint16x8) simd.Uint16x8, want func(_ []uint16) []uint16) { +func testUint16x8Unary(t *testing.T, f func(_ archsimd.Uint16x8) archsimd.Uint16x8, want func(_ []uint16) []uint16) { n := 8 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x8Slice(x) + a := archsimd.LoadUint16x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -98,12 +98,12 @@ func testUint16x8Unary(t *testing.T, f func(_ simd.Uint16x8) simd.Uint16x8, want } // testUint32x4Unary tests the simd unary method f against the expected behavior generated by want -func testUint32x4Unary(t *testing.T, f func(_ simd.Uint32x4) simd.Uint32x4, want func(_ []uint32) []uint32) { +func testUint32x4Unary(t *testing.T, f func(_ archsimd.Uint32x4) archsimd.Uint32x4, want func(_ []uint32) []uint32) { n := 4 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x4Slice(x) + a := archsimd.LoadUint32x4Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -112,12 +112,12 @@ func testUint32x4Unary(t *testing.T, f func(_ simd.Uint32x4) simd.Uint32x4, want } // testUint64x2Unary tests the simd unary method f against the expected behavior generated by want -func testUint64x2Unary(t *testing.T, f func(_ simd.Uint64x2) simd.Uint64x2, want func(_ []uint64) []uint64) { +func testUint64x2Unary(t *testing.T, f func(_ archsimd.Uint64x2) archsimd.Uint64x2, want func(_ []uint64) []uint64) { n := 2 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() - a := simd.LoadUint64x2Slice(x) + a := archsimd.LoadUint64x2Slice(x) g := make([]uint64, n) f(a).StoreSlice(g) w := want(x) @@ -126,12 +126,12 @@ func testUint64x2Unary(t *testing.T, f func(_ simd.Uint64x2) simd.Uint64x2, want } // testFloat32x4Unary tests the simd unary method f against the expected behavior generated by want -func testFloat32x4Unary(t *testing.T, f func(_ simd.Float32x4) simd.Float32x4, want func(_ []float32) []float32) { +func testFloat32x4Unary(t *testing.T, f func(_ archsimd.Float32x4) archsimd.Float32x4, want func(_ []float32) []float32) { n := 4 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) + a := archsimd.LoadFloat32x4Slice(x) g := make([]float32, n) f(a).StoreSlice(g) w := want(x) @@ -140,12 +140,12 @@ func testFloat32x4Unary(t *testing.T, f func(_ simd.Float32x4) simd.Float32x4, w } // testFloat64x2Unary tests the simd unary method f against the expected behavior generated by want -func testFloat64x2Unary(t *testing.T, f func(_ simd.Float64x2) simd.Float64x2, want func(_ []float64) []float64) { +func testFloat64x2Unary(t *testing.T, f func(_ archsimd.Float64x2) archsimd.Float64x2, want func(_ []float64) []float64) { n := 2 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x2Slice(x) + a := archsimd.LoadFloat64x2Slice(x) g := make([]float64, n) f(a).StoreSlice(g) w := want(x) @@ -154,12 +154,12 @@ func testFloat64x2Unary(t *testing.T, f func(_ simd.Float64x2) simd.Float64x2, w } // testInt8x32Unary tests the simd unary method f against the expected behavior generated by want -func testInt8x32Unary(t *testing.T, f func(_ simd.Int8x32) simd.Int8x32, want func(_ []int8) []int8) { +func testInt8x32Unary(t *testing.T, f func(_ archsimd.Int8x32) archsimd.Int8x32, want func(_ []int8) []int8) { n := 32 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() - a := simd.LoadInt8x32Slice(x) + a := archsimd.LoadInt8x32Slice(x) g := make([]int8, n) f(a).StoreSlice(g) w := want(x) @@ -168,12 +168,12 @@ func testInt8x32Unary(t *testing.T, f func(_ simd.Int8x32) simd.Int8x32, want fu } // testInt16x16Unary tests the simd unary method f against the expected behavior generated by want -func testInt16x16Unary(t *testing.T, f func(_ simd.Int16x16) simd.Int16x16, want func(_ []int16) []int16) { +func testInt16x16Unary(t *testing.T, f func(_ archsimd.Int16x16) archsimd.Int16x16, want func(_ []int16) []int16) { n := 16 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x16Slice(x) + a := archsimd.LoadInt16x16Slice(x) g := make([]int16, n) f(a).StoreSlice(g) w := want(x) @@ -182,12 +182,12 @@ func testInt16x16Unary(t *testing.T, f func(_ simd.Int16x16) simd.Int16x16, want } // testInt32x8Unary tests the simd unary method f against the expected behavior generated by want -func testInt32x8Unary(t *testing.T, f func(_ simd.Int32x8) simd.Int32x8, want func(_ []int32) []int32) { +func testInt32x8Unary(t *testing.T, f func(_ archsimd.Int32x8) archsimd.Int32x8, want func(_ []int32) []int32) { n := 8 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x8Slice(x) + a := archsimd.LoadInt32x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -196,12 +196,12 @@ func testInt32x8Unary(t *testing.T, f func(_ simd.Int32x8) simd.Int32x8, want fu } // testInt64x4Unary tests the simd unary method f against the expected behavior generated by want -func testInt64x4Unary(t *testing.T, f func(_ simd.Int64x4) simd.Int64x4, want func(_ []int64) []int64) { +func testInt64x4Unary(t *testing.T, f func(_ archsimd.Int64x4) archsimd.Int64x4, want func(_ []int64) []int64) { n := 4 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() - a := simd.LoadInt64x4Slice(x) + a := archsimd.LoadInt64x4Slice(x) g := make([]int64, n) f(a).StoreSlice(g) w := want(x) @@ -210,12 +210,12 @@ func testInt64x4Unary(t *testing.T, f func(_ simd.Int64x4) simd.Int64x4, want fu } // testUint8x32Unary tests the simd unary method f against the expected behavior generated by want -func testUint8x32Unary(t *testing.T, f func(_ simd.Uint8x32) simd.Uint8x32, want func(_ []uint8) []uint8) { +func testUint8x32Unary(t *testing.T, f func(_ archsimd.Uint8x32) archsimd.Uint8x32, want func(_ []uint8) []uint8) { n := 32 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() - a := simd.LoadUint8x32Slice(x) + a := archsimd.LoadUint8x32Slice(x) g := make([]uint8, n) f(a).StoreSlice(g) w := want(x) @@ -224,12 +224,12 @@ func testUint8x32Unary(t *testing.T, f func(_ simd.Uint8x32) simd.Uint8x32, want } // testUint16x16Unary tests the simd unary method f against the expected behavior generated by want -func testUint16x16Unary(t *testing.T, f func(_ simd.Uint16x16) simd.Uint16x16, want func(_ []uint16) []uint16) { +func testUint16x16Unary(t *testing.T, f func(_ archsimd.Uint16x16) archsimd.Uint16x16, want func(_ []uint16) []uint16) { n := 16 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x16Slice(x) + a := archsimd.LoadUint16x16Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -238,12 +238,12 @@ func testUint16x16Unary(t *testing.T, f func(_ simd.Uint16x16) simd.Uint16x16, w } // testUint32x8Unary tests the simd unary method f against the expected behavior generated by want -func testUint32x8Unary(t *testing.T, f func(_ simd.Uint32x8) simd.Uint32x8, want func(_ []uint32) []uint32) { +func testUint32x8Unary(t *testing.T, f func(_ archsimd.Uint32x8) archsimd.Uint32x8, want func(_ []uint32) []uint32) { n := 8 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x8Slice(x) + a := archsimd.LoadUint32x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -252,12 +252,12 @@ func testUint32x8Unary(t *testing.T, f func(_ simd.Uint32x8) simd.Uint32x8, want } // testUint64x4Unary tests the simd unary method f against the expected behavior generated by want -func testUint64x4Unary(t *testing.T, f func(_ simd.Uint64x4) simd.Uint64x4, want func(_ []uint64) []uint64) { +func testUint64x4Unary(t *testing.T, f func(_ archsimd.Uint64x4) archsimd.Uint64x4, want func(_ []uint64) []uint64) { n := 4 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() - a := simd.LoadUint64x4Slice(x) + a := archsimd.LoadUint64x4Slice(x) g := make([]uint64, n) f(a).StoreSlice(g) w := want(x) @@ -266,12 +266,12 @@ func testUint64x4Unary(t *testing.T, f func(_ simd.Uint64x4) simd.Uint64x4, want } // testFloat32x8Unary tests the simd unary method f against the expected behavior generated by want -func testFloat32x8Unary(t *testing.T, f func(_ simd.Float32x8) simd.Float32x8, want func(_ []float32) []float32) { +func testFloat32x8Unary(t *testing.T, f func(_ archsimd.Float32x8) archsimd.Float32x8, want func(_ []float32) []float32) { n := 8 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) + a := archsimd.LoadFloat32x8Slice(x) g := make([]float32, n) f(a).StoreSlice(g) w := want(x) @@ -280,12 +280,12 @@ func testFloat32x8Unary(t *testing.T, f func(_ simd.Float32x8) simd.Float32x8, w } // testFloat64x4Unary tests the simd unary method f against the expected behavior generated by want -func testFloat64x4Unary(t *testing.T, f func(_ simd.Float64x4) simd.Float64x4, want func(_ []float64) []float64) { +func testFloat64x4Unary(t *testing.T, f func(_ archsimd.Float64x4) archsimd.Float64x4, want func(_ []float64) []float64) { n := 4 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x4Slice(x) + a := archsimd.LoadFloat64x4Slice(x) g := make([]float64, n) f(a).StoreSlice(g) w := want(x) @@ -294,12 +294,12 @@ func testFloat64x4Unary(t *testing.T, f func(_ simd.Float64x4) simd.Float64x4, w } // testInt8x64Unary tests the simd unary method f against the expected behavior generated by want -func testInt8x64Unary(t *testing.T, f func(_ simd.Int8x64) simd.Int8x64, want func(_ []int8) []int8) { +func testInt8x64Unary(t *testing.T, f func(_ archsimd.Int8x64) archsimd.Int8x64, want func(_ []int8) []int8) { n := 64 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() - a := simd.LoadInt8x64Slice(x) + a := archsimd.LoadInt8x64Slice(x) g := make([]int8, n) f(a).StoreSlice(g) w := want(x) @@ -308,12 +308,12 @@ func testInt8x64Unary(t *testing.T, f func(_ simd.Int8x64) simd.Int8x64, want fu } // testInt16x32Unary tests the simd unary method f against the expected behavior generated by want -func testInt16x32Unary(t *testing.T, f func(_ simd.Int16x32) simd.Int16x32, want func(_ []int16) []int16) { +func testInt16x32Unary(t *testing.T, f func(_ archsimd.Int16x32) archsimd.Int16x32, want func(_ []int16) []int16) { n := 32 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x32Slice(x) + a := archsimd.LoadInt16x32Slice(x) g := make([]int16, n) f(a).StoreSlice(g) w := want(x) @@ -322,12 +322,12 @@ func testInt16x32Unary(t *testing.T, f func(_ simd.Int16x32) simd.Int16x32, want } // testInt32x16Unary tests the simd unary method f against the expected behavior generated by want -func testInt32x16Unary(t *testing.T, f func(_ simd.Int32x16) simd.Int32x16, want func(_ []int32) []int32) { +func testInt32x16Unary(t *testing.T, f func(_ archsimd.Int32x16) archsimd.Int32x16, want func(_ []int32) []int32) { n := 16 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x16Slice(x) + a := archsimd.LoadInt32x16Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -336,12 +336,12 @@ func testInt32x16Unary(t *testing.T, f func(_ simd.Int32x16) simd.Int32x16, want } // testInt64x8Unary tests the simd unary method f against the expected behavior generated by want -func testInt64x8Unary(t *testing.T, f func(_ simd.Int64x8) simd.Int64x8, want func(_ []int64) []int64) { +func testInt64x8Unary(t *testing.T, f func(_ archsimd.Int64x8) archsimd.Int64x8, want func(_ []int64) []int64) { n := 8 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() - a := simd.LoadInt64x8Slice(x) + a := archsimd.LoadInt64x8Slice(x) g := make([]int64, n) f(a).StoreSlice(g) w := want(x) @@ -350,12 +350,12 @@ func testInt64x8Unary(t *testing.T, f func(_ simd.Int64x8) simd.Int64x8, want fu } // testUint8x64Unary tests the simd unary method f against the expected behavior generated by want -func testUint8x64Unary(t *testing.T, f func(_ simd.Uint8x64) simd.Uint8x64, want func(_ []uint8) []uint8) { +func testUint8x64Unary(t *testing.T, f func(_ archsimd.Uint8x64) archsimd.Uint8x64, want func(_ []uint8) []uint8) { n := 64 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() - a := simd.LoadUint8x64Slice(x) + a := archsimd.LoadUint8x64Slice(x) g := make([]uint8, n) f(a).StoreSlice(g) w := want(x) @@ -364,12 +364,12 @@ func testUint8x64Unary(t *testing.T, f func(_ simd.Uint8x64) simd.Uint8x64, want } // testUint16x32Unary tests the simd unary method f against the expected behavior generated by want -func testUint16x32Unary(t *testing.T, f func(_ simd.Uint16x32) simd.Uint16x32, want func(_ []uint16) []uint16) { +func testUint16x32Unary(t *testing.T, f func(_ archsimd.Uint16x32) archsimd.Uint16x32, want func(_ []uint16) []uint16) { n := 32 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x32Slice(x) + a := archsimd.LoadUint16x32Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -378,12 +378,12 @@ func testUint16x32Unary(t *testing.T, f func(_ simd.Uint16x32) simd.Uint16x32, w } // testUint32x16Unary tests the simd unary method f against the expected behavior generated by want -func testUint32x16Unary(t *testing.T, f func(_ simd.Uint32x16) simd.Uint32x16, want func(_ []uint32) []uint32) { +func testUint32x16Unary(t *testing.T, f func(_ archsimd.Uint32x16) archsimd.Uint32x16, want func(_ []uint32) []uint32) { n := 16 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x16Slice(x) + a := archsimd.LoadUint32x16Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -392,12 +392,12 @@ func testUint32x16Unary(t *testing.T, f func(_ simd.Uint32x16) simd.Uint32x16, w } // testUint64x8Unary tests the simd unary method f against the expected behavior generated by want -func testUint64x8Unary(t *testing.T, f func(_ simd.Uint64x8) simd.Uint64x8, want func(_ []uint64) []uint64) { +func testUint64x8Unary(t *testing.T, f func(_ archsimd.Uint64x8) archsimd.Uint64x8, want func(_ []uint64) []uint64) { n := 8 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() - a := simd.LoadUint64x8Slice(x) + a := archsimd.LoadUint64x8Slice(x) g := make([]uint64, n) f(a).StoreSlice(g) w := want(x) @@ -406,12 +406,12 @@ func testUint64x8Unary(t *testing.T, f func(_ simd.Uint64x8) simd.Uint64x8, want } // testFloat32x16Unary tests the simd unary method f against the expected behavior generated by want -func testFloat32x16Unary(t *testing.T, f func(_ simd.Float32x16) simd.Float32x16, want func(_ []float32) []float32) { +func testFloat32x16Unary(t *testing.T, f func(_ archsimd.Float32x16) archsimd.Float32x16, want func(_ []float32) []float32) { n := 16 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) + a := archsimd.LoadFloat32x16Slice(x) g := make([]float32, n) f(a).StoreSlice(g) w := want(x) @@ -420,12 +420,12 @@ func testFloat32x16Unary(t *testing.T, f func(_ simd.Float32x16) simd.Float32x16 } // testFloat64x8Unary tests the simd unary method f against the expected behavior generated by want -func testFloat64x8Unary(t *testing.T, f func(_ simd.Float64x8) simd.Float64x8, want func(_ []float64) []float64) { +func testFloat64x8Unary(t *testing.T, f func(_ archsimd.Float64x8) archsimd.Float64x8, want func(_ []float64) []float64) { n := 8 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) + a := archsimd.LoadFloat64x8Slice(x) g := make([]float64, n) f(a).StoreSlice(g) w := want(x) @@ -435,12 +435,12 @@ func testFloat64x8Unary(t *testing.T, f func(_ simd.Float64x8) simd.Float64x8, w // testInt8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt8x16ConvertToInt32(t *testing.T, f func(x simd.Int8x16) simd.Int32x16, want func(x []int8) []int32) { +func testInt8x16ConvertToInt32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int32x16, want func(x []int8) []int32) { n := 16 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() - a := simd.LoadInt8x16Slice(x) + a := archsimd.LoadInt8x16Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -450,12 +450,12 @@ func testInt8x16ConvertToInt32(t *testing.T, f func(x simd.Int8x16) simd.Int32x1 // testInt16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x8ConvertToInt32(t *testing.T, f func(x simd.Int16x8) simd.Int32x8, want func(x []int16) []int32) { +func testInt16x8ConvertToInt32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int32x8, want func(x []int16) []int32) { n := 8 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x8Slice(x) + a := archsimd.LoadInt16x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -465,12 +465,12 @@ func testInt16x8ConvertToInt32(t *testing.T, f func(x simd.Int16x8) simd.Int32x8 // testInt32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x4ConvertToInt32(t *testing.T, f func(x simd.Int32x4) simd.Int32x4, want func(x []int32) []int32) { +func testInt32x4ConvertToInt32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int32x4, want func(x []int32) []int32) { n := 4 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x4Slice(x) + a := archsimd.LoadInt32x4Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -480,12 +480,12 @@ func testInt32x4ConvertToInt32(t *testing.T, f func(x simd.Int32x4) simd.Int32x4 // testUint8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint8x16ConvertToInt32(t *testing.T, f func(x simd.Uint8x16) simd.Int32x16, want func(x []uint8) []int32) { +func testUint8x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int32x16, want func(x []uint8) []int32) { n := 16 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() - a := simd.LoadUint8x16Slice(x) + a := archsimd.LoadUint8x16Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -495,12 +495,12 @@ func testUint8x16ConvertToInt32(t *testing.T, f func(x simd.Uint8x16) simd.Int32 // testUint16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x8ConvertToInt32(t *testing.T, f func(x simd.Uint16x8) simd.Int32x8, want func(x []uint16) []int32) { +func testUint16x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int32x8, want func(x []uint16) []int32) { n := 8 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x8Slice(x) + a := archsimd.LoadUint16x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -510,12 +510,12 @@ func testUint16x8ConvertToInt32(t *testing.T, f func(x simd.Uint16x8) simd.Int32 // testUint32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x4ConvertToInt32(t *testing.T, f func(x simd.Uint32x4) simd.Int32x4, want func(x []uint32) []int32) { +func testUint32x4ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int32x4, want func(x []uint32) []int32) { n := 4 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x4Slice(x) + a := archsimd.LoadUint32x4Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -525,12 +525,12 @@ func testUint32x4ConvertToInt32(t *testing.T, f func(x simd.Uint32x4) simd.Int32 // testFloat32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x4ConvertToInt32(t *testing.T, f func(x simd.Float32x4) simd.Int32x4, want func(x []float32) []int32) { +func testFloat32x4ConvertToInt32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int32x4, want func(x []float32) []int32) { n := 4 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) + a := archsimd.LoadFloat32x4Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -540,12 +540,12 @@ func testFloat32x4ConvertToInt32(t *testing.T, f func(x simd.Float32x4) simd.Int // testInt16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x16ConvertToInt32(t *testing.T, f func(x simd.Int16x16) simd.Int32x16, want func(x []int16) []int32) { +func testInt16x16ConvertToInt32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int32x16, want func(x []int16) []int32) { n := 16 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x16Slice(x) + a := archsimd.LoadInt16x16Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -555,12 +555,12 @@ func testInt16x16ConvertToInt32(t *testing.T, f func(x simd.Int16x16) simd.Int32 // testInt32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x8ConvertToInt32(t *testing.T, f func(x simd.Int32x8) simd.Int32x8, want func(x []int32) []int32) { +func testInt32x8ConvertToInt32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int32x8, want func(x []int32) []int32) { n := 8 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x8Slice(x) + a := archsimd.LoadInt32x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -570,12 +570,12 @@ func testInt32x8ConvertToInt32(t *testing.T, f func(x simd.Int32x8) simd.Int32x8 // testInt64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt64x4ConvertToInt32(t *testing.T, f func(x simd.Int64x4) simd.Int32x4, want func(x []int64) []int32) { +func testInt64x4ConvertToInt32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int32x4, want func(x []int64) []int32) { n := 4 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() - a := simd.LoadInt64x4Slice(x) + a := archsimd.LoadInt64x4Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -585,12 +585,12 @@ func testInt64x4ConvertToInt32(t *testing.T, f func(x simd.Int64x4) simd.Int32x4 // testUint16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x16ConvertToInt32(t *testing.T, f func(x simd.Uint16x16) simd.Int32x16, want func(x []uint16) []int32) { +func testUint16x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int32x16, want func(x []uint16) []int32) { n := 16 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x16Slice(x) + a := archsimd.LoadUint16x16Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -600,12 +600,12 @@ func testUint16x16ConvertToInt32(t *testing.T, f func(x simd.Uint16x16) simd.Int // testUint32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x8ConvertToInt32(t *testing.T, f func(x simd.Uint32x8) simd.Int32x8, want func(x []uint32) []int32) { +func testUint32x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int32x8, want func(x []uint32) []int32) { n := 8 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x8Slice(x) + a := archsimd.LoadUint32x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -615,12 +615,12 @@ func testUint32x8ConvertToInt32(t *testing.T, f func(x simd.Uint32x8) simd.Int32 // testUint64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint64x4ConvertToInt32(t *testing.T, f func(x simd.Uint64x4) simd.Int32x4, want func(x []uint64) []int32) { +func testUint64x4ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int32x4, want func(x []uint64) []int32) { n := 4 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() - a := simd.LoadUint64x4Slice(x) + a := archsimd.LoadUint64x4Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -630,12 +630,12 @@ func testUint64x4ConvertToInt32(t *testing.T, f func(x simd.Uint64x4) simd.Int32 // testFloat32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x8ConvertToInt32(t *testing.T, f func(x simd.Float32x8) simd.Int32x8, want func(x []float32) []int32) { +func testFloat32x8ConvertToInt32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int32x8, want func(x []float32) []int32) { n := 8 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) + a := archsimd.LoadFloat32x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -645,12 +645,12 @@ func testFloat32x8ConvertToInt32(t *testing.T, f func(x simd.Float32x8) simd.Int // testFloat64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat64x4ConvertToInt32(t *testing.T, f func(x simd.Float64x4) simd.Int32x4, want func(x []float64) []int32) { +func testFloat64x4ConvertToInt32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int32x4, want func(x []float64) []int32) { n := 4 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x4Slice(x) + a := archsimd.LoadFloat64x4Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -660,12 +660,12 @@ func testFloat64x4ConvertToInt32(t *testing.T, f func(x simd.Float64x4) simd.Int // testInt32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x16ConvertToInt32(t *testing.T, f func(x simd.Int32x16) simd.Int32x16, want func(x []int32) []int32) { +func testInt32x16ConvertToInt32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int32x16, want func(x []int32) []int32) { n := 16 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x16Slice(x) + a := archsimd.LoadInt32x16Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -675,12 +675,12 @@ func testInt32x16ConvertToInt32(t *testing.T, f func(x simd.Int32x16) simd.Int32 // testInt64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt64x8ConvertToInt32(t *testing.T, f func(x simd.Int64x8) simd.Int32x8, want func(x []int64) []int32) { +func testInt64x8ConvertToInt32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int32x8, want func(x []int64) []int32) { n := 8 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() - a := simd.LoadInt64x8Slice(x) + a := archsimd.LoadInt64x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -690,12 +690,12 @@ func testInt64x8ConvertToInt32(t *testing.T, f func(x simd.Int64x8) simd.Int32x8 // testUint32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x16ConvertToInt32(t *testing.T, f func(x simd.Uint32x16) simd.Int32x16, want func(x []uint32) []int32) { +func testUint32x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int32x16, want func(x []uint32) []int32) { n := 16 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x16Slice(x) + a := archsimd.LoadUint32x16Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -705,12 +705,12 @@ func testUint32x16ConvertToInt32(t *testing.T, f func(x simd.Uint32x16) simd.Int // testUint64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint64x8ConvertToInt32(t *testing.T, f func(x simd.Uint64x8) simd.Int32x8, want func(x []uint64) []int32) { +func testUint64x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int32x8, want func(x []uint64) []int32) { n := 8 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() - a := simd.LoadUint64x8Slice(x) + a := archsimd.LoadUint64x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -720,12 +720,12 @@ func testUint64x8ConvertToInt32(t *testing.T, f func(x simd.Uint64x8) simd.Int32 // testFloat32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x16ConvertToInt32(t *testing.T, f func(x simd.Float32x16) simd.Int32x16, want func(x []float32) []int32) { +func testFloat32x16ConvertToInt32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int32x16, want func(x []float32) []int32) { n := 16 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) + a := archsimd.LoadFloat32x16Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -735,12 +735,12 @@ func testFloat32x16ConvertToInt32(t *testing.T, f func(x simd.Float32x16) simd.I // testFloat64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat64x8ConvertToInt32(t *testing.T, f func(x simd.Float64x8) simd.Int32x8, want func(x []float64) []int32) { +func testFloat64x8ConvertToInt32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int32x8, want func(x []float64) []int32) { n := 8 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) + a := archsimd.LoadFloat64x8Slice(x) g := make([]int32, n) f(a).StoreSlice(g) w := want(x) @@ -750,12 +750,12 @@ func testFloat64x8ConvertToInt32(t *testing.T, f func(x simd.Float64x8) simd.Int // testInt8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt8x16ConvertToUint32(t *testing.T, f func(x simd.Int8x16) simd.Uint32x16, want func(x []int8) []uint32) { +func testInt8x16ConvertToUint32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint32x16, want func(x []int8) []uint32) { n := 16 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() - a := simd.LoadInt8x16Slice(x) + a := archsimd.LoadInt8x16Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -765,12 +765,12 @@ func testInt8x16ConvertToUint32(t *testing.T, f func(x simd.Int8x16) simd.Uint32 // testInt16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x8ConvertToUint32(t *testing.T, f func(x simd.Int16x8) simd.Uint32x8, want func(x []int16) []uint32) { +func testInt16x8ConvertToUint32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint32x8, want func(x []int16) []uint32) { n := 8 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x8Slice(x) + a := archsimd.LoadInt16x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -780,12 +780,12 @@ func testInt16x8ConvertToUint32(t *testing.T, f func(x simd.Int16x8) simd.Uint32 // testInt32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x4ConvertToUint32(t *testing.T, f func(x simd.Int32x4) simd.Uint32x4, want func(x []int32) []uint32) { +func testInt32x4ConvertToUint32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint32x4, want func(x []int32) []uint32) { n := 4 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x4Slice(x) + a := archsimd.LoadInt32x4Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -795,12 +795,12 @@ func testInt32x4ConvertToUint32(t *testing.T, f func(x simd.Int32x4) simd.Uint32 // testUint8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint8x16ConvertToUint32(t *testing.T, f func(x simd.Uint8x16) simd.Uint32x16, want func(x []uint8) []uint32) { +func testUint8x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint32x16, want func(x []uint8) []uint32) { n := 16 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() - a := simd.LoadUint8x16Slice(x) + a := archsimd.LoadUint8x16Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -810,12 +810,12 @@ func testUint8x16ConvertToUint32(t *testing.T, f func(x simd.Uint8x16) simd.Uint // testUint16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x8ConvertToUint32(t *testing.T, f func(x simd.Uint16x8) simd.Uint32x8, want func(x []uint16) []uint32) { +func testUint16x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint32x8, want func(x []uint16) []uint32) { n := 8 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x8Slice(x) + a := archsimd.LoadUint16x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -825,12 +825,12 @@ func testUint16x8ConvertToUint32(t *testing.T, f func(x simd.Uint16x8) simd.Uint // testUint32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x4ConvertToUint32(t *testing.T, f func(x simd.Uint32x4) simd.Uint32x4, want func(x []uint32) []uint32) { +func testUint32x4ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint32x4, want func(x []uint32) []uint32) { n := 4 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x4Slice(x) + a := archsimd.LoadUint32x4Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -840,12 +840,12 @@ func testUint32x4ConvertToUint32(t *testing.T, f func(x simd.Uint32x4) simd.Uint // testFloat32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x4ConvertToUint32(t *testing.T, f func(x simd.Float32x4) simd.Uint32x4, want func(x []float32) []uint32) { +func testFloat32x4ConvertToUint32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint32x4, want func(x []float32) []uint32) { n := 4 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) + a := archsimd.LoadFloat32x4Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -855,12 +855,12 @@ func testFloat32x4ConvertToUint32(t *testing.T, f func(x simd.Float32x4) simd.Ui // testInt16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x16ConvertToUint32(t *testing.T, f func(x simd.Int16x16) simd.Uint32x16, want func(x []int16) []uint32) { +func testInt16x16ConvertToUint32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint32x16, want func(x []int16) []uint32) { n := 16 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x16Slice(x) + a := archsimd.LoadInt16x16Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -870,12 +870,12 @@ func testInt16x16ConvertToUint32(t *testing.T, f func(x simd.Int16x16) simd.Uint // testInt32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x8ConvertToUint32(t *testing.T, f func(x simd.Int32x8) simd.Uint32x8, want func(x []int32) []uint32) { +func testInt32x8ConvertToUint32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint32x8, want func(x []int32) []uint32) { n := 8 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x8Slice(x) + a := archsimd.LoadInt32x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -885,12 +885,12 @@ func testInt32x8ConvertToUint32(t *testing.T, f func(x simd.Int32x8) simd.Uint32 // testInt64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt64x4ConvertToUint32(t *testing.T, f func(x simd.Int64x4) simd.Uint32x4, want func(x []int64) []uint32) { +func testInt64x4ConvertToUint32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint32x4, want func(x []int64) []uint32) { n := 4 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() - a := simd.LoadInt64x4Slice(x) + a := archsimd.LoadInt64x4Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -900,12 +900,12 @@ func testInt64x4ConvertToUint32(t *testing.T, f func(x simd.Int64x4) simd.Uint32 // testUint16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x16ConvertToUint32(t *testing.T, f func(x simd.Uint16x16) simd.Uint32x16, want func(x []uint16) []uint32) { +func testUint16x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint32x16, want func(x []uint16) []uint32) { n := 16 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x16Slice(x) + a := archsimd.LoadUint16x16Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -915,12 +915,12 @@ func testUint16x16ConvertToUint32(t *testing.T, f func(x simd.Uint16x16) simd.Ui // testUint32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x8ConvertToUint32(t *testing.T, f func(x simd.Uint32x8) simd.Uint32x8, want func(x []uint32) []uint32) { +func testUint32x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint32x8, want func(x []uint32) []uint32) { n := 8 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x8Slice(x) + a := archsimd.LoadUint32x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -930,12 +930,12 @@ func testUint32x8ConvertToUint32(t *testing.T, f func(x simd.Uint32x8) simd.Uint // testUint64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint64x4ConvertToUint32(t *testing.T, f func(x simd.Uint64x4) simd.Uint32x4, want func(x []uint64) []uint32) { +func testUint64x4ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint32x4, want func(x []uint64) []uint32) { n := 4 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() - a := simd.LoadUint64x4Slice(x) + a := archsimd.LoadUint64x4Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -945,12 +945,12 @@ func testUint64x4ConvertToUint32(t *testing.T, f func(x simd.Uint64x4) simd.Uint // testFloat32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x8ConvertToUint32(t *testing.T, f func(x simd.Float32x8) simd.Uint32x8, want func(x []float32) []uint32) { +func testFloat32x8ConvertToUint32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint32x8, want func(x []float32) []uint32) { n := 8 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) + a := archsimd.LoadFloat32x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -960,12 +960,12 @@ func testFloat32x8ConvertToUint32(t *testing.T, f func(x simd.Float32x8) simd.Ui // testFloat64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat64x4ConvertToUint32(t *testing.T, f func(x simd.Float64x4) simd.Uint32x4, want func(x []float64) []uint32) { +func testFloat64x4ConvertToUint32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint32x4, want func(x []float64) []uint32) { n := 4 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x4Slice(x) + a := archsimd.LoadFloat64x4Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -975,12 +975,12 @@ func testFloat64x4ConvertToUint32(t *testing.T, f func(x simd.Float64x4) simd.Ui // testInt32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x16ConvertToUint32(t *testing.T, f func(x simd.Int32x16) simd.Uint32x16, want func(x []int32) []uint32) { +func testInt32x16ConvertToUint32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint32x16, want func(x []int32) []uint32) { n := 16 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x16Slice(x) + a := archsimd.LoadInt32x16Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -990,12 +990,12 @@ func testInt32x16ConvertToUint32(t *testing.T, f func(x simd.Int32x16) simd.Uint // testInt64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt64x8ConvertToUint32(t *testing.T, f func(x simd.Int64x8) simd.Uint32x8, want func(x []int64) []uint32) { +func testInt64x8ConvertToUint32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint32x8, want func(x []int64) []uint32) { n := 8 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() - a := simd.LoadInt64x8Slice(x) + a := archsimd.LoadInt64x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -1005,12 +1005,12 @@ func testInt64x8ConvertToUint32(t *testing.T, f func(x simd.Int64x8) simd.Uint32 // testUint32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x16ConvertToUint32(t *testing.T, f func(x simd.Uint32x16) simd.Uint32x16, want func(x []uint32) []uint32) { +func testUint32x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint32x16, want func(x []uint32) []uint32) { n := 16 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x16Slice(x) + a := archsimd.LoadUint32x16Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -1020,12 +1020,12 @@ func testUint32x16ConvertToUint32(t *testing.T, f func(x simd.Uint32x16) simd.Ui // testUint64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint64x8ConvertToUint32(t *testing.T, f func(x simd.Uint64x8) simd.Uint32x8, want func(x []uint64) []uint32) { +func testUint64x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint32x8, want func(x []uint64) []uint32) { n := 8 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() - a := simd.LoadUint64x8Slice(x) + a := archsimd.LoadUint64x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -1035,12 +1035,12 @@ func testUint64x8ConvertToUint32(t *testing.T, f func(x simd.Uint64x8) simd.Uint // testFloat32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x16ConvertToUint32(t *testing.T, f func(x simd.Float32x16) simd.Uint32x16, want func(x []float32) []uint32) { +func testFloat32x16ConvertToUint32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint32x16, want func(x []float32) []uint32) { n := 16 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) + a := archsimd.LoadFloat32x16Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -1050,12 +1050,12 @@ func testFloat32x16ConvertToUint32(t *testing.T, f func(x simd.Float32x16) simd. // testFloat64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat64x8ConvertToUint32(t *testing.T, f func(x simd.Float64x8) simd.Uint32x8, want func(x []float64) []uint32) { +func testFloat64x8ConvertToUint32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint32x8, want func(x []float64) []uint32) { n := 8 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) + a := archsimd.LoadFloat64x8Slice(x) g := make([]uint32, n) f(a).StoreSlice(g) w := want(x) @@ -1065,12 +1065,12 @@ func testFloat64x8ConvertToUint32(t *testing.T, f func(x simd.Float64x8) simd.Ui // testInt8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt8x16ConvertToUint16(t *testing.T, f func(x simd.Int8x16) simd.Uint16x16, want func(x []int8) []uint16) { +func testInt8x16ConvertToUint16(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint16x16, want func(x []int8) []uint16) { n := 16 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() - a := simd.LoadInt8x16Slice(x) + a := archsimd.LoadInt8x16Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1080,12 +1080,12 @@ func testInt8x16ConvertToUint16(t *testing.T, f func(x simd.Int8x16) simd.Uint16 // testInt16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x8ConvertToUint16(t *testing.T, f func(x simd.Int16x8) simd.Uint16x8, want func(x []int16) []uint16) { +func testInt16x8ConvertToUint16(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint16x8, want func(x []int16) []uint16) { n := 8 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x8Slice(x) + a := archsimd.LoadInt16x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1095,12 +1095,12 @@ func testInt16x8ConvertToUint16(t *testing.T, f func(x simd.Int16x8) simd.Uint16 // testUint8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint8x16ConvertToUint16(t *testing.T, f func(x simd.Uint8x16) simd.Uint16x16, want func(x []uint8) []uint16) { +func testUint8x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint16x16, want func(x []uint8) []uint16) { n := 16 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() - a := simd.LoadUint8x16Slice(x) + a := archsimd.LoadUint8x16Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1110,12 +1110,12 @@ func testUint8x16ConvertToUint16(t *testing.T, f func(x simd.Uint8x16) simd.Uint // testUint16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x8ConvertToUint16(t *testing.T, f func(x simd.Uint16x8) simd.Uint16x8, want func(x []uint16) []uint16) { +func testUint16x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint16x8, want func(x []uint16) []uint16) { n := 8 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x8Slice(x) + a := archsimd.LoadUint16x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1125,12 +1125,12 @@ func testUint16x8ConvertToUint16(t *testing.T, f func(x simd.Uint16x8) simd.Uint // testInt8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt8x32ConvertToUint16(t *testing.T, f func(x simd.Int8x32) simd.Uint16x32, want func(x []int8) []uint16) { +func testInt8x32ConvertToUint16(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint16x32, want func(x []int8) []uint16) { n := 32 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() - a := simd.LoadInt8x32Slice(x) + a := archsimd.LoadInt8x32Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1140,12 +1140,12 @@ func testInt8x32ConvertToUint16(t *testing.T, f func(x simd.Int8x32) simd.Uint16 // testInt16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x16ConvertToUint16(t *testing.T, f func(x simd.Int16x16) simd.Uint16x16, want func(x []int16) []uint16) { +func testInt16x16ConvertToUint16(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint16x16, want func(x []int16) []uint16) { n := 16 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x16Slice(x) + a := archsimd.LoadInt16x16Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1155,12 +1155,12 @@ func testInt16x16ConvertToUint16(t *testing.T, f func(x simd.Int16x16) simd.Uint // testInt32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x8ConvertToUint16(t *testing.T, f func(x simd.Int32x8) simd.Uint16x8, want func(x []int32) []uint16) { +func testInt32x8ConvertToUint16(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint16x8, want func(x []int32) []uint16) { n := 8 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x8Slice(x) + a := archsimd.LoadInt32x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1170,12 +1170,12 @@ func testInt32x8ConvertToUint16(t *testing.T, f func(x simd.Int32x8) simd.Uint16 // testUint8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint8x32ConvertToUint16(t *testing.T, f func(x simd.Uint8x32) simd.Uint16x32, want func(x []uint8) []uint16) { +func testUint8x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint16x32, want func(x []uint8) []uint16) { n := 32 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() - a := simd.LoadUint8x32Slice(x) + a := archsimd.LoadUint8x32Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1185,12 +1185,12 @@ func testUint8x32ConvertToUint16(t *testing.T, f func(x simd.Uint8x32) simd.Uint // testUint16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x16ConvertToUint16(t *testing.T, f func(x simd.Uint16x16) simd.Uint16x16, want func(x []uint16) []uint16) { +func testUint16x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint16x16, want func(x []uint16) []uint16) { n := 16 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x16Slice(x) + a := archsimd.LoadUint16x16Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1200,12 +1200,12 @@ func testUint16x16ConvertToUint16(t *testing.T, f func(x simd.Uint16x16) simd.Ui // testUint32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x8ConvertToUint16(t *testing.T, f func(x simd.Uint32x8) simd.Uint16x8, want func(x []uint32) []uint16) { +func testUint32x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint16x8, want func(x []uint32) []uint16) { n := 8 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x8Slice(x) + a := archsimd.LoadUint32x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1215,12 +1215,12 @@ func testUint32x8ConvertToUint16(t *testing.T, f func(x simd.Uint32x8) simd.Uint // testFloat32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x8ConvertToUint16(t *testing.T, f func(x simd.Float32x8) simd.Uint16x8, want func(x []float32) []uint16) { +func testFloat32x8ConvertToUint16(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint16x8, want func(x []float32) []uint16) { n := 8 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) + a := archsimd.LoadFloat32x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1230,12 +1230,12 @@ func testFloat32x8ConvertToUint16(t *testing.T, f func(x simd.Float32x8) simd.Ui // testInt16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x32ConvertToUint16(t *testing.T, f func(x simd.Int16x32) simd.Uint16x32, want func(x []int16) []uint16) { +func testInt16x32ConvertToUint16(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint16x32, want func(x []int16) []uint16) { n := 32 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() - a := simd.LoadInt16x32Slice(x) + a := archsimd.LoadInt16x32Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1245,12 +1245,12 @@ func testInt16x32ConvertToUint16(t *testing.T, f func(x simd.Int16x32) simd.Uint // testInt32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x16ConvertToUint16(t *testing.T, f func(x simd.Int32x16) simd.Uint16x16, want func(x []int32) []uint16) { +func testInt32x16ConvertToUint16(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint16x16, want func(x []int32) []uint16) { n := 16 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() - a := simd.LoadInt32x16Slice(x) + a := archsimd.LoadInt32x16Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1260,12 +1260,12 @@ func testInt32x16ConvertToUint16(t *testing.T, f func(x simd.Int32x16) simd.Uint // testInt64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt64x8ConvertToUint16(t *testing.T, f func(x simd.Int64x8) simd.Uint16x8, want func(x []int64) []uint16) { +func testInt64x8ConvertToUint16(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint16x8, want func(x []int64) []uint16) { n := 8 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() - a := simd.LoadInt64x8Slice(x) + a := archsimd.LoadInt64x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1275,12 +1275,12 @@ func testInt64x8ConvertToUint16(t *testing.T, f func(x simd.Int64x8) simd.Uint16 // testUint16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x32ConvertToUint16(t *testing.T, f func(x simd.Uint16x32) simd.Uint16x32, want func(x []uint16) []uint16) { +func testUint16x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint16x32, want func(x []uint16) []uint16) { n := 32 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() - a := simd.LoadUint16x32Slice(x) + a := archsimd.LoadUint16x32Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1290,12 +1290,12 @@ func testUint16x32ConvertToUint16(t *testing.T, f func(x simd.Uint16x32) simd.Ui // testUint32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x16ConvertToUint16(t *testing.T, f func(x simd.Uint32x16) simd.Uint16x16, want func(x []uint32) []uint16) { +func testUint32x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint16x16, want func(x []uint32) []uint16) { n := 16 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() - a := simd.LoadUint32x16Slice(x) + a := archsimd.LoadUint32x16Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1305,12 +1305,12 @@ func testUint32x16ConvertToUint16(t *testing.T, f func(x simd.Uint32x16) simd.Ui // testUint64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint64x8ConvertToUint16(t *testing.T, f func(x simd.Uint64x8) simd.Uint16x8, want func(x []uint64) []uint16) { +func testUint64x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint16x8, want func(x []uint64) []uint16) { n := 8 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() - a := simd.LoadUint64x8Slice(x) + a := archsimd.LoadUint64x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1320,12 +1320,12 @@ func testUint64x8ConvertToUint16(t *testing.T, f func(x simd.Uint64x8) simd.Uint // testFloat32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x16ConvertToUint16(t *testing.T, f func(x simd.Float32x16) simd.Uint16x16, want func(x []float32) []uint16) { +func testFloat32x16ConvertToUint16(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint16x16, want func(x []float32) []uint16) { n := 16 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) + a := archsimd.LoadFloat32x16Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1335,12 +1335,12 @@ func testFloat32x16ConvertToUint16(t *testing.T, f func(x simd.Float32x16) simd. // testFloat64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want // This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat64x8ConvertToUint16(t *testing.T, f func(x simd.Float64x8) simd.Uint16x8, want func(x []float64) []uint16) { +func testFloat64x8ConvertToUint16(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint16x8, want func(x []float64) []uint16) { n := 8 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) + a := archsimd.LoadFloat64x8Slice(x) g := make([]uint16, n) f(a).StoreSlice(g) w := want(x) @@ -1350,12 +1350,12 @@ func testFloat64x8ConvertToUint16(t *testing.T, f func(x simd.Float64x8) simd.Ui // testFloat32x4UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat32x4UnaryFlaky(t *testing.T, f func(x simd.Float32x4) simd.Float32x4, want func(x []float32) []float32, flakiness float64) { +func testFloat32x4UnaryFlaky(t *testing.T, f func(x archsimd.Float32x4) archsimd.Float32x4, want func(x []float32) []float32, flakiness float64) { n := 4 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x4Slice(x) + a := archsimd.LoadFloat32x4Slice(x) g := make([]float32, n) f(a).StoreSlice(g) w := want(x) @@ -1365,12 +1365,12 @@ func testFloat32x4UnaryFlaky(t *testing.T, f func(x simd.Float32x4) simd.Float32 // testFloat64x2UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat64x2UnaryFlaky(t *testing.T, f func(x simd.Float64x2) simd.Float64x2, want func(x []float64) []float64, flakiness float64) { +func testFloat64x2UnaryFlaky(t *testing.T, f func(x archsimd.Float64x2) archsimd.Float64x2, want func(x []float64) []float64, flakiness float64) { n := 2 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x2Slice(x) + a := archsimd.LoadFloat64x2Slice(x) g := make([]float64, n) f(a).StoreSlice(g) w := want(x) @@ -1380,12 +1380,12 @@ func testFloat64x2UnaryFlaky(t *testing.T, f func(x simd.Float64x2) simd.Float64 // testFloat32x8UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat32x8UnaryFlaky(t *testing.T, f func(x simd.Float32x8) simd.Float32x8, want func(x []float32) []float32, flakiness float64) { +func testFloat32x8UnaryFlaky(t *testing.T, f func(x archsimd.Float32x8) archsimd.Float32x8, want func(x []float32) []float32, flakiness float64) { n := 8 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x8Slice(x) + a := archsimd.LoadFloat32x8Slice(x) g := make([]float32, n) f(a).StoreSlice(g) w := want(x) @@ -1395,12 +1395,12 @@ func testFloat32x8UnaryFlaky(t *testing.T, f func(x simd.Float32x8) simd.Float32 // testFloat64x4UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat64x4UnaryFlaky(t *testing.T, f func(x simd.Float64x4) simd.Float64x4, want func(x []float64) []float64, flakiness float64) { +func testFloat64x4UnaryFlaky(t *testing.T, f func(x archsimd.Float64x4) archsimd.Float64x4, want func(x []float64) []float64, flakiness float64) { n := 4 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x4Slice(x) + a := archsimd.LoadFloat64x4Slice(x) g := make([]float64, n) f(a).StoreSlice(g) w := want(x) @@ -1410,12 +1410,12 @@ func testFloat64x4UnaryFlaky(t *testing.T, f func(x simd.Float64x4) simd.Float64 // testFloat32x16UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat32x16UnaryFlaky(t *testing.T, f func(x simd.Float32x16) simd.Float32x16, want func(x []float32) []float32, flakiness float64) { +func testFloat32x16UnaryFlaky(t *testing.T, f func(x archsimd.Float32x16) archsimd.Float32x16, want func(x []float32) []float32, flakiness float64) { n := 16 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() - a := simd.LoadFloat32x16Slice(x) + a := archsimd.LoadFloat32x16Slice(x) g := make([]float32, n) f(a).StoreSlice(g) w := want(x) @@ -1425,12 +1425,12 @@ func testFloat32x16UnaryFlaky(t *testing.T, f func(x simd.Float32x16) simd.Float // testFloat64x8UnaryFlaky tests the simd unary method f against the expected behavior generated by want, // but using a flakiness parameter because we haven't exactly figured out how simd floating point works -func testFloat64x8UnaryFlaky(t *testing.T, f func(x simd.Float64x8) simd.Float64x8, want func(x []float64) []float64, flakiness float64) { +func testFloat64x8UnaryFlaky(t *testing.T, f func(x archsimd.Float64x8) archsimd.Float64x8, want func(x []float64) []float64, flakiness float64) { n := 8 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() - a := simd.LoadFloat64x8Slice(x) + a := archsimd.LoadFloat64x8Slice(x) g := make([]float64, n) f(a).StoreSlice(g) w := want(x) diff --git a/src/simd/archsimd/internal/simd_test/unary_test.go b/src/simd/archsimd/internal/simd_test/unary_test.go new file mode 100644 index 0000000000..6b53669d78 --- /dev/null +++ b/src/simd/archsimd/internal/simd_test/unary_test.go @@ -0,0 +1,137 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.simd && amd64 + +package simd_test + +import ( + "math" + "simd/archsimd" + "testing" +) + +func TestCeil(t *testing.T) { + testFloat32x4Unary(t, archsimd.Float32x4.Ceil, ceilSlice[float32]) + testFloat32x8Unary(t, archsimd.Float32x8.Ceil, ceilSlice[float32]) + testFloat64x2Unary(t, archsimd.Float64x2.Ceil, ceilSlice[float64]) + testFloat64x4Unary(t, archsimd.Float64x4.Ceil, ceilSlice[float64]) + if archsimd.X86.AVX512() { + // testFloat32x16Unary(t, archsimd.Float32x16.Ceil, ceilSlice[float32]) // missing + // testFloat64x8Unary(t, archsimd.Float64x8.Ceil, ceilSlice[float64]) // missing + } +} + +func TestFloor(t *testing.T) { + testFloat32x4Unary(t, archsimd.Float32x4.Floor, floorSlice[float32]) + testFloat32x8Unary(t, archsimd.Float32x8.Floor, floorSlice[float32]) + testFloat64x2Unary(t, archsimd.Float64x2.Floor, floorSlice[float64]) + testFloat64x4Unary(t, archsimd.Float64x4.Floor, floorSlice[float64]) + if archsimd.X86.AVX512() { + // testFloat32x16Unary(t, archsimd.Float32x16.Floor, floorSlice[float32]) // missing + // testFloat64x8Unary(t, archsimd.Float64x8.Floor, floorSlice[float64]) // missing + } +} + +func TestTrunc(t *testing.T) { + testFloat32x4Unary(t, archsimd.Float32x4.Trunc, truncSlice[float32]) + testFloat32x8Unary(t, archsimd.Float32x8.Trunc, truncSlice[float32]) + testFloat64x2Unary(t, archsimd.Float64x2.Trunc, truncSlice[float64]) + testFloat64x4Unary(t, archsimd.Float64x4.Trunc, truncSlice[float64]) + if archsimd.X86.AVX512() { + // testFloat32x16Unary(t, archsimd.Float32x16.Trunc, truncSlice[float32]) // missing + // testFloat64x8Unary(t, archsimd.Float64x8.Trunc, truncSlice[float64]) // missing + } +} + +func TestRound(t *testing.T) { + testFloat32x4Unary(t, archsimd.Float32x4.RoundToEven, roundSlice[float32]) + testFloat32x8Unary(t, archsimd.Float32x8.RoundToEven, roundSlice[float32]) + testFloat64x2Unary(t, archsimd.Float64x2.RoundToEven, roundSlice[float64]) + testFloat64x4Unary(t, archsimd.Float64x4.RoundToEven, roundSlice[float64]) + if archsimd.X86.AVX512() { + // testFloat32x16Unary(t, archsimd.Float32x16.Round, roundSlice[float32]) // missing + // testFloat64x8Unary(t, archsimd.Float64x8.Round, roundSlice[float64]) // missing + } +} + +func TestSqrt(t *testing.T) { + testFloat32x4Unary(t, archsimd.Float32x4.Sqrt, sqrtSlice[float32]) + testFloat32x8Unary(t, archsimd.Float32x8.Sqrt, sqrtSlice[float32]) + testFloat64x2Unary(t, archsimd.Float64x2.Sqrt, sqrtSlice[float64]) + testFloat64x4Unary(t, archsimd.Float64x4.Sqrt, sqrtSlice[float64]) + if archsimd.X86.AVX512() { + testFloat32x16Unary(t, archsimd.Float32x16.Sqrt, sqrtSlice[float32]) + testFloat64x8Unary(t, archsimd.Float64x8.Sqrt, sqrtSlice[float64]) + } +} + +func TestNot(t *testing.T) { + testInt8x16Unary(t, archsimd.Int8x16.Not, map1[int8](not)) + testInt8x32Unary(t, archsimd.Int8x32.Not, map1[int8](not)) + testInt16x8Unary(t, archsimd.Int16x8.Not, map1[int16](not)) + testInt16x16Unary(t, archsimd.Int16x16.Not, map1[int16](not)) + testInt32x4Unary(t, archsimd.Int32x4.Not, map1[int32](not)) + testInt32x8Unary(t, archsimd.Int32x8.Not, map1[int32](not)) +} + +func TestAbsolute(t *testing.T) { + testInt8x16Unary(t, archsimd.Int8x16.Abs, map1[int8](abs)) + testInt8x32Unary(t, archsimd.Int8x32.Abs, map1[int8](abs)) + testInt16x8Unary(t, archsimd.Int16x8.Abs, map1[int16](abs)) + testInt16x16Unary(t, archsimd.Int16x16.Abs, map1[int16](abs)) + testInt32x4Unary(t, archsimd.Int32x4.Abs, map1[int32](abs)) + testInt32x8Unary(t, archsimd.Int32x8.Abs, map1[int32](abs)) + if archsimd.X86.AVX512() { + testInt8x64Unary(t, archsimd.Int8x64.Abs, map1[int8](abs)) + testInt16x32Unary(t, archsimd.Int16x32.Abs, map1[int16](abs)) + testInt32x16Unary(t, archsimd.Int32x16.Abs, map1[int32](abs)) + testInt64x2Unary(t, archsimd.Int64x2.Abs, map1[int64](abs)) + testInt64x4Unary(t, archsimd.Int64x4.Abs, map1[int64](abs)) + testInt64x8Unary(t, archsimd.Int64x8.Abs, map1[int64](abs)) + } +} + +func TestCeilScaledResidue(t *testing.T) { + if !archsimd.X86.AVX512() { + t.Skip("Needs AVX512") + } + testFloat64x8UnaryFlaky(t, + func(x archsimd.Float64x8) archsimd.Float64x8 { return x.CeilScaledResidue(0) }, + map1(ceilResidueForPrecision[float64](0)), + 0.001) + testFloat64x8UnaryFlaky(t, + func(x archsimd.Float64x8) archsimd.Float64x8 { return x.CeilScaledResidue(1) }, + map1(ceilResidueForPrecision[float64](1)), + 0.001) + testFloat64x8Unary(t, + func(x archsimd.Float64x8) archsimd.Float64x8 { return x.Sub(x.CeilScaled(0)) }, + map1[float64](func(x float64) float64 { return x - math.Ceil(x) })) +} + +func TestToUint32(t *testing.T) { + if !archsimd.X86.AVX512() { + t.Skip("Needs AVX512") + } + testFloat32x4ConvertToUint32(t, archsimd.Float32x4.ConvertToUint32, map1[float32](toUint32)) + testFloat32x8ConvertToUint32(t, archsimd.Float32x8.ConvertToUint32, map1[float32](toUint32)) + testFloat32x16ConvertToUint32(t, archsimd.Float32x16.ConvertToUint32, map1[float32](toUint32)) +} + +func TestToInt32(t *testing.T) { + testFloat32x4ConvertToInt32(t, archsimd.Float32x4.ConvertToInt32, map1[float32](toInt32)) + testFloat32x8ConvertToInt32(t, archsimd.Float32x8.ConvertToInt32, map1[float32](toInt32)) +} + +func TestConverts(t *testing.T) { + testUint8x16ConvertToUint16(t, archsimd.Uint8x16.ExtendToUint16, map1[uint8](toUint16)) + testUint16x8ConvertToUint32(t, archsimd.Uint16x8.ExtendToUint32, map1[uint16](toUint32)) +} + +func TestConvertsAVX512(t *testing.T) { + if !archsimd.X86.AVX512() { + t.Skip("Needs AVX512") + } + testUint8x32ConvertToUint16(t, archsimd.Uint8x32.ExtendToUint16, map1[uint8](toUint16)) +} diff --git a/src/simd/internal/test_helpers/checkslices.go b/src/simd/archsimd/internal/test_helpers/checkslices.go similarity index 100% rename from src/simd/internal/test_helpers/checkslices.go rename to src/simd/archsimd/internal/test_helpers/checkslices.go diff --git a/src/simd/maskmerge_gen_amd64.go b/src/simd/archsimd/maskmerge_gen_amd64.go similarity index 99% rename from src/simd/maskmerge_gen_amd64.go rename to src/simd/archsimd/maskmerge_gen_amd64.go index 71a617c425..47bd260929 100644 --- a/src/simd/maskmerge_gen_amd64.go +++ b/src/simd/archsimd/maskmerge_gen_amd64.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd // Masked returns x but with elements zeroed where mask is false. func (x Int8x16) Masked(mask Mask8x16) Int8x16 { diff --git a/src/simd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go similarity index 99% rename from src/simd/ops_amd64.go rename to src/simd/archsimd/ops_amd64.go index 82774e05ad..45b8d6ee17 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd /* AESDecryptLastRound */ diff --git a/src/simd/ops_internal_amd64.go b/src/simd/archsimd/ops_internal_amd64.go similarity index 99% rename from src/simd/ops_internal_amd64.go rename to src/simd/archsimd/ops_internal_amd64.go index 6d6e84ffff..566b88d510 100644 --- a/src/simd/ops_internal_amd64.go +++ b/src/simd/archsimd/ops_internal_amd64.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd /* blend */ diff --git a/src/simd/other_gen_amd64.go b/src/simd/archsimd/other_gen_amd64.go similarity index 99% rename from src/simd/other_gen_amd64.go rename to src/simd/archsimd/other_gen_amd64.go index da11b227df..a9f2b1b977 100644 --- a/src/simd/other_gen_amd64.go +++ b/src/simd/archsimd/other_gen_amd64.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd // BroadcastInt8x16 returns a vector with the input // x assigned to all elements of the output. diff --git a/src/simd/archsimd/pkginternal_test.go b/src/simd/archsimd/pkginternal_test.go new file mode 100644 index 0000000000..a20da340af --- /dev/null +++ b/src/simd/archsimd/pkginternal_test.go @@ -0,0 +1,258 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.simd && amd64 + +package archsimd_test + +import ( + "simd/archsimd" + "simd/archsimd/internal/test_helpers" + "testing" +) + +func TestConcatSelectedConstant64(t *testing.T) { + a := make([]int64, 2) + x := archsimd.LoadInt64x2Slice([]int64{4, 5}) + y := archsimd.LoadInt64x2Slice([]int64{6, 7}) + z := x.ExportTestConcatSelectedConstant(0b10, y) + z.StoreSlice(a) + test_helpers.CheckSlices[int64](t, a, []int64{4, 7}) +} + +func TestConcatSelectedConstantGrouped64(t *testing.T) { + a := make([]float64, 4) + x := archsimd.LoadFloat64x4Slice([]float64{4, 5, 8, 9}) + y := archsimd.LoadFloat64x4Slice([]float64{6, 7, 10, 11}) + z := x.ExportTestConcatSelectedConstantGrouped(0b_11_10, y) + z.StoreSlice(a) + test_helpers.CheckSlices[float64](t, a, []float64{4, 7, 9, 11}) +} + +func TestConcatSelectedConstant32(t *testing.T) { + a := make([]float32, 4) + x := archsimd.LoadFloat32x4Slice([]float32{4, 5, 8, 9}) + y := archsimd.LoadFloat32x4Slice([]float32{6, 7, 10, 11}) + z := x.ExportTestConcatSelectedConstant(0b_11_01_10_00, y) + z.StoreSlice(a) + test_helpers.CheckSlices[float32](t, a, []float32{4, 8, 7, 11}) +} + +func TestConcatSelectedConstantGrouped32(t *testing.T) { + a := make([]uint32, 8) + x := archsimd.LoadUint32x8Slice([]uint32{0, 1, 2, 3, 8, 9, 10, 11}) + y := archsimd.LoadUint32x8Slice([]uint32{4, 5, 6, 7, 12, 13, 14, 15}) + z := x.ExportTestConcatSelectedConstantGrouped(0b_11_01_00_10, y) + z.StoreSlice(a) + test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15}) +} + +func TestTern(t *testing.T) { + if !archsimd.X86.AVX512() { + t.Skip("This test needs AVX512") + } + x := archsimd.LoadInt32x8Slice([]int32{0, 0, 0, 0, 1, 1, 1, 1}) + y := archsimd.LoadInt32x8Slice([]int32{0, 0, 1, 1, 0, 0, 1, 1}) + z := archsimd.LoadInt32x8Slice([]int32{0, 1, 0, 1, 0, 1, 0, 1}) + + foo := func(w archsimd.Int32x8, k uint8) { + a := make([]int32, 8) + w.StoreSlice(a) + t.Logf("For k=%0b, w=%v", k, a) + for i, b := range a { + if (int32(k)>>i)&1 != b { + t.Errorf("Element %d of stored slice (=%d) did not match corresponding bit in 0b%b", + i, b, k) + } + } + } + + foo(x.ExportTestTern(0b1111_0000, y, z), 0b1111_0000) + foo(x.ExportTestTern(0b1100_1100, y, z), 0b1100_1100) + foo(x.ExportTestTern(0b1010_1010, y, z), 0b1010_1010) +} + +func TestSelect2x4x32(t *testing.T) { + for a := range uint8(8) { + for b := range uint8(8) { + for c := range uint8(8) { + for d := range uint8(8) { + x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) + y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) + z := select2x4x32(x, a, b, c, d, y) + w := make([]int32, 4, 4) + z.StoreSlice(w) + if w[0] != int32(a) || w[1] != int32(b) || + w[2] != int32(c) || w[3] != int32(d) { + t.Errorf("Expected [%d %d %d %d] got %v", a, b, c, d, w) + } + } + } + } + } +} + +func TestSelect2x8x32Grouped(t *testing.T) { + for a := range uint8(8) { + for b := range uint8(8) { + for c := range uint8(8) { + for d := range uint8(8) { + x := archsimd.LoadInt32x8Slice([]int32{0, 1, 2, 3, 10, 11, 12, 13}) + y := archsimd.LoadInt32x8Slice([]int32{4, 5, 6, 7, 14, 15, 16, 17}) + z := select2x8x32Grouped(x, a, b, c, d, y) + w := make([]int32, 8, 8) + z.StoreSlice(w) + if w[0] != int32(a) || w[1] != int32(b) || + w[2] != int32(c) || w[3] != int32(d) || + w[4] != int32(10+a) || w[5] != int32(10+b) || + w[6] != int32(10+c) || w[7] != int32(10+d) { + t.Errorf("Expected [%d %d %d %d %d %d %d %d] got %v", a, b, c, d, 10+a, 10+b, 10+c, 10+d, w) + } + } + } + } + } +} + +// select2x4x32 returns a selection of 4 elements in x and y, numbered +// 0-7, where 0-3 are the four elements of x and 4-7 are the four elements +// of y. +func select2x4x32(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 { + pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1 + + a, b, c, d = a&3, b&3, c&3, d&3 + + switch pattern { + case archsimd.LLLL: + return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), x) + case archsimd.HHHH: + return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), y) + case archsimd.LLHH: + return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), y) + case archsimd.HHLL: + return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, c, d), x) + + case archsimd.HLLL: + z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), x) + return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), x) + case archsimd.LHLL: + z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), y) + return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), x) + + case archsimd.HLHH: + z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), x) + return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), y) + case archsimd.LHHH: + z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, a, b, b), y) + return z.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(0, 2, c, d), y) + + case archsimd.LLLH: + z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), y) + return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z) + case archsimd.LLHL: + z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), x) + return x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z) + case archsimd.HHLH: + z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), y) + return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z) + case archsimd.HHHL: + z := y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(c, c, d, d), x) + return y.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, b, 0, 2), z) + + case archsimd.LHLH: + z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, c, b, d), y) + return z.ExportTestConcatSelectedConstant(0b11_01_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 1, 3) */, z) + case archsimd.HLHL: + z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(b, d, a, c), y) + return z.ExportTestConcatSelectedConstant(0b01_11_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 3, 1) */, z) + case archsimd.HLLH: + z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(b, c, a, d), y) + return z.ExportTestConcatSelectedConstant(0b11_01_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 1, 3) */, z) + case archsimd.LHHL: + z := x.ExportTestConcatSelectedConstant(archsimd.ExportTestCscImm4(a, d, b, c), y) + return z.ExportTestConcatSelectedConstant(0b01_11_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 3, 1) */, z) + } + panic("missing case, switch should be exhaustive") +} + +// select2x8x32Grouped returns a pair of selection of 4 elements in x and y, +// numbered 0-7, where 0-3 are the four elements of x's two groups (lower and +// upper 128 bits) and 4-7 are the four elements of y's two groups. + +func select2x8x32Grouped(x archsimd.Int32x8, a, b, c, d uint8, y archsimd.Int32x8) archsimd.Int32x8 { + // selections as being expressible in the ExportTestConcatSelectedConstant pattern, + // or not. Classification is by H and L, where H is a selection from 4-7 + // and L is a selection from 0-3. + // archsimd.LLHH -> CSC(x,y, a, b, c&3, d&3) + // archsimd.HHLL -> CSC(y,x, a&3, b&3, c, d) + // archsimd.LLLL -> CSC(x,x, a, b, c, d) + // archsimd.HHHH -> CSC(y,y, a&3, b&3, c&3, d&3) + + // archsimd.LLLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(x, z, a, b, 0, 2) + // archsimd.LLHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(x, z, a, b, 0, 2) + // archsimd.HHLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(y, z, a&3, b&3, 0, 2) + // archsimd.HHHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(y, z, a&3, b&3, 0, 2) + + // archsimd.LHLL -> z = CSC(x, y, a, a, b&3, b&3); CSC(z, x, 0, 2, c, d) + // etc + + // archsimd.LHLH -> z = CSC(x, y, a, c, b&3, d&3); CSC(z, z, 0, 2, 1, 3) + // archsimd.HLHL -> z = CSC(x, y, b, d, a&3, c&3); CSC(z, z, 2, 0, 3, 1) + + pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1 + + a, b, c, d = a&3, b&3, c&3, d&3 + + switch pattern { + case archsimd.LLLL: + return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), x) + case archsimd.HHHH: + return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), y) + case archsimd.LLHH: + return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), y) + case archsimd.HHLL: + return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, c, d), x) + + case archsimd.HLLL: + z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), x) + return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), x) + case archsimd.LHLL: + z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), y) + return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), x) + + case archsimd.HLHH: + z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), x) + return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), y) + case archsimd.LHHH: + z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, a, b, b), y) + return z.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(0, 2, c, d), y) + + case archsimd.LLLH: + z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), y) + return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z) + case archsimd.LLHL: + z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), x) + return x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z) + case archsimd.HHLH: + z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), y) + return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z) + case archsimd.HHHL: + z := y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(c, c, d, d), x) + return y.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, b, 0, 2), z) + + case archsimd.LHLH: + z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, c, b, d), y) + return z.ExportTestConcatSelectedConstantGrouped(0b11_01_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 1, 3) */, z) + case archsimd.HLHL: + z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(b, d, a, c), y) + return z.ExportTestConcatSelectedConstantGrouped(0b01_11_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 3, 1) */, z) + case archsimd.HLLH: + z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(b, c, a, d), y) + return z.ExportTestConcatSelectedConstantGrouped(0b11_01_00_10 /* =archsimd.ExportTestCscImm4(2, 0, 1, 3) */, z) + case archsimd.LHHL: + z := x.ExportTestConcatSelectedConstantGrouped(archsimd.ExportTestCscImm4(a, d, b, c), y) + return z.ExportTestConcatSelectedConstantGrouped(0b01_11_10_00 /* =archsimd.ExportTestCscImm4(0, 2, 3, 1) */, z) + } + panic("missing case, switch should be exhaustive") +} diff --git a/src/simd/shuffles_amd64.go b/src/simd/archsimd/shuffles_amd64.go similarity index 99% rename from src/simd/shuffles_amd64.go rename to src/simd/archsimd/shuffles_amd64.go index 96323002a4..2bbd89c725 100644 --- a/src/simd/shuffles_amd64.go +++ b/src/simd/archsimd/shuffles_amd64.go @@ -4,7 +4,7 @@ //go:build goexperiment.simd && amd64 -package simd +package archsimd // These constants represent the source pattern for the four parameters // (a, b, c, d) passed to SelectFromPair and SelectFromPairGrouped. diff --git a/src/simd/slice_gen_amd64.go b/src/simd/archsimd/slice_gen_amd64.go similarity index 99% rename from src/simd/slice_gen_amd64.go rename to src/simd/archsimd/slice_gen_amd64.go index 7d70cfb94d..c03e28206d 100644 --- a/src/simd/slice_gen_amd64.go +++ b/src/simd/archsimd/slice_gen_amd64.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd import "unsafe" diff --git a/src/simd/slicepart_amd64.go b/src/simd/archsimd/slicepart_amd64.go similarity index 99% rename from src/simd/slicepart_amd64.go rename to src/simd/archsimd/slicepart_amd64.go index 206d3b98cb..a3188e4a5d 100644 --- a/src/simd/slicepart_amd64.go +++ b/src/simd/archsimd/slicepart_amd64.go @@ -4,7 +4,7 @@ //go:build goexperiment.simd -package simd +package archsimd import "unsafe" diff --git a/src/simd/string.go b/src/simd/archsimd/string.go similarity index 98% rename from src/simd/string.go rename to src/simd/archsimd/string.go index a692653aa0..77500ade1c 100644 --- a/src/simd/string.go +++ b/src/simd/archsimd/string.go @@ -4,7 +4,7 @@ //go:build goexperiment.simd && amd64 -package simd +package archsimd import ( "internal/strconv" diff --git a/src/simd/testdata/sample.go b/src/simd/archsimd/testdata/sample.go similarity index 85% rename from src/simd/testdata/sample.go rename to src/simd/archsimd/testdata/sample.go index b8e3697b6b..9c4695d949 100644 --- a/src/simd/testdata/sample.go +++ b/src/simd/archsimd/testdata/sample.go @@ -7,20 +7,20 @@ package main import ( "fmt" "os" - "simd" + "simd/archsimd" "unsafe" ) -func load(s []float64) simd.Float64x4 { - return simd.LoadFloat64x4((*[4]float64)(s[:4])) +func load(s []float64) archsimd.Float64x4 { + return archsimd.LoadFloat64x4((*[4]float64)(s[:4])) } -type S1 = simd.Float64x4 +type S1 = archsimd.Float64x4 -type S2 simd.Float64x4 +type S2 archsimd.Float64x4 func (s S2) Len() int { - return simd.Float64x4(s).Len() + return archsimd.Float64x4(s).Len() } func (s S2) Load(a []float64) S2 { @@ -28,19 +28,19 @@ func (s S2) Load(a []float64) S2 { } func (s S2) Store(a *[4]float64) { - simd.Float64x4(s).Store(a) + archsimd.Float64x4(s).Store(a) } func (s S2) Add(a S2) S2 { - return S2(simd.Float64x4(s).Add(simd.Float64x4(a))) + return S2(archsimd.Float64x4(s).Add(archsimd.Float64x4(a))) } func (s S2) Mul(a S2) S2 { - return S2(simd.Float64x4(s).Mul(simd.Float64x4(a))) + return S2(archsimd.Float64x4(s).Mul(archsimd.Float64x4(a))) } type S3 struct { - simd.Float64x4 + archsimd.Float64x4 } func ip64_0(a, b []float64) float64 { @@ -82,7 +82,7 @@ func ip64_1a(a, b []float64) float64 { } //go:noinline -func FMA(a, b, c simd.Float64x4) simd.Float64x4 { +func FMA(a, b, c archsimd.Float64x4) archsimd.Float64x4 { return a.Add(b.Mul(c)) } diff --git a/src/simd/types_amd64.go b/src/simd/archsimd/types_amd64.go similarity index 99% rename from src/simd/types_amd64.go rename to src/simd/archsimd/types_amd64.go index dfa864b802..556383b380 100644 --- a/src/simd/types_amd64.go +++ b/src/simd/archsimd/types_amd64.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd // v128 is a tag type that tells the compiler that this is really 128-bit SIMD type v128 struct { diff --git a/src/simd/unsafe_helpers.go b/src/simd/archsimd/unsafe_helpers.go similarity index 99% rename from src/simd/unsafe_helpers.go rename to src/simd/archsimd/unsafe_helpers.go index c6ea50d551..0123ad77c5 100644 --- a/src/simd/unsafe_helpers.go +++ b/src/simd/archsimd/unsafe_helpers.go @@ -2,7 +2,7 @@ //go:build goexperiment.simd -package simd +package archsimd import "unsafe" diff --git a/src/simd/internal/simd_test/binary_test.go b/src/simd/internal/simd_test/binary_test.go deleted file mode 100644 index 04dca3e2e2..0000000000 --- a/src/simd/internal/simd_test/binary_test.go +++ /dev/null @@ -1,361 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build goexperiment.simd && amd64 - -package simd_test - -import ( - "simd" - "testing" -) - -func TestAdd(t *testing.T) { - testFloat32x4Binary(t, simd.Float32x4.Add, addSlice[float32]) - testFloat32x8Binary(t, simd.Float32x8.Add, addSlice[float32]) - testFloat64x2Binary(t, simd.Float64x2.Add, addSlice[float64]) - testFloat64x4Binary(t, simd.Float64x4.Add, addSlice[float64]) - - testInt16x16Binary(t, simd.Int16x16.Add, addSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.Add, addSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.Add, addSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.Add, addSlice[int32]) - testInt64x2Binary(t, simd.Int64x2.Add, addSlice[int64]) - testInt64x4Binary(t, simd.Int64x4.Add, addSlice[int64]) - testInt8x16Binary(t, simd.Int8x16.Add, addSlice[int8]) - testInt8x32Binary(t, simd.Int8x32.Add, addSlice[int8]) - - testUint16x16Binary(t, simd.Uint16x16.Add, addSlice[uint16]) - testUint16x8Binary(t, simd.Uint16x8.Add, addSlice[uint16]) - testUint32x4Binary(t, simd.Uint32x4.Add, addSlice[uint32]) - testUint32x8Binary(t, simd.Uint32x8.Add, addSlice[uint32]) - testUint64x2Binary(t, simd.Uint64x2.Add, addSlice[uint64]) - testUint64x4Binary(t, simd.Uint64x4.Add, addSlice[uint64]) - testUint8x16Binary(t, simd.Uint8x16.Add, addSlice[uint8]) - testUint8x32Binary(t, simd.Uint8x32.Add, addSlice[uint8]) - - if simd.X86.AVX512() { - testFloat32x16Binary(t, simd.Float32x16.Add, addSlice[float32]) - testFloat64x8Binary(t, simd.Float64x8.Add, addSlice[float64]) - testInt8x64Binary(t, simd.Int8x64.Add, addSlice[int8]) - testInt16x32Binary(t, simd.Int16x32.Add, addSlice[int16]) - testInt32x16Binary(t, simd.Int32x16.Add, addSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.Add, addSlice[int64]) - testUint8x64Binary(t, simd.Uint8x64.Add, addSlice[uint8]) - testUint16x32Binary(t, simd.Uint16x32.Add, addSlice[uint16]) - testUint32x16Binary(t, simd.Uint32x16.Add, addSlice[uint32]) - testUint64x8Binary(t, simd.Uint64x8.Add, addSlice[uint64]) - } -} - -func TestSub(t *testing.T) { - testFloat32x4Binary(t, simd.Float32x4.Sub, subSlice[float32]) - testFloat32x8Binary(t, simd.Float32x8.Sub, subSlice[float32]) - testFloat64x2Binary(t, simd.Float64x2.Sub, subSlice[float64]) - testFloat64x4Binary(t, simd.Float64x4.Sub, subSlice[float64]) - - testInt16x16Binary(t, simd.Int16x16.Sub, subSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.Sub, subSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.Sub, subSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.Sub, subSlice[int32]) - testInt64x2Binary(t, simd.Int64x2.Sub, subSlice[int64]) - testInt64x4Binary(t, simd.Int64x4.Sub, subSlice[int64]) - testInt8x16Binary(t, simd.Int8x16.Sub, subSlice[int8]) - testInt8x32Binary(t, simd.Int8x32.Sub, subSlice[int8]) - - testUint16x16Binary(t, simd.Uint16x16.Sub, subSlice[uint16]) - testUint16x8Binary(t, simd.Uint16x8.Sub, subSlice[uint16]) - testUint32x4Binary(t, simd.Uint32x4.Sub, subSlice[uint32]) - testUint32x8Binary(t, simd.Uint32x8.Sub, subSlice[uint32]) - testUint64x2Binary(t, simd.Uint64x2.Sub, subSlice[uint64]) - testUint64x4Binary(t, simd.Uint64x4.Sub, subSlice[uint64]) - testUint8x16Binary(t, simd.Uint8x16.Sub, subSlice[uint8]) - testUint8x32Binary(t, simd.Uint8x32.Sub, subSlice[uint8]) - - if simd.X86.AVX512() { - testFloat32x16Binary(t, simd.Float32x16.Sub, subSlice[float32]) - testFloat64x8Binary(t, simd.Float64x8.Sub, subSlice[float64]) - testInt8x64Binary(t, simd.Int8x64.Sub, subSlice[int8]) - testInt16x32Binary(t, simd.Int16x32.Sub, subSlice[int16]) - testInt32x16Binary(t, simd.Int32x16.Sub, subSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.Sub, subSlice[int64]) - testUint8x64Binary(t, simd.Uint8x64.Sub, subSlice[uint8]) - testUint16x32Binary(t, simd.Uint16x32.Sub, subSlice[uint16]) - testUint32x16Binary(t, simd.Uint32x16.Sub, subSlice[uint32]) - testUint64x8Binary(t, simd.Uint64x8.Sub, subSlice[uint64]) - } -} - -func TestMax(t *testing.T) { - // testFloat32x4Binary(t, simd.Float32x4.Max, maxSlice[float32]) // nan is wrong - // testFloat32x8Binary(t, simd.Float32x8.Max, maxSlice[float32]) // nan is wrong - // testFloat64x2Binary(t, simd.Float64x2.Max, maxSlice[float64]) // nan is wrong - // testFloat64x4Binary(t, simd.Float64x4.Max, maxSlice[float64]) // nan is wrong - - testInt16x16Binary(t, simd.Int16x16.Max, maxSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.Max, maxSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.Max, maxSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.Max, maxSlice[int32]) - - if simd.X86.AVX512() { - testInt64x2Binary(t, simd.Int64x2.Max, maxSlice[int64]) - testInt64x4Binary(t, simd.Int64x4.Max, maxSlice[int64]) - } - - testInt8x16Binary(t, simd.Int8x16.Max, maxSlice[int8]) - testInt8x32Binary(t, simd.Int8x32.Max, maxSlice[int8]) - - testUint16x16Binary(t, simd.Uint16x16.Max, maxSlice[uint16]) - testUint16x8Binary(t, simd.Uint16x8.Max, maxSlice[uint16]) - testUint32x4Binary(t, simd.Uint32x4.Max, maxSlice[uint32]) - testUint32x8Binary(t, simd.Uint32x8.Max, maxSlice[uint32]) - - if simd.X86.AVX512() { - testUint64x2Binary(t, simd.Uint64x2.Max, maxSlice[uint64]) - testUint64x4Binary(t, simd.Uint64x4.Max, maxSlice[uint64]) - } - - testUint8x16Binary(t, simd.Uint8x16.Max, maxSlice[uint8]) - testUint8x32Binary(t, simd.Uint8x32.Max, maxSlice[uint8]) - - if simd.X86.AVX512() { - // testFloat32x16Binary(t, simd.Float32x16.Max, maxSlice[float32]) // nan is wrong - // testFloat64x8Binary(t, simd.Float64x8.Max, maxSlice[float64]) // nan is wrong - testInt8x64Binary(t, simd.Int8x64.Max, maxSlice[int8]) - testInt16x32Binary(t, simd.Int16x32.Max, maxSlice[int16]) - testInt32x16Binary(t, simd.Int32x16.Max, maxSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.Max, maxSlice[int64]) - testUint8x64Binary(t, simd.Uint8x64.Max, maxSlice[uint8]) - testUint16x32Binary(t, simd.Uint16x32.Max, maxSlice[uint16]) - testUint32x16Binary(t, simd.Uint32x16.Max, maxSlice[uint32]) - testUint64x8Binary(t, simd.Uint64x8.Max, maxSlice[uint64]) - } -} - -func TestMin(t *testing.T) { - // testFloat32x4Binary(t, simd.Float32x4.Min, minSlice[float32]) // nan is wrong - // testFloat32x8Binary(t, simd.Float32x8.Min, minSlice[float32]) // nan is wrong - // testFloat64x2Binary(t, simd.Float64x2.Min, minSlice[float64]) // nan is wrong - // testFloat64x4Binary(t, simd.Float64x4.Min, minSlice[float64]) // nan is wrong - - testInt16x16Binary(t, simd.Int16x16.Min, minSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.Min, minSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.Min, minSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.Min, minSlice[int32]) - - if simd.X86.AVX512() { - testInt64x2Binary(t, simd.Int64x2.Min, minSlice[int64]) - testInt64x4Binary(t, simd.Int64x4.Min, minSlice[int64]) - } - - testInt8x16Binary(t, simd.Int8x16.Min, minSlice[int8]) - testInt8x32Binary(t, simd.Int8x32.Min, minSlice[int8]) - - testUint16x16Binary(t, simd.Uint16x16.Min, minSlice[uint16]) - testUint16x8Binary(t, simd.Uint16x8.Min, minSlice[uint16]) - testUint32x4Binary(t, simd.Uint32x4.Min, minSlice[uint32]) - testUint32x8Binary(t, simd.Uint32x8.Min, minSlice[uint32]) - - if simd.X86.AVX512() { - testUint64x2Binary(t, simd.Uint64x2.Min, minSlice[uint64]) - testUint64x4Binary(t, simd.Uint64x4.Min, minSlice[uint64]) - } - - testUint8x16Binary(t, simd.Uint8x16.Min, minSlice[uint8]) - testUint8x32Binary(t, simd.Uint8x32.Min, minSlice[uint8]) - - if simd.X86.AVX512() { - // testFloat32x16Binary(t, simd.Float32x16.Min, minSlice[float32]) // nan is wrong - // testFloat64x8Binary(t, simd.Float64x8.Min, minSlice[float64]) // nan is wrong - testInt8x64Binary(t, simd.Int8x64.Min, minSlice[int8]) - testInt16x32Binary(t, simd.Int16x32.Min, minSlice[int16]) - testInt32x16Binary(t, simd.Int32x16.Min, minSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.Min, minSlice[int64]) - testUint8x64Binary(t, simd.Uint8x64.Min, minSlice[uint8]) - testUint16x32Binary(t, simd.Uint16x32.Min, minSlice[uint16]) - testUint32x16Binary(t, simd.Uint32x16.Min, minSlice[uint32]) - testUint64x8Binary(t, simd.Uint64x8.Min, minSlice[uint64]) - } -} - -func TestAnd(t *testing.T) { - testInt16x16Binary(t, simd.Int16x16.And, andSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.And, andSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.And, andSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.And, andSlice[int32]) - testInt64x2Binary(t, simd.Int64x2.And, andSlice[int64]) - testInt64x4Binary(t, simd.Int64x4.And, andSlice[int64]) - testInt8x16Binary(t, simd.Int8x16.And, andSlice[int8]) - testInt8x32Binary(t, simd.Int8x32.And, andSlice[int8]) - - testUint16x16Binary(t, simd.Uint16x16.And, andSlice[uint16]) - testUint16x8Binary(t, simd.Uint16x8.And, andSlice[uint16]) - testUint32x4Binary(t, simd.Uint32x4.And, andSlice[uint32]) - testUint32x8Binary(t, simd.Uint32x8.And, andSlice[uint32]) - testUint64x2Binary(t, simd.Uint64x2.And, andSlice[uint64]) - testUint64x4Binary(t, simd.Uint64x4.And, andSlice[uint64]) - testUint8x16Binary(t, simd.Uint8x16.And, andSlice[uint8]) - testUint8x32Binary(t, simd.Uint8x32.And, andSlice[uint8]) - - if simd.X86.AVX512() { - // testInt8x64Binary(t, simd.Int8x64.And, andISlice[int8]) // missing - // testInt16x32Binary(t, simd.Int16x32.And, andISlice[int16]) // missing - testInt32x16Binary(t, simd.Int32x16.And, andSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.And, andSlice[int64]) - // testUint8x64Binary(t, simd.Uint8x64.And, andISlice[uint8]) // missing - // testUint16x32Binary(t, simd.Uint16x32.And, andISlice[uint16]) // missing - testUint32x16Binary(t, simd.Uint32x16.And, andSlice[uint32]) - testUint64x8Binary(t, simd.Uint64x8.And, andSlice[uint64]) - } -} - -func TestAndNot(t *testing.T) { - testInt16x16Binary(t, simd.Int16x16.AndNot, andNotSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.AndNot, andNotSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.AndNot, andNotSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.AndNot, andNotSlice[int32]) - testInt64x2Binary(t, simd.Int64x2.AndNot, andNotSlice[int64]) - testInt64x4Binary(t, simd.Int64x4.AndNot, andNotSlice[int64]) - testInt8x16Binary(t, simd.Int8x16.AndNot, andNotSlice[int8]) - testInt8x32Binary(t, simd.Int8x32.AndNot, andNotSlice[int8]) - - testUint16x16Binary(t, simd.Uint16x16.AndNot, andNotSlice[uint16]) - testUint16x8Binary(t, simd.Uint16x8.AndNot, andNotSlice[uint16]) - testUint32x4Binary(t, simd.Uint32x4.AndNot, andNotSlice[uint32]) - testUint32x8Binary(t, simd.Uint32x8.AndNot, andNotSlice[uint32]) - testUint64x2Binary(t, simd.Uint64x2.AndNot, andNotSlice[uint64]) - testUint64x4Binary(t, simd.Uint64x4.AndNot, andNotSlice[uint64]) - testUint8x16Binary(t, simd.Uint8x16.AndNot, andNotSlice[uint8]) - testUint8x32Binary(t, simd.Uint8x32.AndNot, andNotSlice[uint8]) - - if simd.X86.AVX512() { - testInt8x64Binary(t, simd.Int8x64.AndNot, andNotSlice[int8]) - testInt16x32Binary(t, simd.Int16x32.AndNot, andNotSlice[int16]) - testInt32x16Binary(t, simd.Int32x16.AndNot, andNotSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.AndNot, andNotSlice[int64]) - testUint8x64Binary(t, simd.Uint8x64.AndNot, andNotSlice[uint8]) - testUint16x32Binary(t, simd.Uint16x32.AndNot, andNotSlice[uint16]) - testUint32x16Binary(t, simd.Uint32x16.AndNot, andNotSlice[uint32]) - testUint64x8Binary(t, simd.Uint64x8.AndNot, andNotSlice[uint64]) - } -} - -func TestXor(t *testing.T) { - testInt16x16Binary(t, simd.Int16x16.Xor, xorSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.Xor, xorSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.Xor, xorSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.Xor, xorSlice[int32]) - testInt64x2Binary(t, simd.Int64x2.Xor, xorSlice[int64]) - testInt64x4Binary(t, simd.Int64x4.Xor, xorSlice[int64]) - testInt8x16Binary(t, simd.Int8x16.Xor, xorSlice[int8]) - testInt8x32Binary(t, simd.Int8x32.Xor, xorSlice[int8]) - - testUint16x16Binary(t, simd.Uint16x16.Xor, xorSlice[uint16]) - testUint16x8Binary(t, simd.Uint16x8.Xor, xorSlice[uint16]) - testUint32x4Binary(t, simd.Uint32x4.Xor, xorSlice[uint32]) - testUint32x8Binary(t, simd.Uint32x8.Xor, xorSlice[uint32]) - testUint64x2Binary(t, simd.Uint64x2.Xor, xorSlice[uint64]) - testUint64x4Binary(t, simd.Uint64x4.Xor, xorSlice[uint64]) - testUint8x16Binary(t, simd.Uint8x16.Xor, xorSlice[uint8]) - testUint8x32Binary(t, simd.Uint8x32.Xor, xorSlice[uint8]) - - if simd.X86.AVX512() { - // testInt8x64Binary(t, simd.Int8x64.Xor, andISlice[int8]) // missing - // testInt16x32Binary(t, simd.Int16x32.Xor, andISlice[int16]) // missing - testInt32x16Binary(t, simd.Int32x16.Xor, xorSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.Xor, xorSlice[int64]) - // testUint8x64Binary(t, simd.Uint8x64.Xor, andISlice[uint8]) // missing - // testUint16x32Binary(t, simd.Uint16x32.Xor, andISlice[uint16]) // missing - testUint32x16Binary(t, simd.Uint32x16.Xor, xorSlice[uint32]) - testUint64x8Binary(t, simd.Uint64x8.Xor, xorSlice[uint64]) - } -} - -func TestOr(t *testing.T) { - testInt16x16Binary(t, simd.Int16x16.Or, orSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.Or, orSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.Or, orSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.Or, orSlice[int32]) - testInt64x2Binary(t, simd.Int64x2.Or, orSlice[int64]) - testInt64x4Binary(t, simd.Int64x4.Or, orSlice[int64]) - testInt8x16Binary(t, simd.Int8x16.Or, orSlice[int8]) - testInt8x32Binary(t, simd.Int8x32.Or, orSlice[int8]) - - testUint16x16Binary(t, simd.Uint16x16.Or, orSlice[uint16]) - testUint16x8Binary(t, simd.Uint16x8.Or, orSlice[uint16]) - testUint32x4Binary(t, simd.Uint32x4.Or, orSlice[uint32]) - testUint32x8Binary(t, simd.Uint32x8.Or, orSlice[uint32]) - testUint64x2Binary(t, simd.Uint64x2.Or, orSlice[uint64]) - testUint64x4Binary(t, simd.Uint64x4.Or, orSlice[uint64]) - testUint8x16Binary(t, simd.Uint8x16.Or, orSlice[uint8]) - testUint8x32Binary(t, simd.Uint8x32.Or, orSlice[uint8]) - - if simd.X86.AVX512() { - // testInt8x64Binary(t, simd.Int8x64.Or, andISlice[int8]) // missing - // testInt16x32Binary(t, simd.Int16x32.Or, andISlice[int16]) // missing - testInt32x16Binary(t, simd.Int32x16.Or, orSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.Or, orSlice[int64]) - // testUint8x64Binary(t, simd.Uint8x64.Or, andISlice[uint8]) // missing - // testUint16x32Binary(t, simd.Uint16x32.Or, andISlice[uint16]) // missing - testUint32x16Binary(t, simd.Uint32x16.Or, orSlice[uint32]) - testUint64x8Binary(t, simd.Uint64x8.Or, orSlice[uint64]) - } -} - -func TestMul(t *testing.T) { - testFloat32x4Binary(t, simd.Float32x4.Mul, mulSlice[float32]) - testFloat32x8Binary(t, simd.Float32x8.Mul, mulSlice[float32]) - testFloat64x2Binary(t, simd.Float64x2.Mul, mulSlice[float64]) - testFloat64x4Binary(t, simd.Float64x4.Mul, mulSlice[float64]) - - testInt16x16Binary(t, simd.Int16x16.Mul, mulSlice[int16]) - testInt16x8Binary(t, simd.Int16x8.Mul, mulSlice[int16]) - testInt32x4Binary(t, simd.Int32x4.Mul, mulSlice[int32]) - testInt32x8Binary(t, simd.Int32x8.Mul, mulSlice[int32]) - - // testInt8x16Binary(t, simd.Int8x16.Mul, mulSlice[int8]) // nope - // testInt8x32Binary(t, simd.Int8x32.Mul, mulSlice[int8]) - - // TODO we should be able to do these, there's no difference between signed/unsigned Mul - // testUint16x16Binary(t, simd.Uint16x16.Mul, mulSlice[uint16]) - // testUint16x8Binary(t, simd.Uint16x8.Mul, mulSlice[uint16]) - // testUint32x4Binary(t, simd.Uint32x4.Mul, mulSlice[uint32]) - // testUint32x8Binary(t, simd.Uint32x8.Mul, mulSlice[uint32]) - // testUint64x2Binary(t, simd.Uint64x2.Mul, mulSlice[uint64]) - // testUint64x4Binary(t, simd.Uint64x4.Mul, mulSlice[uint64]) - - // testUint8x16Binary(t, simd.Uint8x16.Mul, mulSlice[uint8]) // nope - // testUint8x32Binary(t, simd.Uint8x32.Mul, mulSlice[uint8]) - - if simd.X86.AVX512() { - testInt64x2Binary(t, simd.Int64x2.Mul, mulSlice[int64]) // avx512 only - testInt64x4Binary(t, simd.Int64x4.Mul, mulSlice[int64]) - - testFloat32x16Binary(t, simd.Float32x16.Mul, mulSlice[float32]) - testFloat64x8Binary(t, simd.Float64x8.Mul, mulSlice[float64]) - - // testInt8x64Binary(t, simd.Int8x64.Mul, mulSlice[int8]) // nope - testInt16x32Binary(t, simd.Int16x32.Mul, mulSlice[int16]) - testInt32x16Binary(t, simd.Int32x16.Mul, mulSlice[int32]) - testInt64x8Binary(t, simd.Int64x8.Mul, mulSlice[int64]) - // testUint8x64Binary(t, simd.Uint8x64.Mul, mulSlice[uint8]) // nope - - // TODO signed should do the job - // testUint16x32Binary(t, simd.Uint16x32.Mul, mulSlice[uint16]) - // testUint32x16Binary(t, simd.Uint32x16.Mul, mulSlice[uint32]) - // testUint64x8Binary(t, simd.Uint64x8.Mul, mulSlice[uint64]) - } -} - -func TestDiv(t *testing.T) { - testFloat32x4Binary(t, simd.Float32x4.Div, divSlice[float32]) - testFloat32x8Binary(t, simd.Float32x8.Div, divSlice[float32]) - testFloat64x2Binary(t, simd.Float64x2.Div, divSlice[float64]) - testFloat64x4Binary(t, simd.Float64x4.Div, divSlice[float64]) - - if simd.X86.AVX512() { - testFloat32x16Binary(t, simd.Float32x16.Div, divSlice[float32]) - testFloat64x8Binary(t, simd.Float64x8.Div, divSlice[float64]) - } -} diff --git a/src/simd/internal/simd_test/compare_test.go b/src/simd/internal/simd_test/compare_test.go deleted file mode 100644 index 09b3bfc0d9..0000000000 --- a/src/simd/internal/simd_test/compare_test.go +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build goexperiment.simd && amd64 - -package simd_test - -import ( - "simd" - "testing" -) - -// AVX 2 lacks most comparisons, but they can be synthesized -// from > and = -var comparisonFixed bool = simd.X86.AVX512() - -func TestLess(t *testing.T) { - testFloat32x4Compare(t, simd.Float32x4.Less, lessSlice[float32]) - testFloat32x8Compare(t, simd.Float32x8.Less, lessSlice[float32]) - testFloat64x2Compare(t, simd.Float64x2.Less, lessSlice[float64]) - testFloat64x4Compare(t, simd.Float64x4.Less, lessSlice[float64]) - - testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8]) - - testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8]) - - testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.Less, lessSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.Less, lessSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.Less, lessSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.Less, lessSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.Less, lessSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.Less, lessSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.Less, lessSlice[uint8]) - - if simd.X86.AVX512() { - testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.Less, lessSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.Less, lessSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.Less, lessSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.Less, lessSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.Less, lessSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.Less, lessSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.Less, lessSlice[uint8]) - - testFloat32x16Compare(t, simd.Float32x16.Less, lessSlice[float32]) - testFloat64x8Compare(t, simd.Float64x8.Less, lessSlice[float64]) - testInt8x64Compare(t, simd.Int8x64.Less, lessSlice[int8]) - testInt16x32Compare(t, simd.Int16x32.Less, lessSlice[int16]) - testInt32x16Compare(t, simd.Int32x16.Less, lessSlice[int32]) - testInt64x8Compare(t, simd.Int64x8.Less, lessSlice[int64]) - testUint8x64Compare(t, simd.Uint8x64.Less, lessSlice[uint8]) - testUint16x32Compare(t, simd.Uint16x32.Less, lessSlice[uint16]) - testUint32x16Compare(t, simd.Uint32x16.Less, lessSlice[uint32]) - testUint64x8Compare(t, simd.Uint64x8.Less, lessSlice[uint64]) - } -} - -func TestLessEqual(t *testing.T) { - testFloat32x4Compare(t, simd.Float32x4.LessEqual, lessEqualSlice[float32]) - testFloat32x8Compare(t, simd.Float32x8.LessEqual, lessEqualSlice[float32]) - testFloat64x2Compare(t, simd.Float64x2.LessEqual, lessEqualSlice[float64]) - testFloat64x4Compare(t, simd.Float64x4.LessEqual, lessEqualSlice[float64]) - - testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.LessEqual, lessEqualSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.LessEqual, lessEqualSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.LessEqual, lessEqualSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8]) - - testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.LessEqual, lessEqualSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.LessEqual, lessEqualSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.LessEqual, lessEqualSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8]) - - if simd.X86.AVX512() { - testFloat32x16Compare(t, simd.Float32x16.LessEqual, lessEqualSlice[float32]) - testFloat64x8Compare(t, simd.Float64x8.LessEqual, lessEqualSlice[float64]) - testInt8x64Compare(t, simd.Int8x64.LessEqual, lessEqualSlice[int8]) - testInt16x32Compare(t, simd.Int16x32.LessEqual, lessEqualSlice[int16]) - testInt32x16Compare(t, simd.Int32x16.LessEqual, lessEqualSlice[int32]) - testInt64x8Compare(t, simd.Int64x8.LessEqual, lessEqualSlice[int64]) - testUint8x64Compare(t, simd.Uint8x64.LessEqual, lessEqualSlice[uint8]) - testUint16x32Compare(t, simd.Uint16x32.LessEqual, lessEqualSlice[uint16]) - testUint32x16Compare(t, simd.Uint32x16.LessEqual, lessEqualSlice[uint32]) - testUint64x8Compare(t, simd.Uint64x8.LessEqual, lessEqualSlice[uint64]) - } -} - -func TestGreater(t *testing.T) { - testFloat32x4Compare(t, simd.Float32x4.Greater, greaterSlice[float32]) - testFloat32x8Compare(t, simd.Float32x8.Greater, greaterSlice[float32]) - testFloat64x2Compare(t, simd.Float64x2.Greater, greaterSlice[float64]) - testFloat64x4Compare(t, simd.Float64x4.Greater, greaterSlice[float64]) - - testInt16x16Compare(t, simd.Int16x16.Greater, greaterSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.Greater, greaterSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.Greater, greaterSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.Greater, greaterSlice[int32]) - - testInt64x2Compare(t, simd.Int64x2.Greater, greaterSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.Greater, greaterSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.Greater, greaterSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.Greater, greaterSlice[int8]) - - testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.Greater, greaterSlice[uint32]) - - testUint64x2Compare(t, simd.Uint64x2.Greater, greaterSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.Greater, greaterSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8]) - - if simd.X86.AVX512() { - - testFloat32x16Compare(t, simd.Float32x16.Greater, greaterSlice[float32]) - testFloat64x8Compare(t, simd.Float64x8.Greater, greaterSlice[float64]) - testInt8x64Compare(t, simd.Int8x64.Greater, greaterSlice[int8]) - testInt16x32Compare(t, simd.Int16x32.Greater, greaterSlice[int16]) - testInt32x16Compare(t, simd.Int32x16.Greater, greaterSlice[int32]) - testInt64x8Compare(t, simd.Int64x8.Greater, greaterSlice[int64]) - testUint8x64Compare(t, simd.Uint8x64.Greater, greaterSlice[uint8]) - testUint16x32Compare(t, simd.Uint16x32.Greater, greaterSlice[uint16]) - testUint32x16Compare(t, simd.Uint32x16.Greater, greaterSlice[uint32]) - testUint64x8Compare(t, simd.Uint64x8.Greater, greaterSlice[uint64]) - } -} - -func TestGreaterEqual(t *testing.T) { - testFloat32x4Compare(t, simd.Float32x4.GreaterEqual, greaterEqualSlice[float32]) - testFloat32x8Compare(t, simd.Float32x8.GreaterEqual, greaterEqualSlice[float32]) - testFloat64x2Compare(t, simd.Float64x2.GreaterEqual, greaterEqualSlice[float64]) - testFloat64x4Compare(t, simd.Float64x4.GreaterEqual, greaterEqualSlice[float64]) - - testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.GreaterEqual, greaterEqualSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.GreaterEqual, greaterEqualSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.GreaterEqual, greaterEqualSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8]) - - testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8]) - - if simd.X86.AVX512() { - testFloat32x16Compare(t, simd.Float32x16.GreaterEqual, greaterEqualSlice[float32]) - testFloat64x8Compare(t, simd.Float64x8.GreaterEqual, greaterEqualSlice[float64]) - testInt8x64Compare(t, simd.Int8x64.GreaterEqual, greaterEqualSlice[int8]) - testInt16x32Compare(t, simd.Int16x32.GreaterEqual, greaterEqualSlice[int16]) - testInt32x16Compare(t, simd.Int32x16.GreaterEqual, greaterEqualSlice[int32]) - testInt64x8Compare(t, simd.Int64x8.GreaterEqual, greaterEqualSlice[int64]) - testUint8x64Compare(t, simd.Uint8x64.GreaterEqual, greaterEqualSlice[uint8]) - testUint16x32Compare(t, simd.Uint16x32.GreaterEqual, greaterEqualSlice[uint16]) - testUint32x16Compare(t, simd.Uint32x16.GreaterEqual, greaterEqualSlice[uint32]) - testUint64x8Compare(t, simd.Uint64x8.GreaterEqual, greaterEqualSlice[uint64]) - } -} - -func TestEqual(t *testing.T) { - testFloat32x4Compare(t, simd.Float32x4.Equal, equalSlice[float32]) - testFloat32x8Compare(t, simd.Float32x8.Equal, equalSlice[float32]) - testFloat64x2Compare(t, simd.Float64x2.Equal, equalSlice[float64]) - testFloat64x4Compare(t, simd.Float64x4.Equal, equalSlice[float64]) - - testInt16x16Compare(t, simd.Int16x16.Equal, equalSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.Equal, equalSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.Equal, equalSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.Equal, equalSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.Equal, equalSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.Equal, equalSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.Equal, equalSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.Equal, equalSlice[int8]) - - testUint16x16Compare(t, simd.Uint16x16.Equal, equalSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.Equal, equalSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.Equal, equalSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.Equal, equalSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.Equal, equalSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.Equal, equalSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.Equal, equalSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.Equal, equalSlice[uint8]) - - if simd.X86.AVX512() { - testFloat32x16Compare(t, simd.Float32x16.Equal, equalSlice[float32]) - testFloat64x8Compare(t, simd.Float64x8.Equal, equalSlice[float64]) - testInt8x64Compare(t, simd.Int8x64.Equal, equalSlice[int8]) - testInt16x32Compare(t, simd.Int16x32.Equal, equalSlice[int16]) - testInt32x16Compare(t, simd.Int32x16.Equal, equalSlice[int32]) - testInt64x8Compare(t, simd.Int64x8.Equal, equalSlice[int64]) - testUint8x64Compare(t, simd.Uint8x64.Equal, equalSlice[uint8]) - testUint16x32Compare(t, simd.Uint16x32.Equal, equalSlice[uint16]) - testUint32x16Compare(t, simd.Uint32x16.Equal, equalSlice[uint32]) - testUint64x8Compare(t, simd.Uint64x8.Equal, equalSlice[uint64]) - } -} - -func TestNotEqual(t *testing.T) { - testFloat32x4Compare(t, simd.Float32x4.NotEqual, notEqualSlice[float32]) - testFloat32x8Compare(t, simd.Float32x8.NotEqual, notEqualSlice[float32]) - testFloat64x2Compare(t, simd.Float64x2.NotEqual, notEqualSlice[float64]) - testFloat64x4Compare(t, simd.Float64x4.NotEqual, notEqualSlice[float64]) - - testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.NotEqual, notEqualSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.NotEqual, notEqualSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.NotEqual, notEqualSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.NotEqual, notEqualSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.NotEqual, notEqualSlice[int8]) - - testUint16x16Compare(t, simd.Uint16x16.NotEqual, notEqualSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.NotEqual, notEqualSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.NotEqual, notEqualSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.NotEqual, notEqualSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.NotEqual, notEqualSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8]) - - if simd.X86.AVX512() { - testFloat32x16Compare(t, simd.Float32x16.NotEqual, notEqualSlice[float32]) - testFloat64x8Compare(t, simd.Float64x8.NotEqual, notEqualSlice[float64]) - testInt8x64Compare(t, simd.Int8x64.NotEqual, notEqualSlice[int8]) - testInt16x32Compare(t, simd.Int16x32.NotEqual, notEqualSlice[int16]) - testInt32x16Compare(t, simd.Int32x16.NotEqual, notEqualSlice[int32]) - testInt64x8Compare(t, simd.Int64x8.NotEqual, notEqualSlice[int64]) - testUint8x64Compare(t, simd.Uint8x64.NotEqual, notEqualSlice[uint8]) - testUint16x32Compare(t, simd.Uint16x32.NotEqual, notEqualSlice[uint16]) - testUint32x16Compare(t, simd.Uint32x16.NotEqual, notEqualSlice[uint32]) - testUint64x8Compare(t, simd.Uint64x8.NotEqual, notEqualSlice[uint64]) - } -} diff --git a/src/simd/internal/simd_test/ternary_test.go b/src/simd/internal/simd_test/ternary_test.go deleted file mode 100644 index 6b563cef75..0000000000 --- a/src/simd/internal/simd_test/ternary_test.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build goexperiment.simd && amd64 - -package simd_test - -import ( - "simd" - "testing" -) - -func TestFMA(t *testing.T) { - if simd.X86.AVX512() { - testFloat32x4TernaryFlaky(t, simd.Float32x4.MulAdd, fmaSlice[float32], 0.001) - testFloat32x8TernaryFlaky(t, simd.Float32x8.MulAdd, fmaSlice[float32], 0.001) - testFloat32x16TernaryFlaky(t, simd.Float32x16.MulAdd, fmaSlice[float32], 0.001) - testFloat64x2Ternary(t, simd.Float64x2.MulAdd, fmaSlice[float64]) - testFloat64x4Ternary(t, simd.Float64x4.MulAdd, fmaSlice[float64]) - testFloat64x8Ternary(t, simd.Float64x8.MulAdd, fmaSlice[float64]) - } -} diff --git a/src/simd/internal/simd_test/unary_test.go b/src/simd/internal/simd_test/unary_test.go deleted file mode 100644 index ea4c114992..0000000000 --- a/src/simd/internal/simd_test/unary_test.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build goexperiment.simd && amd64 - -package simd_test - -import ( - "math" - "simd" - "testing" -) - -func TestCeil(t *testing.T) { - testFloat32x4Unary(t, simd.Float32x4.Ceil, ceilSlice[float32]) - testFloat32x8Unary(t, simd.Float32x8.Ceil, ceilSlice[float32]) - testFloat64x2Unary(t, simd.Float64x2.Ceil, ceilSlice[float64]) - testFloat64x4Unary(t, simd.Float64x4.Ceil, ceilSlice[float64]) - if simd.X86.AVX512() { - // testFloat32x16Unary(t, simd.Float32x16.Ceil, ceilSlice[float32]) // missing - // testFloat64x8Unary(t, simd.Float64x8.Ceil, ceilSlice[float64]) // missing - } -} - -func TestFloor(t *testing.T) { - testFloat32x4Unary(t, simd.Float32x4.Floor, floorSlice[float32]) - testFloat32x8Unary(t, simd.Float32x8.Floor, floorSlice[float32]) - testFloat64x2Unary(t, simd.Float64x2.Floor, floorSlice[float64]) - testFloat64x4Unary(t, simd.Float64x4.Floor, floorSlice[float64]) - if simd.X86.AVX512() { - // testFloat32x16Unary(t, simd.Float32x16.Floor, floorSlice[float32]) // missing - // testFloat64x8Unary(t, simd.Float64x8.Floor, floorSlice[float64]) // missing - } -} - -func TestTrunc(t *testing.T) { - testFloat32x4Unary(t, simd.Float32x4.Trunc, truncSlice[float32]) - testFloat32x8Unary(t, simd.Float32x8.Trunc, truncSlice[float32]) - testFloat64x2Unary(t, simd.Float64x2.Trunc, truncSlice[float64]) - testFloat64x4Unary(t, simd.Float64x4.Trunc, truncSlice[float64]) - if simd.X86.AVX512() { - // testFloat32x16Unary(t, simd.Float32x16.Trunc, truncSlice[float32]) // missing - // testFloat64x8Unary(t, simd.Float64x8.Trunc, truncSlice[float64]) // missing - } -} - -func TestRound(t *testing.T) { - testFloat32x4Unary(t, simd.Float32x4.RoundToEven, roundSlice[float32]) - testFloat32x8Unary(t, simd.Float32x8.RoundToEven, roundSlice[float32]) - testFloat64x2Unary(t, simd.Float64x2.RoundToEven, roundSlice[float64]) - testFloat64x4Unary(t, simd.Float64x4.RoundToEven, roundSlice[float64]) - if simd.X86.AVX512() { - // testFloat32x16Unary(t, simd.Float32x16.Round, roundSlice[float32]) // missing - // testFloat64x8Unary(t, simd.Float64x8.Round, roundSlice[float64]) // missing - } -} - -func TestSqrt(t *testing.T) { - testFloat32x4Unary(t, simd.Float32x4.Sqrt, sqrtSlice[float32]) - testFloat32x8Unary(t, simd.Float32x8.Sqrt, sqrtSlice[float32]) - testFloat64x2Unary(t, simd.Float64x2.Sqrt, sqrtSlice[float64]) - testFloat64x4Unary(t, simd.Float64x4.Sqrt, sqrtSlice[float64]) - if simd.X86.AVX512() { - testFloat32x16Unary(t, simd.Float32x16.Sqrt, sqrtSlice[float32]) - testFloat64x8Unary(t, simd.Float64x8.Sqrt, sqrtSlice[float64]) - } -} - -func TestNot(t *testing.T) { - testInt8x16Unary(t, simd.Int8x16.Not, map1[int8](not)) - testInt8x32Unary(t, simd.Int8x32.Not, map1[int8](not)) - testInt16x8Unary(t, simd.Int16x8.Not, map1[int16](not)) - testInt16x16Unary(t, simd.Int16x16.Not, map1[int16](not)) - testInt32x4Unary(t, simd.Int32x4.Not, map1[int32](not)) - testInt32x8Unary(t, simd.Int32x8.Not, map1[int32](not)) -} - -func TestAbsolute(t *testing.T) { - testInt8x16Unary(t, simd.Int8x16.Abs, map1[int8](abs)) - testInt8x32Unary(t, simd.Int8x32.Abs, map1[int8](abs)) - testInt16x8Unary(t, simd.Int16x8.Abs, map1[int16](abs)) - testInt16x16Unary(t, simd.Int16x16.Abs, map1[int16](abs)) - testInt32x4Unary(t, simd.Int32x4.Abs, map1[int32](abs)) - testInt32x8Unary(t, simd.Int32x8.Abs, map1[int32](abs)) - if simd.X86.AVX512() { - testInt8x64Unary(t, simd.Int8x64.Abs, map1[int8](abs)) - testInt16x32Unary(t, simd.Int16x32.Abs, map1[int16](abs)) - testInt32x16Unary(t, simd.Int32x16.Abs, map1[int32](abs)) - testInt64x2Unary(t, simd.Int64x2.Abs, map1[int64](abs)) - testInt64x4Unary(t, simd.Int64x4.Abs, map1[int64](abs)) - testInt64x8Unary(t, simd.Int64x8.Abs, map1[int64](abs)) - } -} - -func TestCeilScaledResidue(t *testing.T) { - if !simd.X86.AVX512() { - t.Skip("Needs AVX512") - } - testFloat64x8UnaryFlaky(t, - func(x simd.Float64x8) simd.Float64x8 { return x.CeilScaledResidue(0) }, - map1(ceilResidueForPrecision[float64](0)), - 0.001) - testFloat64x8UnaryFlaky(t, - func(x simd.Float64x8) simd.Float64x8 { return x.CeilScaledResidue(1) }, - map1(ceilResidueForPrecision[float64](1)), - 0.001) - testFloat64x8Unary(t, - func(x simd.Float64x8) simd.Float64x8 { return x.Sub(x.CeilScaled(0)) }, - map1[float64](func(x float64) float64 { return x - math.Ceil(x) })) -} - -func TestToUint32(t *testing.T) { - if !simd.X86.AVX512() { - t.Skip("Needs AVX512") - } - testFloat32x4ConvertToUint32(t, simd.Float32x4.ConvertToUint32, map1[float32](toUint32)) - testFloat32x8ConvertToUint32(t, simd.Float32x8.ConvertToUint32, map1[float32](toUint32)) - testFloat32x16ConvertToUint32(t, simd.Float32x16.ConvertToUint32, map1[float32](toUint32)) -} - -func TestToInt32(t *testing.T) { - testFloat32x4ConvertToInt32(t, simd.Float32x4.ConvertToInt32, map1[float32](toInt32)) - testFloat32x8ConvertToInt32(t, simd.Float32x8.ConvertToInt32, map1[float32](toInt32)) -} - -func TestConverts(t *testing.T) { - testUint8x16ConvertToUint16(t, simd.Uint8x16.ExtendToUint16, map1[uint8](toUint16)) - testUint16x8ConvertToUint32(t, simd.Uint16x8.ExtendToUint32, map1[uint16](toUint32)) -} - -func TestConvertsAVX512(t *testing.T) { - if !simd.X86.AVX512() { - t.Skip("Needs AVX512") - } - testUint8x32ConvertToUint16(t, simd.Uint8x32.ExtendToUint16, map1[uint8](toUint16)) -} diff --git a/src/simd/pkginternal_test.go b/src/simd/pkginternal_test.go deleted file mode 100644 index abaa8330e4..0000000000 --- a/src/simd/pkginternal_test.go +++ /dev/null @@ -1,258 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build goexperiment.simd && amd64 - -package simd_test - -import ( - "simd" - "simd/internal/test_helpers" - "testing" -) - -func TestConcatSelectedConstant64(t *testing.T) { - a := make([]int64, 2) - x := simd.LoadInt64x2Slice([]int64{4, 5}) - y := simd.LoadInt64x2Slice([]int64{6, 7}) - z := x.ExportTestConcatSelectedConstant(0b10, y) - z.StoreSlice(a) - test_helpers.CheckSlices[int64](t, a, []int64{4, 7}) -} - -func TestConcatSelectedConstantGrouped64(t *testing.T) { - a := make([]float64, 4) - x := simd.LoadFloat64x4Slice([]float64{4, 5, 8, 9}) - y := simd.LoadFloat64x4Slice([]float64{6, 7, 10, 11}) - z := x.ExportTestConcatSelectedConstantGrouped(0b_11_10, y) - z.StoreSlice(a) - test_helpers.CheckSlices[float64](t, a, []float64{4, 7, 9, 11}) -} - -func TestConcatSelectedConstant32(t *testing.T) { - a := make([]float32, 4) - x := simd.LoadFloat32x4Slice([]float32{4, 5, 8, 9}) - y := simd.LoadFloat32x4Slice([]float32{6, 7, 10, 11}) - z := x.ExportTestConcatSelectedConstant(0b_11_01_10_00, y) - z.StoreSlice(a) - test_helpers.CheckSlices[float32](t, a, []float32{4, 8, 7, 11}) -} - -func TestConcatSelectedConstantGrouped32(t *testing.T) { - a := make([]uint32, 8) - x := simd.LoadUint32x8Slice([]uint32{0, 1, 2, 3, 8, 9, 10, 11}) - y := simd.LoadUint32x8Slice([]uint32{4, 5, 6, 7, 12, 13, 14, 15}) - z := x.ExportTestConcatSelectedConstantGrouped(0b_11_01_00_10, y) - z.StoreSlice(a) - test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15}) -} - -func TestTern(t *testing.T) { - if !simd.X86.AVX512() { - t.Skip("This test needs AVX512") - } - x := simd.LoadInt32x8Slice([]int32{0, 0, 0, 0, 1, 1, 1, 1}) - y := simd.LoadInt32x8Slice([]int32{0, 0, 1, 1, 0, 0, 1, 1}) - z := simd.LoadInt32x8Slice([]int32{0, 1, 0, 1, 0, 1, 0, 1}) - - foo := func(w simd.Int32x8, k uint8) { - a := make([]int32, 8) - w.StoreSlice(a) - t.Logf("For k=%0b, w=%v", k, a) - for i, b := range a { - if (int32(k)>>i)&1 != b { - t.Errorf("Element %d of stored slice (=%d) did not match corresponding bit in 0b%b", - i, b, k) - } - } - } - - foo(x.ExportTestTern(0b1111_0000, y, z), 0b1111_0000) - foo(x.ExportTestTern(0b1100_1100, y, z), 0b1100_1100) - foo(x.ExportTestTern(0b1010_1010, y, z), 0b1010_1010) -} - -func TestSelect2x4x32(t *testing.T) { - for a := range uint8(8) { - for b := range uint8(8) { - for c := range uint8(8) { - for d := range uint8(8) { - x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) - y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) - z := select2x4x32(x, a, b, c, d, y) - w := make([]int32, 4, 4) - z.StoreSlice(w) - if w[0] != int32(a) || w[1] != int32(b) || - w[2] != int32(c) || w[3] != int32(d) { - t.Errorf("Expected [%d %d %d %d] got %v", a, b, c, d, w) - } - } - } - } - } -} - -func TestSelect2x8x32Grouped(t *testing.T) { - for a := range uint8(8) { - for b := range uint8(8) { - for c := range uint8(8) { - for d := range uint8(8) { - x := simd.LoadInt32x8Slice([]int32{0, 1, 2, 3, 10, 11, 12, 13}) - y := simd.LoadInt32x8Slice([]int32{4, 5, 6, 7, 14, 15, 16, 17}) - z := select2x8x32Grouped(x, a, b, c, d, y) - w := make([]int32, 8, 8) - z.StoreSlice(w) - if w[0] != int32(a) || w[1] != int32(b) || - w[2] != int32(c) || w[3] != int32(d) || - w[4] != int32(10+a) || w[5] != int32(10+b) || - w[6] != int32(10+c) || w[7] != int32(10+d) { - t.Errorf("Expected [%d %d %d %d %d %d %d %d] got %v", a, b, c, d, 10+a, 10+b, 10+c, 10+d, w) - } - } - } - } - } -} - -// select2x4x32 returns a selection of 4 elements in x and y, numbered -// 0-7, where 0-3 are the four elements of x and 4-7 are the four elements -// of y. -func select2x4x32(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) simd.Int32x4 { - pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1 - - a, b, c, d = a&3, b&3, c&3, d&3 - - switch pattern { - case simd.LLLL: - return x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, c, d), x) - case simd.HHHH: - return y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, c, d), y) - case simd.LLHH: - return x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, c, d), y) - case simd.HHLL: - return y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, c, d), x) - - case simd.HLLL: - z := y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, a, b, b), x) - return z.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(0, 2, c, d), x) - case simd.LHLL: - z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, a, b, b), y) - return z.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(0, 2, c, d), x) - - case simd.HLHH: - z := y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, a, b, b), x) - return z.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(0, 2, c, d), y) - case simd.LHHH: - z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, a, b, b), y) - return z.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(0, 2, c, d), y) - - case simd.LLLH: - z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(c, c, d, d), y) - return x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, 0, 2), z) - case simd.LLHL: - z := y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(c, c, d, d), x) - return x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, 0, 2), z) - case simd.HHLH: - z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(c, c, d, d), y) - return y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, 0, 2), z) - case simd.HHHL: - z := y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(c, c, d, d), x) - return y.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, b, 0, 2), z) - - case simd.LHLH: - z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, c, b, d), y) - return z.ExportTestConcatSelectedConstant(0b11_01_10_00 /* =simd.ExportTestCscImm4(0, 2, 1, 3) */, z) - case simd.HLHL: - z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(b, d, a, c), y) - return z.ExportTestConcatSelectedConstant(0b01_11_00_10 /* =simd.ExportTestCscImm4(2, 0, 3, 1) */, z) - case simd.HLLH: - z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(b, c, a, d), y) - return z.ExportTestConcatSelectedConstant(0b11_01_00_10 /* =simd.ExportTestCscImm4(2, 0, 1, 3) */, z) - case simd.LHHL: - z := x.ExportTestConcatSelectedConstant(simd.ExportTestCscImm4(a, d, b, c), y) - return z.ExportTestConcatSelectedConstant(0b01_11_10_00 /* =simd.ExportTestCscImm4(0, 2, 3, 1) */, z) - } - panic("missing case, switch should be exhaustive") -} - -// select2x8x32Grouped returns a pair of selection of 4 elements in x and y, -// numbered 0-7, where 0-3 are the four elements of x's two groups (lower and -// upper 128 bits) and 4-7 are the four elements of y's two groups. - -func select2x8x32Grouped(x simd.Int32x8, a, b, c, d uint8, y simd.Int32x8) simd.Int32x8 { - // selections as being expressible in the ExportTestConcatSelectedConstant pattern, - // or not. Classification is by H and L, where H is a selection from 4-7 - // and L is a selection from 0-3. - // simd.LLHH -> CSC(x,y, a, b, c&3, d&3) - // simd.HHLL -> CSC(y,x, a&3, b&3, c, d) - // simd.LLLL -> CSC(x,x, a, b, c, d) - // simd.HHHH -> CSC(y,y, a&3, b&3, c&3, d&3) - - // simd.LLLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(x, z, a, b, 0, 2) - // simd.LLHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(x, z, a, b, 0, 2) - // simd.HHLH -> z = CSC(x, y, c, c, d&3, d&3); CSC(y, z, a&3, b&3, 0, 2) - // simd.HHHL -> z = CSC(x, y, c&3, c&3, d, d); CSC(y, z, a&3, b&3, 0, 2) - - // simd.LHLL -> z = CSC(x, y, a, a, b&3, b&3); CSC(z, x, 0, 2, c, d) - // etc - - // simd.LHLH -> z = CSC(x, y, a, c, b&3, d&3); CSC(z, z, 0, 2, 1, 3) - // simd.HLHL -> z = CSC(x, y, b, d, a&3, c&3); CSC(z, z, 2, 0, 3, 1) - - pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1 - - a, b, c, d = a&3, b&3, c&3, d&3 - - switch pattern { - case simd.LLLL: - return x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, c, d), x) - case simd.HHHH: - return y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, c, d), y) - case simd.LLHH: - return x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, c, d), y) - case simd.HHLL: - return y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, c, d), x) - - case simd.HLLL: - z := y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, a, b, b), x) - return z.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(0, 2, c, d), x) - case simd.LHLL: - z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, a, b, b), y) - return z.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(0, 2, c, d), x) - - case simd.HLHH: - z := y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, a, b, b), x) - return z.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(0, 2, c, d), y) - case simd.LHHH: - z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, a, b, b), y) - return z.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(0, 2, c, d), y) - - case simd.LLLH: - z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(c, c, d, d), y) - return x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, 0, 2), z) - case simd.LLHL: - z := y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(c, c, d, d), x) - return x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, 0, 2), z) - case simd.HHLH: - z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(c, c, d, d), y) - return y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, 0, 2), z) - case simd.HHHL: - z := y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(c, c, d, d), x) - return y.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, b, 0, 2), z) - - case simd.LHLH: - z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, c, b, d), y) - return z.ExportTestConcatSelectedConstantGrouped(0b11_01_10_00 /* =simd.ExportTestCscImm4(0, 2, 1, 3) */, z) - case simd.HLHL: - z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(b, d, a, c), y) - return z.ExportTestConcatSelectedConstantGrouped(0b01_11_00_10 /* =simd.ExportTestCscImm4(2, 0, 3, 1) */, z) - case simd.HLLH: - z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(b, c, a, d), y) - return z.ExportTestConcatSelectedConstantGrouped(0b11_01_00_10 /* =simd.ExportTestCscImm4(2, 0, 1, 3) */, z) - case simd.LHHL: - z := x.ExportTestConcatSelectedConstantGrouped(simd.ExportTestCscImm4(a, d, b, c), y) - return z.ExportTestConcatSelectedConstantGrouped(0b01_11_10_00 /* =simd.ExportTestCscImm4(0, 2, 3, 1) */, z) - } - panic("missing case, switch should be exhaustive") -} diff --git a/test/codegen/simd.go b/test/codegen/simd.go index 63d5bf757a..8f3a1a9f46 100644 --- a/test/codegen/simd.go +++ b/test/codegen/simd.go @@ -10,70 +10,70 @@ package codegen -import "simd" +import "simd/archsimd" func vptest1() bool { - v1 := simd.LoadUint64x2Slice([]uint64{0, 1}) - v2 := simd.LoadUint64x2Slice([]uint64{0, 0}) + v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1}) + v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0}) // amd64:`VPTEST\s(.*)(.*)$` // amd64:`SETCS\s(.*)$` return v1.AndNot(v2).IsZero() } func vptest2() bool { - v1 := simd.LoadUint64x2Slice([]uint64{0, 1}) - v2 := simd.LoadUint64x2Slice([]uint64{0, 0}) + v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1}) + v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0}) // amd64:`VPTEST\s(.*)(.*)$` // amd64:`SETEQ\s(.*)$` return v1.And(v2).IsZero() } type Args2 struct { - V0 simd.Uint8x32 - V1 simd.Uint8x32 + V0 archsimd.Uint8x32 + V1 archsimd.Uint8x32 x string } //go:noinline -func simdStructNoSpill(a Args2) simd.Uint8x32 { +func simdStructNoSpill(a Args2) archsimd.Uint8x32 { // amd64:-`VMOVDQU\s.*$` return a.V0.Xor(a.V1) } -func simdStructWrapperNoSpill(a Args2) simd.Uint8x32 { +func simdStructWrapperNoSpill(a Args2) archsimd.Uint8x32 { // amd64:-`VMOVDQU\s.*$` a.x = "test" return simdStructNoSpill(a) } //go:noinline -func simdArrayNoSpill(a [1]Args2) simd.Uint8x32 { +func simdArrayNoSpill(a [1]Args2) archsimd.Uint8x32 { // amd64:-`VMOVDQU\s.*$` return a[0].V0.Xor(a[0].V1) } -func simdArrayWrapperNoSpill(a [1]Args2) simd.Uint8x32 { +func simdArrayWrapperNoSpill(a [1]Args2) archsimd.Uint8x32 { // amd64:-`VMOVDQU\s.*$` a[0].x = "test" return simdArrayNoSpill(a) } -func simdFeatureGuardedMaskOpt() simd.Int16x16 { - var x, y simd.Int16x16 - if simd.X86.AVX512() { - mask := simd.Mask16x16FromBits(5) +func simdFeatureGuardedMaskOpt() archsimd.Int16x16 { + var x, y archsimd.Int16x16 + if archsimd.X86.AVX512() { + mask := archsimd.Mask16x16FromBits(5) return x.Add(y).Masked(mask) // amd64:`VPADDW.Z\s.*$` } - mask := simd.Mask16x16FromBits(5) + mask := archsimd.Mask16x16FromBits(5) return x.Add(y).Masked(mask) // amd64:`VPAND\s.*$` } -func simdMaskedMerge() simd.Int16x16 { - var x, y simd.Int16x16 - if simd.X86.AVX512() { - mask := simd.Mask16x16FromBits(5) +func simdMaskedMerge() archsimd.Int16x16 { + var x, y archsimd.Int16x16 + if archsimd.X86.AVX512() { + mask := archsimd.Mask16x16FromBits(5) return x.Add(y).Merge(x, mask) // amd64:-`VPBLENDVB\s.*$` } - mask := simd.Mask16x16FromBits(5) + mask := archsimd.Mask16x16FromBits(5) return x.Add(y).Merge(x, mask) // amd64:`VPBLENDVB\s.*$` } diff --git a/test/simd.go b/test/simd.go index 087f6e3da1..52e5ad6c21 100644 --- a/test/simd.go +++ b/test/simd.go @@ -8,33 +8,33 @@ package foo -import "simd" +import "simd/archsimd" -func f1(x simd.Int8x16) { +func f1(x archsimd.Int8x16) { return // ERROR "has features avx" } -func g1() simd.Int8x16 { - var x simd.Int8x16 +func g1() archsimd.Int8x16 { + var x archsimd.Int8x16 return x // ERROR "has features avx$" } -type T1 simd.Int8x16 +type T1 archsimd.Int8x16 func (x T1) h() { return // ERROR "has features avx$" } -func f2(x simd.Int8x64) { +func f2(x archsimd.Int8x64) { return // ERROR "has features avx[+]avx2[+]avx512$" } -func g2() simd.Int8x64 { - var x simd.Int8x64 +func g2() archsimd.Int8x64 { + var x archsimd.Int8x64 return x // ERROR "has features avx[+]avx2[+]avx512$" } -type T2 simd.Int8x64 +type T2 archsimd.Int8x64 func (x T2) h() { return // ERROR "has features avx[+]avx2[+]avx512$" @@ -44,12 +44,12 @@ var a int func f() { if a == 0 { - if !simd.X86.AVX512() { + if !archsimd.X86.AVX512() { return } println("has avx512") // ERROR "has features avx[+]avx2[+]avx512$" } else { - if !simd.X86.AVX2() { + if !archsimd.X86.AVX2() { return } println("has avx2") // ERROR "has features avx[+]avx2$" @@ -58,7 +58,7 @@ func f() { } // ERROR "has features avx[+]avx2$" func g() { - if simd.X86.AVX2() { // ERROR "has features avx[+]avx2$" + if archsimd.X86.AVX2() { // ERROR "has features avx[+]avx2$" for range 5 { // ERROR "has features avx[+]avx2$" if a < 0 { // ERROR "has features avx[+]avx2$" a++ // ERROR "has features avx[+]avx2$" @@ -77,7 +77,7 @@ func p() bool { } func hasIrreducibleLoop() { - if simd.X86.AVX2() { + if archsimd.X86.AVX2() { goto a // ERROR "has features avx[+]avx2$" } else { goto b @@ -96,8 +96,8 @@ c: println("c") } -func ternRewrite(m, w, x, y, z simd.Int32x16) (t0, t1, t2 simd.Int32x16) { - if !simd.X86.AVX512() { // ERROR "has features avx[+]avx2[+]avx512$" +func ternRewrite(m, w, x, y, z archsimd.Int32x16) (t0, t1, t2 archsimd.Int32x16) { + if !archsimd.X86.AVX512() { // ERROR "has features avx[+]avx2[+]avx512$" return // ERROR "has features avx[+]avx2[+]avx512$" // all blocks have it because of the vector size } t0 = w.Xor(y).Xor(z) // ERROR "Rewriting.*ternInt" @@ -106,12 +106,12 @@ func ternRewrite(m, w, x, y, z simd.Int32x16) (t0, t1, t2 simd.Int32x16) { return // ERROR "has features avx[+]avx2[+]avx512$" } -func ternTricky1(x, y, z simd.Int32x8) simd.Int32x8 { +func ternTricky1(x, y, z archsimd.Int32x8) archsimd.Int32x8 { // Int32x8 is a 256-bit vector and does not guarantee AVX-512 // a is a 3-variable logical expression occurring outside AVX-512 feature check a := x.Xor(y).Xor(z) - var w simd.Int32x8 - if !simd.X86.AVX512() { // ERROR "has features avx$" + var w archsimd.Int32x8 + if !archsimd.X86.AVX512() { // ERROR "has features avx$" // do nothing } else { w = y.AndNot(a) // ERROR "has features avx[+]avx2[+]avx512" "Rewriting.*ternInt" @@ -120,10 +120,10 @@ func ternTricky1(x, y, z simd.Int32x8) simd.Int32x8 { return a.Or(w) // ERROR "has features avx$" } -func ternTricky2(x, y, z simd.Int32x8) simd.Int32x8 { +func ternTricky2(x, y, z archsimd.Int32x8) archsimd.Int32x8 { // Int32x8 is a 256-bit vector and does not guarantee AVX-512 - var a, w simd.Int32x8 - if !simd.X86.AVX512() { // ERROR "has features avx$" + var a, w archsimd.Int32x8 + if !archsimd.X86.AVX512() { // ERROR "has features avx$" // do nothing } else { a = x.Xor(y).Xor(z) @@ -133,11 +133,11 @@ func ternTricky2(x, y, z simd.Int32x8) simd.Int32x8 { return a.Or(w) // ERROR "has features avx$" } -func ternTricky3(x, y, z simd.Int32x8) simd.Int32x8 { +func ternTricky3(x, y, z archsimd.Int32x8) archsimd.Int32x8 { // Int32x8 is a 256-bit vector and does not guarantee AVX-512 a := x.Xor(y).Xor(z) w := y.AndNot(a) - if !simd.X86.AVX512() { // ERROR "has features avx$" + if !archsimd.X86.AVX512() { // ERROR "has features avx$" return a // ERROR "has features avx$" } // a is a common subexpression diff --git a/test/simd/bug1.go b/test/simd/bug1.go index dd450df439..82f086a508 100644 --- a/test/simd/bug1.go +++ b/test/simd/bug1.go @@ -11,7 +11,7 @@ package p import ( - "simd" + "simd/archsimd" "unsafe" ) @@ -20,19 +20,19 @@ func F( tos *[2][4][4]float32, blend int, ) { - tiny := simd.BroadcastFloat32x8(0) + tiny := archsimd.BroadcastFloat32x8(0) for { - dstCol12 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[0][0:])))) - dstCol34 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[0][2:])))) - dstCol56 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[1][0:])))) - dstCol78 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[1][2:])))) + dstCol12 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[0][0:])))) + dstCol34 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[0][2:])))) + dstCol56 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[1][0:])))) + dstCol78 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(dst[1][2:])))) - tosCol12 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[0][0:])))) - tosCol34 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[0][2:])))) - tosCol56 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[1][0:])))) - tosCol78 := simd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[1][2:])))) + tosCol12 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[0][0:])))) + tosCol34 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[0][2:])))) + tosCol56 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[1][0:])))) + tosCol78 := archsimd.LoadFloat32x8((*[8]float32)(unsafe.Pointer((*[2][4]float32)(tos[1][2:])))) - var Cr0, Cr1, Cr2 simd.Float32x8 + var Cr0, Cr1, Cr2 archsimd.Float32x8 if blend != 0 { invas := tosCol78.Max(tiny) invad := dstCol78.Max(tiny) @@ -42,7 +42,7 @@ func F( Cs0 := tosCol12.Mul(invas) Cs1 := tosCol34.Mul(invas) Cs2 := tosCol56.Mul(invas) - var Cm0, Cm1, Cm2 simd.Float32x8 + var Cm0, Cm1, Cm2 archsimd.Float32x8 switch blend { case 4: case 10: @@ -61,7 +61,7 @@ func F( Cr1 = dstCol78.Mul(Cs1).Mul(Cm1) Cr2 = dstCol78.Mul(Cs2).Mul(Cm2) } - var resR, resG, resB, resA simd.Float32x8 + var resR, resG, resB, resA archsimd.Float32x8 if blend == 0 { resR = tosCol12 resG = tosCol34 diff --git a/test/simd/bug2.go b/test/simd/bug2.go index 5b7a21176a..ab59fc6db5 100644 --- a/test/simd/bug2.go +++ b/test/simd/bug2.go @@ -12,14 +12,14 @@ package p import ( - "simd" + "simd/archsimd" ) func PackComplex(b bool) { for { if b { var indices [4]uint32 - simd.Uint32x4{}.ShiftAllRight(20).Store(&indices) + archsimd.Uint32x4{}.ShiftAllRight(20).Store(&indices) _ = indices[indices[0]] } } @@ -34,9 +34,9 @@ func PackComplex2(x0 uint16, src [][4]float32, b, b2 bool) { px := &src[y] if b { var indices [4]uint32 - fu := simd.LoadFloat32x4(px).AsUint32x4() + fu := archsimd.LoadFloat32x4(px).AsUint32x4() fu.ShiftAllRight(0).Store(nil) - entry := simd.LoadUint32x4(&[4]uint32{ + entry := archsimd.LoadUint32x4(&[4]uint32{ toSrgbTable[indices[0]], }) var res [4]uint32 diff --git a/test/simd_inline.go b/test/simd_inline.go index b8c4e0de9e..3d1872b626 100644 --- a/test/simd_inline.go +++ b/test/simd_inline.go @@ -8,9 +8,9 @@ package foo -import "simd" +import "simd/archsimd" -func hasClosure(a, b, c, d simd.Int64x4) (w, x, y, z simd.Int64x4) { +func hasClosure(a, b, c, d archsimd.Int64x4) (w, x, y, z archsimd.Int64x4) { shuf := func() { // ERROR "can inline hasClosure.func1" w = z.RotateAllLeft(1).Xor(a) x = w.RotateAllLeft(3).Xor(b)