--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+package main
+
+// this generates all the code to load and store simd
+// vectors to/from slices.
+
+import (
+ "bytes"
+ "flag"
+ "fmt"
+ "go/format"
+ "io"
+ "os"
+ "strings"
+)
+
+// //go:noescape
+// func LoadUint8x16Slice(s []uint8) Uint8x16 {
+// return LoadUint8x16((*[16]uint8)(s[:16]))
+// }
+
+// //go:noescape
+// func (x Uint8x16) StoreSlice(s []uint8) {
+// x.Store((*[16]uint8)(s[:16]))
+// }
+
+func slice(e string, w, c int, out io.Writer) {
+ b := w * c
+ if b < 128 || b > 512 {
+ return
+ }
+ E := strings.ToUpper(e[:1]) + e[1:]
+ t := fmt.Sprintf("%s%d", e, w)
+ v := fmt.Sprintf("%s%dx%d", E, w, c)
+ a := "a"
+ if strings.Contains("aeiou", e[:1]) {
+ a = "an"
+ }
+ fmt.Fprintf(out,
+ `
+// Load%sSlice loads %s %s from a slice of at least %d %ss
+func Load%sSlice(s []%s) %s {
+ return Load%s((*[%d]%s)(s))
+}
+`, v, a, v, c, t, v, t, v, v, c, t)
+
+ fmt.Fprintf(out,
+ `
+// StoreSlice stores x into a slice of at least %d %ss
+func (x %s) StoreSlice(s []%s) {
+ x.Store((*[%d]%s)(s))
+}
+`, c, t, v, t, c, t)
+
+}
+
+func prologue(s string, out io.Writer) {
+ fmt.Fprintf(out,
+ `// Code generated by '%s'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+// The build condition == if the experiment is not on, cmd/api TestCheck will see this and complain
+// see also go/doc/comment, where "simd" is inserted to the package list of the experiment is not on.
+
+package simd
+
+`, s)
+}
+
+func main() {
+ filename := flag.String("o", "", "write generated code to this file")
+ flag.Parse()
+
+ ofile := os.Stdout
+
+ if *filename != "" {
+ var err error
+ ofile, err = os.Create(*filename)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Could not create the output file for the generated code, %v", err)
+ os.Exit(1)
+ }
+ }
+
+ out := new(bytes.Buffer)
+
+ prologue("go run genslice.go -o slice_amd64.go", out)
+
+ vecs := []int{128, 256, 512}
+ ints := []int{8, 16, 32, 64}
+ floats := []int{32, 64}
+ for _, v := range vecs {
+ for _, w := range ints {
+ c := v / w
+ slice("int", w, c, out)
+ slice("uint", w, c, out)
+ }
+ for _, w := range floats {
+ c := v / w
+ slice("float", w, c, out)
+ }
+ }
+ b, err := format.Source(out.Bytes())
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code, %v", err)
+ os.Exit(1)
+ } else {
+ ofile.Write(b)
+ ofile.Close()
+ }
+}
}
}
}
+
+// checkInt8Slices ensures that b and a are equal, to the end of b.
+// also serves to use the slices, to prevent accidental optimization.
+func checkInt8Slices(t *testing.T, a, b []int8) {
+ for i := range b {
+ if a[i] != b[i] {
+ t.Errorf("a and b differ at index %d, a=%d, b=%d", i, a[i], b[i])
+ }
+ }
+}
+
+func TestSlicesInt8(t *testing.T) {
+ a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+ v := simd.LoadInt8x32Slice(a)
+ b := make([]int8, 32, 32)
+ v.StoreSlice(b)
+ checkInt8Slices(t, a, b)
+}
+
+func TestSlicesInt8TooShortLoad(t *testing.T) {
+ defer func() {
+ if r := recover(); r != nil {
+ t.Logf("Saw EXPECTED panic %v", r)
+ } else {
+ t.Errorf("Did not see expected panic")
+ }
+ }()
+ a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic
+ v := simd.LoadInt8x32Slice(a)
+ b := make([]int8, 32, 32)
+ v.StoreSlice(b)
+ checkInt8Slices(t, a, b)
+}
+
+func TestSlicesInt8TooShortStore(t *testing.T) {
+ defer func() {
+ if r := recover(); r != nil {
+ t.Logf("Saw EXPECTED panic %v", r)
+ } else {
+ t.Errorf("Did not see expected panic")
+ }
+ }()
+ a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+ v := simd.LoadInt8x32Slice(a)
+ b := make([]int8, 31) // TOO SHORT, should panic
+ v.StoreSlice(b)
+ checkInt8Slices(t, a, b)
+}
+
+func TestSlicesFloat64(t *testing.T) {
+ a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine
+ v := simd.LoadFloat64x4Slice(a)
+ b := make([]float64, 4, 4)
+ v.StoreSlice(b)
+ for i := range b {
+ if a[i] != b[i] {
+ t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i])
+ }
+ }
+}
--- /dev/null
+// Code generated by 'go run genslice.go -o slice_amd64.go'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+// The build condition == if the experiment is not on, cmd/api TestCheck will see this and complain
+// see also go/doc/comment, where "simd" is inserted to the package list of the experiment is not on.
+
+package simd
+
+// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s
+func LoadInt8x16Slice(s []int8) Int8x16 {
+ return LoadInt8x16((*[16]int8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 int8s
+func (x Int8x16) StoreSlice(s []int8) {
+ x.Store((*[16]int8)(s))
+}
+
+// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s
+func LoadUint8x16Slice(s []uint8) Uint8x16 {
+ return LoadUint8x16((*[16]uint8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 uint8s
+func (x Uint8x16) StoreSlice(s []uint8) {
+ x.Store((*[16]uint8)(s))
+}
+
+// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s
+func LoadInt16x8Slice(s []int16) Int16x8 {
+ return LoadInt16x8((*[8]int16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 int16s
+func (x Int16x8) StoreSlice(s []int16) {
+ x.Store((*[8]int16)(s))
+}
+
+// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s
+func LoadUint16x8Slice(s []uint16) Uint16x8 {
+ return LoadUint16x8((*[8]uint16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 uint16s
+func (x Uint16x8) StoreSlice(s []uint16) {
+ x.Store((*[8]uint16)(s))
+}
+
+// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s
+func LoadInt32x4Slice(s []int32) Int32x4 {
+ return LoadInt32x4((*[4]int32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 int32s
+func (x Int32x4) StoreSlice(s []int32) {
+ x.Store((*[4]int32)(s))
+}
+
+// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s
+func LoadUint32x4Slice(s []uint32) Uint32x4 {
+ return LoadUint32x4((*[4]uint32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 uint32s
+func (x Uint32x4) StoreSlice(s []uint32) {
+ x.Store((*[4]uint32)(s))
+}
+
+// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s
+func LoadInt64x2Slice(s []int64) Int64x2 {
+ return LoadInt64x2((*[2]int64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 2 int64s
+func (x Int64x2) StoreSlice(s []int64) {
+ x.Store((*[2]int64)(s))
+}
+
+// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s
+func LoadUint64x2Slice(s []uint64) Uint64x2 {
+ return LoadUint64x2((*[2]uint64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 2 uint64s
+func (x Uint64x2) StoreSlice(s []uint64) {
+ x.Store((*[2]uint64)(s))
+}
+
+// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s
+func LoadFloat32x4Slice(s []float32) Float32x4 {
+ return LoadFloat32x4((*[4]float32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 float32s
+func (x Float32x4) StoreSlice(s []float32) {
+ x.Store((*[4]float32)(s))
+}
+
+// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s
+func LoadFloat64x2Slice(s []float64) Float64x2 {
+ return LoadFloat64x2((*[2]float64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 2 float64s
+func (x Float64x2) StoreSlice(s []float64) {
+ x.Store((*[2]float64)(s))
+}
+
+// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s
+func LoadInt8x32Slice(s []int8) Int8x32 {
+ return LoadInt8x32((*[32]int8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 32 int8s
+func (x Int8x32) StoreSlice(s []int8) {
+ x.Store((*[32]int8)(s))
+}
+
+// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s
+func LoadUint8x32Slice(s []uint8) Uint8x32 {
+ return LoadUint8x32((*[32]uint8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 32 uint8s
+func (x Uint8x32) StoreSlice(s []uint8) {
+ x.Store((*[32]uint8)(s))
+}
+
+// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s
+func LoadInt16x16Slice(s []int16) Int16x16 {
+ return LoadInt16x16((*[16]int16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 int16s
+func (x Int16x16) StoreSlice(s []int16) {
+ x.Store((*[16]int16)(s))
+}
+
+// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s
+func LoadUint16x16Slice(s []uint16) Uint16x16 {
+ return LoadUint16x16((*[16]uint16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 uint16s
+func (x Uint16x16) StoreSlice(s []uint16) {
+ x.Store((*[16]uint16)(s))
+}
+
+// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s
+func LoadInt32x8Slice(s []int32) Int32x8 {
+ return LoadInt32x8((*[8]int32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 int32s
+func (x Int32x8) StoreSlice(s []int32) {
+ x.Store((*[8]int32)(s))
+}
+
+// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s
+func LoadUint32x8Slice(s []uint32) Uint32x8 {
+ return LoadUint32x8((*[8]uint32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 uint32s
+func (x Uint32x8) StoreSlice(s []uint32) {
+ x.Store((*[8]uint32)(s))
+}
+
+// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s
+func LoadInt64x4Slice(s []int64) Int64x4 {
+ return LoadInt64x4((*[4]int64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 int64s
+func (x Int64x4) StoreSlice(s []int64) {
+ x.Store((*[4]int64)(s))
+}
+
+// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s
+func LoadUint64x4Slice(s []uint64) Uint64x4 {
+ return LoadUint64x4((*[4]uint64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 uint64s
+func (x Uint64x4) StoreSlice(s []uint64) {
+ x.Store((*[4]uint64)(s))
+}
+
+// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s
+func LoadFloat32x8Slice(s []float32) Float32x8 {
+ return LoadFloat32x8((*[8]float32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 float32s
+func (x Float32x8) StoreSlice(s []float32) {
+ x.Store((*[8]float32)(s))
+}
+
+// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s
+func LoadFloat64x4Slice(s []float64) Float64x4 {
+ return LoadFloat64x4((*[4]float64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 float64s
+func (x Float64x4) StoreSlice(s []float64) {
+ x.Store((*[4]float64)(s))
+}
+
+// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s
+func LoadInt8x64Slice(s []int8) Int8x64 {
+ return LoadInt8x64((*[64]int8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 64 int8s
+func (x Int8x64) StoreSlice(s []int8) {
+ x.Store((*[64]int8)(s))
+}
+
+// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s
+func LoadUint8x64Slice(s []uint8) Uint8x64 {
+ return LoadUint8x64((*[64]uint8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 64 uint8s
+func (x Uint8x64) StoreSlice(s []uint8) {
+ x.Store((*[64]uint8)(s))
+}
+
+// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s
+func LoadInt16x32Slice(s []int16) Int16x32 {
+ return LoadInt16x32((*[32]int16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 32 int16s
+func (x Int16x32) StoreSlice(s []int16) {
+ x.Store((*[32]int16)(s))
+}
+
+// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s
+func LoadUint16x32Slice(s []uint16) Uint16x32 {
+ return LoadUint16x32((*[32]uint16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 32 uint16s
+func (x Uint16x32) StoreSlice(s []uint16) {
+ x.Store((*[32]uint16)(s))
+}
+
+// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s
+func LoadInt32x16Slice(s []int32) Int32x16 {
+ return LoadInt32x16((*[16]int32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 int32s
+func (x Int32x16) StoreSlice(s []int32) {
+ x.Store((*[16]int32)(s))
+}
+
+// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s
+func LoadUint32x16Slice(s []uint32) Uint32x16 {
+ return LoadUint32x16((*[16]uint32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 uint32s
+func (x Uint32x16) StoreSlice(s []uint32) {
+ x.Store((*[16]uint32)(s))
+}
+
+// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s
+func LoadInt64x8Slice(s []int64) Int64x8 {
+ return LoadInt64x8((*[8]int64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 int64s
+func (x Int64x8) StoreSlice(s []int64) {
+ x.Store((*[8]int64)(s))
+}
+
+// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s
+func LoadUint64x8Slice(s []uint64) Uint64x8 {
+ return LoadUint64x8((*[8]uint64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 uint64s
+func (x Uint64x8) StoreSlice(s []uint64) {
+ x.Store((*[8]uint64)(s))
+}
+
+// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s
+func LoadFloat32x16Slice(s []float32) Float32x16 {
+ return LoadFloat32x16((*[16]float32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 float32s
+func (x Float32x16) StoreSlice(s []float32) {
+ x.Store((*[16]float32)(s))
+}
+
+// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s
+func LoadFloat64x8Slice(s []float64) Float64x8 {
+ return LoadFloat64x8((*[8]float64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 float64s
+func (x Float64x8) StoreSlice(s []float64) {
+ x.Store((*[8]float64)(s))
+}