--- /dev/null
+// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+package simd
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Int8x16) Less(y Int8x16) Mask8x16 {
+ return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 {
+ ones := x.Equal(x).AsInt8x16()
+ return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Int8x16) LessEqual(y Int8x16) Mask8x16 {
+ ones := x.Equal(x).AsInt8x16()
+ return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Int8x16) NotEqual(y Int8x16) Mask8x16 {
+ ones := x.Equal(x).AsInt8x16()
+ return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Int16x8) Less(y Int16x8) Mask16x8 {
+ return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 {
+ ones := x.Equal(x).AsInt16x8()
+ return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Int16x8) LessEqual(y Int16x8) Mask16x8 {
+ ones := x.Equal(x).AsInt16x8()
+ return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Int16x8) NotEqual(y Int16x8) Mask16x8 {
+ ones := x.Equal(x).AsInt16x8()
+ return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Int32x4) Less(y Int32x4) Mask32x4 {
+ return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 {
+ ones := x.Equal(x).AsInt32x4()
+ return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Int32x4) LessEqual(y Int32x4) Mask32x4 {
+ ones := x.Equal(x).AsInt32x4()
+ return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Int32x4) NotEqual(y Int32x4) Mask32x4 {
+ ones := x.Equal(x).AsInt32x4()
+ return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Int64x2) Less(y Int64x2) Mask64x2 {
+ return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 {
+ ones := x.Equal(x).AsInt64x2()
+ return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Int64x2) LessEqual(y Int64x2) Mask64x2 {
+ ones := x.Equal(x).AsInt64x2()
+ return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Int64x2) NotEqual(y Int64x2) Mask64x2 {
+ ones := x.Equal(x).AsInt64x2()
+ return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Int8x32) Less(y Int8x32) Mask8x32 {
+ return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 {
+ ones := x.Equal(x).AsInt8x32()
+ return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int8x32) LessEqual(y Int8x32) Mask8x32 {
+ ones := x.Equal(x).AsInt8x32()
+ return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Int8x32) NotEqual(y Int8x32) Mask8x32 {
+ ones := x.Equal(x).AsInt8x32()
+ return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Int16x16) Less(y Int16x16) Mask16x16 {
+ return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 {
+ ones := x.Equal(x).AsInt16x16()
+ return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int16x16) LessEqual(y Int16x16) Mask16x16 {
+ ones := x.Equal(x).AsInt16x16()
+ return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Int16x16) NotEqual(y Int16x16) Mask16x16 {
+ ones := x.Equal(x).AsInt16x16()
+ return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Int32x8) Less(y Int32x8) Mask32x8 {
+ return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 {
+ ones := x.Equal(x).AsInt32x8()
+ return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int32x8) LessEqual(y Int32x8) Mask32x8 {
+ ones := x.Equal(x).AsInt32x8()
+ return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Int32x8) NotEqual(y Int32x8) Mask32x8 {
+ ones := x.Equal(x).AsInt32x8()
+ return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Int64x4) Less(y Int64x4) Mask64x4 {
+ return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 {
+ ones := x.Equal(x).AsInt64x4()
+ return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int64x4) LessEqual(y Int64x4) Mask64x4 {
+ ones := x.Equal(x).AsInt64x4()
+ return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Int64x4) NotEqual(y Int64x4) Mask64x4 {
+ ones := x.Equal(x).AsInt64x4()
+ return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x16) Greater(y Uint8x16) Mask8x16 {
+ a, b := x.AsInt8x16(), y.AsInt8x16()
+ signs := BroadcastInt8x16(-1 << (8 - 1))
+ return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x16) Less(y Uint8x16) Mask8x16 {
+ a, b := x.AsInt8x16(), y.AsInt8x16()
+ signs := BroadcastInt8x16(-1 << (8 - 1))
+ return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 {
+ a, b := x.AsInt8x16(), y.AsInt8x16()
+ ones := x.Equal(x).AsInt8x16()
+ signs := BroadcastInt8x16(-1 << (8 - 1))
+ return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 {
+ a, b := x.AsInt8x16(), y.AsInt8x16()
+ ones := x.Equal(x).AsInt8x16()
+ signs := BroadcastInt8x16(-1 << (8 - 1))
+ return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 {
+ a, b := x.AsInt8x16(), y.AsInt8x16()
+ ones := x.Equal(x).AsInt8x16()
+ return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) Greater(y Uint16x8) Mask16x8 {
+ a, b := x.AsInt16x8(), y.AsInt16x8()
+ ones := x.Equal(x).AsInt16x8()
+ signs := ones.ShiftAllLeft(16 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) Less(y Uint16x8) Mask16x8 {
+ a, b := x.AsInt16x8(), y.AsInt16x8()
+ ones := x.Equal(x).AsInt16x8()
+ signs := ones.ShiftAllLeft(16 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 {
+ a, b := x.AsInt16x8(), y.AsInt16x8()
+ ones := x.Equal(x).AsInt16x8()
+ signs := ones.ShiftAllLeft(16 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 {
+ a, b := x.AsInt16x8(), y.AsInt16x8()
+ ones := x.Equal(x).AsInt16x8()
+ signs := ones.ShiftAllLeft(16 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 {
+ a, b := x.AsInt16x8(), y.AsInt16x8()
+ ones := x.Equal(x).AsInt16x8()
+ return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) Greater(y Uint32x4) Mask32x4 {
+ a, b := x.AsInt32x4(), y.AsInt32x4()
+ ones := x.Equal(x).AsInt32x4()
+ signs := ones.ShiftAllLeft(32 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) Less(y Uint32x4) Mask32x4 {
+ a, b := x.AsInt32x4(), y.AsInt32x4()
+ ones := x.Equal(x).AsInt32x4()
+ signs := ones.ShiftAllLeft(32 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 {
+ a, b := x.AsInt32x4(), y.AsInt32x4()
+ ones := x.Equal(x).AsInt32x4()
+ signs := ones.ShiftAllLeft(32 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 {
+ a, b := x.AsInt32x4(), y.AsInt32x4()
+ ones := x.Equal(x).AsInt32x4()
+ signs := ones.ShiftAllLeft(32 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 {
+ a, b := x.AsInt32x4(), y.AsInt32x4()
+ ones := x.Equal(x).AsInt32x4()
+ return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
+ a, b := x.AsInt64x2(), y.AsInt64x2()
+ ones := x.Equal(x).AsInt64x2()
+ signs := ones.ShiftAllLeft(64 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
+ a, b := x.AsInt64x2(), y.AsInt64x2()
+ ones := x.Equal(x).AsInt64x2()
+ signs := ones.ShiftAllLeft(64 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
+ a, b := x.AsInt64x2(), y.AsInt64x2()
+ ones := x.Equal(x).AsInt64x2()
+ signs := ones.ShiftAllLeft(64 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
+ a, b := x.AsInt64x2(), y.AsInt64x2()
+ ones := x.Equal(x).AsInt64x2()
+ signs := ones.ShiftAllLeft(64 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 {
+ a, b := x.AsInt64x2(), y.AsInt64x2()
+ ones := x.Equal(x).AsInt64x2()
+ return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) Greater(y Uint8x32) Mask8x32 {
+ a, b := x.AsInt8x32(), y.AsInt8x32()
+ signs := BroadcastInt8x32(-1 << (8 - 1))
+ return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) Less(y Uint8x32) Mask8x32 {
+ a, b := x.AsInt8x32(), y.AsInt8x32()
+ signs := BroadcastInt8x32(-1 << (8 - 1))
+ return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 {
+ a, b := x.AsInt8x32(), y.AsInt8x32()
+ ones := x.Equal(x).AsInt8x32()
+ signs := BroadcastInt8x32(-1 << (8 - 1))
+ return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 {
+ a, b := x.AsInt8x32(), y.AsInt8x32()
+ ones := x.Equal(x).AsInt8x32()
+ signs := BroadcastInt8x32(-1 << (8 - 1))
+ return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 {
+ a, b := x.AsInt8x32(), y.AsInt8x32()
+ ones := x.Equal(x).AsInt8x32()
+ return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) Greater(y Uint16x16) Mask16x16 {
+ a, b := x.AsInt16x16(), y.AsInt16x16()
+ ones := x.Equal(x).AsInt16x16()
+ signs := ones.ShiftAllLeft(16 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) Less(y Uint16x16) Mask16x16 {
+ a, b := x.AsInt16x16(), y.AsInt16x16()
+ ones := x.Equal(x).AsInt16x16()
+ signs := ones.ShiftAllLeft(16 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 {
+ a, b := x.AsInt16x16(), y.AsInt16x16()
+ ones := x.Equal(x).AsInt16x16()
+ signs := ones.ShiftAllLeft(16 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 {
+ a, b := x.AsInt16x16(), y.AsInt16x16()
+ ones := x.Equal(x).AsInt16x16()
+ signs := ones.ShiftAllLeft(16 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 {
+ a, b := x.AsInt16x16(), y.AsInt16x16()
+ ones := x.Equal(x).AsInt16x16()
+ return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) Greater(y Uint32x8) Mask32x8 {
+ a, b := x.AsInt32x8(), y.AsInt32x8()
+ ones := x.Equal(x).AsInt32x8()
+ signs := ones.ShiftAllLeft(32 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) Less(y Uint32x8) Mask32x8 {
+ a, b := x.AsInt32x8(), y.AsInt32x8()
+ ones := x.Equal(x).AsInt32x8()
+ signs := ones.ShiftAllLeft(32 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 {
+ a, b := x.AsInt32x8(), y.AsInt32x8()
+ ones := x.Equal(x).AsInt32x8()
+ signs := ones.ShiftAllLeft(32 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 {
+ a, b := x.AsInt32x8(), y.AsInt32x8()
+ ones := x.Equal(x).AsInt32x8()
+ signs := ones.ShiftAllLeft(32 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 {
+ a, b := x.AsInt32x8(), y.AsInt32x8()
+ ones := x.Equal(x).AsInt32x8()
+ return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) Greater(y Uint64x4) Mask64x4 {
+ a, b := x.AsInt64x4(), y.AsInt64x4()
+ ones := x.Equal(x).AsInt64x4()
+ signs := ones.ShiftAllLeft(64 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) Less(y Uint64x4) Mask64x4 {
+ a, b := x.AsInt64x4(), y.AsInt64x4()
+ ones := x.Equal(x).AsInt64x4()
+ signs := ones.ShiftAllLeft(64 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 {
+ a, b := x.AsInt64x4(), y.AsInt64x4()
+ ones := x.Equal(x).AsInt64x4()
+ signs := ones.ShiftAllLeft(64 - 1)
+ return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 {
+ a, b := x.AsInt64x4(), y.AsInt64x4()
+ ones := x.Equal(x).AsInt64x4()
+ signs := ones.ShiftAllLeft(64 - 1)
+ return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 {
+ a, b := x.AsInt64x4(), y.AsInt64x4()
+ ones := x.Equal(x).AsInt64x4()
+ return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4()
+}
package simd
-import "unsafe"
-
`, s)
}
`)
func main() {
- sl := flag.String("sl", "slice_amd64.go", "file name for slice operations")
+ sl := flag.String("sl", "slice_gen_amd64.go", "file name for slice operations")
+ cm := flag.String("cm", "compare_gen_amd64.go", "file name for comparison operations")
+ mm := flag.String("mm", "maskmerge_gen_amd64.go", "file name for mask/merge operations")
+ op := flag.String("op", "other_gen_amd64.go", "file name for other operations")
ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers")
bh := flag.String("bh", "binary_helpers_test.go", "file name for binary test helpers")
uh := flag.String("uh", "unary_helpers_test.go", "file name for unary test helpers")
flag.Parse()
if *sl != "" {
- one(*sl, prologue,
+ one(*sl, unsafePrologue,
sliceTemplate,
avx512MaskedLoadSlicePartTemplate,
avx2MaskedLoadSlicePartTemplate,
avx2SmallLoadSlicePartTemplate,
- avx2MaskedTemplate,
- avx512MaskedTemplate,
+ )
+ }
+ if *cm != "" {
+ one(*cm, prologue,
avx2SignedComparisonsTemplate,
avx2UnsignedComparisonsTemplate,
+ )
+ }
+ if *mm != "" {
+ one(*mm, prologue,
+ avx2MaskedTemplate,
+ avx512MaskedTemplate,
+ )
+ }
+ if *op != "" {
+ one(*op, prologue,
broadcastTemplate,
)
}
--- /dev/null
+// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+package simd
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int8x16) Masked(mask Mask8x16) Int8x16 {
+ im := mask.AsInt8x16()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Int8x16) Merge(y Int8x16, mask Mask8x16) Int8x16 {
+ im := mask.AsInt8x16()
+ return y.blend(x, im)
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int16x8) Masked(mask Mask16x8) Int16x8 {
+ im := mask.AsInt16x8()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Int16x8) Merge(y Int16x8, mask Mask16x8) Int16x8 {
+ im := mask.AsInt16x8().AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsInt16x8()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int32x4) Masked(mask Mask32x4) Int32x4 {
+ im := mask.AsInt32x4()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Int32x4) Merge(y Int32x4, mask Mask32x4) Int32x4 {
+ im := mask.AsInt32x4().AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsInt32x4()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int64x2) Masked(mask Mask64x2) Int64x2 {
+ im := mask.AsInt64x2()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Int64x2) Merge(y Int64x2, mask Mask64x2) Int64x2 {
+ im := mask.AsInt64x2().AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsInt64x2()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint8x16) Masked(mask Mask8x16) Uint8x16 {
+ im := mask.AsInt8x16()
+ return x.AsInt8x16().And(im).AsUint8x16()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Uint8x16) Merge(y Uint8x16, mask Mask8x16) Uint8x16 {
+ im := mask.AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsUint8x16()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint16x8) Masked(mask Mask16x8) Uint16x8 {
+ im := mask.AsInt16x8()
+ return x.AsInt16x8().And(im).AsUint16x8()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Uint16x8) Merge(y Uint16x8, mask Mask16x8) Uint16x8 {
+ im := mask.AsInt16x8().AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsUint16x8()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint32x4) Masked(mask Mask32x4) Uint32x4 {
+ im := mask.AsInt32x4()
+ return x.AsInt32x4().And(im).AsUint32x4()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Uint32x4) Merge(y Uint32x4, mask Mask32x4) Uint32x4 {
+ im := mask.AsInt32x4().AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsUint32x4()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint64x2) Masked(mask Mask64x2) Uint64x2 {
+ im := mask.AsInt64x2()
+ return x.AsInt64x2().And(im).AsUint64x2()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Uint64x2) Merge(y Uint64x2, mask Mask64x2) Uint64x2 {
+ im := mask.AsInt64x2().AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsUint64x2()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Float32x4) Masked(mask Mask32x4) Float32x4 {
+ im := mask.AsInt32x4()
+ return x.AsInt32x4().And(im).AsFloat32x4()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Float32x4) Merge(y Float32x4, mask Mask32x4) Float32x4 {
+ im := mask.AsInt32x4().AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsFloat32x4()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Float64x2) Masked(mask Mask64x2) Float64x2 {
+ im := mask.AsInt64x2()
+ return x.AsInt64x2().And(im).AsFloat64x2()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Float64x2) Merge(y Float64x2, mask Mask64x2) Float64x2 {
+ im := mask.AsInt64x2().AsInt8x16()
+ ix := x.AsInt8x16()
+ iy := y.AsInt8x16()
+ return iy.blend(ix, im).AsFloat64x2()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int8x32) Masked(mask Mask8x32) Int8x32 {
+ im := mask.AsInt8x32()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Int8x32) Merge(y Int8x32, mask Mask8x32) Int8x32 {
+ im := mask.AsInt8x32()
+ return y.blend(x, im)
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int16x16) Masked(mask Mask16x16) Int16x16 {
+ im := mask.AsInt16x16()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Int16x16) Merge(y Int16x16, mask Mask16x16) Int16x16 {
+ im := mask.AsInt16x16().AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsInt16x16()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int32x8) Masked(mask Mask32x8) Int32x8 {
+ im := mask.AsInt32x8()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Int32x8) Merge(y Int32x8, mask Mask32x8) Int32x8 {
+ im := mask.AsInt32x8().AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsInt32x8()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int64x4) Masked(mask Mask64x4) Int64x4 {
+ im := mask.AsInt64x4()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Int64x4) Merge(y Int64x4, mask Mask64x4) Int64x4 {
+ im := mask.AsInt64x4().AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsInt64x4()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint8x32) Masked(mask Mask8x32) Uint8x32 {
+ im := mask.AsInt8x32()
+ return x.AsInt8x32().And(im).AsUint8x32()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Uint8x32) Merge(y Uint8x32, mask Mask8x32) Uint8x32 {
+ im := mask.AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsUint8x32()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint16x16) Masked(mask Mask16x16) Uint16x16 {
+ im := mask.AsInt16x16()
+ return x.AsInt16x16().And(im).AsUint16x16()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Uint16x16) Merge(y Uint16x16, mask Mask16x16) Uint16x16 {
+ im := mask.AsInt16x16().AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsUint16x16()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint32x8) Masked(mask Mask32x8) Uint32x8 {
+ im := mask.AsInt32x8()
+ return x.AsInt32x8().And(im).AsUint32x8()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Uint32x8) Merge(y Uint32x8, mask Mask32x8) Uint32x8 {
+ im := mask.AsInt32x8().AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsUint32x8()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint64x4) Masked(mask Mask64x4) Uint64x4 {
+ im := mask.AsInt64x4()
+ return x.AsInt64x4().And(im).AsUint64x4()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Uint64x4) Merge(y Uint64x4, mask Mask64x4) Uint64x4 {
+ im := mask.AsInt64x4().AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsUint64x4()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Float32x8) Masked(mask Mask32x8) Float32x8 {
+ im := mask.AsInt32x8()
+ return x.AsInt32x8().And(im).AsFloat32x8()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Float32x8) Merge(y Float32x8, mask Mask32x8) Float32x8 {
+ im := mask.AsInt32x8().AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsFloat32x8()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Float64x4) Masked(mask Mask64x4) Float64x4 {
+ im := mask.AsInt64x4()
+ return x.AsInt64x4().And(im).AsFloat64x4()
+}
+
+// Merge returns x but with elements set to y where mask is false.
+func (x Float64x4) Merge(y Float64x4, mask Mask64x4) Float64x4 {
+ im := mask.AsInt64x4().AsInt8x32()
+ ix := x.AsInt8x32()
+ iy := y.AsInt8x32()
+ return iy.blend(ix, im).AsFloat64x4()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int8x64) Masked(mask Mask8x64) Int8x64 {
+ im := mask.AsInt8x64()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 {
+ return y.blendMasked(x, mask)
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int16x32) Masked(mask Mask16x32) Int16x32 {
+ im := mask.AsInt16x32()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 {
+ return y.blendMasked(x, mask)
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int32x16) Masked(mask Mask32x16) Int32x16 {
+ im := mask.AsInt32x16()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 {
+ return y.blendMasked(x, mask)
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Int64x8) Masked(mask Mask64x8) Int64x8 {
+ im := mask.AsInt64x8()
+ return im.And(x)
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 {
+ return y.blendMasked(x, mask)
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 {
+ im := mask.AsInt8x64()
+ return x.AsInt8x64().And(im).AsUint8x64()
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 {
+ ix := x.AsInt8x64()
+ iy := y.AsInt8x64()
+ return iy.blendMasked(ix, mask).AsUint8x64()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 {
+ im := mask.AsInt16x32()
+ return x.AsInt16x32().And(im).AsUint16x32()
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 {
+ ix := x.AsInt16x32()
+ iy := y.AsInt16x32()
+ return iy.blendMasked(ix, mask).AsUint16x32()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 {
+ im := mask.AsInt32x16()
+ return x.AsInt32x16().And(im).AsUint32x16()
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 {
+ ix := x.AsInt32x16()
+ iy := y.AsInt32x16()
+ return iy.blendMasked(ix, mask).AsUint32x16()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 {
+ im := mask.AsInt64x8()
+ return x.AsInt64x8().And(im).AsUint64x8()
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 {
+ ix := x.AsInt64x8()
+ iy := y.AsInt64x8()
+ return iy.blendMasked(ix, mask).AsUint64x8()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Float32x16) Masked(mask Mask32x16) Float32x16 {
+ im := mask.AsInt32x16()
+ return x.AsInt32x16().And(im).AsFloat32x16()
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 {
+ ix := x.AsInt32x16()
+ iy := y.AsInt32x16()
+ return iy.blendMasked(ix, mask).AsFloat32x16()
+}
+
+// Masked returns x but with elements zeroed where mask is false.
+func (x Float64x8) Masked(mask Mask64x8) Float64x8 {
+ im := mask.AsInt64x8()
+ return x.AsInt64x8().And(im).AsFloat64x8()
+}
+
+// Merge returns x but with elements set to y where m is false.
+func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 {
+ ix := x.AsInt64x8()
+ iy := y.AsInt64x8()
+ return iy.blendMasked(ix, mask).AsFloat64x8()
+}
--- /dev/null
+// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+package simd
+
+// BroadcastInt8x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt8x16(x int8) Int8x16 {
+ var z Int8x16
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastInt16x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt16x8(x int16) Int16x8 {
+ var z Int16x8
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastInt32x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt32x4(x int32) Int32x4 {
+ var z Int32x4
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastInt64x2 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt64x2(x int64) Int64x2 {
+ var z Int64x2
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastUint8x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint8x16(x uint8) Uint8x16 {
+ var z Uint8x16
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastUint16x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint16x8(x uint16) Uint16x8 {
+ var z Uint16x8
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastUint32x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint32x4(x uint32) Uint32x4 {
+ var z Uint32x4
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastUint64x2 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint64x2(x uint64) Uint64x2 {
+ var z Uint64x2
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastFloat32x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastFloat32x4(x float32) Float32x4 {
+ var z Float32x4
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastFloat64x2 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastFloat64x2(x float64) Float64x2 {
+ var z Float64x2
+ return z.SetElem(0, x).Broadcast128()
+}
+
+// BroadcastInt8x32 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt8x32(x int8) Int8x32 {
+ var z Int8x16
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastInt16x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt16x16(x int16) Int16x16 {
+ var z Int16x8
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastInt32x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt32x8(x int32) Int32x8 {
+ var z Int32x4
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastInt64x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastInt64x4(x int64) Int64x4 {
+ var z Int64x2
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastUint8x32 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint8x32(x uint8) Uint8x32 {
+ var z Uint8x16
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastUint16x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint16x16(x uint16) Uint16x16 {
+ var z Uint16x8
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastUint32x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint32x8(x uint32) Uint32x8 {
+ var z Uint32x4
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastUint64x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastUint64x4(x uint64) Uint64x4 {
+ var z Uint64x2
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastFloat32x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastFloat32x8(x float32) Float32x8 {
+ var z Float32x4
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastFloat64x4 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX2
+func BroadcastFloat64x4(x float64) Float64x4 {
+ var z Float64x2
+ return z.SetElem(0, x).Broadcast256()
+}
+
+// BroadcastInt8x64 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512BW
+func BroadcastInt8x64(x int8) Int8x64 {
+ var z Int8x16
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastInt16x32 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512BW
+func BroadcastInt16x32(x int16) Int16x32 {
+ var z Int16x8
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastInt32x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastInt32x16(x int32) Int32x16 {
+ var z Int32x4
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastInt64x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastInt64x8(x int64) Int64x8 {
+ var z Int64x2
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastUint8x64 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512BW
+func BroadcastUint8x64(x uint8) Uint8x64 {
+ var z Uint8x16
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastUint16x32 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512BW
+func BroadcastUint16x32(x uint16) Uint16x32 {
+ var z Uint16x8
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastUint32x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastUint32x16(x uint32) Uint32x16 {
+ var z Uint32x4
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastUint64x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastUint64x8(x uint64) Uint64x8 {
+ var z Uint64x2
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastFloat32x16 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastFloat32x16(x float32) Float32x16 {
+ var z Float32x4
+ return z.SetElem(0, x).Broadcast512()
+}
+
+// BroadcastFloat64x8 returns a vector with the input
+// x assigned to all elements of the output.
+//
+// Emulated, CPU Feature AVX512F
+func BroadcastFloat64x8(x float64) Float64x8 {
+ var z Float64x2
+ return z.SetElem(0, x).Broadcast512()
+}
+++ /dev/null
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
-
-//go:build goexperiment.simd
-
-package simd
-
-import "unsafe"
-
-// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s
-func LoadInt8x16Slice(s []int8) Int8x16 {
- return LoadInt8x16((*[16]int8)(s))
-}
-
-// StoreSlice stores x into a slice of at least 16 int8s
-func (x Int8x16) StoreSlice(s []int8) {
- x.Store((*[16]int8)(s))
-}
-
-// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s
-func LoadInt16x8Slice(s []int16) Int16x8 {
- return LoadInt16x8((*[8]int16)(s))
-}
-
-// StoreSlice stores x into a slice of at least 8 int16s
-func (x Int16x8) StoreSlice(s []int16) {
- x.Store((*[8]int16)(s))
-}
-
-// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s
-func LoadInt32x4Slice(s []int32) Int32x4 {
- return LoadInt32x4((*[4]int32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 4 int32s
-func (x Int32x4) StoreSlice(s []int32) {
- x.Store((*[4]int32)(s))
-}
-
-// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s
-func LoadInt64x2Slice(s []int64) Int64x2 {
- return LoadInt64x2((*[2]int64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 2 int64s
-func (x Int64x2) StoreSlice(s []int64) {
- x.Store((*[2]int64)(s))
-}
-
-// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s
-func LoadUint8x16Slice(s []uint8) Uint8x16 {
- return LoadUint8x16((*[16]uint8)(s))
-}
-
-// StoreSlice stores x into a slice of at least 16 uint8s
-func (x Uint8x16) StoreSlice(s []uint8) {
- x.Store((*[16]uint8)(s))
-}
-
-// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s
-func LoadUint16x8Slice(s []uint16) Uint16x8 {
- return LoadUint16x8((*[8]uint16)(s))
-}
-
-// StoreSlice stores x into a slice of at least 8 uint16s
-func (x Uint16x8) StoreSlice(s []uint16) {
- x.Store((*[8]uint16)(s))
-}
-
-// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s
-func LoadUint32x4Slice(s []uint32) Uint32x4 {
- return LoadUint32x4((*[4]uint32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 4 uint32s
-func (x Uint32x4) StoreSlice(s []uint32) {
- x.Store((*[4]uint32)(s))
-}
-
-// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s
-func LoadUint64x2Slice(s []uint64) Uint64x2 {
- return LoadUint64x2((*[2]uint64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 2 uint64s
-func (x Uint64x2) StoreSlice(s []uint64) {
- x.Store((*[2]uint64)(s))
-}
-
-// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s
-func LoadFloat32x4Slice(s []float32) Float32x4 {
- return LoadFloat32x4((*[4]float32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 4 float32s
-func (x Float32x4) StoreSlice(s []float32) {
- x.Store((*[4]float32)(s))
-}
-
-// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s
-func LoadFloat64x2Slice(s []float64) Float64x2 {
- return LoadFloat64x2((*[2]float64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 2 float64s
-func (x Float64x2) StoreSlice(s []float64) {
- x.Store((*[2]float64)(s))
-}
-
-// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s
-func LoadInt8x32Slice(s []int8) Int8x32 {
- return LoadInt8x32((*[32]int8)(s))
-}
-
-// StoreSlice stores x into a slice of at least 32 int8s
-func (x Int8x32) StoreSlice(s []int8) {
- x.Store((*[32]int8)(s))
-}
-
-// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s
-func LoadInt16x16Slice(s []int16) Int16x16 {
- return LoadInt16x16((*[16]int16)(s))
-}
-
-// StoreSlice stores x into a slice of at least 16 int16s
-func (x Int16x16) StoreSlice(s []int16) {
- x.Store((*[16]int16)(s))
-}
-
-// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s
-func LoadInt32x8Slice(s []int32) Int32x8 {
- return LoadInt32x8((*[8]int32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 8 int32s
-func (x Int32x8) StoreSlice(s []int32) {
- x.Store((*[8]int32)(s))
-}
-
-// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s
-func LoadInt64x4Slice(s []int64) Int64x4 {
- return LoadInt64x4((*[4]int64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 4 int64s
-func (x Int64x4) StoreSlice(s []int64) {
- x.Store((*[4]int64)(s))
-}
-
-// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s
-func LoadUint8x32Slice(s []uint8) Uint8x32 {
- return LoadUint8x32((*[32]uint8)(s))
-}
-
-// StoreSlice stores x into a slice of at least 32 uint8s
-func (x Uint8x32) StoreSlice(s []uint8) {
- x.Store((*[32]uint8)(s))
-}
-
-// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s
-func LoadUint16x16Slice(s []uint16) Uint16x16 {
- return LoadUint16x16((*[16]uint16)(s))
-}
-
-// StoreSlice stores x into a slice of at least 16 uint16s
-func (x Uint16x16) StoreSlice(s []uint16) {
- x.Store((*[16]uint16)(s))
-}
-
-// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s
-func LoadUint32x8Slice(s []uint32) Uint32x8 {
- return LoadUint32x8((*[8]uint32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 8 uint32s
-func (x Uint32x8) StoreSlice(s []uint32) {
- x.Store((*[8]uint32)(s))
-}
-
-// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s
-func LoadUint64x4Slice(s []uint64) Uint64x4 {
- return LoadUint64x4((*[4]uint64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 4 uint64s
-func (x Uint64x4) StoreSlice(s []uint64) {
- x.Store((*[4]uint64)(s))
-}
-
-// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s
-func LoadFloat32x8Slice(s []float32) Float32x8 {
- return LoadFloat32x8((*[8]float32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 8 float32s
-func (x Float32x8) StoreSlice(s []float32) {
- x.Store((*[8]float32)(s))
-}
-
-// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s
-func LoadFloat64x4Slice(s []float64) Float64x4 {
- return LoadFloat64x4((*[4]float64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 4 float64s
-func (x Float64x4) StoreSlice(s []float64) {
- x.Store((*[4]float64)(s))
-}
-
-// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s
-func LoadInt8x64Slice(s []int8) Int8x64 {
- return LoadInt8x64((*[64]int8)(s))
-}
-
-// StoreSlice stores x into a slice of at least 64 int8s
-func (x Int8x64) StoreSlice(s []int8) {
- x.Store((*[64]int8)(s))
-}
-
-// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s
-func LoadInt16x32Slice(s []int16) Int16x32 {
- return LoadInt16x32((*[32]int16)(s))
-}
-
-// StoreSlice stores x into a slice of at least 32 int16s
-func (x Int16x32) StoreSlice(s []int16) {
- x.Store((*[32]int16)(s))
-}
-
-// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s
-func LoadInt32x16Slice(s []int32) Int32x16 {
- return LoadInt32x16((*[16]int32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 16 int32s
-func (x Int32x16) StoreSlice(s []int32) {
- x.Store((*[16]int32)(s))
-}
-
-// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s
-func LoadInt64x8Slice(s []int64) Int64x8 {
- return LoadInt64x8((*[8]int64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 8 int64s
-func (x Int64x8) StoreSlice(s []int64) {
- x.Store((*[8]int64)(s))
-}
-
-// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s
-func LoadUint8x64Slice(s []uint8) Uint8x64 {
- return LoadUint8x64((*[64]uint8)(s))
-}
-
-// StoreSlice stores x into a slice of at least 64 uint8s
-func (x Uint8x64) StoreSlice(s []uint8) {
- x.Store((*[64]uint8)(s))
-}
-
-// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s
-func LoadUint16x32Slice(s []uint16) Uint16x32 {
- return LoadUint16x32((*[32]uint16)(s))
-}
-
-// StoreSlice stores x into a slice of at least 32 uint16s
-func (x Uint16x32) StoreSlice(s []uint16) {
- x.Store((*[32]uint16)(s))
-}
-
-// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s
-func LoadUint32x16Slice(s []uint32) Uint32x16 {
- return LoadUint32x16((*[16]uint32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 16 uint32s
-func (x Uint32x16) StoreSlice(s []uint32) {
- x.Store((*[16]uint32)(s))
-}
-
-// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s
-func LoadUint64x8Slice(s []uint64) Uint64x8 {
- return LoadUint64x8((*[8]uint64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 8 uint64s
-func (x Uint64x8) StoreSlice(s []uint64) {
- x.Store((*[8]uint64)(s))
-}
-
-// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s
-func LoadFloat32x16Slice(s []float32) Float32x16 {
- return LoadFloat32x16((*[16]float32)(s))
-}
-
-// StoreSlice stores x into a slice of at least 16 float32s
-func (x Float32x16) StoreSlice(s []float32) {
- x.Store((*[16]float32)(s))
-}
-
-// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s
-func LoadFloat64x8Slice(s []float64) Float64x8 {
- return LoadFloat64x8((*[8]float64)(s))
-}
-
-// StoreSlice stores x into a slice of at least 8 float64s
-func (x Float64x8) StoreSlice(s []float64) {
- x.Store((*[8]float64)(s))
-}
-
-// LoadInt8x64SlicePart loads a Int8x64 from the slice s.
-// If s has fewer than 64 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 64 or more elements, the function is equivalent to LoadInt8x64Slice.
-func LoadInt8x64SlicePart(s []int8) Int8x64 {
- l := len(s)
- if l >= 64 {
- return LoadInt8x64Slice(s)
- }
- if l == 0 {
- var x Int8x64
- return x
- }
- mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l))
- return LoadMaskedInt8x64(paInt8x64(s), mask)
-}
-
-// StoreSlicePart stores the 64 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 64 or more elements, the method is equivalent to x.StoreSlice.
-func (x Int8x64) StoreSlicePart(s []int8) {
- l := len(s)
- if l >= 64 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l))
- x.StoreMasked(paInt8x64(s), mask)
-}
-
-// LoadInt16x32SlicePart loads a Int16x32 from the slice s.
-// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 32 or more elements, the function is equivalent to LoadInt16x32Slice.
-func LoadInt16x32SlicePart(s []int16) Int16x32 {
- l := len(s)
- if l >= 32 {
- return LoadInt16x32Slice(s)
- }
- if l == 0 {
- var x Int16x32
- return x
- }
- mask := Mask16x32FromBits(0xffffffff >> (32 - l))
- return LoadMaskedInt16x32(paInt16x32(s), mask)
-}
-
-// StoreSlicePart stores the 32 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 32 or more elements, the method is equivalent to x.StoreSlice.
-func (x Int16x32) StoreSlicePart(s []int16) {
- l := len(s)
- if l >= 32 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask16x32FromBits(0xffffffff >> (32 - l))
- x.StoreMasked(paInt16x32(s), mask)
-}
-
-// LoadInt32x16SlicePart loads a Int32x16 from the slice s.
-// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 16 or more elements, the function is equivalent to LoadInt32x16Slice.
-func LoadInt32x16SlicePart(s []int32) Int32x16 {
- l := len(s)
- if l >= 16 {
- return LoadInt32x16Slice(s)
- }
- if l == 0 {
- var x Int32x16
- return x
- }
- mask := Mask32x16FromBits(0xffff >> (16 - l))
- return LoadMaskedInt32x16(paInt32x16(s), mask)
-}
-
-// StoreSlicePart stores the 16 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
-func (x Int32x16) StoreSlicePart(s []int32) {
- l := len(s)
- if l >= 16 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask32x16FromBits(0xffff >> (16 - l))
- x.StoreMasked(paInt32x16(s), mask)
-}
-
-// LoadInt64x8SlicePart loads a Int64x8 from the slice s.
-// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 8 or more elements, the function is equivalent to LoadInt64x8Slice.
-func LoadInt64x8SlicePart(s []int64) Int64x8 {
- l := len(s)
- if l >= 8 {
- return LoadInt64x8Slice(s)
- }
- if l == 0 {
- var x Int64x8
- return x
- }
- mask := Mask64x8FromBits(0xff >> (8 - l))
- return LoadMaskedInt64x8(paInt64x8(s), mask)
-}
-
-// StoreSlicePart stores the 8 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
-func (x Int64x8) StoreSlicePart(s []int64) {
- l := len(s)
- if l >= 8 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask64x8FromBits(0xff >> (8 - l))
- x.StoreMasked(paInt64x8(s), mask)
-}
-
-// LoadUint8x64SlicePart loads a Uint8x64 from the slice s.
-// If s has fewer than 64 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 64 or more elements, the function is equivalent to LoadUint8x64Slice.
-func LoadUint8x64SlicePart(s []uint8) Uint8x64 {
- l := len(s)
- if l >= 64 {
- return LoadUint8x64Slice(s)
- }
- if l == 0 {
- var x Uint8x64
- return x
- }
- mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l))
- return LoadMaskedUint8x64(paUint8x64(s), mask)
-}
-
-// StoreSlicePart stores the 64 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 64 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint8x64) StoreSlicePart(s []uint8) {
- l := len(s)
- if l >= 64 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l))
- x.StoreMasked(paUint8x64(s), mask)
-}
-
-// LoadUint16x32SlicePart loads a Uint16x32 from the slice s.
-// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 32 or more elements, the function is equivalent to LoadUint16x32Slice.
-func LoadUint16x32SlicePart(s []uint16) Uint16x32 {
- l := len(s)
- if l >= 32 {
- return LoadUint16x32Slice(s)
- }
- if l == 0 {
- var x Uint16x32
- return x
- }
- mask := Mask16x32FromBits(0xffffffff >> (32 - l))
- return LoadMaskedUint16x32(paUint16x32(s), mask)
-}
-
-// StoreSlicePart stores the 32 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 32 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint16x32) StoreSlicePart(s []uint16) {
- l := len(s)
- if l >= 32 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask16x32FromBits(0xffffffff >> (32 - l))
- x.StoreMasked(paUint16x32(s), mask)
-}
-
-// LoadUint32x16SlicePart loads a Uint32x16 from the slice s.
-// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 16 or more elements, the function is equivalent to LoadUint32x16Slice.
-func LoadUint32x16SlicePart(s []uint32) Uint32x16 {
- l := len(s)
- if l >= 16 {
- return LoadUint32x16Slice(s)
- }
- if l == 0 {
- var x Uint32x16
- return x
- }
- mask := Mask32x16FromBits(0xffff >> (16 - l))
- return LoadMaskedUint32x16(paUint32x16(s), mask)
-}
-
-// StoreSlicePart stores the 16 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint32x16) StoreSlicePart(s []uint32) {
- l := len(s)
- if l >= 16 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask32x16FromBits(0xffff >> (16 - l))
- x.StoreMasked(paUint32x16(s), mask)
-}
-
-// LoadUint64x8SlicePart loads a Uint64x8 from the slice s.
-// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 8 or more elements, the function is equivalent to LoadUint64x8Slice.
-func LoadUint64x8SlicePart(s []uint64) Uint64x8 {
- l := len(s)
- if l >= 8 {
- return LoadUint64x8Slice(s)
- }
- if l == 0 {
- var x Uint64x8
- return x
- }
- mask := Mask64x8FromBits(0xff >> (8 - l))
- return LoadMaskedUint64x8(paUint64x8(s), mask)
-}
-
-// StoreSlicePart stores the 8 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint64x8) StoreSlicePart(s []uint64) {
- l := len(s)
- if l >= 8 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask64x8FromBits(0xff >> (8 - l))
- x.StoreMasked(paUint64x8(s), mask)
-}
-
-// LoadFloat32x16SlicePart loads a Float32x16 from the slice s.
-// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 16 or more elements, the function is equivalent to LoadFloat32x16Slice.
-func LoadFloat32x16SlicePart(s []float32) Float32x16 {
- l := len(s)
- if l >= 16 {
- return LoadFloat32x16Slice(s)
- }
- if l == 0 {
- var x Float32x16
- return x
- }
- mask := Mask32x16FromBits(0xffff >> (16 - l))
- return LoadMaskedFloat32x16(paFloat32x16(s), mask)
-}
-
-// StoreSlicePart stores the 16 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
-func (x Float32x16) StoreSlicePart(s []float32) {
- l := len(s)
- if l >= 16 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask32x16FromBits(0xffff >> (16 - l))
- x.StoreMasked(paFloat32x16(s), mask)
-}
-
-// LoadFloat64x8SlicePart loads a Float64x8 from the slice s.
-// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 8 or more elements, the function is equivalent to LoadFloat64x8Slice.
-func LoadFloat64x8SlicePart(s []float64) Float64x8 {
- l := len(s)
- if l >= 8 {
- return LoadFloat64x8Slice(s)
- }
- if l == 0 {
- var x Float64x8
- return x
- }
- mask := Mask64x8FromBits(0xff >> (8 - l))
- return LoadMaskedFloat64x8(paFloat64x8(s), mask)
-}
-
-// StoreSlicePart stores the 8 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
-func (x Float64x8) StoreSlicePart(s []float64) {
- l := len(s)
- if l >= 8 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := Mask64x8FromBits(0xff >> (8 - l))
- x.StoreMasked(paFloat64x8(s), mask)
-}
-
-// LoadInt32x4SlicePart loads a Int32x4 from the slice s.
-// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 4 or more elements, the function is equivalent to LoadInt32x4Slice.
-func LoadInt32x4SlicePart(s []int32) Int32x4 {
- l := len(s)
- if l >= 4 {
- return LoadInt32x4Slice(s)
- }
- if l == 0 {
- var x Int32x4
- return x
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- return LoadMaskedInt32x4(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
-}
-
-// StoreSlicePart stores the 4 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
-func (x Int32x4) StoreSlicePart(s []int32) {
- l := len(s)
- if l >= 4 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- x.StoreMasked(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
-}
-
-// LoadInt64x2SlicePart loads a Int64x2 from the slice s.
-// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 2 or more elements, the function is equivalent to LoadInt64x2Slice.
-func LoadInt64x2SlicePart(s []int64) Int64x2 {
- l := len(s)
- if l >= 2 {
- return LoadInt64x2Slice(s)
- }
- if l == 0 {
- var x Int64x2
- return x
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- return LoadMaskedInt64x2(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
-}
-
-// StoreSlicePart stores the 2 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 2 or more elements, the method is equivalent to x.StoreSlice.
-func (x Int64x2) StoreSlicePart(s []int64) {
- l := len(s)
- if l >= 2 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- x.StoreMasked(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
-}
-
-// LoadUint32x4SlicePart loads a Uint32x4 from the slice s.
-// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 4 or more elements, the function is equivalent to LoadUint32x4Slice.
-func LoadUint32x4SlicePart(s []uint32) Uint32x4 {
- l := len(s)
- if l >= 4 {
- return LoadUint32x4Slice(s)
- }
- if l == 0 {
- var x Uint32x4
- return x
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- return LoadMaskedUint32x4(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
-}
-
-// StoreSlicePart stores the 4 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint32x4) StoreSlicePart(s []uint32) {
- l := len(s)
- if l >= 4 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- x.StoreMasked(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
-}
-
-// LoadUint64x2SlicePart loads a Uint64x2 from the slice s.
-// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 2 or more elements, the function is equivalent to LoadUint64x2Slice.
-func LoadUint64x2SlicePart(s []uint64) Uint64x2 {
- l := len(s)
- if l >= 2 {
- return LoadUint64x2Slice(s)
- }
- if l == 0 {
- var x Uint64x2
- return x
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- return LoadMaskedUint64x2(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
-}
-
-// StoreSlicePart stores the 2 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 2 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint64x2) StoreSlicePart(s []uint64) {
- l := len(s)
- if l >= 2 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- x.StoreMasked(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
-}
-
-// LoadFloat32x4SlicePart loads a Float32x4 from the slice s.
-// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 4 or more elements, the function is equivalent to LoadFloat32x4Slice.
-func LoadFloat32x4SlicePart(s []float32) Float32x4 {
- l := len(s)
- if l >= 4 {
- return LoadFloat32x4Slice(s)
- }
- if l == 0 {
- var x Float32x4
- return x
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- return LoadMaskedFloat32x4(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
-}
-
-// StoreSlicePart stores the 4 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
-func (x Float32x4) StoreSlicePart(s []float32) {
- l := len(s)
- if l >= 4 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- x.StoreMasked(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
-}
-
-// LoadFloat64x2SlicePart loads a Float64x2 from the slice s.
-// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 2 or more elements, the function is equivalent to LoadFloat64x2Slice.
-func LoadFloat64x2SlicePart(s []float64) Float64x2 {
- l := len(s)
- if l >= 2 {
- return LoadFloat64x2Slice(s)
- }
- if l == 0 {
- var x Float64x2
- return x
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- return LoadMaskedFloat64x2(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
-}
-
-// StoreSlicePart stores the 2 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 2 or more elements, the method is equivalent to x.StoreSlice.
-func (x Float64x2) StoreSlicePart(s []float64) {
- l := len(s)
- if l >= 2 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- x.StoreMasked(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
-}
-
-// LoadInt32x8SlicePart loads a Int32x8 from the slice s.
-// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 8 or more elements, the function is equivalent to LoadInt32x8Slice.
-func LoadInt32x8SlicePart(s []int32) Int32x8 {
- l := len(s)
- if l >= 8 {
- return LoadInt32x8Slice(s)
- }
- if l == 0 {
- var x Int32x8
- return x
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- return LoadMaskedInt32x8(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
-}
-
-// StoreSlicePart stores the 8 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
-func (x Int32x8) StoreSlicePart(s []int32) {
- l := len(s)
- if l >= 8 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- x.StoreMasked(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
-}
-
-// LoadInt64x4SlicePart loads a Int64x4 from the slice s.
-// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 4 or more elements, the function is equivalent to LoadInt64x4Slice.
-func LoadInt64x4SlicePart(s []int64) Int64x4 {
- l := len(s)
- if l >= 4 {
- return LoadInt64x4Slice(s)
- }
- if l == 0 {
- var x Int64x4
- return x
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- return LoadMaskedInt64x4(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
-}
-
-// StoreSlicePart stores the 4 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
-func (x Int64x4) StoreSlicePart(s []int64) {
- l := len(s)
- if l >= 4 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- x.StoreMasked(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
-}
-
-// LoadUint32x8SlicePart loads a Uint32x8 from the slice s.
-// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 8 or more elements, the function is equivalent to LoadUint32x8Slice.
-func LoadUint32x8SlicePart(s []uint32) Uint32x8 {
- l := len(s)
- if l >= 8 {
- return LoadUint32x8Slice(s)
- }
- if l == 0 {
- var x Uint32x8
- return x
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- return LoadMaskedUint32x8(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
-}
-
-// StoreSlicePart stores the 8 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint32x8) StoreSlicePart(s []uint32) {
- l := len(s)
- if l >= 8 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- x.StoreMasked(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
-}
-
-// LoadUint64x4SlicePart loads a Uint64x4 from the slice s.
-// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 4 or more elements, the function is equivalent to LoadUint64x4Slice.
-func LoadUint64x4SlicePart(s []uint64) Uint64x4 {
- l := len(s)
- if l >= 4 {
- return LoadUint64x4Slice(s)
- }
- if l == 0 {
- var x Uint64x4
- return x
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- return LoadMaskedUint64x4(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
-}
-
-// StoreSlicePart stores the 4 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint64x4) StoreSlicePart(s []uint64) {
- l := len(s)
- if l >= 4 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- x.StoreMasked(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
-}
-
-// LoadFloat32x8SlicePart loads a Float32x8 from the slice s.
-// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 8 or more elements, the function is equivalent to LoadFloat32x8Slice.
-func LoadFloat32x8SlicePart(s []float32) Float32x8 {
- l := len(s)
- if l >= 8 {
- return LoadFloat32x8Slice(s)
- }
- if l == 0 {
- var x Float32x8
- return x
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- return LoadMaskedFloat32x8(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
-}
-
-// StoreSlicePart stores the 8 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
-func (x Float32x8) StoreSlicePart(s []float32) {
- l := len(s)
- if l >= 8 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask32[len(vecMask32)/2-l:]
- x.StoreMasked(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
-}
-
-// LoadFloat64x4SlicePart loads a Float64x4 from the slice s.
-// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 4 or more elements, the function is equivalent to LoadFloat64x4Slice.
-func LoadFloat64x4SlicePart(s []float64) Float64x4 {
- l := len(s)
- if l >= 4 {
- return LoadFloat64x4Slice(s)
- }
- if l == 0 {
- var x Float64x4
- return x
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- return LoadMaskedFloat64x4(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
-}
-
-// StoreSlicePart stores the 4 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
-func (x Float64x4) StoreSlicePart(s []float64) {
- l := len(s)
- if l >= 4 {
- x.StoreSlice(s)
- return
- }
- if l == 0 {
- return
- }
- mask := vecMask64[len(vecMask64)/2-l:]
- x.StoreMasked(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
-}
-
-// LoadUint8x16SlicePart loads a Uint8x16 from the slice s.
-// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 16 or more elements, the function is equivalent to LoadUint8x16Slice.
-func LoadUint8x16SlicePart(s []uint8) Uint8x16 {
- if len(s) == 0 {
- var zero Uint8x16
- return zero
- }
- t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s))
- return LoadInt8x16SlicePart(t).AsUint8x16()
-}
-
-// StoreSlicePart stores the 16 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint8x16) StoreSlicePart(s []uint8) {
- if len(s) == 0 {
- return
- }
- t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s))
- x.AsInt8x16().StoreSlicePart(t)
-}
-
-// LoadUint16x8SlicePart loads a Uint16x8 from the slice s.
-// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 8 or more elements, the function is equivalent to LoadUint16x8Slice.
-func LoadUint16x8SlicePart(s []uint16) Uint16x8 {
- if len(s) == 0 {
- var zero Uint16x8
- return zero
- }
- t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s))
- return LoadInt16x8SlicePart(t).AsUint16x8()
-}
-
-// StoreSlicePart stores the 8 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint16x8) StoreSlicePart(s []uint16) {
- if len(s) == 0 {
- return
- }
- t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s))
- x.AsInt16x8().StoreSlicePart(t)
-}
-
-// LoadUint8x32SlicePart loads a Uint8x32 from the slice s.
-// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 32 or more elements, the function is equivalent to LoadUint8x32Slice.
-func LoadUint8x32SlicePart(s []uint8) Uint8x32 {
- if len(s) == 0 {
- var zero Uint8x32
- return zero
- }
- t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s))
- return LoadInt8x32SlicePart(t).AsUint8x32()
-}
-
-// StoreSlicePart stores the 32 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 32 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint8x32) StoreSlicePart(s []uint8) {
- if len(s) == 0 {
- return
- }
- t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s))
- x.AsInt8x32().StoreSlicePart(t)
-}
-
-// LoadUint16x16SlicePart loads a Uint16x16 from the slice s.
-// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
-// If s has 16 or more elements, the function is equivalent to LoadUint16x16Slice.
-func LoadUint16x16SlicePart(s []uint16) Uint16x16 {
- if len(s) == 0 {
- var zero Uint16x16
- return zero
- }
- t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s))
- return LoadInt16x16SlicePart(t).AsUint16x16()
-}
-
-// StoreSlicePart stores the 16 elements of x into the slice s.
-// It stores as many elements as will fit in s.
-// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
-func (x Uint16x16) StoreSlicePart(s []uint16) {
- if len(s) == 0 {
- return
- }
- t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s))
- x.AsInt16x16().StoreSlicePart(t)
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int8x16) Masked(mask Mask8x16) Int8x16 {
- im := mask.AsInt8x16()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Int8x16) Merge(y Int8x16, mask Mask8x16) Int8x16 {
- im := mask.AsInt8x16()
- return y.blend(x, im)
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int16x8) Masked(mask Mask16x8) Int16x8 {
- im := mask.AsInt16x8()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Int16x8) Merge(y Int16x8, mask Mask16x8) Int16x8 {
- im := mask.AsInt16x8().AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsInt16x8()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int32x4) Masked(mask Mask32x4) Int32x4 {
- im := mask.AsInt32x4()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Int32x4) Merge(y Int32x4, mask Mask32x4) Int32x4 {
- im := mask.AsInt32x4().AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsInt32x4()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int64x2) Masked(mask Mask64x2) Int64x2 {
- im := mask.AsInt64x2()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Int64x2) Merge(y Int64x2, mask Mask64x2) Int64x2 {
- im := mask.AsInt64x2().AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsInt64x2()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint8x16) Masked(mask Mask8x16) Uint8x16 {
- im := mask.AsInt8x16()
- return x.AsInt8x16().And(im).AsUint8x16()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Uint8x16) Merge(y Uint8x16, mask Mask8x16) Uint8x16 {
- im := mask.AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsUint8x16()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint16x8) Masked(mask Mask16x8) Uint16x8 {
- im := mask.AsInt16x8()
- return x.AsInt16x8().And(im).AsUint16x8()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Uint16x8) Merge(y Uint16x8, mask Mask16x8) Uint16x8 {
- im := mask.AsInt16x8().AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsUint16x8()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint32x4) Masked(mask Mask32x4) Uint32x4 {
- im := mask.AsInt32x4()
- return x.AsInt32x4().And(im).AsUint32x4()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Uint32x4) Merge(y Uint32x4, mask Mask32x4) Uint32x4 {
- im := mask.AsInt32x4().AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsUint32x4()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint64x2) Masked(mask Mask64x2) Uint64x2 {
- im := mask.AsInt64x2()
- return x.AsInt64x2().And(im).AsUint64x2()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Uint64x2) Merge(y Uint64x2, mask Mask64x2) Uint64x2 {
- im := mask.AsInt64x2().AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsUint64x2()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Float32x4) Masked(mask Mask32x4) Float32x4 {
- im := mask.AsInt32x4()
- return x.AsInt32x4().And(im).AsFloat32x4()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Float32x4) Merge(y Float32x4, mask Mask32x4) Float32x4 {
- im := mask.AsInt32x4().AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsFloat32x4()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Float64x2) Masked(mask Mask64x2) Float64x2 {
- im := mask.AsInt64x2()
- return x.AsInt64x2().And(im).AsFloat64x2()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Float64x2) Merge(y Float64x2, mask Mask64x2) Float64x2 {
- im := mask.AsInt64x2().AsInt8x16()
- ix := x.AsInt8x16()
- iy := y.AsInt8x16()
- return iy.blend(ix, im).AsFloat64x2()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int8x32) Masked(mask Mask8x32) Int8x32 {
- im := mask.AsInt8x32()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Int8x32) Merge(y Int8x32, mask Mask8x32) Int8x32 {
- im := mask.AsInt8x32()
- return y.blend(x, im)
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int16x16) Masked(mask Mask16x16) Int16x16 {
- im := mask.AsInt16x16()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Int16x16) Merge(y Int16x16, mask Mask16x16) Int16x16 {
- im := mask.AsInt16x16().AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsInt16x16()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int32x8) Masked(mask Mask32x8) Int32x8 {
- im := mask.AsInt32x8()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Int32x8) Merge(y Int32x8, mask Mask32x8) Int32x8 {
- im := mask.AsInt32x8().AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsInt32x8()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int64x4) Masked(mask Mask64x4) Int64x4 {
- im := mask.AsInt64x4()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Int64x4) Merge(y Int64x4, mask Mask64x4) Int64x4 {
- im := mask.AsInt64x4().AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsInt64x4()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint8x32) Masked(mask Mask8x32) Uint8x32 {
- im := mask.AsInt8x32()
- return x.AsInt8x32().And(im).AsUint8x32()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Uint8x32) Merge(y Uint8x32, mask Mask8x32) Uint8x32 {
- im := mask.AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsUint8x32()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint16x16) Masked(mask Mask16x16) Uint16x16 {
- im := mask.AsInt16x16()
- return x.AsInt16x16().And(im).AsUint16x16()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Uint16x16) Merge(y Uint16x16, mask Mask16x16) Uint16x16 {
- im := mask.AsInt16x16().AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsUint16x16()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint32x8) Masked(mask Mask32x8) Uint32x8 {
- im := mask.AsInt32x8()
- return x.AsInt32x8().And(im).AsUint32x8()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Uint32x8) Merge(y Uint32x8, mask Mask32x8) Uint32x8 {
- im := mask.AsInt32x8().AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsUint32x8()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint64x4) Masked(mask Mask64x4) Uint64x4 {
- im := mask.AsInt64x4()
- return x.AsInt64x4().And(im).AsUint64x4()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Uint64x4) Merge(y Uint64x4, mask Mask64x4) Uint64x4 {
- im := mask.AsInt64x4().AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsUint64x4()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Float32x8) Masked(mask Mask32x8) Float32x8 {
- im := mask.AsInt32x8()
- return x.AsInt32x8().And(im).AsFloat32x8()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Float32x8) Merge(y Float32x8, mask Mask32x8) Float32x8 {
- im := mask.AsInt32x8().AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsFloat32x8()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Float64x4) Masked(mask Mask64x4) Float64x4 {
- im := mask.AsInt64x4()
- return x.AsInt64x4().And(im).AsFloat64x4()
-}
-
-// Merge returns x but with elements set to y where mask is false.
-func (x Float64x4) Merge(y Float64x4, mask Mask64x4) Float64x4 {
- im := mask.AsInt64x4().AsInt8x32()
- ix := x.AsInt8x32()
- iy := y.AsInt8x32()
- return iy.blend(ix, im).AsFloat64x4()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int8x64) Masked(mask Mask8x64) Int8x64 {
- im := mask.AsInt8x64()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 {
- return y.blendMasked(x, mask)
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int16x32) Masked(mask Mask16x32) Int16x32 {
- im := mask.AsInt16x32()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 {
- return y.blendMasked(x, mask)
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int32x16) Masked(mask Mask32x16) Int32x16 {
- im := mask.AsInt32x16()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 {
- return y.blendMasked(x, mask)
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Int64x8) Masked(mask Mask64x8) Int64x8 {
- im := mask.AsInt64x8()
- return im.And(x)
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 {
- return y.blendMasked(x, mask)
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 {
- im := mask.AsInt8x64()
- return x.AsInt8x64().And(im).AsUint8x64()
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 {
- ix := x.AsInt8x64()
- iy := y.AsInt8x64()
- return iy.blendMasked(ix, mask).AsUint8x64()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 {
- im := mask.AsInt16x32()
- return x.AsInt16x32().And(im).AsUint16x32()
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 {
- ix := x.AsInt16x32()
- iy := y.AsInt16x32()
- return iy.blendMasked(ix, mask).AsUint16x32()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 {
- im := mask.AsInt32x16()
- return x.AsInt32x16().And(im).AsUint32x16()
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 {
- ix := x.AsInt32x16()
- iy := y.AsInt32x16()
- return iy.blendMasked(ix, mask).AsUint32x16()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 {
- im := mask.AsInt64x8()
- return x.AsInt64x8().And(im).AsUint64x8()
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 {
- ix := x.AsInt64x8()
- iy := y.AsInt64x8()
- return iy.blendMasked(ix, mask).AsUint64x8()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Float32x16) Masked(mask Mask32x16) Float32x16 {
- im := mask.AsInt32x16()
- return x.AsInt32x16().And(im).AsFloat32x16()
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 {
- ix := x.AsInt32x16()
- iy := y.AsInt32x16()
- return iy.blendMasked(ix, mask).AsFloat32x16()
-}
-
-// Masked returns x but with elements zeroed where mask is false.
-func (x Float64x8) Masked(mask Mask64x8) Float64x8 {
- im := mask.AsInt64x8()
- return x.AsInt64x8().And(im).AsFloat64x8()
-}
-
-// Merge returns x but with elements set to y where m is false.
-func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 {
- ix := x.AsInt64x8()
- iy := y.AsInt64x8()
- return iy.blendMasked(ix, mask).AsFloat64x8()
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX
-func (x Int8x16) Less(y Int8x16) Mask8x16 {
- return y.Greater(x)
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX
-func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 {
- ones := x.Equal(x).AsInt8x16()
- return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX
-func (x Int8x16) LessEqual(y Int8x16) Mask8x16 {
- ones := x.Equal(x).AsInt8x16()
- return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX
-func (x Int8x16) NotEqual(y Int8x16) Mask8x16 {
- ones := x.Equal(x).AsInt8x16()
- return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16()
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX
-func (x Int16x8) Less(y Int16x8) Mask16x8 {
- return y.Greater(x)
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX
-func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 {
- ones := x.Equal(x).AsInt16x8()
- return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX
-func (x Int16x8) LessEqual(y Int16x8) Mask16x8 {
- ones := x.Equal(x).AsInt16x8()
- return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX
-func (x Int16x8) NotEqual(y Int16x8) Mask16x8 {
- ones := x.Equal(x).AsInt16x8()
- return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8()
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX
-func (x Int32x4) Less(y Int32x4) Mask32x4 {
- return y.Greater(x)
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX
-func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 {
- ones := x.Equal(x).AsInt32x4()
- return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX
-func (x Int32x4) LessEqual(y Int32x4) Mask32x4 {
- ones := x.Equal(x).AsInt32x4()
- return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX
-func (x Int32x4) NotEqual(y Int32x4) Mask32x4 {
- ones := x.Equal(x).AsInt32x4()
- return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4()
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX
-func (x Int64x2) Less(y Int64x2) Mask64x2 {
- return y.Greater(x)
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX
-func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 {
- ones := x.Equal(x).AsInt64x2()
- return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX
-func (x Int64x2) LessEqual(y Int64x2) Mask64x2 {
- ones := x.Equal(x).AsInt64x2()
- return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX
-func (x Int64x2) NotEqual(y Int64x2) Mask64x2 {
- ones := x.Equal(x).AsInt64x2()
- return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2()
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Int8x32) Less(y Int8x32) Mask8x32 {
- return y.Greater(x)
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 {
- ones := x.Equal(x).AsInt8x32()
- return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Int8x32) LessEqual(y Int8x32) Mask8x32 {
- ones := x.Equal(x).AsInt8x32()
- return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX2
-func (x Int8x32) NotEqual(y Int8x32) Mask8x32 {
- ones := x.Equal(x).AsInt8x32()
- return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32()
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Int16x16) Less(y Int16x16) Mask16x16 {
- return y.Greater(x)
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 {
- ones := x.Equal(x).AsInt16x16()
- return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Int16x16) LessEqual(y Int16x16) Mask16x16 {
- ones := x.Equal(x).AsInt16x16()
- return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX2
-func (x Int16x16) NotEqual(y Int16x16) Mask16x16 {
- ones := x.Equal(x).AsInt16x16()
- return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16()
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Int32x8) Less(y Int32x8) Mask32x8 {
- return y.Greater(x)
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 {
- ones := x.Equal(x).AsInt32x8()
- return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Int32x8) LessEqual(y Int32x8) Mask32x8 {
- ones := x.Equal(x).AsInt32x8()
- return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX2
-func (x Int32x8) NotEqual(y Int32x8) Mask32x8 {
- ones := x.Equal(x).AsInt32x8()
- return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8()
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Int64x4) Less(y Int64x4) Mask64x4 {
- return y.Greater(x)
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 {
- ones := x.Equal(x).AsInt64x4()
- return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Int64x4) LessEqual(y Int64x4) Mask64x4 {
- ones := x.Equal(x).AsInt64x4()
- return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX2
-func (x Int64x4) NotEqual(y Int64x4) Mask64x4 {
- ones := x.Equal(x).AsInt64x4()
- return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4()
-}
-
-// Greater returns a mask whose elements indicate whether x > y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x16) Greater(y Uint8x16) Mask8x16 {
- a, b := x.AsInt8x16(), y.AsInt8x16()
- signs := BroadcastInt8x16(-1 << (8 - 1))
- return a.Xor(signs).Greater(b.Xor(signs))
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x16) Less(y Uint8x16) Mask8x16 {
- a, b := x.AsInt8x16(), y.AsInt8x16()
- signs := BroadcastInt8x16(-1 << (8 - 1))
- return b.Xor(signs).Greater(a.Xor(signs))
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 {
- a, b := x.AsInt8x16(), y.AsInt8x16()
- ones := x.Equal(x).AsInt8x16()
- signs := BroadcastInt8x16(-1 << (8 - 1))
- return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 {
- a, b := x.AsInt8x16(), y.AsInt8x16()
- ones := x.Equal(x).AsInt8x16()
- signs := BroadcastInt8x16(-1 << (8 - 1))
- return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX
-func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 {
- a, b := x.AsInt8x16(), y.AsInt8x16()
- ones := x.Equal(x).AsInt8x16()
- return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16()
-}
-
-// Greater returns a mask whose elements indicate whether x > y
-//
-// Emulated, CPU Feature AVX
-func (x Uint16x8) Greater(y Uint16x8) Mask16x8 {
- a, b := x.AsInt16x8(), y.AsInt16x8()
- ones := x.Equal(x).AsInt16x8()
- signs := ones.ShiftAllLeft(16 - 1)
- return a.Xor(signs).Greater(b.Xor(signs))
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX
-func (x Uint16x8) Less(y Uint16x8) Mask16x8 {
- a, b := x.AsInt16x8(), y.AsInt16x8()
- ones := x.Equal(x).AsInt16x8()
- signs := ones.ShiftAllLeft(16 - 1)
- return b.Xor(signs).Greater(a.Xor(signs))
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX
-func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 {
- a, b := x.AsInt16x8(), y.AsInt16x8()
- ones := x.Equal(x).AsInt16x8()
- signs := ones.ShiftAllLeft(16 - 1)
- return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX
-func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 {
- a, b := x.AsInt16x8(), y.AsInt16x8()
- ones := x.Equal(x).AsInt16x8()
- signs := ones.ShiftAllLeft(16 - 1)
- return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX
-func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 {
- a, b := x.AsInt16x8(), y.AsInt16x8()
- ones := x.Equal(x).AsInt16x8()
- return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8()
-}
-
-// Greater returns a mask whose elements indicate whether x > y
-//
-// Emulated, CPU Feature AVX
-func (x Uint32x4) Greater(y Uint32x4) Mask32x4 {
- a, b := x.AsInt32x4(), y.AsInt32x4()
- ones := x.Equal(x).AsInt32x4()
- signs := ones.ShiftAllLeft(32 - 1)
- return a.Xor(signs).Greater(b.Xor(signs))
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX
-func (x Uint32x4) Less(y Uint32x4) Mask32x4 {
- a, b := x.AsInt32x4(), y.AsInt32x4()
- ones := x.Equal(x).AsInt32x4()
- signs := ones.ShiftAllLeft(32 - 1)
- return b.Xor(signs).Greater(a.Xor(signs))
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX
-func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 {
- a, b := x.AsInt32x4(), y.AsInt32x4()
- ones := x.Equal(x).AsInt32x4()
- signs := ones.ShiftAllLeft(32 - 1)
- return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX
-func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 {
- a, b := x.AsInt32x4(), y.AsInt32x4()
- ones := x.Equal(x).AsInt32x4()
- signs := ones.ShiftAllLeft(32 - 1)
- return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX
-func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 {
- a, b := x.AsInt32x4(), y.AsInt32x4()
- ones := x.Equal(x).AsInt32x4()
- return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4()
-}
-
-// Greater returns a mask whose elements indicate whether x > y
-//
-// Emulated, CPU Feature AVX
-func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
- a, b := x.AsInt64x2(), y.AsInt64x2()
- ones := x.Equal(x).AsInt64x2()
- signs := ones.ShiftAllLeft(64 - 1)
- return a.Xor(signs).Greater(b.Xor(signs))
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX
-func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
- a, b := x.AsInt64x2(), y.AsInt64x2()
- ones := x.Equal(x).AsInt64x2()
- signs := ones.ShiftAllLeft(64 - 1)
- return b.Xor(signs).Greater(a.Xor(signs))
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX
-func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
- a, b := x.AsInt64x2(), y.AsInt64x2()
- ones := x.Equal(x).AsInt64x2()
- signs := ones.ShiftAllLeft(64 - 1)
- return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX
-func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
- a, b := x.AsInt64x2(), y.AsInt64x2()
- ones := x.Equal(x).AsInt64x2()
- signs := ones.ShiftAllLeft(64 - 1)
- return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX
-func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 {
- a, b := x.AsInt64x2(), y.AsInt64x2()
- ones := x.Equal(x).AsInt64x2()
- return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2()
-}
-
-// Greater returns a mask whose elements indicate whether x > y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x32) Greater(y Uint8x32) Mask8x32 {
- a, b := x.AsInt8x32(), y.AsInt8x32()
- signs := BroadcastInt8x32(-1 << (8 - 1))
- return a.Xor(signs).Greater(b.Xor(signs))
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x32) Less(y Uint8x32) Mask8x32 {
- a, b := x.AsInt8x32(), y.AsInt8x32()
- signs := BroadcastInt8x32(-1 << (8 - 1))
- return b.Xor(signs).Greater(a.Xor(signs))
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 {
- a, b := x.AsInt8x32(), y.AsInt8x32()
- ones := x.Equal(x).AsInt8x32()
- signs := BroadcastInt8x32(-1 << (8 - 1))
- return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 {
- a, b := x.AsInt8x32(), y.AsInt8x32()
- ones := x.Equal(x).AsInt8x32()
- signs := BroadcastInt8x32(-1 << (8 - 1))
- return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 {
- a, b := x.AsInt8x32(), y.AsInt8x32()
- ones := x.Equal(x).AsInt8x32()
- return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32()
-}
-
-// Greater returns a mask whose elements indicate whether x > y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint16x16) Greater(y Uint16x16) Mask16x16 {
- a, b := x.AsInt16x16(), y.AsInt16x16()
- ones := x.Equal(x).AsInt16x16()
- signs := ones.ShiftAllLeft(16 - 1)
- return a.Xor(signs).Greater(b.Xor(signs))
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint16x16) Less(y Uint16x16) Mask16x16 {
- a, b := x.AsInt16x16(), y.AsInt16x16()
- ones := x.Equal(x).AsInt16x16()
- signs := ones.ShiftAllLeft(16 - 1)
- return b.Xor(signs).Greater(a.Xor(signs))
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 {
- a, b := x.AsInt16x16(), y.AsInt16x16()
- ones := x.Equal(x).AsInt16x16()
- signs := ones.ShiftAllLeft(16 - 1)
- return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 {
- a, b := x.AsInt16x16(), y.AsInt16x16()
- ones := x.Equal(x).AsInt16x16()
- signs := ones.ShiftAllLeft(16 - 1)
- return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 {
- a, b := x.AsInt16x16(), y.AsInt16x16()
- ones := x.Equal(x).AsInt16x16()
- return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16()
-}
-
-// Greater returns a mask whose elements indicate whether x > y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint32x8) Greater(y Uint32x8) Mask32x8 {
- a, b := x.AsInt32x8(), y.AsInt32x8()
- ones := x.Equal(x).AsInt32x8()
- signs := ones.ShiftAllLeft(32 - 1)
- return a.Xor(signs).Greater(b.Xor(signs))
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint32x8) Less(y Uint32x8) Mask32x8 {
- a, b := x.AsInt32x8(), y.AsInt32x8()
- ones := x.Equal(x).AsInt32x8()
- signs := ones.ShiftAllLeft(32 - 1)
- return b.Xor(signs).Greater(a.Xor(signs))
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 {
- a, b := x.AsInt32x8(), y.AsInt32x8()
- ones := x.Equal(x).AsInt32x8()
- signs := ones.ShiftAllLeft(32 - 1)
- return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 {
- a, b := x.AsInt32x8(), y.AsInt32x8()
- ones := x.Equal(x).AsInt32x8()
- signs := ones.ShiftAllLeft(32 - 1)
- return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 {
- a, b := x.AsInt32x8(), y.AsInt32x8()
- ones := x.Equal(x).AsInt32x8()
- return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8()
-}
-
-// Greater returns a mask whose elements indicate whether x > y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint64x4) Greater(y Uint64x4) Mask64x4 {
- a, b := x.AsInt64x4(), y.AsInt64x4()
- ones := x.Equal(x).AsInt64x4()
- signs := ones.ShiftAllLeft(64 - 1)
- return a.Xor(signs).Greater(b.Xor(signs))
-}
-
-// Less returns a mask whose elements indicate whether x < y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint64x4) Less(y Uint64x4) Mask64x4 {
- a, b := x.AsInt64x4(), y.AsInt64x4()
- ones := x.Equal(x).AsInt64x4()
- signs := ones.ShiftAllLeft(64 - 1)
- return b.Xor(signs).Greater(a.Xor(signs))
-}
-
-// GreaterEqual returns a mask whose elements indicate whether x >= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 {
- a, b := x.AsInt64x4(), y.AsInt64x4()
- ones := x.Equal(x).AsInt64x4()
- signs := ones.ShiftAllLeft(64 - 1)
- return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
-}
-
-// LessEqual returns a mask whose elements indicate whether x <= y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 {
- a, b := x.AsInt64x4(), y.AsInt64x4()
- ones := x.Equal(x).AsInt64x4()
- signs := ones.ShiftAllLeft(64 - 1)
- return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
-}
-
-// NotEqual returns a mask whose elements indicate whether x != y
-//
-// Emulated, CPU Feature AVX2
-func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 {
- a, b := x.AsInt64x4(), y.AsInt64x4()
- ones := x.Equal(x).AsInt64x4()
- return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4()
-}
-
-// BroadcastInt8x16 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastInt8x16(x int8) Int8x16 {
- var z Int8x16
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastInt16x8 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastInt16x8(x int16) Int16x8 {
- var z Int16x8
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastInt32x4 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastInt32x4(x int32) Int32x4 {
- var z Int32x4
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastInt64x2 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastInt64x2(x int64) Int64x2 {
- var z Int64x2
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastUint8x16 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastUint8x16(x uint8) Uint8x16 {
- var z Uint8x16
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastUint16x8 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastUint16x8(x uint16) Uint16x8 {
- var z Uint16x8
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastUint32x4 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastUint32x4(x uint32) Uint32x4 {
- var z Uint32x4
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastUint64x2 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastUint64x2(x uint64) Uint64x2 {
- var z Uint64x2
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastFloat32x4 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastFloat32x4(x float32) Float32x4 {
- var z Float32x4
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastFloat64x2 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastFloat64x2(x float64) Float64x2 {
- var z Float64x2
- return z.SetElem(0, x).Broadcast128()
-}
-
-// BroadcastInt8x32 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastInt8x32(x int8) Int8x32 {
- var z Int8x16
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastInt16x16 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastInt16x16(x int16) Int16x16 {
- var z Int16x8
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastInt32x8 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastInt32x8(x int32) Int32x8 {
- var z Int32x4
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastInt64x4 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastInt64x4(x int64) Int64x4 {
- var z Int64x2
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastUint8x32 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastUint8x32(x uint8) Uint8x32 {
- var z Uint8x16
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastUint16x16 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastUint16x16(x uint16) Uint16x16 {
- var z Uint16x8
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastUint32x8 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastUint32x8(x uint32) Uint32x8 {
- var z Uint32x4
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastUint64x4 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastUint64x4(x uint64) Uint64x4 {
- var z Uint64x2
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastFloat32x8 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastFloat32x8(x float32) Float32x8 {
- var z Float32x4
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastFloat64x4 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX2
-func BroadcastFloat64x4(x float64) Float64x4 {
- var z Float64x2
- return z.SetElem(0, x).Broadcast256()
-}
-
-// BroadcastInt8x64 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512BW
-func BroadcastInt8x64(x int8) Int8x64 {
- var z Int8x16
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastInt16x32 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512BW
-func BroadcastInt16x32(x int16) Int16x32 {
- var z Int16x8
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastInt32x16 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512F
-func BroadcastInt32x16(x int32) Int32x16 {
- var z Int32x4
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastInt64x8 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512F
-func BroadcastInt64x8(x int64) Int64x8 {
- var z Int64x2
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastUint8x64 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512BW
-func BroadcastUint8x64(x uint8) Uint8x64 {
- var z Uint8x16
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastUint16x32 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512BW
-func BroadcastUint16x32(x uint16) Uint16x32 {
- var z Uint16x8
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastUint32x16 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512F
-func BroadcastUint32x16(x uint32) Uint32x16 {
- var z Uint32x4
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastUint64x8 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512F
-func BroadcastUint64x8(x uint64) Uint64x8 {
- var z Uint64x2
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastFloat32x16 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512F
-func BroadcastFloat32x16(x float32) Float32x16 {
- var z Float32x4
- return z.SetElem(0, x).Broadcast512()
-}
-
-// BroadcastFloat64x8 returns a vector with the input
-// x assigned to all elements of the output.
-//
-// Emulated, CPU Feature AVX512F
-func BroadcastFloat64x8(x float64) Float64x8 {
- var z Float64x2
- return z.SetElem(0, x).Broadcast512()
-}
--- /dev/null
+// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+package simd
+
+import "unsafe"
+
+// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s
+func LoadInt8x16Slice(s []int8) Int8x16 {
+ return LoadInt8x16((*[16]int8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 int8s
+func (x Int8x16) StoreSlice(s []int8) {
+ x.Store((*[16]int8)(s))
+}
+
+// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s
+func LoadInt16x8Slice(s []int16) Int16x8 {
+ return LoadInt16x8((*[8]int16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 int16s
+func (x Int16x8) StoreSlice(s []int16) {
+ x.Store((*[8]int16)(s))
+}
+
+// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s
+func LoadInt32x4Slice(s []int32) Int32x4 {
+ return LoadInt32x4((*[4]int32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 int32s
+func (x Int32x4) StoreSlice(s []int32) {
+ x.Store((*[4]int32)(s))
+}
+
+// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s
+func LoadInt64x2Slice(s []int64) Int64x2 {
+ return LoadInt64x2((*[2]int64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 2 int64s
+func (x Int64x2) StoreSlice(s []int64) {
+ x.Store((*[2]int64)(s))
+}
+
+// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s
+func LoadUint8x16Slice(s []uint8) Uint8x16 {
+ return LoadUint8x16((*[16]uint8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 uint8s
+func (x Uint8x16) StoreSlice(s []uint8) {
+ x.Store((*[16]uint8)(s))
+}
+
+// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s
+func LoadUint16x8Slice(s []uint16) Uint16x8 {
+ return LoadUint16x8((*[8]uint16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 uint16s
+func (x Uint16x8) StoreSlice(s []uint16) {
+ x.Store((*[8]uint16)(s))
+}
+
+// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s
+func LoadUint32x4Slice(s []uint32) Uint32x4 {
+ return LoadUint32x4((*[4]uint32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 uint32s
+func (x Uint32x4) StoreSlice(s []uint32) {
+ x.Store((*[4]uint32)(s))
+}
+
+// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s
+func LoadUint64x2Slice(s []uint64) Uint64x2 {
+ return LoadUint64x2((*[2]uint64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 2 uint64s
+func (x Uint64x2) StoreSlice(s []uint64) {
+ x.Store((*[2]uint64)(s))
+}
+
+// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s
+func LoadFloat32x4Slice(s []float32) Float32x4 {
+ return LoadFloat32x4((*[4]float32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 float32s
+func (x Float32x4) StoreSlice(s []float32) {
+ x.Store((*[4]float32)(s))
+}
+
+// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s
+func LoadFloat64x2Slice(s []float64) Float64x2 {
+ return LoadFloat64x2((*[2]float64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 2 float64s
+func (x Float64x2) StoreSlice(s []float64) {
+ x.Store((*[2]float64)(s))
+}
+
+// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s
+func LoadInt8x32Slice(s []int8) Int8x32 {
+ return LoadInt8x32((*[32]int8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 32 int8s
+func (x Int8x32) StoreSlice(s []int8) {
+ x.Store((*[32]int8)(s))
+}
+
+// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s
+func LoadInt16x16Slice(s []int16) Int16x16 {
+ return LoadInt16x16((*[16]int16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 int16s
+func (x Int16x16) StoreSlice(s []int16) {
+ x.Store((*[16]int16)(s))
+}
+
+// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s
+func LoadInt32x8Slice(s []int32) Int32x8 {
+ return LoadInt32x8((*[8]int32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 int32s
+func (x Int32x8) StoreSlice(s []int32) {
+ x.Store((*[8]int32)(s))
+}
+
+// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s
+func LoadInt64x4Slice(s []int64) Int64x4 {
+ return LoadInt64x4((*[4]int64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 int64s
+func (x Int64x4) StoreSlice(s []int64) {
+ x.Store((*[4]int64)(s))
+}
+
+// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s
+func LoadUint8x32Slice(s []uint8) Uint8x32 {
+ return LoadUint8x32((*[32]uint8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 32 uint8s
+func (x Uint8x32) StoreSlice(s []uint8) {
+ x.Store((*[32]uint8)(s))
+}
+
+// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s
+func LoadUint16x16Slice(s []uint16) Uint16x16 {
+ return LoadUint16x16((*[16]uint16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 uint16s
+func (x Uint16x16) StoreSlice(s []uint16) {
+ x.Store((*[16]uint16)(s))
+}
+
+// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s
+func LoadUint32x8Slice(s []uint32) Uint32x8 {
+ return LoadUint32x8((*[8]uint32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 uint32s
+func (x Uint32x8) StoreSlice(s []uint32) {
+ x.Store((*[8]uint32)(s))
+}
+
+// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s
+func LoadUint64x4Slice(s []uint64) Uint64x4 {
+ return LoadUint64x4((*[4]uint64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 uint64s
+func (x Uint64x4) StoreSlice(s []uint64) {
+ x.Store((*[4]uint64)(s))
+}
+
+// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s
+func LoadFloat32x8Slice(s []float32) Float32x8 {
+ return LoadFloat32x8((*[8]float32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 float32s
+func (x Float32x8) StoreSlice(s []float32) {
+ x.Store((*[8]float32)(s))
+}
+
+// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s
+func LoadFloat64x4Slice(s []float64) Float64x4 {
+ return LoadFloat64x4((*[4]float64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 4 float64s
+func (x Float64x4) StoreSlice(s []float64) {
+ x.Store((*[4]float64)(s))
+}
+
+// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s
+func LoadInt8x64Slice(s []int8) Int8x64 {
+ return LoadInt8x64((*[64]int8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 64 int8s
+func (x Int8x64) StoreSlice(s []int8) {
+ x.Store((*[64]int8)(s))
+}
+
+// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s
+func LoadInt16x32Slice(s []int16) Int16x32 {
+ return LoadInt16x32((*[32]int16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 32 int16s
+func (x Int16x32) StoreSlice(s []int16) {
+ x.Store((*[32]int16)(s))
+}
+
+// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s
+func LoadInt32x16Slice(s []int32) Int32x16 {
+ return LoadInt32x16((*[16]int32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 int32s
+func (x Int32x16) StoreSlice(s []int32) {
+ x.Store((*[16]int32)(s))
+}
+
+// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s
+func LoadInt64x8Slice(s []int64) Int64x8 {
+ return LoadInt64x8((*[8]int64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 int64s
+func (x Int64x8) StoreSlice(s []int64) {
+ x.Store((*[8]int64)(s))
+}
+
+// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s
+func LoadUint8x64Slice(s []uint8) Uint8x64 {
+ return LoadUint8x64((*[64]uint8)(s))
+}
+
+// StoreSlice stores x into a slice of at least 64 uint8s
+func (x Uint8x64) StoreSlice(s []uint8) {
+ x.Store((*[64]uint8)(s))
+}
+
+// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s
+func LoadUint16x32Slice(s []uint16) Uint16x32 {
+ return LoadUint16x32((*[32]uint16)(s))
+}
+
+// StoreSlice stores x into a slice of at least 32 uint16s
+func (x Uint16x32) StoreSlice(s []uint16) {
+ x.Store((*[32]uint16)(s))
+}
+
+// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s
+func LoadUint32x16Slice(s []uint32) Uint32x16 {
+ return LoadUint32x16((*[16]uint32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 uint32s
+func (x Uint32x16) StoreSlice(s []uint32) {
+ x.Store((*[16]uint32)(s))
+}
+
+// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s
+func LoadUint64x8Slice(s []uint64) Uint64x8 {
+ return LoadUint64x8((*[8]uint64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 uint64s
+func (x Uint64x8) StoreSlice(s []uint64) {
+ x.Store((*[8]uint64)(s))
+}
+
+// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s
+func LoadFloat32x16Slice(s []float32) Float32x16 {
+ return LoadFloat32x16((*[16]float32)(s))
+}
+
+// StoreSlice stores x into a slice of at least 16 float32s
+func (x Float32x16) StoreSlice(s []float32) {
+ x.Store((*[16]float32)(s))
+}
+
+// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s
+func LoadFloat64x8Slice(s []float64) Float64x8 {
+ return LoadFloat64x8((*[8]float64)(s))
+}
+
+// StoreSlice stores x into a slice of at least 8 float64s
+func (x Float64x8) StoreSlice(s []float64) {
+ x.Store((*[8]float64)(s))
+}
+
+// LoadInt8x64SlicePart loads a Int8x64 from the slice s.
+// If s has fewer than 64 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 64 or more elements, the function is equivalent to LoadInt8x64Slice.
+func LoadInt8x64SlicePart(s []int8) Int8x64 {
+ l := len(s)
+ if l >= 64 {
+ return LoadInt8x64Slice(s)
+ }
+ if l == 0 {
+ var x Int8x64
+ return x
+ }
+ mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l))
+ return LoadMaskedInt8x64(paInt8x64(s), mask)
+}
+
+// StoreSlicePart stores the 64 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 64 or more elements, the method is equivalent to x.StoreSlice.
+func (x Int8x64) StoreSlicePart(s []int8) {
+ l := len(s)
+ if l >= 64 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l))
+ x.StoreMasked(paInt8x64(s), mask)
+}
+
+// LoadInt16x32SlicePart loads a Int16x32 from the slice s.
+// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 32 or more elements, the function is equivalent to LoadInt16x32Slice.
+func LoadInt16x32SlicePart(s []int16) Int16x32 {
+ l := len(s)
+ if l >= 32 {
+ return LoadInt16x32Slice(s)
+ }
+ if l == 0 {
+ var x Int16x32
+ return x
+ }
+ mask := Mask16x32FromBits(0xffffffff >> (32 - l))
+ return LoadMaskedInt16x32(paInt16x32(s), mask)
+}
+
+// StoreSlicePart stores the 32 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 32 or more elements, the method is equivalent to x.StoreSlice.
+func (x Int16x32) StoreSlicePart(s []int16) {
+ l := len(s)
+ if l >= 32 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask16x32FromBits(0xffffffff >> (32 - l))
+ x.StoreMasked(paInt16x32(s), mask)
+}
+
+// LoadInt32x16SlicePart loads a Int32x16 from the slice s.
+// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 16 or more elements, the function is equivalent to LoadInt32x16Slice.
+func LoadInt32x16SlicePart(s []int32) Int32x16 {
+ l := len(s)
+ if l >= 16 {
+ return LoadInt32x16Slice(s)
+ }
+ if l == 0 {
+ var x Int32x16
+ return x
+ }
+ mask := Mask32x16FromBits(0xffff >> (16 - l))
+ return LoadMaskedInt32x16(paInt32x16(s), mask)
+}
+
+// StoreSlicePart stores the 16 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
+func (x Int32x16) StoreSlicePart(s []int32) {
+ l := len(s)
+ if l >= 16 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask32x16FromBits(0xffff >> (16 - l))
+ x.StoreMasked(paInt32x16(s), mask)
+}
+
+// LoadInt64x8SlicePart loads a Int64x8 from the slice s.
+// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 8 or more elements, the function is equivalent to LoadInt64x8Slice.
+func LoadInt64x8SlicePart(s []int64) Int64x8 {
+ l := len(s)
+ if l >= 8 {
+ return LoadInt64x8Slice(s)
+ }
+ if l == 0 {
+ var x Int64x8
+ return x
+ }
+ mask := Mask64x8FromBits(0xff >> (8 - l))
+ return LoadMaskedInt64x8(paInt64x8(s), mask)
+}
+
+// StoreSlicePart stores the 8 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
+func (x Int64x8) StoreSlicePart(s []int64) {
+ l := len(s)
+ if l >= 8 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask64x8FromBits(0xff >> (8 - l))
+ x.StoreMasked(paInt64x8(s), mask)
+}
+
+// LoadUint8x64SlicePart loads a Uint8x64 from the slice s.
+// If s has fewer than 64 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 64 or more elements, the function is equivalent to LoadUint8x64Slice.
+func LoadUint8x64SlicePart(s []uint8) Uint8x64 {
+ l := len(s)
+ if l >= 64 {
+ return LoadUint8x64Slice(s)
+ }
+ if l == 0 {
+ var x Uint8x64
+ return x
+ }
+ mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l))
+ return LoadMaskedUint8x64(paUint8x64(s), mask)
+}
+
+// StoreSlicePart stores the 64 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 64 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint8x64) StoreSlicePart(s []uint8) {
+ l := len(s)
+ if l >= 64 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask8x64FromBits(0xffffffffffffffff >> (64 - l))
+ x.StoreMasked(paUint8x64(s), mask)
+}
+
+// LoadUint16x32SlicePart loads a Uint16x32 from the slice s.
+// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 32 or more elements, the function is equivalent to LoadUint16x32Slice.
+func LoadUint16x32SlicePart(s []uint16) Uint16x32 {
+ l := len(s)
+ if l >= 32 {
+ return LoadUint16x32Slice(s)
+ }
+ if l == 0 {
+ var x Uint16x32
+ return x
+ }
+ mask := Mask16x32FromBits(0xffffffff >> (32 - l))
+ return LoadMaskedUint16x32(paUint16x32(s), mask)
+}
+
+// StoreSlicePart stores the 32 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 32 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint16x32) StoreSlicePart(s []uint16) {
+ l := len(s)
+ if l >= 32 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask16x32FromBits(0xffffffff >> (32 - l))
+ x.StoreMasked(paUint16x32(s), mask)
+}
+
+// LoadUint32x16SlicePart loads a Uint32x16 from the slice s.
+// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 16 or more elements, the function is equivalent to LoadUint32x16Slice.
+func LoadUint32x16SlicePart(s []uint32) Uint32x16 {
+ l := len(s)
+ if l >= 16 {
+ return LoadUint32x16Slice(s)
+ }
+ if l == 0 {
+ var x Uint32x16
+ return x
+ }
+ mask := Mask32x16FromBits(0xffff >> (16 - l))
+ return LoadMaskedUint32x16(paUint32x16(s), mask)
+}
+
+// StoreSlicePart stores the 16 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint32x16) StoreSlicePart(s []uint32) {
+ l := len(s)
+ if l >= 16 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask32x16FromBits(0xffff >> (16 - l))
+ x.StoreMasked(paUint32x16(s), mask)
+}
+
+// LoadUint64x8SlicePart loads a Uint64x8 from the slice s.
+// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 8 or more elements, the function is equivalent to LoadUint64x8Slice.
+func LoadUint64x8SlicePart(s []uint64) Uint64x8 {
+ l := len(s)
+ if l >= 8 {
+ return LoadUint64x8Slice(s)
+ }
+ if l == 0 {
+ var x Uint64x8
+ return x
+ }
+ mask := Mask64x8FromBits(0xff >> (8 - l))
+ return LoadMaskedUint64x8(paUint64x8(s), mask)
+}
+
+// StoreSlicePart stores the 8 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint64x8) StoreSlicePart(s []uint64) {
+ l := len(s)
+ if l >= 8 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask64x8FromBits(0xff >> (8 - l))
+ x.StoreMasked(paUint64x8(s), mask)
+}
+
+// LoadFloat32x16SlicePart loads a Float32x16 from the slice s.
+// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 16 or more elements, the function is equivalent to LoadFloat32x16Slice.
+func LoadFloat32x16SlicePart(s []float32) Float32x16 {
+ l := len(s)
+ if l >= 16 {
+ return LoadFloat32x16Slice(s)
+ }
+ if l == 0 {
+ var x Float32x16
+ return x
+ }
+ mask := Mask32x16FromBits(0xffff >> (16 - l))
+ return LoadMaskedFloat32x16(paFloat32x16(s), mask)
+}
+
+// StoreSlicePart stores the 16 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
+func (x Float32x16) StoreSlicePart(s []float32) {
+ l := len(s)
+ if l >= 16 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask32x16FromBits(0xffff >> (16 - l))
+ x.StoreMasked(paFloat32x16(s), mask)
+}
+
+// LoadFloat64x8SlicePart loads a Float64x8 from the slice s.
+// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 8 or more elements, the function is equivalent to LoadFloat64x8Slice.
+func LoadFloat64x8SlicePart(s []float64) Float64x8 {
+ l := len(s)
+ if l >= 8 {
+ return LoadFloat64x8Slice(s)
+ }
+ if l == 0 {
+ var x Float64x8
+ return x
+ }
+ mask := Mask64x8FromBits(0xff >> (8 - l))
+ return LoadMaskedFloat64x8(paFloat64x8(s), mask)
+}
+
+// StoreSlicePart stores the 8 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
+func (x Float64x8) StoreSlicePart(s []float64) {
+ l := len(s)
+ if l >= 8 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := Mask64x8FromBits(0xff >> (8 - l))
+ x.StoreMasked(paFloat64x8(s), mask)
+}
+
+// LoadInt32x4SlicePart loads a Int32x4 from the slice s.
+// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 4 or more elements, the function is equivalent to LoadInt32x4Slice.
+func LoadInt32x4SlicePart(s []int32) Int32x4 {
+ l := len(s)
+ if l >= 4 {
+ return LoadInt32x4Slice(s)
+ }
+ if l == 0 {
+ var x Int32x4
+ return x
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ return LoadMaskedInt32x4(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
+}
+
+// StoreSlicePart stores the 4 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
+func (x Int32x4) StoreSlicePart(s []int32) {
+ l := len(s)
+ if l >= 4 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ x.StoreMasked(paInt32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
+}
+
+// LoadInt64x2SlicePart loads a Int64x2 from the slice s.
+// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 2 or more elements, the function is equivalent to LoadInt64x2Slice.
+func LoadInt64x2SlicePart(s []int64) Int64x2 {
+ l := len(s)
+ if l >= 2 {
+ return LoadInt64x2Slice(s)
+ }
+ if l == 0 {
+ var x Int64x2
+ return x
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ return LoadMaskedInt64x2(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
+}
+
+// StoreSlicePart stores the 2 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 2 or more elements, the method is equivalent to x.StoreSlice.
+func (x Int64x2) StoreSlicePart(s []int64) {
+ l := len(s)
+ if l >= 2 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ x.StoreMasked(paInt64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
+}
+
+// LoadUint32x4SlicePart loads a Uint32x4 from the slice s.
+// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 4 or more elements, the function is equivalent to LoadUint32x4Slice.
+func LoadUint32x4SlicePart(s []uint32) Uint32x4 {
+ l := len(s)
+ if l >= 4 {
+ return LoadUint32x4Slice(s)
+ }
+ if l == 0 {
+ var x Uint32x4
+ return x
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ return LoadMaskedUint32x4(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
+}
+
+// StoreSlicePart stores the 4 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint32x4) StoreSlicePart(s []uint32) {
+ l := len(s)
+ if l >= 4 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ x.StoreMasked(paUint32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
+}
+
+// LoadUint64x2SlicePart loads a Uint64x2 from the slice s.
+// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 2 or more elements, the function is equivalent to LoadUint64x2Slice.
+func LoadUint64x2SlicePart(s []uint64) Uint64x2 {
+ l := len(s)
+ if l >= 2 {
+ return LoadUint64x2Slice(s)
+ }
+ if l == 0 {
+ var x Uint64x2
+ return x
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ return LoadMaskedUint64x2(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
+}
+
+// StoreSlicePart stores the 2 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 2 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint64x2) StoreSlicePart(s []uint64) {
+ l := len(s)
+ if l >= 2 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ x.StoreMasked(paUint64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
+}
+
+// LoadFloat32x4SlicePart loads a Float32x4 from the slice s.
+// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 4 or more elements, the function is equivalent to LoadFloat32x4Slice.
+func LoadFloat32x4SlicePart(s []float32) Float32x4 {
+ l := len(s)
+ if l >= 4 {
+ return LoadFloat32x4Slice(s)
+ }
+ if l == 0 {
+ var x Float32x4
+ return x
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ return LoadMaskedFloat32x4(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
+}
+
+// StoreSlicePart stores the 4 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
+func (x Float32x4) StoreSlicePart(s []float32) {
+ l := len(s)
+ if l >= 4 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ x.StoreMasked(paFloat32x4(s), LoadInt32x4Slice(mask).AsMask32x4())
+}
+
+// LoadFloat64x2SlicePart loads a Float64x2 from the slice s.
+// If s has fewer than 2 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 2 or more elements, the function is equivalent to LoadFloat64x2Slice.
+func LoadFloat64x2SlicePart(s []float64) Float64x2 {
+ l := len(s)
+ if l >= 2 {
+ return LoadFloat64x2Slice(s)
+ }
+ if l == 0 {
+ var x Float64x2
+ return x
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ return LoadMaskedFloat64x2(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
+}
+
+// StoreSlicePart stores the 2 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 2 or more elements, the method is equivalent to x.StoreSlice.
+func (x Float64x2) StoreSlicePart(s []float64) {
+ l := len(s)
+ if l >= 2 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ x.StoreMasked(paFloat64x2(s), LoadInt64x2Slice(mask).AsMask64x2())
+}
+
+// LoadInt32x8SlicePart loads a Int32x8 from the slice s.
+// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 8 or more elements, the function is equivalent to LoadInt32x8Slice.
+func LoadInt32x8SlicePart(s []int32) Int32x8 {
+ l := len(s)
+ if l >= 8 {
+ return LoadInt32x8Slice(s)
+ }
+ if l == 0 {
+ var x Int32x8
+ return x
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ return LoadMaskedInt32x8(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
+}
+
+// StoreSlicePart stores the 8 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
+func (x Int32x8) StoreSlicePart(s []int32) {
+ l := len(s)
+ if l >= 8 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ x.StoreMasked(paInt32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
+}
+
+// LoadInt64x4SlicePart loads a Int64x4 from the slice s.
+// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 4 or more elements, the function is equivalent to LoadInt64x4Slice.
+func LoadInt64x4SlicePart(s []int64) Int64x4 {
+ l := len(s)
+ if l >= 4 {
+ return LoadInt64x4Slice(s)
+ }
+ if l == 0 {
+ var x Int64x4
+ return x
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ return LoadMaskedInt64x4(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
+}
+
+// StoreSlicePart stores the 4 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
+func (x Int64x4) StoreSlicePart(s []int64) {
+ l := len(s)
+ if l >= 4 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ x.StoreMasked(paInt64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
+}
+
+// LoadUint32x8SlicePart loads a Uint32x8 from the slice s.
+// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 8 or more elements, the function is equivalent to LoadUint32x8Slice.
+func LoadUint32x8SlicePart(s []uint32) Uint32x8 {
+ l := len(s)
+ if l >= 8 {
+ return LoadUint32x8Slice(s)
+ }
+ if l == 0 {
+ var x Uint32x8
+ return x
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ return LoadMaskedUint32x8(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
+}
+
+// StoreSlicePart stores the 8 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint32x8) StoreSlicePart(s []uint32) {
+ l := len(s)
+ if l >= 8 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ x.StoreMasked(paUint32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
+}
+
+// LoadUint64x4SlicePart loads a Uint64x4 from the slice s.
+// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 4 or more elements, the function is equivalent to LoadUint64x4Slice.
+func LoadUint64x4SlicePart(s []uint64) Uint64x4 {
+ l := len(s)
+ if l >= 4 {
+ return LoadUint64x4Slice(s)
+ }
+ if l == 0 {
+ var x Uint64x4
+ return x
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ return LoadMaskedUint64x4(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
+}
+
+// StoreSlicePart stores the 4 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint64x4) StoreSlicePart(s []uint64) {
+ l := len(s)
+ if l >= 4 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ x.StoreMasked(paUint64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
+}
+
+// LoadFloat32x8SlicePart loads a Float32x8 from the slice s.
+// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 8 or more elements, the function is equivalent to LoadFloat32x8Slice.
+func LoadFloat32x8SlicePart(s []float32) Float32x8 {
+ l := len(s)
+ if l >= 8 {
+ return LoadFloat32x8Slice(s)
+ }
+ if l == 0 {
+ var x Float32x8
+ return x
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ return LoadMaskedFloat32x8(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
+}
+
+// StoreSlicePart stores the 8 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
+func (x Float32x8) StoreSlicePart(s []float32) {
+ l := len(s)
+ if l >= 8 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask32[len(vecMask32)/2-l:]
+ x.StoreMasked(paFloat32x8(s), LoadInt32x8Slice(mask).AsMask32x8())
+}
+
+// LoadFloat64x4SlicePart loads a Float64x4 from the slice s.
+// If s has fewer than 4 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 4 or more elements, the function is equivalent to LoadFloat64x4Slice.
+func LoadFloat64x4SlicePart(s []float64) Float64x4 {
+ l := len(s)
+ if l >= 4 {
+ return LoadFloat64x4Slice(s)
+ }
+ if l == 0 {
+ var x Float64x4
+ return x
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ return LoadMaskedFloat64x4(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
+}
+
+// StoreSlicePart stores the 4 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 4 or more elements, the method is equivalent to x.StoreSlice.
+func (x Float64x4) StoreSlicePart(s []float64) {
+ l := len(s)
+ if l >= 4 {
+ x.StoreSlice(s)
+ return
+ }
+ if l == 0 {
+ return
+ }
+ mask := vecMask64[len(vecMask64)/2-l:]
+ x.StoreMasked(paFloat64x4(s), LoadInt64x4Slice(mask).AsMask64x4())
+}
+
+// LoadUint8x16SlicePart loads a Uint8x16 from the slice s.
+// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 16 or more elements, the function is equivalent to LoadUint8x16Slice.
+func LoadUint8x16SlicePart(s []uint8) Uint8x16 {
+ if len(s) == 0 {
+ var zero Uint8x16
+ return zero
+ }
+ t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s))
+ return LoadInt8x16SlicePart(t).AsUint8x16()
+}
+
+// StoreSlicePart stores the 16 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint8x16) StoreSlicePart(s []uint8) {
+ if len(s) == 0 {
+ return
+ }
+ t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s))
+ x.AsInt8x16().StoreSlicePart(t)
+}
+
+// LoadUint16x8SlicePart loads a Uint16x8 from the slice s.
+// If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 8 or more elements, the function is equivalent to LoadUint16x8Slice.
+func LoadUint16x8SlicePart(s []uint16) Uint16x8 {
+ if len(s) == 0 {
+ var zero Uint16x8
+ return zero
+ }
+ t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s))
+ return LoadInt16x8SlicePart(t).AsUint16x8()
+}
+
+// StoreSlicePart stores the 8 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 8 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint16x8) StoreSlicePart(s []uint16) {
+ if len(s) == 0 {
+ return
+ }
+ t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s))
+ x.AsInt16x8().StoreSlicePart(t)
+}
+
+// LoadUint8x32SlicePart loads a Uint8x32 from the slice s.
+// If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 32 or more elements, the function is equivalent to LoadUint8x32Slice.
+func LoadUint8x32SlicePart(s []uint8) Uint8x32 {
+ if len(s) == 0 {
+ var zero Uint8x32
+ return zero
+ }
+ t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s))
+ return LoadInt8x32SlicePart(t).AsUint8x32()
+}
+
+// StoreSlicePart stores the 32 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 32 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint8x32) StoreSlicePart(s []uint8) {
+ if len(s) == 0 {
+ return
+ }
+ t := unsafe.Slice((*int8)(unsafe.Pointer(&s[0])), len(s))
+ x.AsInt8x32().StoreSlicePart(t)
+}
+
+// LoadUint16x16SlicePart loads a Uint16x16 from the slice s.
+// If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
+// If s has 16 or more elements, the function is equivalent to LoadUint16x16Slice.
+func LoadUint16x16SlicePart(s []uint16) Uint16x16 {
+ if len(s) == 0 {
+ var zero Uint16x16
+ return zero
+ }
+ t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s))
+ return LoadInt16x16SlicePart(t).AsUint16x16()
+}
+
+// StoreSlicePart stores the 16 elements of x into the slice s.
+// It stores as many elements as will fit in s.
+// If s has 16 or more elements, the method is equivalent to x.StoreSlice.
+func (x Uint16x16) StoreSlicePart(s []uint16) {
+ if len(s) == 0 {
+ return
+ }
+ t := unsafe.Slice((*int16)(unsafe.Pointer(&s[0])), len(s))
+ x.AsInt16x16().StoreSlicePart(t)
+}