if eax7 >= 1 {
eax71, _, _, _ := cpuid(7, 1)
if X86.HasAVX {
- X86.HasAVXVNNI = isSet(4, eax71)
+ X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI)
}
}
wanted2 := make([]int32, 4)
res1 := make([]int32, 4)
res2 := make([]int32, 4)
+ for i := range 16 {
+ xd[i] = int8(i + 112) // 112+15 = 127
+ yd[i] = uint8(i + 240) // 240+15 = 255
+ }
for i := range 4 {
- xd[i] = 5
- yd[i] = 6
- zd[i] = 3
- wanted1[i] = 30
- wanted2[i] = 30
+ i4 := 4 * i
+ wanted1[i] = int32(xd[i4])*int32(yd[i4]) + int32(xd[i4+1])*int32(yd[i4+1]) + int32(xd[i4+2])*int32(yd[i4+2]) + int32(xd[i4+3])*int32(yd[i4+3])
+ zd[i] = int32(i + 1)
+ wanted2[i] = wanted1[i] + zd[i]
}
+
x := archsimd.LoadInt8x16Slice(xd)
y := archsimd.LoadUint8x16Slice(yd)
z := archsimd.LoadInt32x4Slice(zd)
x.DotProductQuadruple(y).StoreSlice(res1)
- x.DotProductQuadruple(y).Add(z).StoreSlice(res1)
+ x.DotProductQuadruple(y).Add(z).StoreSlice(res2)
for i := range 4 {
if res1[i] != wanted1[i] {
t.Errorf("got %d wanted %d", res1[i], wanted1[i])