From d00e96d3aeccb11c373d125418316f0b019f4fb0 Mon Sep 17 00:00:00 2001 From: David Chase Date: Thu, 18 Dec 2025 13:18:14 -0500 Subject: [PATCH] internal/cpu: repair VNNI feature check This is a pain to test. Also the original test was never executed, because it was wrong. It looks like processors that might lack this features include Intel 11th generation and AMD Zen 4. These might or might not have bit 2 set in the 7th cpuid "leaf" (SM4) which is what the incorrect test was checking; the bug is triggered by ^VNNI & SM4. Apparently the SM4 bit is not usually set, else we would have seen a test failure. The "Lion Cove" microarchitecture (Arrow Lake, Lunar Lake) appears to trigger this problem, it's not clear if there are others. It was hard to verify this from online information. Fixes #76881. Change-Id: I21be6b4f47134d81e89799b0f06f89fcb6563264 Reviewed-on: https://go-review.googlesource.com/c/go/+/731240 TryBot-Bypass: David Chase Reviewed-by: Cherry Mui --- src/internal/cpu/cpu_x86.go | 2 +- .../archsimd/internal/simd_test/simd_test.go | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go index 4610ce807e..711fb045c3 100644 --- a/src/internal/cpu/cpu_x86.go +++ b/src/internal/cpu/cpu_x86.go @@ -219,7 +219,7 @@ func doinit() { if eax7 >= 1 { eax71, _, _, _ := cpuid(7, 1) if X86.HasAVX { - X86.HasAVXVNNI = isSet(4, eax71) + X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI) } } diff --git a/src/simd/archsimd/internal/simd_test/simd_test.go b/src/simd/archsimd/internal/simd_test/simd_test.go index 83925ae789..5fd7407dbe 100644 --- a/src/simd/archsimd/internal/simd_test/simd_test.go +++ b/src/simd/archsimd/internal/simd_test/simd_test.go @@ -1135,18 +1135,22 @@ func TestDotProductQuadruple(t *testing.T) { wanted2 := make([]int32, 4) res1 := make([]int32, 4) res2 := make([]int32, 4) + for i := range 16 { + xd[i] = int8(i + 112) // 112+15 = 127 + yd[i] = uint8(i + 240) // 240+15 = 255 + } for i := range 4 { - xd[i] = 5 - yd[i] = 6 - zd[i] = 3 - wanted1[i] = 30 - wanted2[i] = 30 + i4 := 4 * i + wanted1[i] = int32(xd[i4])*int32(yd[i4]) + int32(xd[i4+1])*int32(yd[i4+1]) + int32(xd[i4+2])*int32(yd[i4+2]) + int32(xd[i4+3])*int32(yd[i4+3]) + zd[i] = int32(i + 1) + wanted2[i] = wanted1[i] + zd[i] } + x := archsimd.LoadInt8x16Slice(xd) y := archsimd.LoadUint8x16Slice(yd) z := archsimd.LoadInt32x4Slice(zd) x.DotProductQuadruple(y).StoreSlice(res1) - x.DotProductQuadruple(y).Add(z).StoreSlice(res1) + x.DotProductQuadruple(y).Add(z).StoreSlice(res2) for i := range 4 { if res1[i] != wanted1[i] { t.Errorf("got %d wanted %d", res1[i], wanted1[i]) -- 2.52.0