]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile, simd: add ClearAVXUpperBits
authorCherry Mui <cherryyz@google.com>
Thu, 4 Sep 2025 21:15:14 +0000 (17:15 -0400)
committerCherry Mui <cherryyz@google.com>
Fri, 5 Sep 2025 17:19:03 +0000 (10:19 -0700)
Intended for transitioning from AVX to SSE, this helps early
adopters benchmarking. The compiler should take care of that,
one day.

Change-Id: I9d7413f22f30f8dc0c632e8e806386d9ca8e8308
Reviewed-on: https://go-review.googlesource.com/c/go/+/701199
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssagen/intrinsics.go
src/simd/extra_amd64.go [new file with mode: 0644]
src/simd/internal/simd_test/simd_test.go

index 96001e203f1fe34ac0a442ef8951b4b853ad903c..ff6235839be24fb180f6b5452999e9e21ea25ae8 100644 (file)
@@ -1397,8 +1397,8 @@ func init() {
                {name: "VMOVSSconst", reg: fp01, asm: "VMOVSS", aux: "Float32", rematerializeable: true},
                {name: "VMOVSDconst", reg: fp01, asm: "VMOVSD", aux: "Float64", rematerializeable: true},
 
-               {name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"},
-               {name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
+               {name: "VZEROUPPER", argLength: 1, reg: regInfo{clobbers: v}, asm: "VZEROUPPER"}, // arg=mem, returns mem
+               {name: "VZEROALL", argLength: 1, reg: regInfo{clobbers: v}, asm: "VZEROALL"},     // arg=mem, returns mem
 
                {name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
                {name: "KMOVQstore", argLength: 3, reg: kstore, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"},
index e7f06fccf7ee6608f3ed767a6262255fc61c66d8..9fc60598656beb3a34dcf63896258af621ae60a9 100644 (file)
@@ -19070,15 +19070,19 @@ var opcodeTable = [...]opInfo{
        },
        {
                name:   "VZEROUPPER",
-               argLen: 0,
+               argLen: 1,
                asm:    x86.AVZEROUPPER,
-               reg:    regInfo{},
+               reg: regInfo{
+                       clobbers: 2147418112, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+               },
        },
        {
                name:   "VZEROALL",
-               argLen: 0,
+               argLen: 1,
                asm:    x86.AVZEROALL,
-               reg:    regInfo{},
+               reg: regInfo{
+                       clobbers: 2147418112, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+               },
        },
        {
                name:           "KMOVQload",
index f5b5b9bb7cd82874e57f27a2f038cbaf4f781d61..4d1b762f7d4311494380ef6491339ae71541a751 100644 (file)
@@ -1607,6 +1607,13 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
        if buildcfg.Experiment.SIMD {
                // Only enable intrinsics, if SIMD experiment.
                simdIntrinsics(addF)
+
+               addF("simd", "ClearAVXUpperBits",
+                       func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+                               s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem())
+                               return nil
+                       },
+                       sys.AMD64)
        }
 }
 
diff --git a/src/simd/extra_amd64.go b/src/simd/extra_amd64.go
new file mode 100644 (file)
index 0000000..6d09f04
--- /dev/null
@@ -0,0 +1,17 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.simd && amd64
+
+package simd
+
+// ClearAVXUpperBits clears the high bits of Y0-Y15 and Z0-Z15 registers.
+// It is intended for transitioning from AVX to SSE, eliminating the
+// performance penalties caused by false dependencies.
+//
+// Note: in the future the compiler may automatically generate the
+// instruction, making this function unnecessary.
+//
+// Asm: VZEROUPPER, CPU Feature: AVX
+func ClearAVXUpperBits()
index 98cfd55ac5cce98bf0a8fa5bdc4432458b9c8aee..1d4311d75c52fde439dd59a09027844b85bc9965 100644 (file)
@@ -518,3 +518,25 @@ func TestFlattenedTranspose(t *testing.T) {
        checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
 
 }
+
+func TestClearAVXUpperBits(t *testing.T) {
+       // Test that ClearAVXUpperBits is safe even if there are SIMD values
+       // alive (although usually one should not do this).
+       if !simd.HasAVX2() {
+               t.Skip("Test requires HasAVX2, not available on this hardware")
+               return
+       }
+
+       r := make([]int64, 4)
+       s := make([]int64, 4)
+
+       x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
+       y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
+
+       x.Add(y).StoreSlice(r)
+       simd.ClearAVXUpperBits()
+       x.Sub(y).StoreSlice(s)
+
+       checkSlices[int64](t, r, []int64{11, 22, 33, 44})
+       checkSlices[int64](t, s, []int64{9, 18, 27, 36})
+}