]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: fix SIMD const rematerialization condition
authorJunyang Shao <shaojunyang@google.com>
Fri, 19 Sep 2025 18:38:25 +0000 (18:38 +0000)
committerJunyang Shao <shaojunyang@google.com>
Mon, 22 Sep 2025 23:19:02 +0000 (16:19 -0700)
This CL fixes a condition for the previous fix CL 704056.

Change-Id: I1f1f8c6f72870403cb3dff14755c43385dc0c933
Reviewed-on: https://go-review.googlesource.com/c/go/+/705499
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/ssa/regalloc.go
test/simd/bug2.go

index fe30b89cdd660444aa7304eed3900e3a0dbfa35b..bcb5dec09d335949b7aa58b8b42e0093da76ec38 100644 (file)
@@ -2576,22 +2576,25 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
                        e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
                }
                if dstReg {
-                       // Handle incompatible registers.
+                       // We want to rematerialize v into a register that is incompatible with v's op's register mask.
+                       // Instead of setting the wrong register for the rematerialized v, we should find the right register
+                       // for it and emit an additional copy to move to the desired register.
                        // For #70451.
-                       if e.s.regspec(v).outputs[0].regs&regMask(1<<register(loc.(*Register).num)) == 0 && c != nil {
+                       if e.s.regspec(v).outputs[0].regs&regMask(1<<register(loc.(*Register).num)) == 0 {
                                _, srcReg := src.(*Register)
-                               if !srcReg {
+                               if srcReg {
+                                       // It exists in a valid register already, so just copy it to the desired register
+                                       // If src is a Register, c must have already been set.
+                                       x = e.p.NewValue1(pos, OpCopy, c.Type, c)
+                               } else {
                                        // We need a tmp register
                                        x = v.copyInto(e.p)
                                        r := e.findRegFor(x.Type)
                                        e.erase(r)
-                                       // Rematerialize to a tmp register
+                                       // Rematerialize to the tmp register
                                        e.set(r, vid, x, false, pos)
                                        // Copy from tmp to the desired register
                                        x = e.p.NewValue1(pos, OpCopy, x.Type, x)
-                               } else {
-                                       // It exist in a valid register already, so just copy it to the desired register
-                                       x = e.p.NewValue1(pos, OpCopy, c.Type, c)
                                }
                        } else {
                                x = v.copyInto(e.p)
index 2d2094b5a42acfcc75a8023e9dd75a5dabe2b081..5b7a21176a7c2f54b018b81efa912d423cb4ff66 100644 (file)
@@ -24,3 +24,46 @@ func PackComplex(b bool) {
                }
        }
 }
+
+func PackComplex2(x0 uint16, src [][4]float32, b, b2 bool) {
+       var out [][4]byte
+       if b2 {
+               for y := range x0 {
+                       row := out[:x0]
+                       for x := range row {
+                               px := &src[y]
+                               if b {
+                                       var indices [4]uint32
+                                       fu := simd.LoadFloat32x4(px).AsUint32x4()
+                                       fu.ShiftAllRight(0).Store(nil)
+                                       entry := simd.LoadUint32x4(&[4]uint32{
+                                               toSrgbTable[indices[0]],
+                                       })
+                                       var res [4]uint32
+                                       entry.ShiftAllRight(19).Store(nil)
+                                       row[x] = [4]uint8{
+                                               uint8(res[0]),
+                                               uint8(res[1]),
+                                               uint8(res[2]),
+                                       }
+                               } else {
+                                       row[x] = [4]uint8{
+                                               float32ToSrgb8(0),
+                                               float32ToSrgb8(1),
+                                               float32ToSrgb8(2),
+                                       }
+                               }
+                       }
+                       out = out[len(out):]
+               }
+       }
+}
+
+var toSrgbTable = [4]uint32{}
+
+func float32ToSrgb8(f float32) uint8 {
+       f = min(0, f)
+       fu := uint32(f)
+       entry := toSrgbTable[fu]
+       return uint8(entry * fu)
+}