]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile/internal/ssa: re-adjust CarryChainTail scheduling priority
authorPaul E. Murphy <murp@ibm.com>
Fri, 28 Oct 2022 20:59:43 +0000 (15:59 -0500)
committerPaul Murphy <murp@ibm.com>
Thu, 3 Nov 2022 19:59:19 +0000 (19:59 +0000)
This needs to be as low as possible while not breaking priority
assumptions of other scores to correctly schedule carry chains.

Prior to the arm64 changes, it was set below ReadTuple. At the time,
this prevented the MulHiLo implementation on PPC64 from occluding
the scheduling of a full carry chain.

Memory scores can also prevent better scheduling, as can be observed
with crypto/internal/edwards25519/field.feMulGeneric.

Fixes #56497

Change-Id: Ia4b54e6dffcce584faf46b1b8d7cea18a3913887
Reviewed-on: https://go-review.googlesource.com/c/go/+/447435
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/schedule.go
test/codegen/mathbits.go

index 6e570aa82a675c241351e8978d40508b5187fc9d..4e762f7b3fd223a4808f2103b3e1d64f743b63a1 100644 (file)
@@ -16,8 +16,8 @@ const (
        ScoreNilCheck
        ScoreReadTuple
        ScoreVarDef
-       ScoreMemory
        ScoreCarryChainTail
+       ScoreMemory
        ScoreReadFlags
        ScoreDefault
        ScoreFlags
index acc9930c61d4573ef71447fb99f8ee552914e6c1..b506a370062d5c5026bdb3aab5b00b8b90764cc0 100644 (file)
@@ -620,6 +620,39 @@ func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
        return r
 }
 
+// Verify independent carry chain operations are scheduled efficiently
+// and do not cause unnecessary save/restore of the CA bit.
+//
+// This is an example of why CarryChainTail priority must be lower
+// (earlier in the block) than Memory. f[0]=f1 could be scheduled
+// after the first two lower 64 bit limb adds, but before either
+// high 64 bit limbs are added.
+//
+// This is what happened on PPC64 when compiling
+// crypto/internal/edwards25519/field.feMulGeneric.
+func Add64MultipleChains(a, b, c, d [2]uint64) {
+       var cx, d1, d2 uint64
+       a1, a2 := a[0], a[1]
+       b1, b2 := b[0], b[1]
+       c1, c2 := c[0], c[1]
+
+       // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
+       // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
+       d1, cx = bits.Add64(a1, b1, 0)
+       // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER"
+       // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER"
+       d2, _ = bits.Add64(a2, b2, cx)
+
+       // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
+       // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
+       d1, cx = bits.Add64(c1, d1, 0)
+       // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER"
+       // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER"
+       d2, _ = bits.Add64(c2, d2, cx)
+       d[0] = d1
+       d[1] = d2
+}
+
 // --------------- //
 //    bits.Sub*    //
 // --------------- //