From: Paul E. Murphy Date: Fri, 28 Oct 2022 20:59:43 +0000 (-0500) Subject: cmd/compile/internal/ssa: re-adjust CarryChainTail scheduling priority X-Git-Tag: go1.20rc1~433 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=d031e9e07a07afef8d16576fd7079a739a7e4394;p=gostls13.git cmd/compile/internal/ssa: re-adjust CarryChainTail scheduling priority This needs to be as low as possible while not breaking priority assumptions of other scores to correctly schedule carry chains. Prior to the arm64 changes, it was set below ReadTuple. At the time, this prevented the MulHiLo implementation on PPC64 from occluding the scheduling of a full carry chain. Memory scores can also prevent better scheduling, as can be observed with crypto/internal/edwards25519/field.feMulGeneric. Fixes #56497 Change-Id: Ia4b54e6dffcce584faf46b1b8d7cea18a3913887 Reviewed-on: https://go-review.googlesource.com/c/go/+/447435 Reviewed-by: Cherry Mui Reviewed-by: Keith Randall Run-TryBot: Paul Murphy TryBot-Result: Gopher Robot Reviewed-by: Bryan Mills Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/ssa/schedule.go b/src/cmd/compile/internal/ssa/schedule.go index 6e570aa82a..4e762f7b3f 100644 --- a/src/cmd/compile/internal/ssa/schedule.go +++ b/src/cmd/compile/internal/ssa/schedule.go @@ -16,8 +16,8 @@ const ( ScoreNilCheck ScoreReadTuple ScoreVarDef - ScoreMemory ScoreCarryChainTail + ScoreMemory ScoreReadFlags ScoreDefault ScoreFlags diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index acc9930c61..b506a37006 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -620,6 +620,39 @@ func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { return r } +// Verify independent carry chain operations are scheduled efficiently +// and do not cause unnecessary save/restore of the CA bit. +// +// This is an example of why CarryChainTail priority must be lower +// (earlier in the block) than Memory. f[0]=f1 could be scheduled +// after the first two lower 64 bit limb adds, but before either +// high 64 bit limbs are added. +// +// This is what happened on PPC64 when compiling +// crypto/internal/edwards25519/field.feMulGeneric. +func Add64MultipleChains(a, b, c, d [2]uint64) { + var cx, d1, d2 uint64 + a1, a2 := a[0], a[1] + b1, b2 := b[0], b[1] + c1, c2 := c[0], c[1] + + // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + d1, cx = bits.Add64(a1, b1, 0) + // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER" + // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER" + d2, _ = bits.Add64(a2, b2, cx) + + // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + d1, cx = bits.Add64(c1, d1, 0) + // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER" + // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER" + d2, _ = bits.Add64(c2, d2, cx) + d[0] = d1 + d[1] = d2 +} + // --------------- // // bits.Sub* // // --------------- //