From d031e9e07a07afef8d16576fd7079a739a7e4394 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 28 Oct 2022 15:59:43 -0500 Subject: [PATCH] cmd/compile/internal/ssa: re-adjust CarryChainTail scheduling priority This needs to be as low as possible while not breaking priority assumptions of other scores to correctly schedule carry chains. Prior to the arm64 changes, it was set below ReadTuple. At the time, this prevented the MulHiLo implementation on PPC64 from occluding the scheduling of a full carry chain. Memory scores can also prevent better scheduling, as can be observed with crypto/internal/edwards25519/field.feMulGeneric. Fixes #56497 Change-Id: Ia4b54e6dffcce584faf46b1b8d7cea18a3913887 Reviewed-on: https://go-review.googlesource.com/c/go/+/447435 Reviewed-by: Cherry Mui Reviewed-by: Keith Randall Run-TryBot: Paul Murphy TryBot-Result: Gopher Robot Reviewed-by: Bryan Mills Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/schedule.go | 2 +- test/codegen/mathbits.go | 33 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/cmd/compile/internal/ssa/schedule.go b/src/cmd/compile/internal/ssa/schedule.go index 6e570aa82a..4e762f7b3f 100644 --- a/src/cmd/compile/internal/ssa/schedule.go +++ b/src/cmd/compile/internal/ssa/schedule.go @@ -16,8 +16,8 @@ const ( ScoreNilCheck ScoreReadTuple ScoreVarDef - ScoreMemory ScoreCarryChainTail + ScoreMemory ScoreReadFlags ScoreDefault ScoreFlags diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index acc9930c61..b506a37006 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -620,6 +620,39 @@ func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { return r } +// Verify independent carry chain operations are scheduled efficiently +// and do not cause unnecessary save/restore of the CA bit. +// +// This is an example of why CarryChainTail priority must be lower +// (earlier in the block) than Memory. f[0]=f1 could be scheduled +// after the first two lower 64 bit limb adds, but before either +// high 64 bit limbs are added. +// +// This is what happened on PPC64 when compiling +// crypto/internal/edwards25519/field.feMulGeneric. +func Add64MultipleChains(a, b, c, d [2]uint64) { + var cx, d1, d2 uint64 + a1, a2 := a[0], a[1] + b1, b2 := b[0], b[1] + c1, c2 := c[0], c[1] + + // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + d1, cx = bits.Add64(a1, b1, 0) + // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER" + // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER" + d2, _ = bits.Add64(a2, b2, cx) + + // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + d1, cx = bits.Add64(c1, d1, 0) + // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER" + // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER" + d2, _ = bits.Add64(c2, d2, cx) + d[0] = d1 + d[1] = d2 +} + // --------------- // // bits.Sub* // // --------------- // -- 2.50.0