From 6e5398bad16aba15db365b72cb70d1177330b603 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 16 Feb 2024 13:29:16 -0600 Subject: [PATCH] cmd/asm,cmd/compile: generate less instructions for most 32 bit constant adds on ppc64x For GOPPC64 < 10 targets, most large 32 bit constants (those exceeding int16 capacity) can be added using two instructions instead of 3. This cannot be done for values greater than 0x7FFF7FFF, so this must be done during asm preprocessing as the optab matching rules cannot differentiate this special case. Likewise, constants 0x8000 <= x < 0x10000 are not converted. The assembler currently generates 2 instructions sequences for these constants. Change-Id: I1ccc839c6c28fc32f15d286b2e52e2d22a2a06d4 Reviewed-on: https://go-review.googlesource.com/c/go/+/568116 Reviewed-by: Cherry Mui Reviewed-by: Michael Knyszek LUCI-TryBot-Result: Go LUCI Run-TryBot: Paul Murphy Reviewed-by: Lynn Boger TryBot-Result: Gopher Robot --- src/cmd/asm/internal/asm/testdata/ppc64.s | 6 ++-- src/cmd/internal/obj/ppc64/obj9.go | 42 ++++++++++++++++++++--- test/codegen/arithmetic.go | 26 ++++++++++++++ 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index da0b25c1ac..57060a3c10 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -192,8 +192,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 // this is OK since r0 == $0, but the latter is preferred. ADD $0, R6, R5 // 7ca60214 - ADD $1234567, R5 // 641f001263ffd6877cbf2a14 or 0600001238a5d687 - ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 or 0600001238c5d687 + //TODO: the assembler rewrites these into ADDIS $19, R5, Rx and ADD $-10617, Rx, Rx, but the test only sees the first ADDIS + ADD $1234567, R5 // 3ca50013 or 0600001238a5d687 + ADD $1234567, R5, R6 // 3cc50013 or 0600001238c5d687 + ADDEX R3, R5, $3, R6 // 7cc32f54 ADDEX R3, $3, R5, R6 // 7cc32f54 ADDIS $8, R3 // 3c630008 diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index a3d392d62c..ab7e0f6a77 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -35,6 +35,7 @@ import ( "cmd/internal/src" "cmd/internal/sys" "internal/abi" + "internal/buildcfg" "log" "math/bits" ) @@ -203,17 +204,48 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { } case ASUB: - if p.From.Type == obj.TYPE_CONST { - p.From.Offset = -p.From.Offset - p.As = AADD + if p.From.Type != obj.TYPE_CONST { + break } + // Rewrite SUB $const,... into ADD $-const,... + p.From.Offset = -p.From.Offset + p.As = AADD + // This is now an ADD opcode, try simplifying it below. + fallthrough // Rewrite ADD/OR/XOR/ANDCC $const,... forms into ADDIS/ORIS/XORIS/ANDISCC case AADD: - // AADD can encode signed 34b values, ensure it is a valid signed 32b integer too. - if p.From.Type == obj.TYPE_CONST && p.From.Offset&0xFFFF == 0 && int64(int32(p.From.Offset)) == p.From.Offset && p.From.Offset != 0 { + // Don't rewrite if this is not adding a constant value, or is not an int32 + if p.From.Type != obj.TYPE_CONST || p.From.Offset == 0 || int64(int32(p.From.Offset)) != p.From.Offset { + break + } + if p.From.Offset&0xFFFF == 0 { + // The constant can be added using ADDIS p.As = AADDIS p.From.Offset >>= 16 + } else if buildcfg.GOPPC64 >= 10 { + // Let the assembler generate paddi for large constants. + break + } else if (p.From.Offset < -0x8000 && int64(int32(p.From.Offset)) == p.From.Offset) || (p.From.Offset > 0xFFFF && p.From.Offset < 0x7FFF8000) { + // For a constant x, 0xFFFF (UINT16_MAX) < x < 0x7FFF8000 or -0x80000000 (INT32_MIN) <= x < -0x8000 (INT16_MIN) + // This is not done for 0x7FFF < x < 0x10000; the assembler will generate a slightly faster instruction sequence. + // + // The constant x can be rewritten as ADDIS + ADD as follows: + // ADDIS $x>>16 + (x>>15)&1, rX, rY + // ADD $int64(int16(x)), rY, rY + // The range is slightly asymmetric as 0x7FFF8000 and above overflow the sign bit, whereas for + // negative values, this would happen with constant values between -1 and -32768 which can + // assemble into a single addi. + is := p.From.Offset>>16 + (p.From.Offset>>15)&1 + i := int64(int16(p.From.Offset)) + p.As = AADDIS + p.From.Offset = is + q := obj.Appendp(p, c.newprog) + q.As = AADD + q.From.SetConst(i) + q.Reg = p.To.Reg + q.To = p.To + p = q } case AOR: if p.From.Type == obj.TYPE_CONST && uint64(p.From.Offset)&0xFFFFFFFF0000FFFF == 0 && p.From.Offset != 0 { diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 174c2dbcc9..dc3bab7be9 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -23,6 +23,32 @@ func AddLargeConst(a uint64, out []uint64) { // ppc64x/power9:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*" // ppc64x/power8:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*" out[1] = a + 0xFFFFFFFE00000000 + // ppc64x/power10:"ADD\t[$]1234567," + // ppc64x/power9:"ADDIS\t[$]19,", "ADD\t[$]-10617," + // ppc64x/power8:"ADDIS\t[$]19,", "ADD\t[$]-10617," + out[2] = a + 1234567 + // ppc64x/power10:"ADD\t[$]-1234567," + // ppc64x/power9:"ADDIS\t[$]-19,", "ADD\t[$]10617," + // ppc64x/power8:"ADDIS\t[$]-19,", "ADD\t[$]10617," + out[3] = a - 1234567 + // ppc64x/power10:"ADD\t[$]2147450879," + // ppc64x/power9:"ADDIS\t[$]32767,", "ADD\t[$]32767," + // ppc64x/power8:"ADDIS\t[$]32767,", "ADD\t[$]32767," + out[4] = a + 0x7FFF7FFF + // ppc64x/power10:"ADD\t[$]-2147483647," + // ppc64x/power9:"ADDIS\t[$]-32768,", "ADD\t[$]1," + // ppc64x/power8:"ADDIS\t[$]-32768,", "ADD\t[$]1," + out[5] = a - 2147483647 + // ppc64x:"ADDIS\t[$]-32768,", ^"ADD\t" + out[6] = a - 2147483648 + // ppc64x:"ADD\t[$]2147450880,", ^"ADDIS\t" + out[7] = a + 0x7FFF8000 + // ppc64x:"ADD\t[$]-32768,", ^"ADDIS\t" + out[8] = a - 32768 + // ppc64x/power10:"ADD\t[$]-32769," + // ppc64x/power9:"ADDIS\t[$]-1,", "ADD\t[$]32767," + // ppc64x/power8:"ADDIS\t[$]-1,", "ADD\t[$]32767," + out[9] = a - 32769 } // ----------------- // -- 2.48.1