]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/asm,cmd/compile: generate less instructions for most 32 bit constant adds on...
authorPaul E. Murphy <murp@ibm.com>
Fri, 16 Feb 2024 19:29:16 +0000 (13:29 -0600)
committerPaul Murphy <murp@ibm.com>
Wed, 13 Mar 2024 13:58:44 +0000 (13:58 +0000)
For GOPPC64 < 10 targets, most large 32 bit constants (those
exceeding int16 capacity) can be added using two instructions
instead of 3.

This cannot be done for values greater than 0x7FFF7FFF, so this
must be done during asm preprocessing as the optab matching
rules cannot differentiate this special case.

Likewise, constants 0x8000 <= x < 0x10000 are not converted. The
assembler currently generates 2 instructions sequences for these
constants.

Change-Id: I1ccc839c6c28fc32f15d286b2e52e2d22a2a06d4
Reviewed-on: https://go-review.googlesource.com/c/go/+/568116
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/cmd/asm/internal/asm/testdata/ppc64.s
src/cmd/internal/obj/ppc64/obj9.go
test/codegen/arithmetic.go

index da0b25c1ac4da8e8d5406deede010c2ae885115b..57060a3c107a6367dcd4f2ba756035dcd1b4ff95 100644 (file)
@@ -192,8 +192,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
        //      this is OK since r0 == $0, but the latter is preferred.
        ADD $0, R6, R5                  // 7ca60214
 
-       ADD $1234567, R5                // 641f001263ffd6877cbf2a14 or 0600001238a5d687
-       ADD $1234567, R5, R6            // 641f001263ffd6877cdf2a14 or 0600001238c5d687
+        //TODO: the assembler rewrites these into ADDIS $19, R5, Rx and ADD $-10617, Rx, Rx, but the test only sees the first ADDIS
+       ADD $1234567, R5                // 3ca50013 or 0600001238a5d687
+       ADD $1234567, R5, R6            // 3cc50013 or 0600001238c5d687
+
        ADDEX R3, R5, $3, R6            // 7cc32f54
        ADDEX R3, $3, R5, R6            // 7cc32f54
        ADDIS $8, R3                    // 3c630008
index a3d392d62c263e7c1b4e2ba617485746ba1272a0..ab7e0f6a7739d9e920b0b76195e6e54399605214 100644 (file)
@@ -35,6 +35,7 @@ import (
        "cmd/internal/src"
        "cmd/internal/sys"
        "internal/abi"
+       "internal/buildcfg"
        "log"
        "math/bits"
 )
@@ -203,17 +204,48 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
                }
 
        case ASUB:
-               if p.From.Type == obj.TYPE_CONST {
-                       p.From.Offset = -p.From.Offset
-                       p.As = AADD
+               if p.From.Type != obj.TYPE_CONST {
+                       break
                }
+               // Rewrite SUB $const,... into ADD $-const,...
+               p.From.Offset = -p.From.Offset
+               p.As = AADD
+               // This is now an ADD opcode, try simplifying it below.
+               fallthrough
 
        // Rewrite ADD/OR/XOR/ANDCC $const,... forms into ADDIS/ORIS/XORIS/ANDISCC
        case AADD:
-               // AADD can encode signed 34b values, ensure it is a valid signed 32b integer too.
-               if p.From.Type == obj.TYPE_CONST && p.From.Offset&0xFFFF == 0 && int64(int32(p.From.Offset)) == p.From.Offset && p.From.Offset != 0 {
+               // Don't rewrite if this is not adding a constant value, or is not an int32
+               if p.From.Type != obj.TYPE_CONST || p.From.Offset == 0 || int64(int32(p.From.Offset)) != p.From.Offset {
+                       break
+               }
+               if p.From.Offset&0xFFFF == 0 {
+                       // The constant can be added using ADDIS
                        p.As = AADDIS
                        p.From.Offset >>= 16
+               } else if buildcfg.GOPPC64 >= 10 {
+                       // Let the assembler generate paddi for large constants.
+                       break
+               } else if (p.From.Offset < -0x8000 && int64(int32(p.From.Offset)) == p.From.Offset) || (p.From.Offset > 0xFFFF && p.From.Offset < 0x7FFF8000) {
+                       // For a constant x, 0xFFFF (UINT16_MAX) < x < 0x7FFF8000 or -0x80000000 (INT32_MIN) <= x < -0x8000 (INT16_MIN)
+                       // This is not done for 0x7FFF < x < 0x10000; the assembler will generate a slightly faster instruction sequence.
+                       //
+                       // The constant x can be rewritten as ADDIS + ADD as follows:
+                       //     ADDIS $x>>16 + (x>>15)&1, rX, rY
+                       //     ADD   $int64(int16(x)), rY, rY
+                       // The range is slightly asymmetric as 0x7FFF8000 and above overflow the sign bit, whereas for
+                       // negative values, this would happen with constant values between -1 and -32768 which can
+                       // assemble into a single addi.
+                       is := p.From.Offset>>16 + (p.From.Offset>>15)&1
+                       i := int64(int16(p.From.Offset))
+                       p.As = AADDIS
+                       p.From.Offset = is
+                       q := obj.Appendp(p, c.newprog)
+                       q.As = AADD
+                       q.From.SetConst(i)
+                       q.Reg = p.To.Reg
+                       q.To = p.To
+                       p = q
                }
        case AOR:
                if p.From.Type == obj.TYPE_CONST && uint64(p.From.Offset)&0xFFFFFFFF0000FFFF == 0 && p.From.Offset != 0 {
index 174c2dbcc90eaf8e4b597f42e31ca04164cf656f..dc3bab7be954df33849d77dbfdae3905f058444b 100644 (file)
@@ -23,6 +23,32 @@ func AddLargeConst(a uint64, out []uint64) {
        // ppc64x/power9:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
        // ppc64x/power8:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
        out[1] = a + 0xFFFFFFFE00000000
+       // ppc64x/power10:"ADD\t[$]1234567,"
+       // ppc64x/power9:"ADDIS\t[$]19,", "ADD\t[$]-10617,"
+       // ppc64x/power8:"ADDIS\t[$]19,", "ADD\t[$]-10617,"
+       out[2] = a + 1234567
+       // ppc64x/power10:"ADD\t[$]-1234567,"
+       // ppc64x/power9:"ADDIS\t[$]-19,", "ADD\t[$]10617,"
+       // ppc64x/power8:"ADDIS\t[$]-19,", "ADD\t[$]10617,"
+       out[3] = a - 1234567
+       // ppc64x/power10:"ADD\t[$]2147450879,"
+       // ppc64x/power9:"ADDIS\t[$]32767,", "ADD\t[$]32767,"
+       // ppc64x/power8:"ADDIS\t[$]32767,", "ADD\t[$]32767,"
+       out[4] = a + 0x7FFF7FFF
+       // ppc64x/power10:"ADD\t[$]-2147483647,"
+       // ppc64x/power9:"ADDIS\t[$]-32768,", "ADD\t[$]1,"
+       // ppc64x/power8:"ADDIS\t[$]-32768,", "ADD\t[$]1,"
+       out[5] = a - 2147483647
+       // ppc64x:"ADDIS\t[$]-32768,", ^"ADD\t"
+       out[6] = a - 2147483648
+       // ppc64x:"ADD\t[$]2147450880,", ^"ADDIS\t"
+       out[7] = a + 0x7FFF8000
+       // ppc64x:"ADD\t[$]-32768,", ^"ADDIS\t"
+       out[8] = a - 32768
+       // ppc64x/power10:"ADD\t[$]-32769,"
+       // ppc64x/power9:"ADDIS\t[$]-1,", "ADD\t[$]32767,"
+       // ppc64x/power8:"ADDIS\t[$]-1,", "ADD\t[$]32767,"
+       out[9] = a - 32769
 }
 
 // ----------------- //