From 481a6beba5a76403cd1c4be2fe7a6662439984c7 Mon Sep 17 00:00:00 2001 From: Wayne Zuo Date: Tue, 8 Nov 2022 15:39:27 +0800 Subject: [PATCH] cmd/internal/obj/riscv: optimize loading large immediate with trailing zeros MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This CL can avoid a memory load for some immediate. Reduce binary size slightly. compilecmp HEAD~1 -> HEAD HEAD~1 (9860faa512): math/big: remove underscores from Binomial docs HEAD (ac85312f10): cmd/internal/obj/riscv: optimize loading large immediate with many trailing zeros platform: linux/riscv64 file before after Δ % addr2line 3892904 3889844 -3060 -0.079% api 5558527 5555073 -3454 -0.062% asm 5191725 5187853 -3872 -0.075% buildid 2631958 2629356 -2602 -0.099% cgo 4722642 4718516 -4126 -0.087% compile 25579930 25564496 -15434 -0.060% cover 4989560 4985580 -3980 -0.080% dist 3538991 3536299 -2692 -0.076% doc 3990845 3987605 -3240 -0.081% fix 3410945 3407097 -3848 -0.113% link 6701702 6697926 -3776 -0.056% nm 3802030 3798664 -3366 -0.089% objdump 4211373 4207651 -3722 -0.088% pack 2423248 2421048 -2200 -0.091% pprof 14302052 14296714 -5338 -0.037% test2json 2724359 2721881 -2478 -0.091% trace 13598607 13590879 -7728 -0.057% vet 7612108 7607722 -4386 -0.058% go 15092773 15087323 -5450 -0.036% total 133976279 133891527 -84752 -0.063% Change-Id: I8615c7830ebfee0386f95d0c0fc4d29dc0b4c7fb Reviewed-on: https://go-review.googlesource.com/c/go/+/448635 Reviewed-by: Cherry Mui Run-TryBot: Wayne Zuo TryBot-Result: Gopher Robot Reviewed-by: Michael Pratt Reviewed-by: Joel Sing --- src/cmd/asm/internal/asm/testdata/riscv64.s | 4 ++- src/cmd/internal/obj/riscv/obj.go | 32 ++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s index 79d6054869..6f303858b4 100644 --- a/src/cmd/asm/internal/asm/testdata/riscv64.s +++ b/src/cmd/asm/internal/asm/testdata/riscv64.s @@ -318,7 +318,9 @@ start: MOV $-2147483647, X5 // b70200809b821200 // Converted to load of symbol (AUIPC + LD) - MOV $4294967296, X5 // 9702000083b20200 + MOV $4294967295, X5 // 9702000083b20200 + // Converted to MOV $1, X5 + SLLI $32, X5 + MOV $4294967296, X5 // 9302100093920202 MOV (X5), X6 // 03b30200 MOV 4(X5), X6 // 03b34200 diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index cbf894817d..8aa76a670d 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -26,6 +26,7 @@ import ( "cmd/internal/sys" "fmt" "log" + "math/bits" ) func buildop(ctxt *obj.Link) {} @@ -140,8 +141,14 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { p.As = AEBREAK case AMOV: - // Put >32-bit constants in memory and load them. if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == obj.REG_NONE && int64(int32(p.From.Offset)) != p.From.Offset { + ctz := bits.TrailingZeros64(uint64(p.From.Offset)) + val := p.From.Offset >> ctz + if int64(int32(val)) == val { + // It's ok. We can handle constants with many trailing zeros. + break + } + // Put >32-bit constants in memory and load them. p.From.Type = obj.TYPE_MEM p.From.Sym = ctxt.Int64Sym(p.From.Offset) p.From.Name = obj.NAME_EXTERN @@ -1838,6 +1845,23 @@ func instructionsForMOV(p *obj.Prog) []*instruction { return nil } + // For constants larger than 32 bits in size that have trailing zeros, + // use the value with the trailing zeros removed and then use a SLLI + // instruction to restore the original constant. + // For example: + // MOV $0x8000000000000000, X10 + // becomes + // MOV $1, X10 + // SLLI $63, X10, X10 + var insSLLI *instruction + if !immIFits(ins.imm, 32) { + ctz := bits.TrailingZeros64(uint64(ins.imm)) + if immIFits(ins.imm>>ctz, 32) { + ins.imm = ins.imm >> ctz + insSLLI = &instruction{as: ASLLI, rd: ins.rd, rs1: ins.rd, imm: int64(ctz)} + } + } + low, high, err := Split32BitImmediate(ins.imm) if err != nil { p.Ctxt.Diag("%v: constant %d too large: %v", p, ins.imm, err) @@ -1849,6 +1873,9 @@ func instructionsForMOV(p *obj.Prog) []*instruction { // LUI is only necessary if the constant does not fit in 12 bits. if high == 0 { + if insSLLI != nil { + inss = append(inss, insSLLI) + } break } @@ -1860,6 +1887,9 @@ func instructionsForMOV(p *obj.Prog) []*instruction { ins.as, ins.rs1 = AADDIW, ins.rd inss = append(inss, ins) } + if insSLLI != nil { + inss = append(inss, insSLLI) + } case p.From.Type == obj.TYPE_CONST && p.To.Type != obj.TYPE_REG: p.Ctxt.Diag("%v: constant load must target register", p) -- 2.50.0