]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64: assemble BEQ/BNEs comparing with 0 as beqz/bnez
authorWANG Xuerui <git@xen0n.name>
Wed, 22 Mar 2023 05:56:38 +0000 (13:56 +0800)
committerGopher Robot <gobot@golang.org>
Fri, 31 Mar 2023 02:56:19 +0000 (02:56 +0000)
LoongArch (except for the extremely reduced LA32 Primary subset) has
dedicated beqz/bnez instructions as alternative encodings for beq/bne
with one of the source registers being R0, that allow the offset field
to occupy 5 more bits, giving 21 bits in total (equal to the FP
branches). Make use of them instead of beq/bne if one source operand is
omitted in asm, or if one of the registers being compared is R0.

Multiple go1 benchmark runs indicate the change is not perf-sensitive.

Change-Id: If6267623c82092e81d75578091fb4e013658b9f3
Reviewed-on: https://go-review.googlesource.com/c/go/+/478377
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Run-TryBot: Ben Shi <powerman1st@163.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Wayne Zuo <wdvxdr@golangcn.org>

src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/asm.go

index f5a80d5d172232868709c6653236e5a896ba8056..ea6c569f9dface39837fe02613da0e57357ae9c5 100644 (file)
@@ -116,7 +116,13 @@ lable2:
        ROTRV   $4, R4                  // 84104d00
        SYSCALL                         // 00002b00
        BEQ     R4, R5, 1(PC)           // 85040058
-       BEQ     R4, 1(PC)               // 80040058
+       BEQ     R4, 1(PC)               // 80040040
+       BEQ     R4, R0, 1(PC)           // 80040040
+       BEQ     R0, R4, 1(PC)           // 80040040
+       BNE     R4, R5, 1(PC)           // 8504005c
+       BNE     R4, 1(PC)               // 80040044
+       BNE     R4, R0, 1(PC)           // 80040044
+       BNE     R0, R4, 1(PC)           // 80040044
        BLTU    R4, 1(PC)               // 80040068
        MOVW    y+8(FP), F4             // 6440002b
        MOVF    y+8(FP), F4             // 6440002b
index 77ffbfb4c345ca692117e57c8c7c327542a7e0ed..5cdbed31acb0d925d6c7c3e97cf192a536a706f9 100644 (file)
@@ -1095,7 +1095,7 @@ func OP_RR(op uint32, r2 uint32, r3 uint32) uint32 {
 }
 
 func OP_16IR_5I(op uint32, i uint32, r2 uint32) uint32 {
-       return op | (i&0xFFFF)<<10 | (r2&0x7)<<5 | ((i >> 16) & 0x1F)
+       return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | ((i >> 16) & 0x1F)
 }
 
 func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
@@ -1182,23 +1182,38 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
                if p.To.Target() != nil {
                        v = int32(p.To.Target().Pc-p.Pc) >> 2
                }
-               rd, rj := p.Reg, p.From.Reg
-               if p.As == ABGTZ || p.As == ABLEZ {
+               as, rd, rj, width := p.As, p.Reg, p.From.Reg, 16
+               switch as {
+               case ABGTZ, ABLEZ:
                        rd, rj = rj, rd
-               }
-               switch p.As {
                case ABFPT, ABFPF:
+                       width = 21
+                       // FCC0 is the implicit source operand, now that we
+                       // don't register-allocate from the FCC bank.
+                       rd = REG_FCC0
+               case ABEQ, ABNE:
+                       if rd == 0 || rd == REGZERO || rj == REGZERO {
+                               // BEQZ/BNEZ can be encoded with 21-bit offsets.
+                               width = 21
+                               as = -as
+                               if rj == 0 || rj == REGZERO {
+                                       rj = rd
+                               }
+                       }
+               }
+               switch width {
+               case 21:
                        if (v<<11)>>11 != v {
                                c.ctxt.Diag("21 bit-width, short branch too far\n%v", p)
                        }
-                       // FCC0 is the implicit source operand, now that we
-                       // don't register-allocate from the FCC bank.
-                       o1 = OP_16IR_5I(c.opirr(p.As), uint32(v), uint32(REG_FCC0))
-               default:
+                       o1 = OP_16IR_5I(c.opirr(as), uint32(v), uint32(rj))
+               case 16:
                        if (v<<16)>>16 != v {
                                c.ctxt.Diag("16 bit-width, short branch too far\n%v", p)
                        }
-                       o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(rj), uint32(rd))
+                       o1 = OP_16IRR(c.opirr(as), uint32(v), uint32(rj), uint32(rd))
+               default:
+                       c.ctxt.Diag("unexpected branch encoding\n%v", p)
                }
 
        case 7: // mov r, soreg
@@ -1902,6 +1917,10 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
                return 0x1b << 26
        case ABGE, ABGEZ, ABLEZ:
                return 0x19 << 26
+       case -ABEQ: // beqz
+               return 0x10 << 26
+       case -ABNE: // bnez
+               return 0x11 << 26
        case ABEQ:
                return 0x16 << 26
        case ABNE: