From: WANG Xuerui Date: Wed, 22 Mar 2023 05:56:38 +0000 (+0800) Subject: cmd/internal/obj/loong64: assemble BEQ/BNEs comparing with 0 as beqz/bnez X-Git-Tag: go1.21rc1~1084 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=22f9317f205efc0c86ec5107a19a53150d02e439;p=gostls13.git cmd/internal/obj/loong64: assemble BEQ/BNEs comparing with 0 as beqz/bnez LoongArch (except for the extremely reduced LA32 Primary subset) has dedicated beqz/bnez instructions as alternative encodings for beq/bne with one of the source registers being R0, that allow the offset field to occupy 5 more bits, giving 21 bits in total (equal to the FP branches). Make use of them instead of beq/bne if one source operand is omitted in asm, or if one of the registers being compared is R0. Multiple go1 benchmark runs indicate the change is not perf-sensitive. Change-Id: If6267623c82092e81d75578091fb4e013658b9f3 Reviewed-on: https://go-review.googlesource.com/c/go/+/478377 Reviewed-by: Ian Lance Taylor TryBot-Result: Gopher Robot Reviewed-by: abner chenc Run-TryBot: Ben Shi Reviewed-by: Cherry Mui Auto-Submit: Wayne Zuo --- diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index f5a80d5d17..ea6c569f9d 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -116,7 +116,13 @@ lable2: ROTRV $4, R4 // 84104d00 SYSCALL // 00002b00 BEQ R4, R5, 1(PC) // 85040058 - BEQ R4, 1(PC) // 80040058 + BEQ R4, 1(PC) // 80040040 + BEQ R4, R0, 1(PC) // 80040040 + BEQ R0, R4, 1(PC) // 80040040 + BNE R4, R5, 1(PC) // 8504005c + BNE R4, 1(PC) // 80040044 + BNE R4, R0, 1(PC) // 80040044 + BNE R0, R4, 1(PC) // 80040044 BLTU R4, 1(PC) // 80040068 MOVW y+8(FP), F4 // 6440002b MOVF y+8(FP), F4 // 6440002b diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 77ffbfb4c3..5cdbed31ac 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -1095,7 +1095,7 @@ func OP_RR(op uint32, r2 uint32, r3 uint32) uint32 { } func OP_16IR_5I(op uint32, i uint32, r2 uint32) uint32 { - return op | (i&0xFFFF)<<10 | (r2&0x7)<<5 | ((i >> 16) & 0x1F) + return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | ((i >> 16) & 0x1F) } func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { @@ -1182,23 +1182,38 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { if p.To.Target() != nil { v = int32(p.To.Target().Pc-p.Pc) >> 2 } - rd, rj := p.Reg, p.From.Reg - if p.As == ABGTZ || p.As == ABLEZ { + as, rd, rj, width := p.As, p.Reg, p.From.Reg, 16 + switch as { + case ABGTZ, ABLEZ: rd, rj = rj, rd - } - switch p.As { case ABFPT, ABFPF: + width = 21 + // FCC0 is the implicit source operand, now that we + // don't register-allocate from the FCC bank. + rd = REG_FCC0 + case ABEQ, ABNE: + if rd == 0 || rd == REGZERO || rj == REGZERO { + // BEQZ/BNEZ can be encoded with 21-bit offsets. + width = 21 + as = -as + if rj == 0 || rj == REGZERO { + rj = rd + } + } + } + switch width { + case 21: if (v<<11)>>11 != v { c.ctxt.Diag("21 bit-width, short branch too far\n%v", p) } - // FCC0 is the implicit source operand, now that we - // don't register-allocate from the FCC bank. - o1 = OP_16IR_5I(c.opirr(p.As), uint32(v), uint32(REG_FCC0)) - default: + o1 = OP_16IR_5I(c.opirr(as), uint32(v), uint32(rj)) + case 16: if (v<<16)>>16 != v { c.ctxt.Diag("16 bit-width, short branch too far\n%v", p) } - o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(rj), uint32(rd)) + o1 = OP_16IRR(c.opirr(as), uint32(v), uint32(rj), uint32(rd)) + default: + c.ctxt.Diag("unexpected branch encoding\n%v", p) } case 7: // mov r, soreg @@ -1902,6 +1917,10 @@ func (c *ctxt0) opirr(a obj.As) uint32 { return 0x1b << 26 case ABGE, ABGEZ, ABLEZ: return 0x19 << 26 + case -ABEQ: // beqz + return 0x10 << 26 + case -ABNE: // bnez + return 0x11 << 26 case ABEQ: return 0x16 << 26 case ABNE: