From: Paul E. Murphy Date: Mon, 27 Nov 2023 23:05:56 +0000 (-0600) Subject: cmd/internal/obj/ppc64: generate smaller machine code for OR/XOR of uint32 values X-Git-Tag: go1.23rc1~1047 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=288615ddb57a79fa78c6f13b829bd8daeeff8fa1;p=gostls13.git cmd/internal/obj/ppc64: generate smaller machine code for OR/XOR of uint32 values These binary operations can be done in two sequential instructions instead of loading a constant into REGTMP and doing the binary op. Change-Id: Ie0ab863f9e81afad140b92b265bca4d3f0fe90b1 Reviewed-on: https://go-review.googlesource.com/c/go/+/565215 Reviewed-by: Lynn Boger Reviewed-by: Carlos Amedee Reviewed-by: Michael Pratt TryBot-Result: Gopher Robot LUCI-TryBot-Result: Go LUCI Run-TryBot: Paul Murphy --- diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index 01052b49e7..da0b25c1ac 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -230,10 +230,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 OR $-32767, R5, R6 // 3be080017fe62b78 OR $-32768, R6 // 3be080007fe63378 OR $-32768, R6, R7 // 3be080007fe73378 - OR $1234567, R5 // 641f001263ffd6877fe52b78 - OR $1234567, R5, R3 // 641f001263ffd6877fe32b78 + OR $1234567, R5 // 64a5001260a5d687 + OR $1234567, R5, R3 // 64a300126063d687 OR $2147483648, R5, R3 // 64a38000 - OR $2147483649, R5, R3 // 641f800063ff00017fe32b78 + OR $2147483649, R5, R3 // 64a3800060630001 ORIS $255, R3, R4 // 646400ff OR $16711680, R3, R4 // 646400ff @@ -249,8 +249,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 XOR $-32767, R5, R6 // 3be080017fe62a78 XOR $-32768, R6 // 3be080007fe63278 XOR $-32768, R6, R7 // 3be080007fe73278 - XOR $1234567, R5 // 641f001263ffd6877fe52a78 - XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78 + XOR $1234567, R5 // 6ca5001268a5d687 + XOR $1234567, R5, R3 // 6ca300126863d687 XORIS $15, R3, R4 // 6c64000f XOR $983040, R3, R4 // 6c64000f diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 189b0fb5a8..2793600cd0 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -156,6 +156,8 @@ var optabBase = []Optab{ {as: AOR, a1: C_U16CON, a2: C_REG, a6: C_REG, type_: 58, size: 4}, {as: AOR, a1: C_S16CON, a6: C_REG, type_: 23, size: 8}, {as: AOR, a1: C_S16CON, a2: C_REG, a6: C_REG, type_: 23, size: 8}, + {as: AOR, a1: C_U32CON, a2: C_REG, a6: C_REG, type_: 21, size: 8}, + {as: AOR, a1: C_U32CON, a6: C_REG, type_: 21, size: 8}, {as: AOR, a1: C_32CON, a6: C_REG, type_: 23, size: 12}, {as: AOR, a1: C_32CON, a2: C_REG, a6: C_REG, type_: 23, size: 12}, {as: AORIS, a1: C_U16CON, a6: C_REG, type_: 58, size: 4}, @@ -2284,6 +2286,8 @@ const ( OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 + OP_XORI = 26<<26 | 0<<1 | 0<<10 | 0 + OP_XORIS = 27<<26 | 0<<1 | 0<<10 | 0 OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 @@ -2866,6 +2870,23 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { } o1 = AOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v)) + case 21: /* or $u32con,rx[,ry] => oris + ori (similar for xor) */ + var opu, opl uint32 + r := uint32(p.Reg) + if r == 0 { + r = uint32(p.To.Reg) + } + switch p.As { + case AOR: + opu, opl = OP_ORIS, OP_ORI + case AXOR: + opu, opl = OP_XORIS, OP_XORI + default: + c.ctxt.Diag("unhandled opcode.\n%v", p) + } + o1 = LOP_IRR(opu, uint32(p.To.Reg), r, uint32(p.From.Offset>>16)) + o2 = LOP_IRR(opl, uint32(p.To.Reg), uint32(p.To.Reg), uint32(p.From.Offset)&0xFFFF) + case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add, add $s34con,r1 ==> addis+ori+slw+ori+add */ if p.To.Reg == REGTMP || p.Reg == REGTMP { c.ctxt.Diag("can't synthesize large constant\n%v", p)