--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Initial set of opcode combinations based on
+// improvements to processing of constant
+// operands.
+
+// Full set will be added at a later date.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT asmtest(SB),DUPOK|NOSPLIT,$0
+ // move constants
+ MOVD $1, R3 // 38600001
+ MOVD $-1, R4 // 3880ffff
+ MOVD $65535, R5 // 6005ffff
+ MOVD $65536, R6 // 64060001
+ MOVD $-32767, R5 // 38a08001
+ MOVD $-32768, R6 // 38c08000
+ MOVD $1234567, R5 // 6405001260a5d687
+
+ // add constants
+ ADD $1, R3 // 38630001
+ ADD $1, R3, R4 // 38830001
+ ADD $-1, R4 // 3884ffff
+ ADD $-1, R4, R5 // 38a4ffff
+ ADD $65535, R5 // 601fffff7cbf2a14
+ ADD $65535, R5, R6 // 601fffff7cdf2a14
+ ADD $65536, R6 // 3cc60001
+ ADD $65536, R6, R7 // 3ce60001
+ ADD $-32767, R5 // 38a58001
+ ADD $-32767, R5, R4 // 38858001
+ ADD $-32768, R6 // 38c68000
+ ADD $-32768, R6, R5 // 38a68000
+ ADD $1234567, R5 // 641f001263ffd6877cbf2a14
+ ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14
+
+ // and constants
+ ANDCC $1, R3 // 70630001
+ ANDCC $1, R3, R4 // 70640001
+ ANDCC $-1, R4 // 3be0ffff7fe42039
+ ANDCC $-1, R4, R5 // 3be0ffff7fe52039
+ ANDCC $65535, R5 // 70a5ffff
+ ANDCC $65535, R5, R6 // 70a6ffff
+ ANDCC $65536, R6 // 74c60001
+ ANDCC $65536, R6, R7 // 74c70001
+ ANDCC $-32767, R5 // 3be080017fe52839
+ ANDCC $-32767, R5, R4 // 3be080017fe42839
+ ANDCC $-32768, R6 // 3be080007fe63039
+ ANDCC $-32768, R5, R6 // 3be080007fe62839
+ ANDCC $1234567, R5 // 641f001263ffd6877fe52839
+ ANDCC $1234567, R5, R6 // 641f001263ffd6877fe62839
+
+ // or constants
+ OR $1, R3 // 60630001
+ OR $1, R3, R4 // 60640001
+ OR $-1, R4 // 3be0ffff7fe42378
+ OR $-1, R4, R5 // 3be0ffff7fe52378
+ OR $65535, R5 // 60a5ffff
+ OR $65535, R5, R6 // 60a6ffff
+ OR $65536, R6 // 64c60001
+ OR $65536, R6, R7 // 64c70001
+ OR $-32767, R5 // 3be080017fe52b78
+ OR $-32767, R5, R6 // 3be080017fe62b78
+ OR $-32768, R6 // 3be080007fe63378
+ OR $-32768, R6, R7 // 3be080007fe73378
+ OR $1234567, R5 // 641f001263ffd6877fe52b78
+ OR $1234567, R5, R3 // 641f001263ffd6877fe32b78
+
+ // or constants
+ XOR $1, R3 // 68630001
+ XOR $1, R3, R4 // 68640001
+ XOR $-1, R4 // 3be0ffff7fe42278
+ XOR $-1, R4, R5 // 3be0ffff7fe52278
+ XOR $65535, R5 // 68a5ffff
+ XOR $65535, R5, R6 // 68a6ffff
+ XOR $65536, R6 // 6cc60001
+ XOR $65536, R6, R7 // 6cc70001
+ XOR $-32767, R5 // 3be080017fe52a78
+ XOR $-32767, R5, R6 // 3be080017fe62a78
+ XOR $-32768, R6 // 3be080007fe63278
+ XOR $-32768, R6, R7 // 3be080007fe73278
+ XOR $1234567, R5 // 641f001263ffd6877fe52a78
+ XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78
+
+ RET
param int16
}
+// This optab contains a list of opcodes with the operand
+// combinations that are implemented. Not all opcodes are in this
+// table, but are added later in buildop by calling opset for those
+// opcodes which allow the same operand combinations as an opcode
+// already in the table.
+//
+// The type field in the Optabl identifies the case in asmout where
+// the instruction word is assembled.
var optab = []Optab{
{obj.ATEXT, C_LEXT, C_NONE, C_NONE, C_TEXTSIZE, 0, 0, 0},
{obj.ATEXT, C_LEXT, C_NONE, C_LCON, C_TEXTSIZE, 0, 0, 0},
{AMOVWZ, C_REG, C_NONE, C_NONE, C_REG, 13, 4, 0},
{AADD, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
{AADD, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
+ {AADD, C_SCON, C_REG, C_NONE, C_REG, 4, 4, 0},
+ {AADD, C_SCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
{AADD, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
{AADD, C_ADDCON, C_NONE, C_NONE, C_REG, 4, 4, 0},
{AADD, C_UCON, C_REG, C_NONE, C_REG, 20, 4, 0},
{AADD, C_UCON, C_NONE, C_NONE, C_REG, 20, 4, 0},
- {AADDIS, C_ADDCON, C_REG, C_NONE, C_REG, 20, 4, 0},
- {AADDIS, C_ADDCON, C_NONE, C_NONE, C_REG, 20, 4, 0},
+ {AADD, C_ANDCON, C_REG, C_NONE, C_REG, 22, 8, 0},
+ {AADD, C_ANDCON, C_NONE, C_NONE, C_REG, 22, 8, 0},
{AADD, C_LCON, C_REG, C_NONE, C_REG, 22, 12, 0},
{AADD, C_LCON, C_NONE, C_NONE, C_REG, 22, 12, 0},
+ {AADDIS, C_ADDCON, C_REG, C_NONE, C_REG, 20, 4, 0},
+ {AADDIS, C_ADDCON, C_NONE, C_NONE, C_REG, 20, 4, 0},
{AADDC, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
{AADDC, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
{AADDC, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
{AANDCC, C_ANDCON, C_REG, C_NONE, C_REG, 58, 4, 0},
{AANDCC, C_UCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
{AANDCC, C_UCON, C_REG, C_NONE, C_REG, 59, 4, 0},
- {AANDISCC, C_ANDCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
- {AANDISCC, C_ANDCON, C_REG, C_NONE, C_REG, 59, 4, 0},
+ {AANDCC, C_ADDCON, C_NONE, C_NONE, C_REG, 23, 8, 0},
+ {AANDCC, C_ADDCON, C_REG, C_NONE, C_REG, 23, 8, 0},
{AANDCC, C_LCON, C_NONE, C_NONE, C_REG, 23, 12, 0},
{AANDCC, C_LCON, C_REG, C_NONE, C_REG, 23, 12, 0},
+ {AANDISCC, C_ANDCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
+ {AANDISCC, C_ANDCON, C_REG, C_NONE, C_REG, 59, 4, 0},
{AMULLW, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0},
{AMULLW, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
{AMULLW, C_ADDCON, C_REG, C_NONE, C_REG, 4, 4, 0},
{AOR, C_ANDCON, C_REG, C_NONE, C_REG, 58, 4, 0},
{AOR, C_UCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
{AOR, C_UCON, C_REG, C_NONE, C_REG, 59, 4, 0},
- {AORIS, C_ANDCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
- {AORIS, C_ANDCON, C_REG, C_NONE, C_REG, 59, 4, 0},
+ {AOR, C_ADDCON, C_NONE, C_NONE, C_REG, 23, 8, 0},
+ {AOR, C_ADDCON, C_REG, C_NONE, C_REG, 23, 8, 0},
{AOR, C_LCON, C_NONE, C_NONE, C_REG, 23, 12, 0},
{AOR, C_LCON, C_REG, C_NONE, C_REG, 23, 12, 0},
+ {AORIS, C_ANDCON, C_NONE, C_NONE, C_REG, 59, 4, 0},
+ {AORIS, C_ANDCON, C_REG, C_NONE, C_REG, 59, 4, 0},
{ADIVW, C_REG, C_REG, C_NONE, C_REG, 2, 4, 0}, /* op r1[,r2],r3 */
{ADIVW, C_REG, C_NONE, C_NONE, C_REG, 2, 4, 0},
{ASUB, C_REG, C_REG, C_NONE, C_REG, 10, 4, 0}, /* op r2[,r1],r3 */
{AMOVD, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
{AMOVD, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
{AMOVD, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+ {AMOVD, C_ANDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
{AMOVW, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, /* TO DO: check */
{AMOVW, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP},
{AMOVW, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
{AMOVW, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
{AMOVW, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+ {AMOVW, C_ANDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
{AMOVWZ, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, /* TO DO: check */
{AMOVWZ, C_SACON, C_NONE, C_NONE, C_REG, 3, 4, REGSP},
{AMOVWZ, C_LECON, C_NONE, C_NONE, C_REG, 26, 8, REGSB},
{AMOVWZ, C_LACON, C_NONE, C_NONE, C_REG, 26, 8, REGSP},
{AMOVWZ, C_ADDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
+ {AMOVWZ, C_ANDCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
/* load unsigned/long constants (TO DO: check) */
{AMOVD, C_UCON, C_NONE, C_NONE, C_REG, 3, 4, REGZERO},
x[i], x[j] = x[j], x[i]
}
+// Used when sorting the optab. Sorting is
+// done in a way so that the best choice of
+// opcode/operand combination is considered first.
func (x ocmp) Less(i, j int) bool {
p1 := &x[i]
p2 := &x[j]
n := int(p1.as) - int(p2.as)
+ // same opcode
+ if n != 0 {
+ return n < 0
+ }
+ // Consider those that generate fewer
+ // instructions first.
+ n = int(p1.size) - int(p2.size)
if n != 0 {
return n < 0
}
+ // operand order should match
+ // better choices first
n = int(p1.a1) - int(p2.a1)
if n != 0 {
return n < 0
}
return false
}
+
+// Add an entry to the opcode table for
+// a new opcode b0 with the same operand combinations
+// as opcode a.
func opset(a, b0 obj.As) {
oprange[a&obj.AMask] = oprange[b0]
}
+// Build the opcode table
func buildop(ctxt *obj.Link) {
if oprange[AANDN&obj.AMask] != nil {
// Already initialized; stop now.
}
o1 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
- case 3: /* mov $soreg/addcon/ucon, r ==> addis/addi $i,reg',r */
+ case 3: /* mov $soreg/addcon/andcon/ucon, r ==> addis/oris/addi/ori $i,reg',r */
d := c.vregoff(&p.From)
v := int32(d)
if d&0xffff != 0 {
log.Fatalf("invalid handling of %v", p)
}
+ // For UCON operands the value is right shifted 16, using ADDIS if the
+ // value should be signed, ORIS if unsigned.
v >>= 16
if r == REGZERO && isuint32(uint64(d)) {
o1 = LOP_IRR(OP_ORIS, uint32(p.To.Reg), REGZERO, uint32(v))
}
a = OP_ADDIS
- } else {
- if int64(int16(d)) != d {
+ } else if int64(int16(d)) != d {
+ // Operand is 16 bit value with sign bit set
+ if o.a1 == C_ANDCON {
+ // Needs unsigned 16 bit so use ORI
+ if r == 0 || r == REGZERO {
+ o1 = LOP_IRR(uint32(OP_ORI), uint32(p.To.Reg), uint32(0), uint32(v))
+ break
+ }
+ // With ADDCON, needs signed 16 bit value, fall through to use ADDI
+ } else if o.a1 != C_ADDCON {
log.Fatalf("invalid handling of %v", p)
}
}
o1, o2 = c.symbolAccess(p.From.Sym, d, p.To.Reg, OP_ADDI)
}
- //if(dlm) reloc(&p->from, p->pc, 0);
-
case 20: /* add $ucon,,r | addis $addcon,r,r */
v := c.regoff(&p.From)
o1 = AOP_IRR(c.opirr(AADDIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
}
- case 22: /* add $lcon,r1,r2 ==> cau+or+add */ /* could do add/sub more efficiently */
+ case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add */
if p.To.Reg == REGTMP || p.Reg == REGTMP {
c.ctxt.Diag("can't synthesize large constant\n%v", p)
}
d := c.vregoff(&p.From)
- o1 = loadu32(REGTMP, d)
- o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d)))
r := int(p.Reg)
if r == 0 {
r = int(p.To.Reg)
}
- o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
if p.From.Sym != nil {
c.ctxt.Diag("%v is not supported", p)
}
+ // If operand is ANDCON, generate 2 instructions using
+ // ORI for unsigned value; with LCON 3 instructions.
+ if o.size == 8 {
+ o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d)))
+ o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+ } else {
+ o1 = loadu32(REGTMP, d)
+ o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d)))
+ o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+ }
- //if(dlm) reloc(&p->from, p->pc, 0);
-
- case 23: /* and $lcon,r1,r2 ==> cau+or+and */ /* masks could be done using rlnm etc. */
+ case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */
if p.To.Reg == REGTMP || p.Reg == REGTMP {
c.ctxt.Diag("can't synthesize large constant\n%v", p)
}
d := c.vregoff(&p.From)
- o1 = loadu32(REGTMP, d)
- o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d)))
r := int(p.Reg)
if r == 0 {
r = int(p.To.Reg)
}
- o3 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+
+ // With ADDCON operand, generate 2 instructions using ADDI for signed value,
+ // with LCON operand generate 3 instructions.
+ if o.size == 8 {
+ o1 = LOP_IRR(OP_ADDI, REGZERO, REGTMP, uint32(int32(d)))
+ o2 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+ } else {
+ o1 = loadu32(REGTMP, d)
+ o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d)))
+ o3 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
+ }
if p.From.Sym != nil {
c.ctxt.Diag("%v is not supported", p)
}
- //if(dlm) reloc(&p->from, p->pc, 0);
-
- /*24*/
case 25:
/* sld[.] $sh,rS,rA -> rldicr[.] $sh,rS,mask(0,63-sh),rA; srd[.] -> rldicl */
v := c.regoff(&p.From)
case AXOR:
o1 = LOP_IRR(c.opirr(AXORIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
case AANDCC:
- o1 = LOP_IRR(c.opirr(AANDCC), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
+ o1 = LOP_IRR(c.opirr(AANDISCC), uint32(p.To.Reg), uint32(r), uint32(v)>>16)
default:
o1 = LOP_IRR(c.opirr(p.As), uint32(p.To.Reg), uint32(r), uint32(v))
}