p.To.Reg = v.Reg()
p.To.Type = obj.TYPE_REG
- case ssa.OpPPC64MOVDload:
-
- // MOVDload uses a DS instruction which requires the offset value of the data to be a multiple of 4.
- // For offsets known at compile time, a MOVDload won't be selected, but in the case of a go.string,
- // the offset is not known until link time. If the load of a go.string uses relocation for the
- // offset field of the instruction, and if the offset is not aligned to 4, then a link error will occur.
- // To avoid this problem, the full address of the go.string is computed and loaded into the base register,
- // and that base register is used for the MOVDload using a 0 offset. This problem can only occur with
- // go.string types because other types will have proper alignment.
-
- gostring := false
- switch n := v.Aux.(type) {
- case *obj.LSym:
- gostring = strings.HasPrefix(n.Name, "go.string.")
+ case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload:
+
+ // MOVDload and MOVWload are DS form instructions that are restricted to
+ // offsets that are a multiple of 4. If the offset is not a multple of 4,
+ // then the address of the symbol to be loaded is computed (base + offset)
+ // and used as the new base register and the offset field in the instruction
+ // can be set to zero.
+
+ // This same problem can happen with gostrings since the final offset is not
+ // known yet, but could be unaligned after the relocation is resolved.
+ // So gostrings are handled the same way.
+
+ // This allows the MOVDload and MOVWload to be generated in more cases and
+ // eliminates some offset and alignment checking in the rules file.
+
+ fromAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
+ ssagen.AddAux(&fromAddr, v)
+
+ genAddr := false
+
+ switch fromAddr.Name {
+ case obj.NAME_EXTERN, obj.NAME_STATIC:
+ // Special case for a rule combines the bytes of gostring.
+ // The v alignment might seem OK, but we don't want to load it
+ // using an offset because relocation comes later.
+ genAddr = strings.HasPrefix(fromAddr.Sym.Name, "go.string") || v.Type.Alignment()%4 != 0 || fromAddr.Offset%4 != 0
+ default:
+ genAddr = fromAddr.Offset%4 != 0
}
- if gostring {
- // Generate full addr of the go.string const
- // including AuxInt
+ if genAddr {
+ // Load full address into the temp register.
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.From, v)
+ // Load target using temp as base register
+ // and offset zero. Setting NAME_NONE
+ // prevents any extra offsets from being
+ // added.
p.To.Type = obj.TYPE_REG
- p.To.Reg = v.Reg()
- // Load go.string using 0 offset
- p = s.Prog(v.Op.Asm())
- p.From.Type = obj.TYPE_MEM
- p.From.Reg = v.Reg()
- p.To.Type = obj.TYPE_REG
- p.To.Reg = v.Reg()
- break
+ p.To.Reg = ppc64.REGTMP
+ fromAddr.Reg = ppc64.REGTMP
+ // Clear the offset field and other
+ // information that might be used
+ // by the assembler to add to the
+ // final offset value.
+ fromAddr.Offset = 0
+ fromAddr.Name = obj.NAME_NONE
+ fromAddr.Sym = nil
}
- // Not a go.string, generate a normal load
- fallthrough
+ p := s.Prog(v.Op.Asm())
+ p.From = fromAddr
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg()
+ break
- case ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
+ case ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload, ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
- case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
+ case ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = ppc64.REGZERO
p.To.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.To, v)
- case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
+ case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVDstorezero:
+
+ // MOVDstore and MOVDstorezero become DS form instructions that are restricted
+ // to offset values that are a multple of 4. If the offset field is not a
+ // multiple of 4, then the full address of the store target is computed (base +
+ // offset) and used as the new base register and the offset in the instruction
+ // is set to 0.
+
+ // This allows the MOVDstore and MOVDstorezero to be generated in more cases,
+ // and prevents checking of the offset value and alignment in the rules.
+
+ toAddr := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
+ ssagen.AddAux(&toAddr, v)
+
+ if toAddr.Offset%4 != 0 {
+ p := s.Prog(ppc64.AMOVD)
+ p.From.Type = obj.TYPE_ADDR
+ p.From.Reg = v.Args[0].Reg()
+ ssagen.AddAux(&p.From, v)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ppc64.REGTMP
+ toAddr.Reg = ppc64.REGTMP
+ // Clear the offset field and other
+ // information that might be used
+ // by the assembler to add to the
+ // final offset value.
+ toAddr.Offset = 0
+ toAddr.Name = obj.NAME_NONE
+ toAddr.Sym = nil
+ }
+ p := s.Prog(v.Op.Asm())
+ p.To = toAddr
+ p.From.Type = obj.TYPE_REG
+ if v.Op == ssa.OpPPC64MOVDstorezero {
+ p.From.Reg = ppc64.REGZERO
+ } else {
+ p.From.Reg = v.Args[1].Reg()
+ }
+
+ case ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore, ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
case rem >= 8:
op, size = ppc64.AMOVD, 8
case rem >= 4:
- op, size = ppc64.AMOVW, 4
+ op, size = ppc64.AMOVWZ, 4
case rem >= 2:
op, size = ppc64.AMOVH, 2
}
case rem >= 8:
op, size = ppc64.AMOVD, 8
case rem >= 4:
- op, size = ppc64.AMOVW, 4
+ op, size = ppc64.AMOVWZ, 4
case rem >= 2:
op, size = ppc64.AMOVH, 2
}
(MOVHstorezero [4] destptr
(MOVWstorezero destptr mem)))
-// MOVD for store with DS must have offsets that are multiple of 4
-(Zero [8] {t} destptr mem) && t.Alignment()%4 == 0 =>
- (MOVDstorezero destptr mem)
-(Zero [8] destptr mem) =>
- (MOVWstorezero [4] destptr
- (MOVWstorezero [0] destptr mem))
-// Handle these cases only if aligned properly, otherwise use general case below
-(Zero [12] {t} destptr mem) && t.Alignment()%4 == 0 =>
+(Zero [8] {t} destptr mem) => (MOVDstorezero destptr mem)
+(Zero [12] {t} destptr mem) =>
(MOVWstorezero [8] destptr
(MOVDstorezero [0] destptr mem))
-(Zero [16] {t} destptr mem) && t.Alignment()%4 == 0 =>
+(Zero [16] {t} destptr mem) =>
(MOVDstorezero [8] destptr
(MOVDstorezero [0] destptr mem))
-(Zero [24] {t} destptr mem) && t.Alignment()%4 == 0 =>
+(Zero [24] {t} destptr mem) =>
(MOVDstorezero [16] destptr
(MOVDstorezero [8] destptr
(MOVDstorezero [0] destptr mem)))
-(Zero [32] {t} destptr mem) && t.Alignment()%4 == 0 =>
+(Zero [32] {t} destptr mem) =>
(MOVDstorezero [24] destptr
(MOVDstorezero [16] destptr
(MOVDstorezero [8] destptr
(Zero [s] ptr mem) && objabi.GOPPC64 >= 9 => (LoweredQuadZero [s] ptr mem)
// moves
-// Only the MOVD and MOVW instructions require 4 byte
-// alignment in the offset field. The other MOVx instructions
-// allow any alignment.
(Move [0] _ _ mem) => mem
(Move [1] dst src mem) => (MOVBstore dst (MOVBZload src mem) mem)
(Move [2] dst src mem) =>
(Move [4] dst src mem) =>
(MOVWstore dst (MOVWZload src mem) mem)
// MOVD for load and store must have offsets that are multiple of 4
-(Move [8] {t} dst src mem) && t.Alignment()%4 == 0 =>
+(Move [8] {t} dst src mem) =>
(MOVDstore dst (MOVDload src mem) mem)
-(Move [8] dst src mem) =>
- (MOVWstore [4] dst (MOVWZload [4] src mem)
- (MOVWstore dst (MOVWZload src mem) mem))
(Move [3] dst src mem) =>
(MOVBstore [2] dst (MOVBZload [2] src mem)
(MOVHstore dst (MOVHload src mem) mem))
(MFVSRD x:(FMOVDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVDload [off] {sym} ptr mem)
// Fold offsets for stores.
-(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0 => (MOVDstore [off1+int32(off2)] {sym} x val mem)
+(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(int64(off1)+off2) => (MOVDstore [off1+int32(off2)] {sym} x val mem)
(MOVWstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(int64(off1)+off2) => (MOVWstore [off1+int32(off2)] {sym} x val mem)
(MOVHstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(int64(off1)+off2) => (MOVHstore [off1+int32(off2)] {sym} x val mem)
(MOVBstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(int64(off1)+off2) => (MOVBstore [off1+int32(off2)] {sym} x val mem)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVDstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
- && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 =>
+ && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(FMOVSstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
(MOVHZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
- && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 =>
+ && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
(MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWZload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
(MOVWZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
- && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 =>
+ && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
(MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(FMOVSload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
(FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is16Bit(int64(off1)+off2) => (FMOVSload [off1+int32(off2)] {sym} ptr mem)
(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is16Bit(int64(off1)+off2) => (FMOVDload [off1+int32(off2)] {sym} ptr mem)
-(MOVDload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0 => (MOVDload [off1+int32(off2)] {sym} x mem)
-(MOVWload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0 => (MOVWload [off1+int32(off2)] {sym} x mem)
+(MOVDload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) => (MOVDload [off1+int32(off2)] {sym} x mem)
+(MOVWload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) => (MOVWload [off1+int32(off2)] {sym} x mem)
(MOVWZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) => (MOVWZload [off1+int32(off2)] {sym} x mem)
(MOVHload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) => (MOVHload [off1+int32(off2)] {sym} x mem)
(MOVHZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) => (MOVHZload [off1+int32(off2)] {sym} x mem)
// Determine load + addressing that can be done as a register indexed load
(MOV(D|W|WZ|H|HZ|BZ)load [0] {sym} p:(ADD ptr idx) mem) && sym == nil && p.Uses == 1 => (MOV(D|W|WZ|H|HZ|BZ)loadidx ptr idx mem)
-// Determine indexed loads with constant values that can be done without index
+// Determine if there is benefit to using a non-indexed load, since that saves the load
+// of the index register. With MOVDload and MOVWload, there is no benefit if the offset
+// value is not a multiple of 4, since that results in an extra instruction in the base
+// register address computation.
(MOV(D|W)loadidx ptr (MOVDconst [c]) mem) && is16Bit(c) && c%4 == 0 => (MOV(D|W)load [int32(c)] ptr mem)
(MOV(WZ|H|HZ|BZ)loadidx ptr (MOVDconst [c]) mem) && is16Bit(c) => (MOV(WZ|H|HZ|BZ)load [int32(c)] ptr mem)
(MOV(D|W)loadidx (MOVDconst [c]) ptr mem) && is16Bit(c) && c%4 == 0 => (MOV(D|W)load [int32(c)] ptr mem)
(MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem)
// Fold offsets for storezero
-(MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0 =>
+(MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) =>
(MOVDstorezero [off1+int32(off2)] {sym} x mem)
(MOVWstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(int64(off1)+off2) =>
(MOVWstorezero [off1+int32(off2)] {sym} x mem)
(MOV(D|W|H|B)store [0] {sym} p:(ADD ptr idx) val mem) && sym == nil && p.Uses == 1 => (MOV(D|W|H|B)storeidx ptr idx val mem)
// Stores with constant index values can be done without indexed instructions
+// No need to lower the idx cases if c%4 is not 0
(MOVDstoreidx ptr (MOVDconst [c]) val mem) && is16Bit(c) && c%4 == 0 => (MOVDstore [int32(c)] ptr val mem)
(MOV(W|H|B)storeidx ptr (MOVDconst [c]) val mem) && is16Bit(c) => (MOV(W|H|B)store [int32(c)] ptr val mem)
(MOVDstoreidx (MOVDconst [c]) ptr val mem) && is16Bit(c) && c%4 == 0 => (MOVDstore [int32(c)] ptr val mem)
// Fold symbols into storezero
(MOVDstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2)
- && (x.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 =>
+ && (x.Op != OpSB || p.Uses == 1) =>
(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
(MOVWstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2)
&& (x.Op != OpSB || p.Uses == 1) =>
o3:(OR <t> s3:(SLDconst x4:(MOVBZload [i4] {s} p mem) [32])
x0:(MOVWZload {s} [i0] p mem)))))
&& !config.BigEndian
- && i0%4 == 0
&& i4 == i0+4
&& i5 == i0+5
&& i6 == i0+6
x2:(MOVBstore [i4] {s} p (SRDconst w [32])
x3:(MOVWstore [i0] {s} p w mem)))))
&& !config.BigEndian
- && i0%4 == 0
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
&& clobber(x0, x1, x2, x3)
return true
}
// match: (Move [8] {t} dst src mem)
- // cond: t.Alignment()%4 == 0
// result: (MOVDstore dst (MOVDload src mem) mem)
for {
if auxIntToInt64(v.AuxInt) != 8 {
break
}
- t := auxToType(v.Aux)
dst := v_0
src := v_1
mem := v_2
- if !(t.Alignment()%4 == 0) {
- break
- }
v.reset(OpPPC64MOVDstore)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDload, typ.Int64)
v0.AddArg2(src, mem)
v.AddArg3(dst, v0, mem)
return true
}
- // match: (Move [8] dst src mem)
- // result: (MOVWstore [4] dst (MOVWZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem))
- for {
- if auxIntToInt64(v.AuxInt) != 8 {
- break
- }
- dst := v_0
- src := v_1
- mem := v_2
- v.reset(OpPPC64MOVWstore)
- v.AuxInt = int32ToAuxInt(4)
- v0 := b.NewValue0(v.Pos, OpPPC64MOVWZload, typ.UInt32)
- v0.AuxInt = int32ToAuxInt(4)
- v0.AddArg2(src, mem)
- v1 := b.NewValue0(v.Pos, OpPPC64MOVWstore, types.TypeMem)
- v2 := b.NewValue0(v.Pos, OpPPC64MOVWZload, typ.UInt32)
- v2.AddArg2(src, mem)
- v1.AddArg3(dst, v2, mem)
- v.AddArg3(dst, v0, v1)
- return true
- }
// match: (Move [3] dst src mem)
// result: (MOVBstore [2] dst (MOVBZload [2] src mem) (MOVHstore dst (MOVHload src mem) mem))
for {
return true
}
// match: (MOVBstore [i7] {s} p (SRDconst w [56]) x0:(MOVBstore [i6] {s} p (SRDconst w [48]) x1:(MOVBstore [i5] {s} p (SRDconst w [40]) x2:(MOVBstore [i4] {s} p (SRDconst w [32]) x3:(MOVWstore [i0] {s} p w mem)))))
- // cond: !config.BigEndian && i0%4 == 0 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && clobber(x0, x1, x2, x3)
+ // cond: !config.BigEndian && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && clobber(x0, x1, x2, x3)
// result: (MOVDstore [i0] {s} p w mem)
for {
i7 := auxIntToInt32(v.AuxInt)
break
}
mem := x3.Args[2]
- if p != x3.Args[0] || w != x3.Args[1] || !(!config.BigEndian && i0%4 == 0 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && clobber(x0, x1, x2, x3)) {
+ if p != x3.Args[0] || w != x3.Args[1] || !(!config.BigEndian && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && clobber(x0, x1, x2, x3)) {
break
}
v.reset(OpPPC64MOVDstore)
return true
}
// match: (MOVDload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem)
- // cond: canMergeSym(sym1,sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0
+ // cond: canMergeSym(sym1,sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)
// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym2 := auxToSym(p.Aux)
ptr := p.Args[0]
mem := v_1
- if !(canMergeSym(sym1, sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0) {
+ if !(canMergeSym(sym1, sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)) {
break
}
v.reset(OpPPC64MOVDload)
return true
}
// match: (MOVDload [off1] {sym} (ADDconst [off2] x) mem)
- // cond: is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0
+ // cond: is16Bit(int64(off1)+off2)
// result: (MOVDload [off1+int32(off2)] {sym} x mem)
for {
off1 := auxIntToInt32(v.AuxInt)
off2 := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
mem := v_1
- if !(is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0) {
+ if !(is16Bit(int64(off1) + off2)) {
break
}
v.reset(OpPPC64MOVDload)
return true
}
// match: (MOVDstore [off1] {sym} (ADDconst [off2] x) val mem)
- // cond: is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0
+ // cond: is16Bit(int64(off1)+off2)
// result: (MOVDstore [off1+int32(off2)] {sym} x val mem)
for {
off1 := auxIntToInt32(v.AuxInt)
x := v_0.Args[0]
val := v_1
mem := v_2
- if !(is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0) {
+ if !(is16Bit(int64(off1) + off2)) {
break
}
v.reset(OpPPC64MOVDstore)
return true
}
// match: (MOVDstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem)
- // cond: canMergeSym(sym1,sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0
+ // cond: canMergeSym(sym1,sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)
// result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
for {
off1 := auxIntToInt32(v.AuxInt)
ptr := p.Args[0]
val := v_1
mem := v_2
- if !(canMergeSym(sym1, sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0) {
+ if !(canMergeSym(sym1, sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)) {
break
}
v.reset(OpPPC64MOVDstore)
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem)
- // cond: is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0
+ // cond: is16Bit(int64(off1)+off2)
// result: (MOVDstorezero [off1+int32(off2)] {sym} x mem)
for {
off1 := auxIntToInt32(v.AuxInt)
off2 := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
mem := v_1
- if !(is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0) {
+ if !(is16Bit(int64(off1) + off2)) {
break
}
v.reset(OpPPC64MOVDstorezero)
return true
}
// match: (MOVDstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem)
- // cond: canMergeSym(sym1,sym2) && (x.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0
+ // cond: canMergeSym(sym1,sym2) && (x.Op != OpSB || p.Uses == 1)
// result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym2 := auxToSym(p.Aux)
x := p.Args[0]
mem := v_1
- if !(canMergeSym(sym1, sym2) && (x.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0) {
+ if !(canMergeSym(sym1, sym2) && (x.Op != OpSB || p.Uses == 1)) {
break
}
v.reset(OpPPC64MOVDstorezero)
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (MOVWload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem)
- // cond: canMergeSym(sym1,sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0
+ // cond: canMergeSym(sym1,sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)
// result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym2 := auxToSym(p.Aux)
ptr := p.Args[0]
mem := v_1
- if !(canMergeSym(sym1, sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0) {
+ if !(canMergeSym(sym1, sym2) && is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1)) {
break
}
v.reset(OpPPC64MOVWload)
return true
}
// match: (MOVWload [off1] {sym} (ADDconst [off2] x) mem)
- // cond: is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0
+ // cond: is16Bit(int64(off1)+off2)
// result: (MOVWload [off1+int32(off2)] {sym} x mem)
for {
off1 := auxIntToInt32(v.AuxInt)
off2 := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
mem := v_1
- if !(is16Bit(int64(off1)+off2) && (int64(off1)+off2)%4 == 0) {
+ if !(is16Bit(int64(off1) + off2)) {
break
}
v.reset(OpPPC64MOVWload)
break
}
// match: (OR <t> s6:(SLDconst x7:(MOVBZload [i7] {s} p mem) [56]) o5:(OR <t> s5:(SLDconst x6:(MOVBZload [i6] {s} p mem) [48]) o4:(OR <t> s4:(SLDconst x5:(MOVBZload [i5] {s} p mem) [40]) o3:(OR <t> s3:(SLDconst x4:(MOVBZload [i4] {s} p mem) [32]) x0:(MOVWZload {s} [i0] p mem)))))
- // cond: !config.BigEndian && i0%4 == 0 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses ==1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x0, x4, x5, x6, x7) != nil && clobber(x0, x4, x5, x6, x7, s3, s4, s5, s6, o3, o4, o5)
+ // cond: !config.BigEndian && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses ==1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x0, x4, x5, x6, x7) != nil && clobber(x0, x4, x5, x6, x7, s3, s4, s5, s6, o3, o4, o5)
// result: @mergePoint(b,x0,x4,x5,x6,x7) (MOVDload <t> {s} [i0] p mem)
for {
t := v.Type
continue
}
_ = x0.Args[1]
- if p != x0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i0%4 == 0 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x0, x4, x5, x6, x7) != nil && clobber(x0, x4, x5, x6, x7, s3, s4, s5, s6, o3, o4, o5)) {
+ if p != x0.Args[0] || mem != x0.Args[1] || !(!config.BigEndian && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && mergePoint(b, x0, x4, x5, x6, x7) != nil && clobber(x0, x4, x5, x6, x7, s3, s4, s5, s6, o3, o4, o5)) {
continue
}
b = mergePoint(b, x0, x4, x5, x6, x7)
return true
}
// match: (Zero [8] {t} destptr mem)
- // cond: t.Alignment()%4 == 0
// result: (MOVDstorezero destptr mem)
for {
if auxIntToInt64(v.AuxInt) != 8 {
break
}
- t := auxToType(v.Aux)
destptr := v_0
mem := v_1
- if !(t.Alignment()%4 == 0) {
- break
- }
v.reset(OpPPC64MOVDstorezero)
v.AddArg2(destptr, mem)
return true
}
- // match: (Zero [8] destptr mem)
- // result: (MOVWstorezero [4] destptr (MOVWstorezero [0] destptr mem))
- for {
- if auxIntToInt64(v.AuxInt) != 8 {
- break
- }
- destptr := v_0
- mem := v_1
- v.reset(OpPPC64MOVWstorezero)
- v.AuxInt = int32ToAuxInt(4)
- v0 := b.NewValue0(v.Pos, OpPPC64MOVWstorezero, types.TypeMem)
- v0.AuxInt = int32ToAuxInt(0)
- v0.AddArg2(destptr, mem)
- v.AddArg2(destptr, v0)
- return true
- }
// match: (Zero [12] {t} destptr mem)
- // cond: t.Alignment()%4 == 0
// result: (MOVWstorezero [8] destptr (MOVDstorezero [0] destptr mem))
for {
if auxIntToInt64(v.AuxInt) != 12 {
break
}
- t := auxToType(v.Aux)
destptr := v_0
mem := v_1
- if !(t.Alignment()%4 == 0) {
- break
- }
v.reset(OpPPC64MOVWstorezero)
v.AuxInt = int32ToAuxInt(8)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
return true
}
// match: (Zero [16] {t} destptr mem)
- // cond: t.Alignment()%4 == 0
// result: (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))
for {
if auxIntToInt64(v.AuxInt) != 16 {
break
}
- t := auxToType(v.Aux)
destptr := v_0
mem := v_1
- if !(t.Alignment()%4 == 0) {
- break
- }
v.reset(OpPPC64MOVDstorezero)
v.AuxInt = int32ToAuxInt(8)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
return true
}
// match: (Zero [24] {t} destptr mem)
- // cond: t.Alignment()%4 == 0
// result: (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem)))
for {
if auxIntToInt64(v.AuxInt) != 24 {
break
}
- t := auxToType(v.Aux)
destptr := v_0
mem := v_1
- if !(t.Alignment()%4 == 0) {
- break
- }
v.reset(OpPPC64MOVDstorezero)
v.AuxInt = int32ToAuxInt(16)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
return true
}
// match: (Zero [32] {t} destptr mem)
- // cond: t.Alignment()%4 == 0
// result: (MOVDstorezero [24] destptr (MOVDstorezero [16] destptr (MOVDstorezero [8] destptr (MOVDstorezero [0] destptr mem))))
for {
if auxIntToInt64(v.AuxInt) != 32 {
break
}
- t := auxToType(v.Aux)
destptr := v_0
mem := v_1
- if !(t.Alignment()%4 == 0) {
- break
- }
v.reset(OpPPC64MOVDstorezero)
v.AuxInt = int32ToAuxInt(24)
v0 := b.NewValue0(v.Pos, OpPPC64MOVDstorezero, types.TypeMem)
--- /dev/null
+// compile
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// issue 44739: cmd/compile: incorrect offset in MOVD
+// load/store on ppc64/ppc64le causes assembler error.
+
+// Test other 8 byte loads and stores where the
+// compile time offset is not aligned to 8, as
+// well as cases where the offset is not known
+// until link time (e.g. gostrings).
+
+package main
+
+import (
+ "fmt"
+)
+
+type T struct {
+ x [4]byte
+ y [8]byte
+}
+
+var st T
+
+const (
+ gostring1 = "abc"
+ gostring2 = "defghijk"
+ gostring3 = "lmnopqrs"
+)
+
+func f(a T, _ byte, b T) bool {
+ // initialization of a,b
+ // tests unaligned store
+ return a.y == b.y
+}
+
+func g(a T) {
+ // test load of unaligned
+ // 8 byte gostring, store
+ // to unaligned static
+ copy(a.y[:], gostring2)
+}
+
+func main() {
+ var t1, t2 T
+
+ // test copy to automatic storage,
+ // load of unaligned gostring.
+ copy(st.y[:], gostring2)
+ copy(t1.y[:], st.y[:])
+ copy(t2.y[:], gostring3)
+ // test initialization of params
+ if !f(t1, 'a', t2) {
+ // gostring1 added so it has a use
+ fmt.Printf("FAIL: %s\n", gostring1)
+ }
+}
+