}
}
+//go:noinline
+func tu2copy_ssa(docopy bool, data [2]byte, x *[2]byte) {
+ if docopy {
+ *x = data
+ }
+}
+func testUnalignedCopy2() {
+ var a [2]byte
+ t2 := [2]byte{2, 3}
+ tu2copy_ssa(true, t2, &a)
+ want2 := [2]byte{2, 3}
+ if a != want2 {
+ fmt.Printf("tu2copy got=%v, want %v\n", a, want2)
+ failed = true
+ }
+}
+
+//go:noinline
+func tu3copy_ssa(docopy bool, data [3]byte, x *[3]byte) {
+ if docopy {
+ *x = data
+ }
+}
+func testUnalignedCopy3() {
+ var a [3]byte
+ t3 := [3]byte{3, 4, 5}
+ tu3copy_ssa(true, t3, &a)
+ want3 := [3]byte{3, 4, 5}
+ if a != want3 {
+ fmt.Printf("tu3copy got=%v, want %v\n", a, want3)
+ failed = true
+ }
+}
+
+//go:noinline
+func tu4copy_ssa(docopy bool, data [4]byte, x *[4]byte) {
+ if docopy {
+ *x = data
+ }
+}
+func testUnalignedCopy4() {
+ var a [4]byte
+ t4 := [4]byte{4, 5, 6, 7}
+ tu4copy_ssa(true, t4, &a)
+ want4 := [4]byte{4, 5, 6, 7}
+ if a != want4 {
+ fmt.Printf("tu4copy got=%v, want %v\n", a, want4)
+ failed = true
+ }
+}
+
+//go:noinline
+func tu5copy_ssa(docopy bool, data [5]byte, x *[5]byte) {
+ if docopy {
+ *x = data
+ }
+}
+func testUnalignedCopy5() {
+ var a [5]byte
+ t5 := [5]byte{5, 6, 7, 8, 9}
+ tu5copy_ssa(true, t5, &a)
+ want5 := [5]byte{5, 6, 7, 8, 9}
+ if a != want5 {
+ fmt.Printf("tu5copy got=%v, want %v\n", a, want5)
+ failed = true
+ }
+}
+
+//go:noinline
+func tu6copy_ssa(docopy bool, data [6]byte, x *[6]byte) {
+ if docopy {
+ *x = data
+ }
+}
+func testUnalignedCopy6() {
+ var a [6]byte
+ t6 := [6]byte{6, 7, 8, 9, 10, 11}
+ tu6copy_ssa(true, t6, &a)
+ want6 := [6]byte{6, 7, 8, 9, 10, 11}
+ if a != want6 {
+ fmt.Printf("tu6copy got=%v, want %v\n", a, want6)
+ failed = true
+ }
+}
+
+//go:noinline
+func tu7copy_ssa(docopy bool, data [7]byte, x *[7]byte) {
+ if docopy {
+ *x = data
+ }
+}
+func testUnalignedCopy7() {
+ var a [7]byte
+ t7 := [7]byte{7, 8, 9, 10, 11, 12, 13}
+ tu7copy_ssa(true, t7, &a)
+ want7 := [7]byte{7, 8, 9, 10, 11, 12, 13}
+ if a != want7 {
+ fmt.Printf("tu7copy got=%v, want %v\n", a, want7)
+ failed = true
+ }
+}
+
var failed bool
func main() {
testCopy1039()
testCopy1040()
testCopy1041()
+ testUnalignedCopy2()
+ testUnalignedCopy3()
+ testUnalignedCopy4()
+ testUnalignedCopy5()
+ testUnalignedCopy6()
+ testUnalignedCopy7()
if failed {
panic("failed")
}
var sizes = [...]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 23, 24, 25, 31, 32, 33, 63, 64, 65, 1023, 1024, 1025, 1024 + 7, 1024 + 8, 1024 + 9, 1024 + 15, 1024 + 16, 1024 + 17}
+var usizes = [...]int{2, 3, 4, 5, 6, 7}
+
func main() {
w := new(bytes.Buffer)
fmt.Fprintf(w, "// run\n")
fmt.Fprintf(w, "}\n")
}
+ for _, s := range usizes {
+ // function being tested
+ fmt.Fprintf(w, "//go:noinline\n")
+ fmt.Fprintf(w, "func tu%dcopy_ssa(docopy bool, data [%d]byte, x *[%d]byte) {\n", s, s, s)
+ fmt.Fprintf(w, " if docopy {\n")
+ fmt.Fprintf(w, " *x = data\n")
+ fmt.Fprintf(w, " }\n")
+ fmt.Fprintf(w, "}\n")
+
+ // testing harness
+ fmt.Fprintf(w, "func testUnalignedCopy%d() {\n", s)
+ fmt.Fprintf(w, " var a [%d]byte\n", s)
+ fmt.Fprintf(w, " t%d := [%d]byte{", s, s)
+ for i := 0; i < s; i++ {
+ fmt.Fprintf(w, " %d,", s+i)
+ }
+ fmt.Fprintf(w, "}\n")
+ fmt.Fprintf(w, " tu%dcopy_ssa(true, t%d, &a)\n", s, s)
+ fmt.Fprintf(w, " want%d := [%d]byte{", s, s)
+ for i := 0; i < s; i++ {
+ fmt.Fprintf(w, " %d,", s+i)
+ }
+ fmt.Fprintf(w, "}\n")
+ fmt.Fprintf(w, " if a != want%d {\n", s)
+ fmt.Fprintf(w, " fmt.Printf(\"tu%dcopy got=%%v, want %%v\\n\", a, want%d)\n", s, s)
+ fmt.Fprintf(w, " failed=true\n")
+ fmt.Fprintf(w, " }\n")
+ fmt.Fprintf(w, "}\n")
+ }
+
// boilerplate at end
fmt.Fprintf(w, "var failed bool\n")
fmt.Fprintf(w, "func main() {\n")
for _, s := range sizes {
fmt.Fprintf(w, " testCopy%d()\n", s)
}
+ for _, s := range usizes {
+ fmt.Fprintf(w, " testUnalignedCopy%d()\n", s)
+ }
fmt.Fprintf(w, " if failed {\n")
fmt.Fprintf(w, " panic(\"failed\")\n")
fmt.Fprintf(w, " }\n")
(Zero [s] ptr mem) -> (LoweredZero [s] ptr mem)
// moves
+// Only the MOVD and MOVW instructions require 4 byte
+// alignment in the offset field. The other MOVx instructions
+// allow any alignment.
(Move [0] _ _ mem) -> mem
(Move [1] dst src mem) -> (MOVBstore dst (MOVBZload src mem) mem)
(Move [2] dst src mem) ->
(MOVHstore dst (MOVHZload src mem) mem)
-(Move [4] {t} dst src mem) && t.(Type).Alignment()%4 == 0 ->
- (MOVWstore dst (MOVWload src mem) mem)
-(Move [4] {t} dst src mem) && t.(Type).Alignment()%2 == 0 ->
- (MOVHstore [2] dst (MOVHZload [2] src mem)
- (MOVHstore dst (MOVHZload src mem) mem))
(Move [4] dst src mem) ->
- (MOVBstore [3] dst (MOVBZload [3] src mem)
- (MOVBstore [2] dst (MOVBZload [2] src mem)
- (MOVBstore [1] dst (MOVBZload [1] src mem)
- (MOVBstore dst (MOVBZload src mem) mem))))
-
-(Move [8] {t} dst src mem) && t.(Type).Alignment()%8 == 0 ->
- (MOVDstore dst (MOVDload src mem) mem)
+ (MOVWstore dst (MOVWZload src mem) mem)
+// MOVD for load and store must have offsets that are multiple of 4
(Move [8] {t} dst src mem) && t.(Type).Alignment()%4 == 0 ->
+ (MOVDstore dst (MOVDload src mem) mem)
+(Move [8] dst src mem) ->
(MOVWstore [4] dst (MOVWZload [4] src mem)
(MOVWstore dst (MOVWZload src mem) mem))
-(Move [8] {t} dst src mem) && t.(Type).Alignment()%2 == 0 ->
- (MOVHstore [6] dst (MOVHZload [6] src mem)
- (MOVHstore [4] dst (MOVHZload [4] src mem)
- (MOVHstore [2] dst (MOVHZload [2] src mem)
- (MOVHstore dst (MOVHZload src mem) mem))))
-
(Move [3] dst src mem) ->
(MOVBstore [2] dst (MOVBZload [2] src mem)
(MOVHstore dst (MOVHload src mem) mem))
-(Move [4] dst src mem) ->
- (MOVWstore dst (MOVWload src mem) mem)
(Move [5] dst src mem) ->
(MOVBstore [4] dst (MOVBZload [4] src mem)
- (MOVWstore dst (MOVWload src mem) mem))
+ (MOVWstore dst (MOVWZload src mem) mem))
(Move [6] dst src mem) ->
(MOVHstore [4] dst (MOVHZload [4] src mem)
- (MOVWstore dst (MOVWload src mem) mem))
+ (MOVWstore dst (MOVWZload src mem) mem))
(Move [7] dst src mem) ->
(MOVBstore [6] dst (MOVBZload [6] src mem)
(MOVHstore [4] dst (MOVHZload [4] src mem)
- (MOVWstore dst (MOVWload src mem) mem)))
-(Move [8] dst src mem) ->
- (MOVDstore dst (MOVDload src mem) mem)
+ (MOVWstore dst (MOVWZload src mem) mem)))
-// Large move uses a loop
+// Large move uses a loop. Since the address is computed and the
+// offset is zero, any alignment can be used.
(Move [s] dst src mem) && s > 8 ->
(LoweredMove [s] dst src mem)
v.AddArg(mem)
return true
}
- // match: (Move [4] {t} dst src mem)
- // cond: t.(Type).Alignment()%4 == 0
- // result: (MOVWstore dst (MOVWload src mem) mem)
- for {
- if v.AuxInt != 4 {
- break
- }
- t := v.Aux
- dst := v.Args[0]
- src := v.Args[1]
- mem := v.Args[2]
- if !(t.(Type).Alignment()%4 == 0) {
- break
- }
- v.reset(OpPPC64MOVWstore)
- v.AddArg(dst)
- v0 := b.NewValue0(v.Pos, OpPPC64MOVWload, types.Int32)
- v0.AddArg(src)
- v0.AddArg(mem)
- v.AddArg(v0)
- v.AddArg(mem)
- return true
- }
- // match: (Move [4] {t} dst src mem)
- // cond: t.(Type).Alignment()%2 == 0
- // result: (MOVHstore [2] dst (MOVHZload [2] src mem) (MOVHstore dst (MOVHZload src mem) mem))
- for {
- if v.AuxInt != 4 {
- break
- }
- t := v.Aux
- dst := v.Args[0]
- src := v.Args[1]
- mem := v.Args[2]
- if !(t.(Type).Alignment()%2 == 0) {
- break
- }
- v.reset(OpPPC64MOVHstore)
- v.AuxInt = 2
- v.AddArg(dst)
- v0 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
- v0.AuxInt = 2
- v0.AddArg(src)
- v0.AddArg(mem)
- v.AddArg(v0)
- v1 := b.NewValue0(v.Pos, OpPPC64MOVHstore, TypeMem)
- v1.AddArg(dst)
- v2 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
- v2.AddArg(src)
- v2.AddArg(mem)
- v1.AddArg(v2)
- v1.AddArg(mem)
- v.AddArg(v1)
- return true
- }
// match: (Move [4] dst src mem)
// cond:
- // result: (MOVBstore [3] dst (MOVBZload [3] src mem) (MOVBstore [2] dst (MOVBZload [2] src mem) (MOVBstore [1] dst (MOVBZload [1] src mem) (MOVBstore dst (MOVBZload src mem) mem))))
+ // result: (MOVWstore dst (MOVWZload src mem) mem)
for {
if v.AuxInt != 4 {
break
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
- v.reset(OpPPC64MOVBstore)
- v.AuxInt = 3
+ v.reset(OpPPC64MOVWstore)
v.AddArg(dst)
- v0 := b.NewValue0(v.Pos, OpPPC64MOVBZload, types.UInt8)
- v0.AuxInt = 3
+ v0 := b.NewValue0(v.Pos, OpPPC64MOVWZload, types.UInt32)
v0.AddArg(src)
v0.AddArg(mem)
v.AddArg(v0)
- v1 := b.NewValue0(v.Pos, OpPPC64MOVBstore, TypeMem)
- v1.AuxInt = 2
- v1.AddArg(dst)
- v2 := b.NewValue0(v.Pos, OpPPC64MOVBZload, types.UInt8)
- v2.AuxInt = 2
- v2.AddArg(src)
- v2.AddArg(mem)
- v1.AddArg(v2)
- v3 := b.NewValue0(v.Pos, OpPPC64MOVBstore, TypeMem)
- v3.AuxInt = 1
- v3.AddArg(dst)
- v4 := b.NewValue0(v.Pos, OpPPC64MOVBZload, types.UInt8)
- v4.AuxInt = 1
- v4.AddArg(src)
- v4.AddArg(mem)
- v3.AddArg(v4)
- v5 := b.NewValue0(v.Pos, OpPPC64MOVBstore, TypeMem)
- v5.AddArg(dst)
- v6 := b.NewValue0(v.Pos, OpPPC64MOVBZload, types.UInt8)
- v6.AddArg(src)
- v6.AddArg(mem)
- v5.AddArg(v6)
- v5.AddArg(mem)
- v3.AddArg(v5)
- v1.AddArg(v3)
- v.AddArg(v1)
+ v.AddArg(mem)
return true
}
// match: (Move [8] {t} dst src mem)
- // cond: t.(Type).Alignment()%8 == 0
+ // cond: t.(Type).Alignment()%4 == 0
// result: (MOVDstore dst (MOVDload src mem) mem)
for {
if v.AuxInt != 8 {
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
- if !(t.(Type).Alignment()%8 == 0) {
+ if !(t.(Type).Alignment()%4 == 0) {
break
}
v.reset(OpPPC64MOVDstore)
v.AddArg(mem)
return true
}
- // match: (Move [8] {t} dst src mem)
- // cond: t.(Type).Alignment()%4 == 0
+ // match: (Move [8] dst src mem)
+ // cond:
// result: (MOVWstore [4] dst (MOVWZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem))
for {
if v.AuxInt != 8 {
break
}
- t := v.Aux
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
- if !(t.(Type).Alignment()%4 == 0) {
- break
- }
v.reset(OpPPC64MOVWstore)
v.AuxInt = 4
v.AddArg(dst)
v.AddArg(v1)
return true
}
- // match: (Move [8] {t} dst src mem)
- // cond: t.(Type).Alignment()%2 == 0
- // result: (MOVHstore [6] dst (MOVHZload [6] src mem) (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVHstore [2] dst (MOVHZload [2] src mem) (MOVHstore dst (MOVHZload src mem) mem))))
- for {
- if v.AuxInt != 8 {
- break
- }
- t := v.Aux
- dst := v.Args[0]
- src := v.Args[1]
- mem := v.Args[2]
- if !(t.(Type).Alignment()%2 == 0) {
- break
- }
- v.reset(OpPPC64MOVHstore)
- v.AuxInt = 6
- v.AddArg(dst)
- v0 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
- v0.AuxInt = 6
- v0.AddArg(src)
- v0.AddArg(mem)
- v.AddArg(v0)
- v1 := b.NewValue0(v.Pos, OpPPC64MOVHstore, TypeMem)
- v1.AuxInt = 4
- v1.AddArg(dst)
- v2 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
- v2.AuxInt = 4
- v2.AddArg(src)
- v2.AddArg(mem)
- v1.AddArg(v2)
- v3 := b.NewValue0(v.Pos, OpPPC64MOVHstore, TypeMem)
- v3.AuxInt = 2
- v3.AddArg(dst)
- v4 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
- v4.AuxInt = 2
- v4.AddArg(src)
- v4.AddArg(mem)
- v3.AddArg(v4)
- v5 := b.NewValue0(v.Pos, OpPPC64MOVHstore, TypeMem)
- v5.AddArg(dst)
- v6 := b.NewValue0(v.Pos, OpPPC64MOVHZload, types.UInt16)
- v6.AddArg(src)
- v6.AddArg(mem)
- v5.AddArg(v6)
- v5.AddArg(mem)
- v3.AddArg(v5)
- v1.AddArg(v3)
- v.AddArg(v1)
- return true
- }
// match: (Move [3] dst src mem)
// cond:
// result: (MOVBstore [2] dst (MOVBZload [2] src mem) (MOVHstore dst (MOVHload src mem) mem))
v.AddArg(v1)
return true
}
- // match: (Move [4] dst src mem)
- // cond:
- // result: (MOVWstore dst (MOVWload src mem) mem)
- for {
- if v.AuxInt != 4 {
- break
- }
- dst := v.Args[0]
- src := v.Args[1]
- mem := v.Args[2]
- v.reset(OpPPC64MOVWstore)
- v.AddArg(dst)
- v0 := b.NewValue0(v.Pos, OpPPC64MOVWload, types.Int32)
- v0.AddArg(src)
- v0.AddArg(mem)
- v.AddArg(v0)
- v.AddArg(mem)
- return true
- }
// match: (Move [5] dst src mem)
// cond:
- // result: (MOVBstore [4] dst (MOVBZload [4] src mem) (MOVWstore dst (MOVWload src mem) mem))
+ // result: (MOVBstore [4] dst (MOVBZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem))
for {
if v.AuxInt != 5 {
break
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpPPC64MOVWstore, TypeMem)
v1.AddArg(dst)
- v2 := b.NewValue0(v.Pos, OpPPC64MOVWload, types.Int32)
+ v2 := b.NewValue0(v.Pos, OpPPC64MOVWZload, types.UInt32)
v2.AddArg(src)
v2.AddArg(mem)
v1.AddArg(v2)
}
// match: (Move [6] dst src mem)
// cond:
- // result: (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVWstore dst (MOVWload src mem) mem))
+ // result: (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem))
for {
if v.AuxInt != 6 {
break
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpPPC64MOVWstore, TypeMem)
v1.AddArg(dst)
- v2 := b.NewValue0(v.Pos, OpPPC64MOVWload, types.Int32)
+ v2 := b.NewValue0(v.Pos, OpPPC64MOVWZload, types.UInt32)
v2.AddArg(src)
v2.AddArg(mem)
v1.AddArg(v2)
}
// match: (Move [7] dst src mem)
// cond:
- // result: (MOVBstore [6] dst (MOVBZload [6] src mem) (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVWstore dst (MOVWload src mem) mem)))
+ // result: (MOVBstore [6] dst (MOVBZload [6] src mem) (MOVHstore [4] dst (MOVHZload [4] src mem) (MOVWstore dst (MOVWZload src mem) mem)))
for {
if v.AuxInt != 7 {
break
v1.AddArg(v2)
v3 := b.NewValue0(v.Pos, OpPPC64MOVWstore, TypeMem)
v3.AddArg(dst)
- v4 := b.NewValue0(v.Pos, OpPPC64MOVWload, types.Int32)
+ v4 := b.NewValue0(v.Pos, OpPPC64MOVWZload, types.UInt32)
v4.AddArg(src)
v4.AddArg(mem)
v3.AddArg(v4)
v.AddArg(v1)
return true
}
- // match: (Move [8] dst src mem)
- // cond:
- // result: (MOVDstore dst (MOVDload src mem) mem)
- for {
- if v.AuxInt != 8 {
- break
- }
- dst := v.Args[0]
- src := v.Args[1]
- mem := v.Args[2]
- v.reset(OpPPC64MOVDstore)
- v.AddArg(dst)
- v0 := b.NewValue0(v.Pos, OpPPC64MOVDload, types.Int64)
- v0.AddArg(src)
- v0.AddArg(mem)
- v.AddArg(v0)
- v.AddArg(mem)
- return true
- }
// match: (Move [s] dst src mem)
// cond: s > 8
// result: (LoweredMove [s] dst src mem)