From: Todd Neal Date: Tue, 8 Mar 2016 00:43:05 +0000 (-0600) Subject: cmd/compile: fix load combining from a non-zero pointer offset X-Git-Tag: go1.7beta1~1493 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=481fe5901232583340159415f3f5c83af28c1fa4;p=gostls13.git cmd/compile: fix load combining from a non-zero pointer offset When the pointer offset is non-zero in the small loads, we need to add the offset when converting to the larger load. Fixes #14694 Change-Id: I5ba8bcb3b9ce26c7fae0c4951500b9ef0fed54cd Reviewed-on: https://go-review.googlesource.com/20333 Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/gc/testdata/arith_ssa.go b/src/cmd/compile/internal/gc/testdata/arith_ssa.go index f4bea0ed11..3eb02ff17e 100644 --- a/src/cmd/compile/internal/gc/testdata/arith_ssa.go +++ b/src/cmd/compile/internal/gc/testdata/arith_ssa.go @@ -14,6 +14,40 @@ const ( y = 0x0fffFFFF ) +//go:noinline +func parseLE64(b []byte) uint64 { + // skip the first two bytes, and parse the remaining 8 as a uint64 + return uint64(b[2]) | uint64(b[3])<<8 | uint64(b[4])<<16 | uint64(b[5])<<24 | + uint64(b[6])<<32 | uint64(b[7])<<40 | uint64(b[8])<<48 | uint64(b[9])<<56 +} + +//go:noinline +func parseLE32(b []byte) uint32 { + return uint32(b[2]) | uint32(b[3])<<8 | uint32(b[4])<<16 | uint32(b[5])<<24 +} + +//go:noinline +func parseLE16(b []byte) uint16 { + return uint16(b[2]) | uint16(b[3])<<8 +} + +// testLoadCombine tests for issue #14694 where load combining didn't respect the pointer offset. +func testLoadCombine() { + testData := []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09} + if want, got := uint64(0x0908070605040302), parseLE64(testData); want != got { + println("testLargeConst add failed, wanted", want, "got", got) + failed = true + } + if want, got := uint32(0x05040302), parseLE32(testData); want != got { + println("testLargeConst add failed, wanted", want, "got", got) + failed = true + } + if want, got := uint16(0x0302), parseLE16(testData); want != got { + println("testLargeConst add failed, wanted", want, "got", got) + failed = true + } +} + //go:noinline func invalidAdd_ssa(x uint32) uint32 { return x + y + y + y + y + y + y + y + y + y + y + y + y + y + y + y + y + y @@ -431,6 +465,7 @@ func main() { testArithConstShift() testArithRshConst() testLargeConst() + testLoadCombine() if failed { panic("failed") diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 35d0848233..a98301a303 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1167,13 +1167,13 @@ // There are many ways these combinations could occur. This is // designed to match the way encoding/binary.LittleEndian does it. (ORW (MOVBQZXload [i] {s} p mem) - (SHLWconst [8] (MOVBQZXload [i+1] {s} p mem))) -> (MOVWload p mem) + (SHLWconst [8] (MOVBQZXload [i+1] {s} p mem))) -> (MOVWload (ADDQconst [i] p) mem) (ORL (ORL (ORL (MOVBQZXload [i] {s} p mem) (SHLLconst [8] (MOVBQZXload [i+1] {s} p mem))) (SHLLconst [16] (MOVBQZXload [i+2] {s} p mem))) - (SHLLconst [24] (MOVBQZXload [i+3] {s} p mem))) -> (MOVLload p mem) + (SHLLconst [24] (MOVBQZXload [i+3] {s} p mem))) -> (MOVLload (ADDQconst [i] p) mem) (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (MOVBQZXload [i] {s} p mem) @@ -1183,4 +1183,4 @@ (SHLQconst [32] (MOVBQZXload [i+4] {s} p mem))) (SHLQconst [40] (MOVBQZXload [i+5] {s} p mem))) (SHLQconst [48] (MOVBQZXload [i+6] {s} p mem))) - (SHLQconst [56] (MOVBQZXload [i+7] {s} p mem))) -> (MOVQload p mem) + (SHLQconst [56] (MOVBQZXload [i+7] {s} p mem))) -> (MOVQload (ADDQconst [i] p) mem) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 0675d86d3f..698e6ab167 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -9465,7 +9465,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { } // match: (ORL (ORL (ORL (MOVBQZXload [i] {s} p mem) (SHLLconst [8] (MOVBQZXload [i+1] {s} p mem))) (SHLLconst [16] (MOVBQZXload [i+2] {s} p mem))) (SHLLconst [24] (MOVBQZXload [i+3] {s} p mem))) // cond: - // result: (MOVLload p mem) + // result: (MOVLload (ADDQconst [i] p) mem) for { if v.Args[0].Op != OpAMD64ORL { break @@ -9544,7 +9544,10 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { break } v.reset(OpAMD64MOVLload) - v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64()) + v0.AuxInt = i + v0.AddArg(p) + v.AddArg(v0) v.AddArg(mem) return true } @@ -9646,7 +9649,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { } // match: (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (MOVBQZXload [i] {s} p mem) (SHLQconst [8] (MOVBQZXload [i+1] {s} p mem))) (SHLQconst [16] (MOVBQZXload [i+2] {s} p mem))) (SHLQconst [24] (MOVBQZXload [i+3] {s} p mem))) (SHLQconst [32] (MOVBQZXload [i+4] {s} p mem))) (SHLQconst [40] (MOVBQZXload [i+5] {s} p mem))) (SHLQconst [48] (MOVBQZXload [i+6] {s} p mem))) (SHLQconst [56] (MOVBQZXload [i+7] {s} p mem))) // cond: - // result: (MOVQload p mem) + // result: (MOVQload (ADDQconst [i] p) mem) for { if v.Args[0].Op != OpAMD64ORQ { break @@ -9821,7 +9824,10 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { break } v.reset(OpAMD64MOVQload) - v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64()) + v0.AuxInt = i + v0.AddArg(p) + v.AddArg(v0) v.AddArg(mem) return true } @@ -9915,7 +9921,7 @@ func rewriteValueAMD64_OpAMD64ORW(v *Value, config *Config) bool { } // match: (ORW (MOVBQZXload [i] {s} p mem) (SHLWconst [8] (MOVBQZXload [i+1] {s} p mem))) // cond: - // result: (MOVWload p mem) + // result: (MOVWload (ADDQconst [i] p) mem) for { if v.Args[0].Op != OpAMD64MOVBQZXload { break @@ -9946,7 +9952,10 @@ func rewriteValueAMD64_OpAMD64ORW(v *Value, config *Config) bool { break } v.reset(OpAMD64MOVWload) - v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64()) + v0.AuxInt = i + v0.AddArg(p) + v.AddArg(v0) v.AddArg(mem) return true }