From f3884680fcbda5640e14a01d17aa38d06c70891f Mon Sep 17 00:00:00 2001 From: Ilya Tocar Date: Wed, 9 Aug 2017 14:00:38 -0500 Subject: [PATCH] cmd/compile/internal/ssa: inline memmove with known size MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Replace calls to memmove with known (constant) size, with OpMove. Do it only if it is safe from aliasing point of view. Helps with code like this: append(buf,"const str"...) In strconv this provides nice benefit: Quote-6 731ns ± 2% 647ns ± 3% -11.41% (p=0.000 n=10+10) QuoteRune-6 117ns ± 5% 111ns ± 1% -4.54% (p=0.000 n=10+10) AppendQuote-6 475ns ± 0% 396ns ± 0% -16.59% (p=0.000 n=9+10) AppendQuoteRune-6 32.0ns ± 0% 27.4ns ± 0% -14.41% (p=0.000 n=8+9) Change-Id: I7704f5c51b46aed2d8f033de74c75140fc35036c Reviewed-on: https://go-review.googlesource.com/54394 Run-TryBot: Ilya Tocar Reviewed-by: Keith Randall TryBot-Result: Gobot Gobot --- src/cmd/compile/internal/gc/asm_test.go | 50 +++++++++++ .../compile/internal/ssa/gen/generic.rules | 8 ++ .../compile/internal/ssa/gen/genericOps.go | 7 +- src/cmd/compile/internal/ssa/rewrite.go | 15 ++++ src/cmd/compile/internal/ssa/rewrite_test.go | 22 +++++ .../compile/internal/ssa/rewritegeneric.go | 89 +++++++++++++++++++ 6 files changed, 189 insertions(+), 2 deletions(-) diff --git a/src/cmd/compile/internal/gc/asm_test.go b/src/cmd/compile/internal/gc/asm_test.go index c609e0088f..edcdf04286 100644 --- a/src/cmd/compile/internal/gc/asm_test.go +++ b/src/cmd/compile/internal/gc/asm_test.go @@ -1191,6 +1191,36 @@ var linuxAMD64Tests = []*asmTest{ `, pos: []string{"\tANDQ\t\\$4095,"}, }, + { + // Test that small memmove was replaced with direct movs + fn: ` + func $() { + x := [...]byte{1, 2, 3, 4, 5, 6, 7} + copy(x[1:], x[:]) + } + `, + neg: []string{"memmove"}, + }, + { + // Same as above but with different size + fn: ` + func $() { + x := [...]byte{1, 2, 3, 4} + copy(x[1:], x[:]) + } + `, + neg: []string{"memmove"}, + }, + { + // Same as above but with different size + fn: ` + func $() { + x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + copy(x[1:], x[:]) + } + `, + neg: []string{"memmove"}, + }, } var linux386Tests = []*asmTest{ @@ -1322,6 +1352,26 @@ var linux386Tests = []*asmTest{ `, pos: []string{"\tANDL\t\\$4095,"}, }, + { + // Test that small memmove was replaced with direct movs + fn: ` + func $() { + x := [...]byte{1, 2, 3, 4, 5, 6, 7} + copy(x[1:], x[:]) + } + `, + neg: []string{"memmove"}, + }, + { + // Same as above but with different size + fn: ` + func $() { + x := [...]byte{1, 2, 3, 4} + copy(x[1:], x[:]) + } + `, + neg: []string{"memmove"}, + }, } var linuxS390XTests = []*asmTest{ diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 5150eec0ef..9b004be9f1 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -1443,6 +1443,14 @@ (EqPtr x x) -> (ConstBool [1]) (EqPtr (Addr {a} x) (Addr {b} x)) -> (ConstBool [b2i(a == b)]) +// Inline small runtime.memmove calls with constant length. +(StaticCall {sym} s1:(Store _ (Const64 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) + && isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config) + -> (Move {t.(*types.Type).Elem()} [sz] dst src mem) +(StaticCall {sym} s1:(Store _ (Const32 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) + && isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config) + -> (Move {t.(*types.Type).Elem()} [sz] dst src mem) + // De-virtualize interface calls into static calls. // Note that (ITab (IMake)) doesn't get // rewritten until after the first opt pass, diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index d36910e7da..5ce11c7e87 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -321,8 +321,11 @@ var genericOps = []opData{ {name: "Invalid"}, // unused value // Memory operations - {name: "Load", argLength: 2}, // Load from arg0. arg1=memory - {name: "Store", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory. + {name: "Load", argLength: 2}, // Load from arg0. arg1=memory + {name: "Store", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory. + // The source and destination of Move may overlap in some cases. See e.g. + // memmove inlining in generic.rules. When inlineablememmovesize (in ../rewrite.go) + // returns true, we must do all loads before all stores, when lowering Move. {name: "Move", argLength: 3, typ: "Mem", aux: "TypSize"}, // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size, aux=type. Returns memory. {name: "Zero", argLength: 2, typ: "Mem", aux: "TypSize"}, // arg0=destptr, arg1=mem, auxint=size, aux=type. Returns memory. diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index b214f92bb9..6d77da079b 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -668,3 +668,18 @@ func zeroUpper32Bits(x *Value, depth int) bool { } return false } + +// inlineablememmovesize reports whether the given arch performs OpMove of the given size +// faster than memmove and in a safe way when src and dst overlap. +// This is used as a check for replacing memmove with OpMove. +func isInlinableMemmoveSize(sz int64, c *Config) bool { + switch c.arch { + case "amd64", "amd64p32": + return sz <= 16 + case "386", "ppc64", "s390x", "ppc64le": + return sz <= 8 + case "arm", "mips", "mips64", "mipsle", "mips64le": + return sz <= 4 + } + return false +} diff --git a/src/cmd/compile/internal/ssa/rewrite_test.go b/src/cmd/compile/internal/ssa/rewrite_test.go index c21c64bb7b..8a097b04f6 100644 --- a/src/cmd/compile/internal/ssa/rewrite_test.go +++ b/src/cmd/compile/internal/ssa/rewrite_test.go @@ -103,3 +103,25 @@ func TestLog2(t *testing.T) { } } } + +// We generate memmove for copy(x[1:], x[:]), however we may change it to OpMove, +// because size is known. Check that OpMove is alias-safe, or we did call memmove. +func TestMove(t *testing.T) { + x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40} + copy(x[1:], x[:]) + for i := 1; i < len(x); i++ { + if int(x[i]) != i { + t.Errorf("Memmove got converted to OpMove in alias-unsafe way. Got %d insted of %d in position %d", int(x[i]), i, i+1) + } + } +} + +func TestMoveSmall(t *testing.T) { + x := [...]byte{1, 2, 3, 4, 5, 6, 7} + copy(x[1:], x[:]) + for i := 1; i < len(x); i++ { + if int(x[i]) != i { + t.Errorf("Memmove got converted to OpMove in alias-unsafe way. Got %d instead of %d in position %d", int(x[i]), i, i+1) + } + } +} diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 8def2dbcc8..def2cca9fc 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -409,6 +409,8 @@ func rewriteValuegeneric(v *Value) bool { return rewriteValuegeneric_OpSlicemask_0(v) case OpSqrt: return rewriteValuegeneric_OpSqrt_0(v) + case OpStaticCall: + return rewriteValuegeneric_OpStaticCall_0(v) case OpStore: return rewriteValuegeneric_OpStore_0(v) || rewriteValuegeneric_OpStore_10(v) case OpStringLen: @@ -23732,6 +23734,93 @@ func rewriteValuegeneric_OpSqrt_0(v *Value) bool { } return false } +func rewriteValuegeneric_OpStaticCall_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (StaticCall {sym} s1:(Store _ (Const64 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) + // cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config) + // result: (Move {t.(*types.Type).Elem()} [sz] dst src mem) + for { + sym := v.Aux + s1 := v.Args[0] + if s1.Op != OpStore { + break + } + _ = s1.Args[2] + s1_1 := s1.Args[1] + if s1_1.Op != OpConst64 { + break + } + sz := s1_1.AuxInt + s2 := s1.Args[2] + if s2.Op != OpStore { + break + } + _ = s2.Args[2] + src := s2.Args[1] + s3 := s2.Args[2] + if s3.Op != OpStore { + break + } + t := s3.Aux + _ = s3.Args[2] + dst := s3.Args[1] + mem := s3.Args[2] + if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz, config)) { + break + } + v.reset(OpMove) + v.AuxInt = sz + v.Aux = t.(*types.Type).Elem() + v.AddArg(dst) + v.AddArg(src) + v.AddArg(mem) + return true + } + // match: (StaticCall {sym} s1:(Store _ (Const32 [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))) + // cond: isSameSym(sym,"runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz,config) + // result: (Move {t.(*types.Type).Elem()} [sz] dst src mem) + for { + sym := v.Aux + s1 := v.Args[0] + if s1.Op != OpStore { + break + } + _ = s1.Args[2] + s1_1 := s1.Args[1] + if s1_1.Op != OpConst32 { + break + } + sz := s1_1.AuxInt + s2 := s1.Args[2] + if s2.Op != OpStore { + break + } + _ = s2.Args[2] + src := s2.Args[1] + s3 := s2.Args[2] + if s3.Op != OpStore { + break + } + t := s3.Aux + _ = s3.Args[2] + dst := s3.Args[1] + mem := s3.Args[2] + if !(isSameSym(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmoveSize(sz, config)) { + break + } + v.reset(OpMove) + v.AuxInt = sz + v.Aux = t.(*types.Type).Elem() + v.AddArg(dst) + v.AddArg(src) + v.AddArg(mem) + return true + } + return false +} func rewriteValuegeneric_OpStore_0(v *Value) bool { b := v.Block _ = b -- 2.48.1