From a65b4904fc2a179289a241aeb010b5a33e377d2c Mon Sep 17 00:00:00 2001 From: Jakub Ciolek Date: Mon, 21 Aug 2023 10:09:54 +0200 Subject: [PATCH] cmd/compile: use MOVQ + MOVL for 11 byte moves MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Instead of using two MOVQ, use a shorter MOVL for the remaining 3 bytes. compilecmp (linux/amd64): runtime/pprof runtime/pprof.printCountProfile.func1 875 -> 869 (-0.69%) fmt [cmd/compile] fmt.Sprintf 5009 -> 4977 (-0.64%) runtime/pprof [cmd/compile] runtime/pprof.printCountProfile.func1 875 -> 869 (-0.69%) cmd/vendor/github.com/ianlancetaylor/demangle cmd/vendor/github.com/ianlancetaylor/demangle.(*GlobalCDtor).print 957 -> 938 (-1.99%) cmd/vendor/github.com/ianlancetaylor/demangle.(*ImaginaryType).printInner 270 -> 261 (-3.33%) cmd/vendor/github.com/ianlancetaylor/demangle.(*TemplateTemplateParam).print 876 -> 874 (-0.23%) cmd/cgo/internal/test cmd/cgo/internal/test.test18146.func1 73 -> 66 (-9.59%) cmd/go/internal/work cmd/go/internal/work.(*Builder).gccSupportsFlag 3469 -> 3461 (-0.23%) file before after Δ % runtime/pprof.s 79729 79723 -6 -0.008% fmt [cmd/compile].s 83679 83647 -32 -0.038% runtime/pprof [cmd/compile].s 79729 79723 -6 -0.008% cmd/vendor/github.com/ianlancetaylor/demangle.s 254999 254969 -30 -0.012% cmd/cgo/internal/test.s 212272 212265 -7 -0.003% cmd/go/internal/work.s 287700 287692 -8 -0.003% cmd/compile/internal/ssa.s 3193233 3193553 +320 +0.010% cmd/compile/internal/ssa [cmd/compile].s 3362126 3362446 +320 +0.010% total 30810158 30810709 +551 +0.002% Change-Id: Iad09a42fb247c85ca1c649c367aaf75a620dbbc5 Reviewed-on: https://go-review.googlesource.com/c/go/+/521475 Reviewed-by: Matthew Dempsky Run-TryBot: Jakub Ciolek TryBot-Result: Gopher Robot Reviewed-by: Keith Randall Auto-Submit: Matthew Dempsky Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/_gen/AMD64.rules | 5 +++- src/cmd/compile/internal/ssa/rewriteAMD64.go | 25 +++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index 9ffb196880..7840600ef6 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -303,10 +303,13 @@ (Move [10] dst src mem) => (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) +(Move [11] dst src mem) => + (MOVLstore [7] dst (MOVLload [7] src mem) + (MOVQstore dst (MOVQload src mem) mem)) (Move [12] dst src mem) => (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) -(Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 => +(Move [s] dst src mem) && s >= 13 && s <= 15 => (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem)) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index d3e009d644..5cf5425fdc 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -27456,6 +27456,27 @@ func rewriteValueAMD64_OpMove(v *Value) bool { v.AddArg3(dst, v0, v1) return true } + // match: (Move [11] dst src mem) + // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 11 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(7) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(7) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } // match: (Move [12] dst src mem) // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) for { @@ -27478,14 +27499,14 @@ func rewriteValueAMD64_OpMove(v *Value) bool { return true } // match: (Move [s] dst src mem) - // cond: s == 11 || s >= 13 && s <= 15 + // cond: s >= 13 && s <= 15 // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem)) for { s := auxIntToInt64(v.AuxInt) dst := v_0 src := v_1 mem := v_2 - if !(s == 11 || s >= 13 && s <= 15) { + if !(s >= 13 && s <= 15) { break } v.reset(OpAMD64MOVQstore) -- 2.48.1