From 09d76e59d2e4e1c8d2c2ea99570647890057966f Mon Sep 17 00:00:00 2001 From: limeidan Date: Tue, 25 Mar 2025 15:02:03 +0800 Subject: [PATCH] cmd/compile: set unalignedOK to make memcombine work properly on loong64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit goos: linux goarch: loong64 pkg: unicode/utf8 cpu: Loongson-3A6000-HV @ 2500.00MHz │ old │ new │ │ sec/op │ sec/op vs base │ ValidTenASCIIChars 7.604n ± 0% 6.805n ± 0% -10.51% (p=0.000 n=10) Valid100KASCIIChars 37.41µ ± 0% 16.58µ ± 0% -55.67% (p=0.000 n=10) ValidTenJapaneseChars 60.84n ± 0% 58.62n ± 0% -3.64% (p=0.000 n=10) ValidLongMostlyASCII 113.5µ ± 0% 113.5µ ± 0% ~ (p=0.303 n=10) ValidLongJapanese 204.6µ ± 0% 206.8µ ± 0% +1.07% (p=0.000 n=10) ValidStringTenASCIIChars 7.604n ± 0% 6.803n ± 0% -10.53% (p=0.000 n=10) ValidString100KASCIIChars 38.05µ ± 0% 17.14µ ± 0% -54.97% (p=0.000 n=10) ValidStringTenJapaneseChars 60.58n ± 0% 59.48n ± 0% -1.82% (p=0.000 n=10) ValidStringLongMostlyASCII 113.5µ ± 0% 113.4µ ± 0% -0.10% (p=0.000 n=10) ValidStringLongJapanese 205.9µ ± 0% 207.3µ ± 0% +0.67% (p=0.000 n=10) geomean 3.324µ 2.756µ -17.08% Change-Id: Id43b6e2e41907bd4b92f421dacde31f048db47d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/662495 LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: Keith Randall Auto-Submit: Keith Randall Reviewed-by: abner chenc Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/config.go | 1 + src/cmd/internal/sys/arch.go | 2 +- test/codegen/memcombine.go | 12 ++++++------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 09c1ebb107..a3131efa41 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -283,6 +283,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo c.FPReg = framepointerRegLOONG64 c.LinkReg = linkRegLOONG64 c.hasGReg = true + c.unalignedOK = true case "s390x": c.PtrSize = 8 c.RegSize = 8 diff --git a/src/cmd/internal/sys/arch.go b/src/cmd/internal/sys/arch.go index 3c28ff0405..484538f28f 100644 --- a/src/cmd/internal/sys/arch.go +++ b/src/cmd/internal/sys/arch.go @@ -144,7 +144,7 @@ var ArchLoong64 = &Arch{ RegSize: 8, MinLC: 4, Alignment: 8, // Unaligned accesses are not guaranteed to be fast - CanMergeLoads: false, + CanMergeLoads: true, HasLR: true, FixedFrameSize: 8, // LR } diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index c5744bf8d7..9ede80132c 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -19,7 +19,7 @@ func load_le64(b []byte) uint64 { // amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR` // s390x:`MOVDBR\s\(.*\),` // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]` - // loong64:`MOVBU\s\(R[0-9]+\),` + // loong64:`MOVV\s\(R[0-9]+\),` // ppc64le:`MOVD\s`,-`MOV[BHW]Z` // ppc64:`MOVDBR\s`,-`MOV[BHW]Z` return binary.LittleEndian.Uint64(b) @@ -29,7 +29,7 @@ func load_le64_idx(b []byte, idx int) uint64 { // amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR` // s390x:`MOVDBR\s\(.*\)\(.*\*1\),` // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]` - // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),` + // loong64:`MOVV\s\(R[0-9]+\)\(R[0-9]+\),` // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s` // ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s` return binary.LittleEndian.Uint64(b[idx:]) @@ -40,7 +40,7 @@ func load_le32(b []byte) uint32 { // 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR` // s390x:`MOVWBR\s\(.*\),` // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]` - // loong64:`MOVBU\s\(R[0-9]+\),` + // loong64:`MOVWU\s\(R[0-9]+\),` // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s` // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s` return binary.LittleEndian.Uint32(b) @@ -51,7 +51,7 @@ func load_le32_idx(b []byte, idx int) uint32 { // 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR` // s390x:`MOVWBR\s\(.*\)\(.*\*1\),` // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]` - // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),` + // loong64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),` // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s` // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s' return binary.LittleEndian.Uint32(b[idx:]) @@ -61,7 +61,7 @@ func load_le16(b []byte) uint16 { // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR` // ppc64le:`MOVHZ\s`,-`MOVBZ` // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB` - // loong64:`MOVBU\s\(R[0-9]+\),` + // loong64:`MOVHU\s\(R[0-9]+\),` // s390x:`MOVHBR\s\(.*\),` // ppc64:`MOVHBR\s`,-`MOVBZ` return binary.LittleEndian.Uint16(b) @@ -72,7 +72,7 @@ func load_le16_idx(b []byte, idx int) uint16 { // ppc64le:`MOVHZ\s`,-`MOVBZ` // ppc64:`MOVHBR\s`,-`MOVBZ` // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` - // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),` + // loong64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),` // s390x:`MOVHBR\s\(.*\)\(.*\*1\),` return binary.LittleEndian.Uint16(b[idx:]) } -- 2.51.0