]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: set unalignedOK to make memcombine work properly on loong64
authorlimeidan <limeidan@loongson.cn>
Tue, 25 Mar 2025 07:02:03 +0000 (15:02 +0800)
committerGopher Robot <gobot@golang.org>
Wed, 9 Apr 2025 16:18:20 +0000 (09:18 -0700)
goos: linux
goarch: loong64
pkg: unicode/utf8
cpu: Loongson-3A6000-HV @ 2500.00MHz
                            │     old     │                 new                 │
                            │   sec/op    │   sec/op     vs base                │
ValidTenASCIIChars            7.604n ± 0%   6.805n ± 0%  -10.51% (p=0.000 n=10)
Valid100KASCIIChars           37.41µ ± 0%   16.58µ ± 0%  -55.67% (p=0.000 n=10)
ValidTenJapaneseChars         60.84n ± 0%   58.62n ± 0%   -3.64% (p=0.000 n=10)
ValidLongMostlyASCII          113.5µ ± 0%   113.5µ ± 0%        ~ (p=0.303 n=10)
ValidLongJapanese             204.6µ ± 0%   206.8µ ± 0%   +1.07% (p=0.000 n=10)
ValidStringTenASCIIChars      7.604n ± 0%   6.803n ± 0%  -10.53% (p=0.000 n=10)
ValidString100KASCIIChars     38.05µ ± 0%   17.14µ ± 0%  -54.97% (p=0.000 n=10)
ValidStringTenJapaneseChars   60.58n ± 0%   59.48n ± 0%   -1.82% (p=0.000 n=10)
ValidStringLongMostlyASCII    113.5µ ± 0%   113.4µ ± 0%   -0.10% (p=0.000 n=10)
ValidStringLongJapanese       205.9µ ± 0%   207.3µ ± 0%   +0.67% (p=0.000 n=10)
geomean                       3.324µ        2.756µ       -17.08%

Change-Id: Id43b6e2e41907bd4b92f421dacde31f048db47d6
Reviewed-on: https://go-review.googlesource.com/c/go/+/662495
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Keith Randall <khr@google.com>
src/cmd/compile/internal/ssa/config.go
src/cmd/internal/sys/arch.go
test/codegen/memcombine.go

index 09c1ebb1077fbf495a1a7a4849fa630beb00f436..a3131efa41c73dbc64cda6f57d7c89f27936d680 100644 (file)
@@ -283,6 +283,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
                c.FPReg = framepointerRegLOONG64
                c.LinkReg = linkRegLOONG64
                c.hasGReg = true
+               c.unalignedOK = true
        case "s390x":
                c.PtrSize = 8
                c.RegSize = 8
index 3c28ff04058c48a733b70da17bf49b7ed883b287..484538f28f0dd53f62d84cdacd7ca4da5116f096 100644 (file)
@@ -144,7 +144,7 @@ var ArchLoong64 = &Arch{
        RegSize:        8,
        MinLC:          4,
        Alignment:      8, // Unaligned accesses are not guaranteed to be fast
-       CanMergeLoads:  false,
+       CanMergeLoads:  true,
        HasLR:          true,
        FixedFrameSize: 8, // LR
 }
index c5744bf8d7f1a2391619e04fdf03475ca3e36a5f..9ede80132cddaac7af39355d9b4b7f41350721a7 100644 (file)
@@ -19,7 +19,7 @@ func load_le64(b []byte) uint64 {
        // amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR`
        // s390x:`MOVDBR\s\(.*\),`
        // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
-       // loong64:`MOVBU\s\(R[0-9]+\),`
+       // loong64:`MOVV\s\(R[0-9]+\),`
        // ppc64le:`MOVD\s`,-`MOV[BHW]Z`
        // ppc64:`MOVDBR\s`,-`MOV[BHW]Z`
        return binary.LittleEndian.Uint64(b)
@@ -29,7 +29,7 @@ func load_le64_idx(b []byte, idx int) uint64 {
        // amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR`
        // s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
        // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
-       // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),`
+       // loong64:`MOVV\s\(R[0-9]+\)\(R[0-9]+\),`
        // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
        // ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s`
        return binary.LittleEndian.Uint64(b[idx:])
@@ -40,7 +40,7 @@ func load_le32(b []byte) uint32 {
        // 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
        // s390x:`MOVWBR\s\(.*\),`
        // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
-       // loong64:`MOVBU\s\(R[0-9]+\),`
+       // loong64:`MOVWU\s\(R[0-9]+\),`
        // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
        // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s`
        return binary.LittleEndian.Uint32(b)
@@ -51,7 +51,7 @@ func load_le32_idx(b []byte, idx int) uint32 {
        // 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
        // s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
        // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
-       // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),`
+       // loong64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`
        // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
        // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s'
        return binary.LittleEndian.Uint32(b[idx:])
@@ -61,7 +61,7 @@ func load_le16(b []byte) uint16 {
        // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
        // ppc64le:`MOVHZ\s`,-`MOVBZ`
        // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
-       // loong64:`MOVBU\s\(R[0-9]+\),`
+       // loong64:`MOVHU\s\(R[0-9]+\),`
        // s390x:`MOVHBR\s\(.*\),`
        // ppc64:`MOVHBR\s`,-`MOVBZ`
        return binary.LittleEndian.Uint16(b)
@@ -72,7 +72,7 @@ func load_le16_idx(b []byte, idx int) uint16 {
        // ppc64le:`MOVHZ\s`,-`MOVBZ`
        // ppc64:`MOVHBR\s`,-`MOVBZ`
        // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
-       // loong64:`MOVBU\s\(R[0-9]+\)\(R[0-9]+\),`
+       // loong64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`
        // s390x:`MOVHBR\s\(.*\)\(.*\*1\),`
        return binary.LittleEndian.Uint16(b[idx:])
 }