]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile/internal: intrinsify publicationBarrier on loong64
authorGuoqi Chen <chenguoqi@loongson.cn>
Thu, 19 Sep 2024 11:50:23 +0000 (19:50 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Fri, 8 Nov 2024 01:04:43 +0000 (01:04 +0000)
The publication barrier is a StoreStore barrier, which is implemented
by "DBAR 0x1A" [1] on loong64.

goos: linux
goarch: loong64
pkg: runtime
cpu: Loongson-3A6000 @ 2500.00MHz
                     |   bench.old   |  bench.new                            |
                     |    sec/op     |   sec/op        vs base               |
Malloc8                 31.76n ± 0%     22.79n ± 0%   -28.24% (p=0.000 n=20)
Malloc8-2               25.46n ± 0%     18.33n ± 0%   -28.00% (p=0.000 n=20)
Malloc8-4               25.75n ± 0%     18.43n ± 0%   -28.41% (p=0.000 n=20)
Malloc16                62.97n ± 0%     42.41n ± 0%   -32.65% (p=0.000 n=20)
Malloc16-2              49.11n ± 0%     31.68n ± 0%   -35.50% (p=0.000 n=20)
Malloc16-4              49.64n ± 1%     31.95n ± 0%   -35.62% (p=0.000 n=20)
MallocTypeInfo8         58.57n ± 0%     46.51n ± 0%   -20.61% (p=0.000 n=20)
MallocTypeInfo8-2       51.43n ± 0%     38.01n ± 0%   -26.09% (p=0.000 n=20)
MallocTypeInfo8-4       51.65n ± 0%     38.15n ± 0%   -26.13% (p=0.000 n=20)
MallocTypeInfo16        68.07n ± 0%     51.62n ± 0%   -24.17% (p=0.000 n=20)
MallocTypeInfo16-2      54.73n ± 0%     41.13n ± 0%   -24.85% (p=0.000 n=20)
MallocTypeInfo16-4      55.05n ± 0%     41.28n ± 0%   -25.02% (p=0.000 n=20)
MallocLargeStruct       491.5n ± 0%     454.8n ± 0%    -7.47% (p=0.000 n=20)
MallocLargeStruct-2     351.8n ± 1%     323.8n ± 0%    -7.94% (p=0.000 n=20)
MallocLargeStruct-4     333.6n ± 0%     316.7n ± 0%    -5.10% (p=0.000 n=20)
geomean                 71.01n          53.78n        -24.26%

[1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html

Change-Id: Ica0c89db6f2bebd55d9b3207a1c462a9454e9268
Reviewed-on: https://go-review.googlesource.com/c/go/+/577515
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Carlos Amedee <carlos@golang.org>
src/cmd/compile/internal/loong64/ssa.go
src/cmd/compile/internal/ssa/_gen/LOONG64.rules
src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteLOONG64.go
src/cmd/compile/internal/ssagen/intrinsics.go
src/cmd/compile/internal/ssagen/intrinsics_test.go
src/runtime/atomic_loong64.s

index bec76843786fcf386eca547d038db1f1e791901d..02286b8de82f9436659c37febbc6f35eb52dbe97 100644 (file)
@@ -589,6 +589,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Name = obj.NAME_EXTERN
                // AuxInt encodes how many buffer entries we need.
                p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
+
+       case ssa.OpLOONG64LoweredPubBarrier:
+               // DBAR 0x1A
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = 0x1A
+
        case ssa.OpLOONG64LoweredPanicBoundsA, ssa.OpLOONG64LoweredPanicBoundsB, ssa.OpLOONG64LoweredPanicBoundsC:
                p := s.Prog(obj.ACALL)
                p.To.Type = obj.TYPE_MEM
index 383cac40aba73421aa83ef6bb06173aed2fb54ac..69119f1d96d46026918ce1862e1338095282f00d 100644 (file)
 // Write barrier.
 (WB ...) => (LoweredWB ...)
 
+// Publication barrier as intrinsic
+(PubBarrier ...) => (LoweredPubBarrier ...)
+
 (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
 (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
 (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
index 2d8d87fa4ae76b7e983a73885e16c8b48aa2e371..5789760683c43a454592a2f881243bde001255cd 100644 (file)
@@ -497,6 +497,9 @@ func init() {
                // Returns a pointer to a write barrier buffer in R29.
                {name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R1"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},
 
+               // Do data barrier. arg0=memorys
+               {name: "LoweredPubBarrier", argLength: 1, asm: "DBAR", hasSideEffects: true},
+
                // There are three of these functions so that they can have three different register inputs.
                // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
                // default registers to match so we don't need to copy registers around unnecessarily.
index 61d3b0462fc7239d014cf27f2fc84442c5011d63..b18a4385d2be4dc0e8e2927135c013e4b56947c1 100644 (file)
@@ -1917,6 +1917,7 @@ const (
        OpLOONG64LoweredGetCallerSP
        OpLOONG64LoweredGetCallerPC
        OpLOONG64LoweredWB
+       OpLOONG64LoweredPubBarrier
        OpLOONG64LoweredPanicBoundsA
        OpLOONG64LoweredPanicBoundsB
        OpLOONG64LoweredPanicBoundsC
@@ -25711,6 +25712,13 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "LoweredPubBarrier",
+               argLen:         1,
+               hasSideEffects: true,
+               asm:            loong64.ADBAR,
+               reg:            regInfo{},
+       },
        {
                name:    "LoweredPanicBoundsA",
                auxType: auxInt64,
index 14cbd25ee22ee5bfeb5e4a23365c690e4e9a8d9c..fedcd196d469cb5f7d75175435c30c0bcad007b3 100644 (file)
@@ -584,6 +584,9 @@ func rewriteValueLOONG64(v *Value) bool {
                return true
        case OpPanicBounds:
                return rewriteValueLOONG64_OpPanicBounds(v)
+       case OpPubBarrier:
+               v.Op = OpLOONG64LoweredPubBarrier
+               return true
        case OpRotateLeft16:
                return rewriteValueLOONG64_OpRotateLeft16(v)
        case OpRotateLeft32:
index 9084c2f690969631c275d74ed57b6b3020e6444b..fda273b3e561f09c0cc43172c51e584d01b37a23 100644 (file)
@@ -162,7 +162,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
                        s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem())
                        return nil
                },
-               sys.ARM64, sys.PPC64, sys.RISCV64)
+               sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64)
 
        /******** internal/runtime/sys ********/
        add("internal/runtime/sys", "GetCallerPC",
index 5e71639a29ae7061b5124cb7587c459bb09edb71..4e59714ce7fe22782e5c3198389eccf6400d4f6c 100644 (file)
@@ -419,6 +419,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"loong64", "math/bits", "Sub"}:                            struct{}{},
        {"loong64", "math/bits", "Sub64"}:                          struct{}{},
        {"loong64", "runtime", "KeepAlive"}:                        struct{}{},
+       {"loong64", "runtime", "publicationBarrier"}:               struct{}{},
        {"loong64", "runtime", "slicebytetostringtmp"}:             struct{}{},
        {"loong64", "sync", "runtime_LoadAcquintptr"}:              struct{}{},
        {"loong64", "sync", "runtime_StoreReluintptr"}:             struct{}{},
index 4818a827de4d20a90e1e8d7bf55e4085c100ba52..5332d36fadb1fb3fb223867c73a6ec9d08611032 100644 (file)
@@ -5,5 +5,5 @@
 #include "textflag.h"
 
 TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
-       DBAR
+       DBAR    $0x1A // StoreStore barrier
        RET