]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify publicationBarrier
authorRuslan Andreev <kels9009@gmail.com>
Fri, 11 Jun 2021 09:27:09 +0000 (09:27 +0000)
committerCherry Mui <cherryyz@google.com>
Mon, 4 Oct 2021 22:00:19 +0000 (22:00 +0000)
This CL intrinsify asm call for publicationBarrier on ARM64. As for x86
we may completly removes any instructions due to strong memory
oredering, but decided to leave it as is for compiler barrier.

Benchmarks Go1 ARM64:
name                     old time/op    new time/op    delta
BinaryTree17-8              3.38s ± 1%     3.36s ± 1%    ~     (p=0.095 n=5+5)
Fannkuch11-8                2.93s ± 0%     2.84s ± 0%  -3.26%  (p=0.008 n=5+5)
FmtFprintfEmpty-8          54.2ns ± 1%    54.0ns ± 1%    ~     (p=0.690 n=5+5)
FmtFprintfString-8          111ns ± 0%     109ns ± 0%  -1.48%  (p=0.008 n=5+5)
FmtFprintfInt-8             140ns ± 0%     138ns ± 0%  -1.10%  (p=0.000 n=4+5)
FmtFprintfIntInt-8          168ns ± 0%     169ns ± 0%  +0.66%  (p=0.008 n=5+5)
FmtFprintfPrefixedInt-8     206ns ± 1%     195ns ± 0%  -5.12%  (p=0.008 n=5+5)
FmtFprintfFloat-8           270ns ± 0%     269ns ± 0%  -0.20%  (p=0.024 n=5+5)
FmtManyArgs-8               721ns ± 0%     733ns ± 0%  +1.69%  (p=0.008 n=5+5)
GobDecode-8                9.75ms ± 1%    9.28ms ± 3%  -4.88%  (p=0.008 n=5+5)
GobEncode-8                6.38ms ± 1%    6.34ms ± 1%    ~     (p=0.095 n=5+5)
Gzip-8                      255ms ± 0%     254ms ± 0%  -0.44%  (p=0.008 n=5+5)
Gunzip-8                   41.8ms ± 1%    40.8ms ± 0%  -2.40%  (p=0.008 n=5+5)
HTTPClientServer-8         65.1µs ± 1%    65.1µs ± 1%    ~     (p=0.690 n=5+5)
JSONEncode-8               11.7ms ± 0%    11.7ms ± 1%    ~     (p=0.841 n=5+5)
JSONDecode-8               60.2ms ± 1%    60.0ms ± 0%    ~     (p=0.841 n=5+5)
Mandelbrot200-8            5.85ms ± 0%    5.86ms ± 0%  +0.22%  (p=0.016 n=4+5)
GoParse-8                  4.38ms ± 0%    4.35ms ± 0%  -0.60%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-8      87.1ns ± 2%    88.3ns ± 1%    ~     (p=0.151 n=5+5)
RegexpMatchEasy0_1K-8       306ns ± 0%     306ns ± 1%    ~     (p=0.143 n=5+5)
RegexpMatchEasy1_32-8      86.3ns ± 0%    84.8ns ± 0%  -1.81%  (p=0.008 n=5+5)
RegexpMatchEasy1_1K-8       491ns ± 2%     487ns ± 1%    ~     (p=0.548 n=5+5)
RegexpMatchMedium_32-8     7.50ns ± 0%    7.49ns ± 1%    ~     (p=0.817 n=5+5)
RegexpMatchMedium_1K-8     40.3µs ± 1%    39.9µs ± 0%  -1.02%  (p=0.008 n=5+5)
RegexpMatchHard_32-8       2.10µs ± 1%    2.10µs ± 1%    ~     (p=0.548 n=5+5)
RegexpMatchHard_1K-8       62.4µs ± 1%    62.5µs ± 2%    ~     (p=0.690 n=5+5)
Revcomp-8                   504ms ± 1%     502ms ± 1%    ~     (p=0.095 n=5+5)
Template-8                 86.8ms ± 1%    86.5ms ± 1%    ~     (p=0.222 n=5+5)
TimeParse-8                 330ns ± 0%     327ns ± 0%  -0.84%  (p=0.008 n=5+5)
TimeFormat-8                383ns ± 1%     392ns ± 1%  +2.42%  (p=0.008 n=5+5)
[Geo mean]                 54.3µs         53.9µs       -0.67%

name                     old speed      new speed      delta
GobDecode-8              78.7MB/s ± 1%  82.8MB/s ± 4%  +5.16%  (p=0.008 n=5+5)
GobEncode-8               120MB/s ± 1%   121MB/s ± 1%    ~     (p=0.095 n=5+5)
Gzip-8                   76.2MB/s ± 0%  76.5MB/s ± 0%  +0.44%  (p=0.008 n=5+5)
Gunzip-8                  464MB/s ± 1%   475MB/s ± 0%  +2.45%  (p=0.008 n=5+5)
JSONEncode-8              166MB/s ± 0%   166MB/s ± 1%    ~     (p=0.841 n=5+5)
JSONDecode-8             32.2MB/s ± 1%  32.3MB/s ± 0%    ~     (p=0.714 n=5+5)
GoParse-8                13.2MB/s ± 0%  13.3MB/s ± 0%  +0.59%  (p=0.008 n=5+5)
RegexpMatchEasy0_32-8     368MB/s ± 2%   362MB/s ± 1%    ~     (p=0.151 n=5+5)
RegexpMatchEasy0_1K-8    3.34GB/s ± 0%  3.34GB/s ± 1%    ~     (p=0.127 n=5+5)
RegexpMatchEasy1_32-8     371MB/s ± 0%   378MB/s ± 0%  +1.84%  (p=0.008 n=5+5)
RegexpMatchEasy1_1K-8    2.09GB/s ± 2%  2.10GB/s ± 1%    ~     (p=0.548 n=5+5)
RegexpMatchMedium_32-8    133MB/s ± 0%   134MB/s ± 1%    ~     (p=0.952 n=5+5)
RegexpMatchMedium_1K-8   25.4MB/s ± 1%  25.6MB/s ± 0%  +1.04%  (p=0.008 n=5+5)
RegexpMatchHard_32-8     15.3MB/s ± 1%  15.2MB/s ± 1%    ~     (p=0.500 n=5+5)
RegexpMatchHard_1K-8     16.4MB/s ± 1%  16.4MB/s ± 2%    ~     (p=0.595 n=5+5)
Revcomp-8                 504MB/s ± 1%   506MB/s ± 1%    ~     (p=0.095 n=5+5)
Template-8               22.4MB/s ± 1%  22.4MB/s ± 1%    ~     (p=0.206 n=5+5)
[Geo mean]                120MB/s        121MB/s       +0.71%

Change-Id: I9cc10840b5c0d6bf759150f052c79f4c499e35e1
Reviewed-on: https://go-review.googlesource.com/c/go/+/328290
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Cherry Mui <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: David Chase <drchase@google.com>

src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/ssagen/ssa.go

index 9c26d90fd046f83f7fa8d7d314ab91a3a691895a..96a29224bf71d50b0201457ae965f54a55381eb6 100644 (file)
@@ -1132,6 +1132,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p := s.Prog(obj.AGETCALLERPC)
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
+       case ssa.OpARM64DMB:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = v.AuxInt
        case ssa.OpARM64FlagConstant:
                v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
        case ssa.OpARM64InvertFlags:
index 4b66883f266dc39913db799b3114d0fdbb0ced66..02fb4e19905cb5b0416b2386756f216b97dd5a5c 100644 (file)
 // Write barrier.
 (WB ...) => (LoweredWB ...)
 
+// Publication barrier (0xe is ST option)
+(PubBarrier mem) => (DMB [0xe] mem)
+
 (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
 (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
 (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
index e3ebb6e1afb563b8265477d17644924f864ac0d5..a4a5b9bdcdafc98dad7b47a88cca1ba178d1cfa0 100644 (file)
@@ -743,6 +743,9 @@ func init() {
                // Prefetch instruction
                // Do prefetch arg0 address with option aux. arg0=addr, arg1=memory, aux=option.
                {name: "PRFM", argLength: 2, aux: "Int64", reg: prefreg, asm: "PRFM", hasSideEffects: true},
+
+               // Publication barrier
+               {name: "DMB", argLength: 1, aux: "Int64", asm: "DMB", hasSideEffects: true}, // Do data barrier. arg0=memory, aux=option.
        }
 
        blocks := []blockData{
index 984552900fe5e8e2f418231c195a65b760f96bcb..4f133b1ff6a1b85de79dec35a4360d9cb2d2a231 100644 (file)
@@ -617,6 +617,9 @@ var genericOps = []opData{
        {name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true},                     // *arg0 |= arg1.  arg2=memory.  Returns memory.
        {name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true},                    // *arg0 |= arg1.  arg2=memory.  Returns memory.
 
+       // Publication barrier
+       {name: "PubBarrier", argLength: 1, hasSideEffects: true}, // Do data barrier. arg0=memory.
+
        // Clobber experiment op
        {name: "Clobber", argLength: 0, typ: "Void", aux: "SymOff", symEffect: "None"}, // write an invalid pointer value to the given pointer slot of a stack variable
        {name: "ClobberReg", argLength: 0, typ: "Void"},                                // clobber a register
index 32ffd28b6a3496b7044e56ab46e02dbb931d6b50..128ec1f049d4ed95c65ccde71cf2eea51828f539 100644 (file)
@@ -1633,6 +1633,7 @@ const (
        OpARM64LoweredPanicBoundsB
        OpARM64LoweredPanicBoundsC
        OpARM64PRFM
+       OpARM64DMB
 
        OpMIPSADD
        OpMIPSADDconst
@@ -2949,6 +2950,7 @@ const (
        OpAtomicAnd32Variant
        OpAtomicOr8Variant
        OpAtomicOr32Variant
+       OpPubBarrier
        OpClobber
        OpClobberReg
        OpPrefetchCache
@@ -21790,6 +21792,14 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "DMB",
+               auxType:        auxInt64,
+               argLen:         1,
+               hasSideEffects: true,
+               asm:            arm64.ADMB,
+               reg:            regInfo{},
+       },
 
        {
                name:        "ADD",
@@ -36739,6 +36749,12 @@ var opcodeTable = [...]opInfo{
                hasSideEffects: true,
                generic:        true,
        },
+       {
+               name:           "PubBarrier",
+               argLen:         1,
+               hasSideEffects: true,
+               generic:        true,
+       },
        {
                name:      "Clobber",
                auxType:   auxSymOff,
index 614e71f8521ef982375dbc78aed0af47649b1787..8ad9e400eb056b1462936708551b30d41e708a25 100644 (file)
@@ -920,6 +920,8 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpPrefetchCache(v)
        case OpPrefetchCacheStreamed:
                return rewriteValueARM64_OpPrefetchCacheStreamed(v)
+       case OpPubBarrier:
+               return rewriteValueARM64_OpPubBarrier(v)
        case OpRotateLeft16:
                return rewriteValueARM64_OpRotateLeft16(v)
        case OpRotateLeft32:
@@ -25613,6 +25615,18 @@ func rewriteValueARM64_OpPrefetchCacheStreamed(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpPubBarrier(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (PubBarrier mem)
+       // result: (DMB [0xe] mem)
+       for {
+               mem := v_0
+               v.reset(OpARM64DMB)
+               v.AuxInt = int64ToAuxInt(0xe)
+               v.AddArg(mem)
+               return true
+       }
+}
 func rewriteValueARM64_OpRotateLeft16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 56fc191e5aad8639bdf6b1a5213c038bf08fcde4..1bfbe7ce65a83809b52ff345263d162c5ced1037 100644 (file)
@@ -3856,6 +3856,13 @@ func InitTables() {
                },
                all...)
 
+       addF("runtime", "publicationBarrier",
+               func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+                       s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem())
+                       return nil
+               },
+               sys.ARM64)
+
        /******** runtime/internal/sys ********/
        addF("runtime/internal/sys", "Ctz32",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {