From: Ruslan Andreev Date: Fri, 11 Jun 2021 09:27:09 +0000 (+0000) Subject: cmd/compile: intrinsify publicationBarrier X-Git-Tag: go1.18beta1~1072 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=739328c694d5e608faa66d17192f0a59f6e01d04;p=gostls13.git cmd/compile: intrinsify publicationBarrier This CL intrinsify asm call for publicationBarrier on ARM64. As for x86 we may completly removes any instructions due to strong memory oredering, but decided to leave it as is for compiler barrier. Benchmarks Go1 ARM64: name old time/op new time/op delta BinaryTree17-8 3.38s ± 1% 3.36s ± 1% ~ (p=0.095 n=5+5) Fannkuch11-8 2.93s ± 0% 2.84s ± 0% -3.26% (p=0.008 n=5+5) FmtFprintfEmpty-8 54.2ns ± 1% 54.0ns ± 1% ~ (p=0.690 n=5+5) FmtFprintfString-8 111ns ± 0% 109ns ± 0% -1.48% (p=0.008 n=5+5) FmtFprintfInt-8 140ns ± 0% 138ns ± 0% -1.10% (p=0.000 n=4+5) FmtFprintfIntInt-8 168ns ± 0% 169ns ± 0% +0.66% (p=0.008 n=5+5) FmtFprintfPrefixedInt-8 206ns ± 1% 195ns ± 0% -5.12% (p=0.008 n=5+5) FmtFprintfFloat-8 270ns ± 0% 269ns ± 0% -0.20% (p=0.024 n=5+5) FmtManyArgs-8 721ns ± 0% 733ns ± 0% +1.69% (p=0.008 n=5+5) GobDecode-8 9.75ms ± 1% 9.28ms ± 3% -4.88% (p=0.008 n=5+5) GobEncode-8 6.38ms ± 1% 6.34ms ± 1% ~ (p=0.095 n=5+5) Gzip-8 255ms ± 0% 254ms ± 0% -0.44% (p=0.008 n=5+5) Gunzip-8 41.8ms ± 1% 40.8ms ± 0% -2.40% (p=0.008 n=5+5) HTTPClientServer-8 65.1µs ± 1% 65.1µs ± 1% ~ (p=0.690 n=5+5) JSONEncode-8 11.7ms ± 0% 11.7ms ± 1% ~ (p=0.841 n=5+5) JSONDecode-8 60.2ms ± 1% 60.0ms ± 0% ~ (p=0.841 n=5+5) Mandelbrot200-8 5.85ms ± 0% 5.86ms ± 0% +0.22% (p=0.016 n=4+5) GoParse-8 4.38ms ± 0% 4.35ms ± 0% -0.60% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 87.1ns ± 2% 88.3ns ± 1% ~ (p=0.151 n=5+5) RegexpMatchEasy0_1K-8 306ns ± 0% 306ns ± 1% ~ (p=0.143 n=5+5) RegexpMatchEasy1_32-8 86.3ns ± 0% 84.8ns ± 0% -1.81% (p=0.008 n=5+5) RegexpMatchEasy1_1K-8 491ns ± 2% 487ns ± 1% ~ (p=0.548 n=5+5) RegexpMatchMedium_32-8 7.50ns ± 0% 7.49ns ± 1% ~ (p=0.817 n=5+5) RegexpMatchMedium_1K-8 40.3µs ± 1% 39.9µs ± 0% -1.02% (p=0.008 n=5+5) RegexpMatchHard_32-8 2.10µs ± 1% 2.10µs ± 1% ~ (p=0.548 n=5+5) RegexpMatchHard_1K-8 62.4µs ± 1% 62.5µs ± 2% ~ (p=0.690 n=5+5) Revcomp-8 504ms ± 1% 502ms ± 1% ~ (p=0.095 n=5+5) Template-8 86.8ms ± 1% 86.5ms ± 1% ~ (p=0.222 n=5+5) TimeParse-8 330ns ± 0% 327ns ± 0% -0.84% (p=0.008 n=5+5) TimeFormat-8 383ns ± 1% 392ns ± 1% +2.42% (p=0.008 n=5+5) [Geo mean] 54.3µs 53.9µs -0.67% name old speed new speed delta GobDecode-8 78.7MB/s ± 1% 82.8MB/s ± 4% +5.16% (p=0.008 n=5+5) GobEncode-8 120MB/s ± 1% 121MB/s ± 1% ~ (p=0.095 n=5+5) Gzip-8 76.2MB/s ± 0% 76.5MB/s ± 0% +0.44% (p=0.008 n=5+5) Gunzip-8 464MB/s ± 1% 475MB/s ± 0% +2.45% (p=0.008 n=5+5) JSONEncode-8 166MB/s ± 0% 166MB/s ± 1% ~ (p=0.841 n=5+5) JSONDecode-8 32.2MB/s ± 1% 32.3MB/s ± 0% ~ (p=0.714 n=5+5) GoParse-8 13.2MB/s ± 0% 13.3MB/s ± 0% +0.59% (p=0.008 n=5+5) RegexpMatchEasy0_32-8 368MB/s ± 2% 362MB/s ± 1% ~ (p=0.151 n=5+5) RegexpMatchEasy0_1K-8 3.34GB/s ± 0% 3.34GB/s ± 1% ~ (p=0.127 n=5+5) RegexpMatchEasy1_32-8 371MB/s ± 0% 378MB/s ± 0% +1.84% (p=0.008 n=5+5) RegexpMatchEasy1_1K-8 2.09GB/s ± 2% 2.10GB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchMedium_32-8 133MB/s ± 0% 134MB/s ± 1% ~ (p=0.952 n=5+5) RegexpMatchMedium_1K-8 25.4MB/s ± 1% 25.6MB/s ± 0% +1.04% (p=0.008 n=5+5) RegexpMatchHard_32-8 15.3MB/s ± 1% 15.2MB/s ± 1% ~ (p=0.500 n=5+5) RegexpMatchHard_1K-8 16.4MB/s ± 1% 16.4MB/s ± 2% ~ (p=0.595 n=5+5) Revcomp-8 504MB/s ± 1% 506MB/s ± 1% ~ (p=0.095 n=5+5) Template-8 22.4MB/s ± 1% 22.4MB/s ± 1% ~ (p=0.206 n=5+5) [Geo mean] 120MB/s 121MB/s +0.71% Change-Id: I9cc10840b5c0d6bf759150f052c79f4c499e35e1 Reviewed-on: https://go-review.googlesource.com/c/go/+/328290 Reviewed-by: Cherry Mui Run-TryBot: Cherry Mui TryBot-Result: Go Bot Trust: David Chase --- diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 9c26d90fd0..96a29224bf 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -1132,6 +1132,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p := s.Prog(obj.AGETCALLERPC) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.OpARM64DMB: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt case ssa.OpARM64FlagConstant: v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString()) case ssa.OpARM64InvertFlags: diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 4b66883f26..02fb4e1990 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -568,6 +568,9 @@ // Write barrier. (WB ...) => (LoweredWB ...) +// Publication barrier (0xe is ST option) +(PubBarrier mem) => (DMB [0xe] mem) + (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index e3ebb6e1af..a4a5b9bdcd 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -743,6 +743,9 @@ func init() { // Prefetch instruction // Do prefetch arg0 address with option aux. arg0=addr, arg1=memory, aux=option. {name: "PRFM", argLength: 2, aux: "Int64", reg: prefreg, asm: "PRFM", hasSideEffects: true}, + + // Publication barrier + {name: "DMB", argLength: 1, aux: "Int64", asm: "DMB", hasSideEffects: true}, // Do data barrier. arg0=memory, aux=option. } blocks := []blockData{ diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index 984552900f..4f133b1ff6 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -617,6 +617,9 @@ var genericOps = []opData{ {name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. {name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. + // Publication barrier + {name: "PubBarrier", argLength: 1, hasSideEffects: true}, // Do data barrier. arg0=memory. + // Clobber experiment op {name: "Clobber", argLength: 0, typ: "Void", aux: "SymOff", symEffect: "None"}, // write an invalid pointer value to the given pointer slot of a stack variable {name: "ClobberReg", argLength: 0, typ: "Void"}, // clobber a register diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 32ffd28b6a..128ec1f049 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1633,6 +1633,7 @@ const ( OpARM64LoweredPanicBoundsB OpARM64LoweredPanicBoundsC OpARM64PRFM + OpARM64DMB OpMIPSADD OpMIPSADDconst @@ -2949,6 +2950,7 @@ const ( OpAtomicAnd32Variant OpAtomicOr8Variant OpAtomicOr32Variant + OpPubBarrier OpClobber OpClobberReg OpPrefetchCache @@ -21790,6 +21792,14 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "DMB", + auxType: auxInt64, + argLen: 1, + hasSideEffects: true, + asm: arm64.ADMB, + reg: regInfo{}, + }, { name: "ADD", @@ -36739,6 +36749,12 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, generic: true, }, + { + name: "PubBarrier", + argLen: 1, + hasSideEffects: true, + generic: true, + }, { name: "Clobber", auxType: auxSymOff, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 614e71f852..8ad9e400eb 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -920,6 +920,8 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpPrefetchCache(v) case OpPrefetchCacheStreamed: return rewriteValueARM64_OpPrefetchCacheStreamed(v) + case OpPubBarrier: + return rewriteValueARM64_OpPubBarrier(v) case OpRotateLeft16: return rewriteValueARM64_OpRotateLeft16(v) case OpRotateLeft32: @@ -25613,6 +25615,18 @@ func rewriteValueARM64_OpPrefetchCacheStreamed(v *Value) bool { return true } } +func rewriteValueARM64_OpPubBarrier(v *Value) bool { + v_0 := v.Args[0] + // match: (PubBarrier mem) + // result: (DMB [0xe] mem) + for { + mem := v_0 + v.reset(OpARM64DMB) + v.AuxInt = int64ToAuxInt(0xe) + v.AddArg(mem) + return true + } +} func rewriteValueARM64_OpRotateLeft16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 56fc191e5a..1bfbe7ce65 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -3856,6 +3856,13 @@ func InitTables() { }, all...) + addF("runtime", "publicationBarrier", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem()) + return nil + }, + sys.ARM64) + /******** runtime/internal/sys ********/ addF("runtime/internal/sys", "Ctz32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {