]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: buffered write barrier for ppc64
authorAustin Clements <austin@google.com>
Wed, 15 Nov 2017 22:54:24 +0000 (14:54 -0800)
committerAustin Clements <austin@google.com>
Tue, 13 Feb 2018 16:34:23 +0000 (16:34 +0000)
Updates #22460.

Change-Id: I6040c4024111c80361c81eb7eec5071ec9efb4f9
Reviewed-on: https://go-review.googlesource.com/92702
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/gc/main.go
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/vet/all/whitelist/ppc64x.txt
src/runtime/asm_ppc64x.s

index 2eef7b7c7b0a595aac44eca7edcda68ceeab50d5..a463b222e1f30d85b3edd520c2cd85faa01ba170 100644 (file)
@@ -408,7 +408,7 @@ func Main(archInit func(*Arch)) {
        }
 
        switch objabi.GOARCH {
-       case "amd64", "amd64p32", "386", "arm", "arm64", "mips64", "mips64le", "mips", "mipsle":
+       case "amd64", "amd64p32", "386", "arm", "arm64", "ppc64", "ppc64le", "mips64", "mips64le", "mips", "mipsle":
        default:
                // Other architectures don't support the buffered
                // write barrier yet.
index 008d9658f4cca719bc7efdb8c86382807e920736..7a2e2c187832658d89c756a1e33a53530f407ca8 100644 (file)
@@ -1086,6 +1086,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                        q.To.Reg = ppc64.REG_R2
                }
 
+       case ssa.OpPPC64LoweredWB:
+               p := s.Prog(obj.ACALL)
+               p.To.Type = obj.TYPE_MEM
+               p.To.Name = obj.NAME_EXTERN
+               p.To.Sym = v.Aux.(*obj.LSym)
+
        case ssa.OpPPC64LoweredNilCheck:
                // Issue a load which will fault if arg is nil.
                p := s.Prog(ppc64.AMOVBZ)
index 4fd6a5a102220f9ccaf4b451fdd0089e58e17e47..b9587b148dad21423b2086394fe922169241c7b7 100644 (file)
 (IsSliceInBounds idx len) -> (LessEqual (CMPU idx len))
 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
 
+// Write barrier.
+(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
+
 // Optimizations
 // Note that PPC "logical" immediates come in 0:15 and 16:31 unsigned immediate forms,
 // so ORconst, XORconst easily expand into a pair.
index c6269e0f48cccd50f13df6cc13576e70409b3f47..2043887a7850ad08b5c81055629aedcfd6e08754 100644 (file)
@@ -474,6 +474,11 @@ func init() {
                {name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
                {name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
 
+               // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
+               // It preserves R0 through R15, g, and its arguments R20 and R21,
+               // but may clobber anything else, including R31 (REGTMP).
+               {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
+
                // (InvertFlags (CMP a b)) == (CMP b a)
                // So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
                // then we do (LessThan (InvertFlags (CMP b a))) instead.
index 4246048e639fb285fc052cff5cdb41f0965c1a73..6859f36e517af32481db73bc9fd2f8d211401a5f 100644 (file)
@@ -1492,6 +1492,7 @@ const (
        OpPPC64LoweredAtomicCas32
        OpPPC64LoweredAtomicAnd8
        OpPPC64LoweredAtomicOr8
+       OpPPC64LoweredWB
        OpPPC64InvertFlags
        OpPPC64FlagEQ
        OpPPC64FlagLT
@@ -19243,6 +19244,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "LoweredWB",
+               auxType:      auxSym,
+               argLen:       3,
+               clobberFlags: true,
+               symEffect:    SymNone,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1048576}, // R20
+                               {1, 2097152}, // R21
+                       },
+                       clobbers: 576460746931503104, // R16 R17 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
+               },
+       },
        {
                name:   "InvertFlags",
                argLen: 1,
index 6a000f4431188a6b86960a9fdb5a1c6fbd445608..ac7df6b9972e4f5b5f5f85de6b240fb59eac7a13 100644 (file)
@@ -623,6 +623,8 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpTrunc64to32_0(v)
        case OpTrunc64to8:
                return rewriteValuePPC64_OpTrunc64to8_0(v)
+       case OpWB:
+               return rewriteValuePPC64_OpWB_0(v)
        case OpXor16:
                return rewriteValuePPC64_OpXor16_0(v)
        case OpXor32:
@@ -43015,6 +43017,24 @@ func rewriteValuePPC64_OpTrunc64to8_0(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpWB_0(v *Value) bool {
+       // match: (WB {fn} destptr srcptr mem)
+       // cond:
+       // result: (LoweredWB {fn} destptr srcptr mem)
+       for {
+               fn := v.Aux
+               _ = v.Args[2]
+               destptr := v.Args[0]
+               srcptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpPPC64LoweredWB)
+               v.Aux = fn
+               v.AddArg(destptr)
+               v.AddArg(srcptr)
+               v.AddArg(mem)
+               return true
+       }
+}
 func rewriteValuePPC64_OpXor16_0(v *Value) bool {
        // match: (Xor16 x y)
        // cond:
index 4f6444e10264acf929da079ee0064faf4c49b590..84b8f18b5332fc202ff5b009e069479acc1b6cb6 100644 (file)
@@ -10,3 +10,5 @@ runtime/asm_ppc64x.s: [GOARCH] addmoduledata: function addmoduledata missing Go
 runtime/duff_ppc64x.s: [GOARCH] duffzero: function duffzero missing Go declaration
 runtime/tls_ppc64x.s: [GOARCH] save_g: function save_g missing Go declaration
 runtime/tls_ppc64x.s: [GOARCH] load_g: function load_g missing Go declaration
+
+runtime/asm_ARCHSUFF.s: [GOARCH] gcWriteBarrier: function gcWriteBarrier missing Go declaration
index e02ca169071d4495fc127f87d2b98f40dcf5c71e..c0e872f7a91c3f3934e2fa6bcd0116173201249c 100644 (file)
@@ -1495,3 +1495,74 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
        MOVW    $1, R3
        MOVB    R3, ret+0(FP)
        RET
+
+// gcWriteBarrier performs a heap pointer write and informs the GC.
+//
+// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
+// - R20 is the destination of the write
+// - R21 is the value being written at R20.
+// It clobbers condition codes.
+// It does not clobber R0 through R15,
+// but may clobber any other register, *including* R31.
+TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112
+       // The standard prologue clobbers R31.
+       // We use R16 and R17 as scratch registers.
+       MOVD    g_m(g), R16
+       MOVD    m_p(R16), R16
+       MOVD    (p_wbBuf+wbBuf_next)(R16), R17
+       // Increment wbBuf.next position.
+       ADD     $16, R17
+       MOVD    R17, (p_wbBuf+wbBuf_next)(R16)
+       MOVD    (p_wbBuf+wbBuf_end)(R16), R16
+       CMP     R16, R17
+       // Record the write.
+       MOVD    R21, -16(R17)   // Record value
+       MOVD    (R20), R16      // TODO: This turns bad writes into bad reads.
+       MOVD    R16, -8(R17)    // Record *slot
+       // Is the buffer full? (flags set in CMP above)
+       BEQ     flush
+ret:
+       // Do the write.
+       MOVD    R21, (R20)
+       RET
+
+flush:
+       // Save registers R0 through R15 since these were not saved by the caller.
+       // We don't save all registers on ppc64 because it takes too much space.
+       MOVD    R20, (FIXED_FRAME+0)(R1)        // Also first argument to wbBufFlush
+       MOVD    R21, (FIXED_FRAME+8)(R1)        // Also second argument to wbBufFlush
+       // R0 is always 0, so no need to spill.
+       // R1 is SP.
+       // R2 is SB.
+       MOVD    R3, (FIXED_FRAME+16)(R1)
+       MOVD    R4, (FIXED_FRAME+24)(R1)
+       MOVD    R5, (FIXED_FRAME+32)(R1)
+       MOVD    R6, (FIXED_FRAME+40)(R1)
+       MOVD    R7, (FIXED_FRAME+48)(R1)
+       MOVD    R8, (FIXED_FRAME+56)(R1)
+       MOVD    R9, (FIXED_FRAME+64)(R1)
+       MOVD    R10, (FIXED_FRAME+72)(R1)
+       MOVD    R11, (FIXED_FRAME+80)(R1)
+       MOVD    R12, (FIXED_FRAME+88)(R1)
+       // R13 is REGTLS
+       MOVD    R14, (FIXED_FRAME+96)(R1)
+       MOVD    R15, (FIXED_FRAME+104)(R1)
+
+       // This takes arguments R20 and R21.
+       CALL    runtime·wbBufFlush(SB)
+
+       MOVD    (FIXED_FRAME+0)(R1), R20
+       MOVD    (FIXED_FRAME+8)(R1), R21
+       MOVD    (FIXED_FRAME+16)(R1), R3
+       MOVD    (FIXED_FRAME+24)(R1), R4
+       MOVD    (FIXED_FRAME+32)(R1), R5
+       MOVD    (FIXED_FRAME+40)(R1), R6
+       MOVD    (FIXED_FRAME+48)(R1), R7
+       MOVD    (FIXED_FRAME+56)(R1), R8
+       MOVD    (FIXED_FRAME+64)(R1), R9
+       MOVD    (FIXED_FRAME+72)(R1), R10
+       MOVD    (FIXED_FRAME+80)(R1), R11
+       MOVD    (FIXED_FRAME+88)(R1), R12
+       MOVD    (FIXED_FRAME+96)(R1), R14
+       MOVD    (FIXED_FRAME+104)(R1), R15
+       JMP     ret