This CL intrinsify atomic{And,Or} on mips64x, which already implemented on mipsx.
goos: linux
goarch: mips64le
pkg: runtime/internal/atomic
_ oldatomic _ newatomic _
_ sec/op _ sec/op vs base _
AtomicLoad64-4 27.96n _ 0% 28.02n _ 0% +0.20% (p=0.026 n=8)
AtomicStore64-4 29.14n _ 0% 29.21n _ 0% +0.22% (p=0.004 n=8)
AtomicLoad-4 27.96n _ 0% 28.02n _ 0% ~ (p=0.220 n=8)
AtomicStore-4 29.15n _ 0% 29.21n _ 0% +0.19% (p=0.002 n=8)
And8-4 53.09n _ 0% 41.71n _ 0% -21.44% (p=0.000 n=8)
And-4 49.87n _ 0% 39.93n _ 0% -19.93% (p=0.000 n=8)
And8Parallel-4 70.45n _ 0% 68.58n _ 0% -2.65% (p=0.000 n=8)
AndParallel-4 70.40n _ 0% 67.95n _ 0% -3.47% (p=0.000 n=8)
Or8-4 52.09n _ 0% 41.11n _ 0% -21.08% (p=0.000 n=8)
Or-4 49.80n _ 0% 39.87n _ 0% -19.93% (p=0.000 n=8)
Or8Parallel-4 70.43n _ 0% 68.25n _ 0% -3.08% (p=0.000 n=8)
OrParallel-4 70.42n _ 0% 67.94n _ 0% -3.51% (p=0.000 n=8)
Xadd-4 67.83n _ 0% 67.92n _ 0% +0.13% (p=0.003 n=8)
Xadd64-4 67.85n _ 0% 67.92n _ 0% +0.09% (p=0.021 n=8)
Cas-4 81.34n _ 0% 81.37n _ 0% ~ (p=0.859 n=8)
Cas64-4 81.43n _ 0% 81.53n _ 0% +0.13% (p=0.001 n=8)
Xchg-4 67.15n _ 0% 67.18n _ 0% ~ (p=0.367 n=8)
Xchg64-4 67.16n _ 0% 67.21n _ 0% +0.08% (p=0.008 n=8)
geomean 54.04n 51.01n -5.61%
Change-Id: I9a4353f4b14134f1e9cf0dcf99db3feb951328ed
Reviewed-on: https://go-review.googlesource.com/c/go/+/494875
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Joel Sing <joel@sing.id.au>
Reviewed-by: Junxian Zhu <zhujunxian@oss.cipunited.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
p4.Reg = v.Reg0()
p4.To.Type = obj.TYPE_REG
p4.To.Reg = v.Reg0()
+ case ssa.OpMIPS64LoweredAtomicAnd32,
+ ssa.OpMIPS64LoweredAtomicOr32:
+ // SYNC
+ // LL (Rarg0), Rtmp
+ // AND/OR Rarg1, Rtmp
+ // SC Rtmp, (Rarg0)
+ // BEQ Rtmp, -3(PC)
+ // SYNC
+ s.Prog(mips.ASYNC)
+
+ p := s.Prog(mips.ALL)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = v.Args[0].Reg()
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = mips.REGTMP
+
+ p1 := s.Prog(v.Op.Asm())
+ p1.From.Type = obj.TYPE_REG
+ p1.From.Reg = v.Args[1].Reg()
+ p1.Reg = mips.REGTMP
+ p1.To.Type = obj.TYPE_REG
+ p1.To.Reg = mips.REGTMP
+
+ p2 := s.Prog(mips.ASC)
+ p2.From.Type = obj.TYPE_REG
+ p2.From.Reg = mips.REGTMP
+ p2.To.Type = obj.TYPE_MEM
+ p2.To.Reg = v.Args[0].Reg()
+
+ p3 := s.Prog(mips.ABEQ)
+ p3.From.Type = obj.TYPE_REG
+ p3.From.Reg = mips.REGTMP
+ p3.To.Type = obj.TYPE_BRANCH
+ p3.To.SetTarget(p)
+
+ s.Prog(mips.ASYNC)
+
case ssa.OpMIPS64LoweredAtomicCas32, ssa.OpMIPS64LoweredAtomicCas64:
// MOVV $0, Rout
// SYNC
(AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem)
(AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...)
+// AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3,uint32(val) << ((ptr & 3) * 8))
+(AtomicOr8 ptr val mem) && !config.BigEndian =>
+ (LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr)
+ (SLLV <typ.UInt32> (ZeroExt8to32 val)
+ (SLLVconst <typ.UInt64> [3]
+ (ANDconst <typ.UInt64> [3] ptr))) mem)
+
+// AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3,(uint32(val) << ((ptr & 3) * 8)) | ^(uint32(0xFF) << ((ptr & 3) * 8))))
+(AtomicAnd8 ptr val mem) && !config.BigEndian =>
+ (LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr)
+ (OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val)
+ (SLLVconst <typ.UInt64> [3]
+ (ANDconst <typ.UInt64> [3] ptr)))
+ (NORconst [0] <typ.UInt64> (SLLV <typ.UInt64>
+ (MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3]
+ (ANDconst <typ.UInt64> [3] ptr))))) mem)
+
+// AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3,uint32(val) << (((ptr^3) & 3) * 8))
+(AtomicOr8 ptr val mem) && config.BigEndian =>
+ (LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr)
+ (SLLV <typ.UInt32> (ZeroExt8to32 val)
+ (SLLVconst <typ.UInt64> [3]
+ (ANDconst <typ.UInt64> [3]
+ (XORconst <typ.UInt64> [3] ptr)))) mem)
+
+// AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3,(uint32(val) << (((ptr^3) & 3) * 8)) | ^(uint32(0xFF) << (((ptr^3) & 3) * 8))))
+(AtomicAnd8 ptr val mem) && config.BigEndian =>
+ (LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr)
+ (OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val)
+ (SLLVconst <typ.UInt64> [3]
+ (ANDconst <typ.UInt64> [3]
+ (XORconst <typ.UInt64> [3] ptr))))
+ (NORconst [0] <typ.UInt64> (SLLV <typ.UInt64>
+ (MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3]
+ (ANDconst <typ.UInt64> [3]
+ (XORconst <typ.UInt64> [3] ptr)))))) mem)
+
+(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...)
+(AtomicOr32 ...) => (LoweredAtomicOr32 ...)
+
// checks
(NilCheck ...) => (LoweredNilCheck ...)
(IsNonNil ptr) => (SGTU ptr (MOVVconst [0]))
faultOnNilArg1: true,
},
+ // atomic and/or.
+ // *arg0 &= (|=) arg1. arg2=mem. returns memory.
+ // SYNC
+ // LL (Rarg0), Rtmp
+ // AND Rarg1, Rtmp
+ // SC Rtmp, (Rarg0)
+ // BEQ Rtmp, -3(PC)
+ // SYNC
+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+
// atomic loads.
// load from arg0. arg1=mem.
// returns <value,memory> so they can be properly ordered with other loads.
OpMIPS64DUFFCOPY
OpMIPS64LoweredZero
OpMIPS64LoweredMove
+ OpMIPS64LoweredAtomicAnd32
+ OpMIPS64LoweredAtomicOr32
OpMIPS64LoweredAtomicLoad8
OpMIPS64LoweredAtomicLoad32
OpMIPS64LoweredAtomicLoad64
clobbers: 6, // R1 R2
},
},
+ {
+ name: "LoweredAtomicAnd32",
+ argLen: 3,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ unsafePoint: true,
+ asm: mips.AAND,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 234881022}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 g R31
+ {0, 4611686018695823358}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 SP g R31 SB
+ },
+ },
+ },
+ {
+ name: "LoweredAtomicOr32",
+ argLen: 3,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ unsafePoint: true,
+ asm: mips.AOR,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 234881022}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 g R31
+ {0, 4611686018695823358}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 SP g R31 SB
+ },
+ },
+ },
{
name: "LoweredAtomicLoad8",
argLen: 2,
case OpAtomicAdd64:
v.Op = OpMIPS64LoweredAtomicAdd64
return true
+ case OpAtomicAnd32:
+ v.Op = OpMIPS64LoweredAtomicAnd32
+ return true
+ case OpAtomicAnd8:
+ return rewriteValueMIPS64_OpAtomicAnd8(v)
case OpAtomicCompareAndSwap32:
return rewriteValueMIPS64_OpAtomicCompareAndSwap32(v)
case OpAtomicCompareAndSwap64:
case OpAtomicLoadPtr:
v.Op = OpMIPS64LoweredAtomicLoad64
return true
+ case OpAtomicOr32:
+ v.Op = OpMIPS64LoweredAtomicOr32
+ return true
+ case OpAtomicOr8:
+ return rewriteValueMIPS64_OpAtomicOr8(v)
case OpAtomicStore32:
v.Op = OpMIPS64LoweredAtomicStore32
return true
return true
}
}
+func rewriteValueMIPS64_OpAtomicAnd8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ config := b.Func.Config
+ typ := &b.Func.Config.Types
+ // match: (AtomicAnd8 ptr val mem)
+ // cond: !config.BigEndian
+ // result: (LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))) (NORconst [0] <typ.UInt64> (SLLV <typ.UInt64> (MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))))) mem)
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ if !(!config.BigEndian) {
+ break
+ }
+ v.reset(OpMIPS64LoweredAtomicAnd32)
+ v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr)
+ v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+ v1.AuxInt = int64ToAuxInt(^3)
+ v0.AddArg2(v1, ptr)
+ v2 := b.NewValue0(v.Pos, OpMIPS64OR, typ.UInt64)
+ v3 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32)
+ v4 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
+ v4.AddArg(val)
+ v5 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64)
+ v5.AuxInt = int64ToAuxInt(3)
+ v6 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64)
+ v6.AuxInt = int64ToAuxInt(3)
+ v6.AddArg(ptr)
+ v5.AddArg(v6)
+ v3.AddArg2(v4, v5)
+ v7 := b.NewValue0(v.Pos, OpMIPS64NORconst, typ.UInt64)
+ v7.AuxInt = int64ToAuxInt(0)
+ v8 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt64)
+ v9 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+ v9.AuxInt = int64ToAuxInt(0xff)
+ v8.AddArg2(v9, v5)
+ v7.AddArg(v8)
+ v2.AddArg2(v3, v7)
+ v.AddArg3(v0, v2, mem)
+ return true
+ }
+ // match: (AtomicAnd8 ptr val mem)
+ // cond: config.BigEndian
+ // result: (LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))) (NORconst [0] <typ.UInt64> (SLLV <typ.UInt64> (MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))))) mem)
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ if !(config.BigEndian) {
+ break
+ }
+ v.reset(OpMIPS64LoweredAtomicAnd32)
+ v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr)
+ v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+ v1.AuxInt = int64ToAuxInt(^3)
+ v0.AddArg2(v1, ptr)
+ v2 := b.NewValue0(v.Pos, OpMIPS64OR, typ.UInt64)
+ v3 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32)
+ v4 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
+ v4.AddArg(val)
+ v5 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64)
+ v5.AuxInt = int64ToAuxInt(3)
+ v6 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64)
+ v6.AuxInt = int64ToAuxInt(3)
+ v7 := b.NewValue0(v.Pos, OpMIPS64XORconst, typ.UInt64)
+ v7.AuxInt = int64ToAuxInt(3)
+ v7.AddArg(ptr)
+ v6.AddArg(v7)
+ v5.AddArg(v6)
+ v3.AddArg2(v4, v5)
+ v8 := b.NewValue0(v.Pos, OpMIPS64NORconst, typ.UInt64)
+ v8.AuxInt = int64ToAuxInt(0)
+ v9 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt64)
+ v10 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+ v10.AuxInt = int64ToAuxInt(0xff)
+ v9.AddArg2(v10, v5)
+ v8.AddArg(v9)
+ v2.AddArg2(v3, v8)
+ v.AddArg3(v0, v2, mem)
+ return true
+ }
+ return false
+}
func rewriteValueMIPS64_OpAtomicCompareAndSwap32(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
return true
}
}
+func rewriteValueMIPS64_OpAtomicOr8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ config := b.Func.Config
+ typ := &b.Func.Config.Types
+ // match: (AtomicOr8 ptr val mem)
+ // cond: !config.BigEndian
+ // result: (LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))) mem)
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ if !(!config.BigEndian) {
+ break
+ }
+ v.reset(OpMIPS64LoweredAtomicOr32)
+ v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr)
+ v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+ v1.AuxInt = int64ToAuxInt(^3)
+ v0.AddArg2(v1, ptr)
+ v2 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32)
+ v3 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
+ v3.AddArg(val)
+ v4 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64)
+ v4.AuxInt = int64ToAuxInt(3)
+ v5 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64)
+ v5.AuxInt = int64ToAuxInt(3)
+ v5.AddArg(ptr)
+ v4.AddArg(v5)
+ v2.AddArg2(v3, v4)
+ v.AddArg3(v0, v2, mem)
+ return true
+ }
+ // match: (AtomicOr8 ptr val mem)
+ // cond: config.BigEndian
+ // result: (LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))) mem)
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ if !(config.BigEndian) {
+ break
+ }
+ v.reset(OpMIPS64LoweredAtomicOr32)
+ v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr)
+ v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+ v1.AuxInt = int64ToAuxInt(^3)
+ v0.AddArg2(v1, ptr)
+ v2 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32)
+ v3 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
+ v3.AddArg(val)
+ v4 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64)
+ v4.AuxInt = int64ToAuxInt(3)
+ v5 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64)
+ v5.AuxInt = int64ToAuxInt(3)
+ v6 := b.NewValue0(v.Pos, OpMIPS64XORconst, typ.UInt64)
+ v6.AuxInt = int64ToAuxInt(3)
+ v6.AddArg(ptr)
+ v5.AddArg(v6)
+ v4.AddArg(v5)
+ v2.AddArg2(v3, v4)
+ v.AddArg3(v0, v2, mem)
+ return true
+ }
+ return false
+}
func rewriteValueMIPS64_OpAvg64u(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X)
+ sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "And",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X)
+ sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "Or8",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X)
+ sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "Or",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X)
+ sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
atomicAndOrEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) {
s.vars[memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem())