]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: inline atomics from runtime/internal/atomic on amd64
authorKeith Randall <khr@golang.org>
Tue, 23 Aug 2016 23:49:28 +0000 (16:49 -0700)
committerKeith Randall <khr@golang.org>
Thu, 25 Aug 2016 20:09:04 +0000 (20:09 +0000)
Inline atomic reads and writes on amd64.  There's no reason
to pay the overhead of a call for these.

To keep atomic loads from being reordered, we make them
return a <value,memory> tuple.

Change the meaning of resultInArg0 for tuple-generating ops
to mean the first part of the result tuple, not the second.
This means we can always put the store part of the tuple last,
matching how arguments are laid out.  This requires reordering
the outputs of add32carry and sub32carry and their descendents
in various architectures.

benchmark                    old ns/op     new ns/op     delta
BenchmarkAtomicLoad64-8      2.09          0.26          -87.56%
BenchmarkAtomicStore64-8     7.54          5.72          -24.14%

TBD (in a different CL): Cas, Or8, ...

Change-Id: I713ea88e7da3026c44ea5bdb56ed094b20bc5207
Reviewed-on: https://go-review.googlesource.com/27641
Reviewed-by: Cherry Zhang <cherryyz@google.com>
20 files changed:
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/arm/ssa.go
src/cmd/compile/internal/gc/inl.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/deadstore.go
src/cmd/compile/internal/ssa/gen/386Ops.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/gen/ARMOps.go
src/cmd/compile/internal/ssa/gen/dec64.rules
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/gen/main.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/regalloc.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssa/rewritedec64.go
src/cmd/compile/internal/ssa/type.go
src/cmd/compile/internal/x86/ssa.go
src/runtime/internal/atomic/asm_amd64.s
src/runtime/internal/atomic/bench_test.go [new file with mode: 0644]

index 472b86b38ab699f169fca9274bbbc2066b951d2c..eed9b2e3d75b4b60a9b926d2ade6b4974d44349d 100644 (file)
@@ -935,7 +935,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                                ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore,
                                ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
                                ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload,
-                               ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore:
+                               ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore,
+                               ssa.OpAMD64MOVQatomicload, ssa.OpAMD64MOVLatomicload:
                                if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
                                        if gc.Debug_checknil != 0 && int(v.Line) > 1 {
                                                gc.Warnl(v.Line, "removed nil check")
@@ -951,7 +952,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                                        return
                                }
                        }
-                       if w.Type.IsMemory() {
+                       if w.Type.IsMemory() || w.Type.IsTuple() && w.Type.FieldType(1).IsMemory() {
                                if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive {
                                        // these ops are OK
                                        mem = w
@@ -976,6 +977,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
                        gc.Warnl(v.Line, "generated nil check")
                }
+       case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
+               p := gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = gc.SSARegNum(v.Args[0])
+               gc.AddAux(&p.From, v)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = gc.SSARegNum0(v)
+       case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
+               r := gc.SSARegNum0(v)
+               if r != gc.SSARegNum(v.Args[0]) {
+                       v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
+               }
+               p := gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = gc.SSARegNum(v.Args[1])
+               gc.AddAux(&p.To, v)
        default:
                v.Unimplementedf("genValue not implemented: %s", v.LongString())
        }
index f16dc0f95f2ce6396f195f87b77efb8620f05a05..d6a501f90762c32c2148def42d1872db2e505655 100644 (file)
@@ -283,7 +283,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Reg = r
        case ssa.OpARMADDS,
                ssa.OpARMSUBS:
-               r := gc.SSARegNum1(v)
+               r := gc.SSARegNum0(v)
                r1 := gc.SSARegNum(v.Args[0])
                r2 := gc.SSARegNum(v.Args[1])
                p := gc.Prog(v.Op.Asm())
@@ -356,7 +356,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Offset = v.AuxInt
                p.Reg = gc.SSARegNum(v.Args[0])
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = gc.SSARegNum1(v)
+               p.To.Reg = gc.SSARegNum0(v)
        case ssa.OpARMSRRconst:
                genshift(arm.AMOVW, 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_RR, v.AuxInt)
        case ssa.OpARMADDshiftLL,
@@ -373,7 +373,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARMADDSshiftLL,
                ssa.OpARMSUBSshiftLL,
                ssa.OpARMRSBSshiftLL:
-               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LL, v.AuxInt)
+               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_LL, v.AuxInt)
                p.Scond = arm.C_SBIT
        case ssa.OpARMADDshiftRL,
                ssa.OpARMADCshiftRL,
@@ -389,7 +389,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARMADDSshiftRL,
                ssa.OpARMSUBSshiftRL,
                ssa.OpARMRSBSshiftRL:
-               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LR, v.AuxInt)
+               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_LR, v.AuxInt)
                p.Scond = arm.C_SBIT
        case ssa.OpARMADDshiftRA,
                ssa.OpARMADCshiftRA,
@@ -405,7 +405,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARMADDSshiftRA,
                ssa.OpARMSUBSshiftRA,
                ssa.OpARMRSBSshiftRA:
-               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_AR, v.AuxInt)
+               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_AR, v.AuxInt)
                p.Scond = arm.C_SBIT
        case ssa.OpARMMVNshiftLL:
                genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt)
@@ -433,7 +433,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARMADDSshiftLLreg,
                ssa.OpARMSUBSshiftLLreg,
                ssa.OpARMRSBSshiftLLreg:
-               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LL)
+               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_LL)
                p.Scond = arm.C_SBIT
        case ssa.OpARMADDshiftRLreg,
                ssa.OpARMADCshiftRLreg,
@@ -449,7 +449,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARMADDSshiftRLreg,
                ssa.OpARMSUBSshiftRLreg,
                ssa.OpARMRSBSshiftRLreg:
-               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LR)
+               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_LR)
                p.Scond = arm.C_SBIT
        case ssa.OpARMADDshiftRAreg,
                ssa.OpARMADCshiftRAreg,
@@ -465,7 +465,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARMADDSshiftRAreg,
                ssa.OpARMSUBSshiftRAreg,
                ssa.OpARMRSBSshiftRAreg:
-               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_AR)
+               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_AR)
                p.Scond = arm.C_SBIT
        case ssa.OpARMHMUL,
                ssa.OpARMHMULU:
index 0bdabb8c911cd4cbd1c9430d91140208b0a47077..4fa8395940d4b22a00ff66922f29848b9e3f4a5b 100644 (file)
@@ -477,7 +477,7 @@ func inlnode(n *Node) *Node {
                if Debug['m'] > 3 {
                        fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign))
                }
-               if n.Left.Func != nil && n.Left.Func.Inl.Len() != 0 && !isIntrinsicCall1(n) { // normal case
+               if n.Left.Func != nil && n.Left.Func.Inl.Len() != 0 && !isIntrinsicCall(n) { // normal case
                        n = mkinlcall(n, n.Left, n.Isddd)
                } else if n.isMethodCalledAsFunction() && n.Left.Sym.Def != nil {
                        n = mkinlcall(n, n.Left.Sym.Def, n.Isddd)
index 928575c3fa22d4425751e23bedce1e91d2e8f754..9dcdb661cd0b842d8f621224573592434eacd084 100644 (file)
@@ -571,7 +571,14 @@ func (s *state) stmt(n *Node) {
        case OEMPTY, ODCLCONST, ODCLTYPE, OFALL:
 
        // Expression statements
-       case OCALLFUNC, OCALLMETH, OCALLINTER:
+       case OCALLFUNC:
+               if isIntrinsicCall(n) {
+                       s.intrinsicCall(n)
+                       return
+               }
+               fallthrough
+
+       case OCALLMETH, OCALLINTER:
                s.call(n, callNormal)
                if n.Op == OCALLFUNC && n.Left.Op == ONAME && n.Left.Class == PFUNC &&
                        (compiling_runtime && n.Left.Sym.Name == "throw" ||
@@ -2107,8 +2114,8 @@ func (s *state) expr(n *Node) *ssa.Value {
                return s.newValue2(ssa.OpStringMake, n.Type, p, l)
 
        case OCALLFUNC:
-               if isIntrinsicCall1(n) {
-                       return s.intrinsicCall1(n)
+               if isIntrinsicCall(n) {
+                       return s.intrinsicCall(n)
                }
                fallthrough
 
@@ -2516,12 +2523,12 @@ const (
        callGo
 )
 
-// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic
+// isSSAIntrinsic returns true if n is a call to a recognized intrinsic
 // that can be handled by the SSA backend.
 // SSA uses this, but so does the front end to see if should not
 // inline a function because it is a candidate for intrinsic
 // substitution.
-func isSSAIntrinsic1(s *Sym) bool {
+func isSSAIntrinsic(s *Sym) bool {
        // The test below is not quite accurate -- in the event that
        // a function is disabled on a per-function basis, for example
        // because of hash-keyed binary failure search, SSA might be
@@ -2541,38 +2548,74 @@ func isSSAIntrinsic1(s *Sym) bool {
                        return true
                }
        }
+       if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/atomic" {
+               switch s.Name {
+               case "Load", "Load64", "Loadint64", "Loadp", "Loaduint", "Loaduintptr":
+                       return true
+               case "Store", "Store64", "StorepNoWB", "Storeuintptr":
+                       return true
+               }
+       }
        return false
 }
 
-func isIntrinsicCall1(n *Node) bool {
+func isIntrinsicCall(n *Node) bool {
        if n == nil || n.Left == nil {
                return false
        }
-       return isSSAIntrinsic1(n.Left.Sym)
+       return isSSAIntrinsic(n.Left.Sym)
 }
 
-// intrinsicFirstArg extracts arg from n.List and eval
-func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
-       x := n.List.First()
+// intrinsicArg extracts the ith arg from n.List and returns its value.
+func (s *state) intrinsicArg(n *Node, i int) *ssa.Value {
+       x := n.List.Slice()[i]
        if x.Op == OAS {
                x = x.Right
        }
        return s.expr(x)
 }
+func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
+       return s.intrinsicArg(n, 0)
+}
 
-// intrinsicCall1 converts a call to a recognized 1-arg intrinsic
-// into the intrinsic
-func (s *state) intrinsicCall1(n *Node) *ssa.Value {
+// intrinsicCall converts a call to a recognized intrinsic function into the intrinsic SSA operation.
+func (s *state) intrinsicCall(n *Node) (ret *ssa.Value) {
        var result *ssa.Value
-       switch n.Left.Sym.Name {
-       case "Ctz64":
+       name := n.Left.Sym.Name
+       switch {
+       case name == "Ctz64":
                result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
-       case "Ctz32":
+               ret = result
+       case name == "Ctz32":
                result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
-       case "Bswap64":
+               ret = result
+       case name == "Bswap64":
                result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
-       case "Bswap32":
+               ret = result
+       case name == "Bswap32":
                result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
+               ret = result
+       case name == "Load" || name == "Loaduint" && s.config.IntSize == 4 || name == "Loaduintptr" && s.config.PtrSize == 4:
+               result = s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
+               s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
+               ret = s.newValue1(ssa.OpSelect0, Types[TUINT32], result)
+       case name == "Load64" || name == "Loadint64" || name == "Loaduint" && s.config.IntSize == 8 || name == "Loaduintptr" && s.config.PtrSize == 8:
+               result = s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
+               s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
+               ret = s.newValue1(ssa.OpSelect0, Types[TUINT64], result)
+       case name == "Loadp":
+               result = s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(Ptrto(Types[TUINT8]), ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
+               s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
+               ret = s.newValue1(ssa.OpSelect0, Ptrto(Types[TUINT8]), result)
+       case name == "Store" || name == "Storeuintptr" && s.config.PtrSize == 4:
+               result = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
+               s.vars[&memVar] = result
+       case name == "Store64" || name == "Storeuintptr" && s.config.PtrSize == 8:
+               result = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
+               s.vars[&memVar] = result
+       case name == "StorepNoWB":
+               result = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
+               s.vars[&memVar] = result
        }
        if result == nil {
                Fatalf("Unknown special call: %v", n.Left.Sym)
@@ -2580,7 +2623,7 @@ func (s *state) intrinsicCall1(n *Node) *ssa.Value {
        if ssa.IntrinsicsDebug > 0 {
                Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString())
        }
-       return result
+       return
 }
 
 // Calls the function n using the specified call type.
index 7acc3b09d498ac92eabcda92bf85c43a6b0acad8..3386a227ed6b01f1cb460392cb4fd467a45bf54e 100644 (file)
@@ -29,6 +29,10 @@ func dse(f *Func) {
                        }
                        if v.Type.IsMemory() {
                                stores = append(stores, v)
+                               if v.Op == OpSelect1 {
+                                       // Use the args of the tuple-generating op.
+                                       v = v.Args[0]
+                               }
                                for _, a := range v.Args {
                                        if a.Block == b && a.Type.IsMemory() {
                                                storeUse.add(a.ID)
index 1013adf4a619dfe3d5c4893aa873f5e703aa7f28..c0cd7c6b2009876af03979c0e7e420c62bc6ca3c 100644 (file)
@@ -106,8 +106,8 @@ func init() {
                gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
                gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
                gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-               gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{0, gp}}
-               gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{0, gp}}
+               gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
+               gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
                gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
                gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
                gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
index ea37f07cc349ad48f1490daf23d01ece3c12c75a..a412604b5952a52b89b68af80f7f824ab50cb9db 100644 (file)
 
 (If cond yes no) -> (NE (TESTB cond cond) yes no)
 
+// Atomic loads.  Other than preserving their ordering with respect to other loads, nothing special here.
+(AtomicLoad32 ptr mem) -> (MOVLatomicload ptr mem)
+(AtomicLoad64 ptr mem) -> (MOVQatomicload ptr mem)
+(AtomicLoadPtr ptr mem) && config.PtrSize == 8 -> (MOVQatomicload ptr mem)
+(AtomicLoadPtr ptr mem) && config.PtrSize == 4 -> (MOVLatomicload ptr mem)
+
+// Atomic stores.  We use XCHG to prevent the hardware reordering a subsequent load.
+// TODO: most runtime uses of atomic stores don't need that property.  Use normal stores for those?
+(AtomicStore32 ptr val mem) -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeUInt32(),TypeMem)> val ptr mem))
+(AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeUInt64(),TypeMem)> val ptr mem))
+(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+
 // ***************************
 // Above: lowering rules
 // Below: optimizations
        (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
 (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
        (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+
+// Merge ADDQconst and LEAQ into atomic loads.
+(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+       (MOVQatomicload [off1+off2] {sym} ptr mem)
+(MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+       (MOVLatomicload [off1+off2] {sym} ptr mem)
+(MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+       (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+       (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+
+// Merge ADDQconst and LEAQ into atomic stores.
+(XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+       (XCHGQ [off1+off2] {sym} val ptr mem)
+(XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
+       (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+(XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+       (XCHGL [off1+off2] {sym} val ptr mem)
+(XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
+       (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
index 9359e6d027468f82d3f57696cae72e392d58b469..f30785032479107decd9da0031befded05530536 100644 (file)
@@ -134,6 +134,7 @@ func init() {
                gpstoreconst    = regInfo{inputs: []regMask{gpspsb, 0}}
                gpstoreidx      = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
                gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
+               gpstorexchg     = regInfo{inputs: []regMask{gp, gp, 0}, outputs: []regMask{gp}}
 
                fp01    = regInfo{inputs: nil, outputs: fponly}
                fp21    = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
@@ -509,6 +510,20 @@ func init() {
                {name: "FlagLT_UGT"}, // signed < and unsigned >
                {name: "FlagGT_UGT"}, // signed > and unsigned <
                {name: "FlagGT_ULT"}, // signed > and unsigned >
+
+               // Atomic loads.  These are just normal loads but return <value,memory> tuples
+               // so they can be properly ordered with other loads.
+               // load from arg0+auxint+aux.  arg1=mem.
+               {name: "MOVLatomicload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff"},
+               {name: "MOVQatomicload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff"},
+               // Atomic stores.  We use XCHG to get the right memory ordering semantics.
+               // These ops return a tuple of <old memory contents, memory>.  The old contents are
+               // ignored for now but they are allocated to a register so that the argument register
+               // is properly clobbered (together with resultInArg0).
+               // store arg0 to arg1+auxint+aux, arg2=mem.
+               // Note: arg0 and arg1 are backwards compared to MOVLstore (to facilitate resultInArg0)!
+               {name: "XCHGL", argLength: 3, reg: gpstorexchg, asm: "XCHGL", aux: "SymOff", resultInArg0: true},
+               {name: "XCHGQ", argLength: 3, reg: gpstorexchg, asm: "XCHGQ", aux: "SymOff", resultInArg0: true},
        }
 
        var AMD64blocks = []blockData{
index f1774c6be045565176efdb35585047f96b6bbd6d..b0114328b3d2b630c5fd9298e9d692c277dc1c28 100644 (file)
@@ -99,17 +99,17 @@ func init() {
        var (
                gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
                gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-               gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{0, gp}}
+               gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
                gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
                gp1flags  = regInfo{inputs: []regMask{gpg}}
                gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
                gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
-               gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{0, gp}}
+               gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
                gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
                gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
                gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
                gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-               gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{0, gp}}
+               gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
                gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
                gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
                gpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
index e419c741b664b2735039c1394ef7b4aa58e3a1d8..8f0227af664cdcf7cbf56b0ad63dbab844286383 100644 (file)
                (Add32withcarry <config.fe.TypeInt32()>
                        (Int64Hi x)
                        (Int64Hi y)
-                       (Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
-               (Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
+                       (Select1 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
+               (Select0 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
 
 (Sub64 x y) ->
        (Int64Make
                (Sub32withcarry <config.fe.TypeInt32()>
                        (Int64Hi x)
                        (Int64Hi y)
-                       (Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
-               (Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
+                       (Select1 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
+               (Select0 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
 
 (Mul64 x y) ->
        (Int64Make
index 1668f6a39040fb044e156da5db9ec0ecd16d1d06..dfa5ed6de35e885e351498a493e06c7bb1ba806d 100644 (file)
@@ -417,10 +417,10 @@ var genericOps = []opData{
        {name: "Int64Hi", argLength: 1, typ: "UInt32"},   // high 32-bit of arg0
        {name: "Int64Lo", argLength: 1, typ: "UInt32"},   // low 32-bit of arg0
 
-       {name: "Add32carry", argLength: 2, commutative: true, typ: "(Flags,UInt32)"}, // arg0 + arg1, returns (carry, value)
+       {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry)
        {name: "Add32withcarry", argLength: 3, commutative: true},                    // arg0 + arg1 + arg2, arg2=carry (0 or 1)
 
-       {name: "Sub32carry", argLength: 2, typ: "(Flags,UInt32)"}, // arg0 - arg1, returns (carry, value)
+       {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry)
        {name: "Sub32withcarry", argLength: 3},                    // arg0 - arg1 - arg2, arg2=carry (0 or 1)
 
        {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
@@ -440,6 +440,17 @@ var genericOps = []opData{
        // pseudo-ops for breaking Tuple
        {name: "Select0", argLength: 1}, // the first component of a tuple
        {name: "Select1", argLength: 1}, // the second component of a tuple
+
+       // Atomic operations used for semantically inlining runtime/internal/atomic.
+       // Atomic loads return a new memory so that the loads are properly ordered
+       // with respect to other loads and stores.
+       // TODO: use for sync/atomic at some point.
+       {name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"},   // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"},   // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"}, // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicStore32", argLength: 3, typ: "Mem"},           // Store arg1 to arg0.  arg2=memory.  Returns memory.
+       {name: "AtomicStore64", argLength: 3, typ: "Mem"},           // Store arg1 to arg0.  arg2=memory.  Returns memory.
+       {name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem"},      // Store arg1 to arg0.  arg2=memory.  Returns memory.
 }
 
 //     kind           control    successors       implicit exit
index 059315542eaa0d79bde38d220f76ad65a5e29157..29f3fa538808c8e12149088f6dbdd492320a99b5 100644 (file)
@@ -43,7 +43,7 @@ type opData struct {
        rematerializeable bool
        argLength         int32 // number of arguments, if -1, then this operation has a variable number of arguments
        commutative       bool  // this operation is commutative on its first 2 arguments (e.g. addition)
-       resultInArg0      bool  // last output of v and v.Args[0] must be allocated to the same register
+       resultInArg0      bool  // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
        clobberFlags      bool  // this op clobbers flags register
 }
 
@@ -161,11 +161,11 @@ func genOp() {
                        }
                        if v.resultInArg0 {
                                fmt.Fprintln(w, "resultInArg0: true,")
-                               if v.reg.inputs[0] != v.reg.outputs[len(v.reg.outputs)-1] {
-                                       log.Fatalf("input[0] and last output register must be equal for %s", v.name)
+                               if v.reg.inputs[0] != v.reg.outputs[0] {
+                                       log.Fatalf("input[0] and output[0] must use the same registers for %s", v.name)
                                }
-                               if v.commutative && v.reg.inputs[1] != v.reg.outputs[len(v.reg.outputs)-1] {
-                                       log.Fatalf("input[1] and last output register must be equal for %s", v.name)
+                               if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
+                                       log.Fatalf("input[1] and output[0] must use the same registers for %s", v.name)
                                }
                        }
                        if v.clobberFlags {
index 61386126772f97af0b9ab5659ac087cefff97ae1..f8e2d8979f084cb2228f25b2473538ae961e5aa6 100644 (file)
@@ -586,6 +586,10 @@ const (
        OpAMD64FlagLT_UGT
        OpAMD64FlagGT_UGT
        OpAMD64FlagGT_ULT
+       OpAMD64MOVLatomicload
+       OpAMD64MOVQatomicload
+       OpAMD64XCHGL
+       OpAMD64XCHGQ
 
        OpARMADD
        OpARMADDconst
@@ -1491,6 +1495,12 @@ const (
        OpCvt64Fto64U
        OpSelect0
        OpSelect1
+       OpAtomicLoad32
+       OpAtomicLoad64
+       OpAtomicLoadPtr
+       OpAtomicStore32
+       OpAtomicStore64
+       OpAtomicStorePtrNoWB
 )
 
 var opcodeTable = [...]opInfo{
@@ -1855,8 +1865,8 @@ var opcodeTable = [...]opInfo{
                                {1, 239}, // AX CX DX BX BP SI DI
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 239}, // AX CX DX BX BP SI DI
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
                        },
                },
        },
@@ -1871,8 +1881,8 @@ var opcodeTable = [...]opInfo{
                                {0, 239}, // AX CX DX BX BP SI DI
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 239}, // AX CX DX BX BP SI DI
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
                        },
                },
        },
@@ -1952,8 +1962,8 @@ var opcodeTable = [...]opInfo{
                                {1, 239}, // AX CX DX BX BP SI DI
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 239}, // AX CX DX BX BP SI DI
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
                        },
                },
        },
@@ -1968,8 +1978,8 @@ var opcodeTable = [...]opInfo{
                                {0, 239}, // AX CX DX BX BP SI DI
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 239}, // AX CX DX BX BP SI DI
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
                        },
                },
        },
@@ -6789,6 +6799,66 @@ var opcodeTable = [...]opInfo{
                argLen: 0,
                reg:    regInfo{},
        },
+       {
+               name:    "MOVLatomicload",
+               auxType: auxSymOff,
+               argLen:  2,
+               asm:     x86.AMOVL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:    "MOVQatomicload",
+               auxType: auxSymOff,
+               argLen:  2,
+               asm:     x86.AMOVQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "XCHGL",
+               auxType:      auxSymOff,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AXCHGL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "XCHGQ",
+               auxType:      auxSymOff,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AXCHGQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
 
        {
                name:        "ADD",
@@ -6991,8 +7061,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -7006,8 +7076,8 @@ var opcodeTable = [...]opInfo{
                                {0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -7050,8 +7120,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -7065,8 +7135,8 @@ var opcodeTable = [...]opInfo{
                                {0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -7080,8 +7150,8 @@ var opcodeTable = [...]opInfo{
                                {0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8041,8 +8111,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8057,8 +8127,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8073,8 +8143,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8089,8 +8159,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8105,8 +8175,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8121,8 +8191,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8137,8 +8207,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8153,8 +8223,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8169,8 +8239,8 @@ var opcodeTable = [...]opInfo{
                                {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8677,8 +8747,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8693,8 +8763,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8709,8 +8779,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8725,8 +8795,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8741,8 +8811,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8757,8 +8827,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8773,8 +8843,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8789,8 +8859,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -8805,8 +8875,8 @@ var opcodeTable = [...]opInfo{
                                {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                        outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                        },
                },
        },
@@ -16156,6 +16226,36 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "AtomicLoad32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AtomicLoad64",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AtomicLoadPtr",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AtomicStore32",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "AtomicStore64",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "AtomicStorePtrNoWB",
+               argLen:  3,
+               generic: true,
+       },
 }
 
 func (o Op) Asm() obj.As    { return opcodeTable[o].asm }
index 3dc9fad28a9f95fcb1aee3488734fbb5e61a46de..e853f6631632cf1945b8b123330296cb3d542541 100644 (file)
@@ -1204,7 +1204,7 @@ func (s *regAllocState) regalloc(f *Func) {
                                        if mask == 0 {
                                                continue
                                        }
-                                       if opcodeTable[v.Op].resultInArg0 && out.idx == len(regspec.outputs)-1 {
+                                       if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
                                                if !opcodeTable[v.Op].commutative {
                                                        // Output must use the same register as input 0.
                                                        r := register(s.f.getHome(args[0].ID).(*Register).Num)
index 6c479bf91f1b35e1b5b17fc948269b1ed9ad4f83..05f01b291633cf9093901d84c4d29d8a5eef8b25 100644 (file)
@@ -76,6 +76,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                return rewriteValueAMD64_OpAMD64MOVLQSXload(v, config)
        case OpAMD64MOVLQZX:
                return rewriteValueAMD64_OpAMD64MOVLQZX(v, config)
+       case OpAMD64MOVLatomicload:
+               return rewriteValueAMD64_OpAMD64MOVLatomicload(v, config)
        case OpAMD64MOVLload:
                return rewriteValueAMD64_OpAMD64MOVLload(v, config)
        case OpAMD64MOVLloadidx1:
@@ -98,6 +100,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                return rewriteValueAMD64_OpAMD64MOVOload(v, config)
        case OpAMD64MOVOstore:
                return rewriteValueAMD64_OpAMD64MOVOstore(v, config)
+       case OpAMD64MOVQatomicload:
+               return rewriteValueAMD64_OpAMD64MOVQatomicload(v, config)
        case OpAMD64MOVQload:
                return rewriteValueAMD64_OpAMD64MOVQload(v, config)
        case OpAMD64MOVQloadidx1:
@@ -256,6 +260,10 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                return rewriteValueAMD64_OpAMD64SUBQ(v, config)
        case OpAMD64SUBQconst:
                return rewriteValueAMD64_OpAMD64SUBQconst(v, config)
+       case OpAMD64XCHGL:
+               return rewriteValueAMD64_OpAMD64XCHGL(v, config)
+       case OpAMD64XCHGQ:
+               return rewriteValueAMD64_OpAMD64XCHGQ(v, config)
        case OpAMD64XORL:
                return rewriteValueAMD64_OpAMD64XORL(v, config)
        case OpAMD64XORLconst:
@@ -290,6 +298,18 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                return rewriteValueAMD64_OpAnd8(v, config)
        case OpAndB:
                return rewriteValueAMD64_OpAndB(v, config)
+       case OpAtomicLoad32:
+               return rewriteValueAMD64_OpAtomicLoad32(v, config)
+       case OpAtomicLoad64:
+               return rewriteValueAMD64_OpAtomicLoad64(v, config)
+       case OpAtomicLoadPtr:
+               return rewriteValueAMD64_OpAtomicLoadPtr(v, config)
+       case OpAtomicStore32:
+               return rewriteValueAMD64_OpAtomicStore32(v, config)
+       case OpAtomicStore64:
+               return rewriteValueAMD64_OpAtomicStore64(v, config)
+       case OpAtomicStorePtrNoWB:
+               return rewriteValueAMD64_OpAtomicStorePtrNoWB(v, config)
        case OpAvg64u:
                return rewriteValueAMD64_OpAvg64u(v, config)
        case OpBswap32:
@@ -4368,6 +4388,58 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLatomicload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64MOVLload(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -5884,6 +5956,58 @@ func rewriteValueAMD64_OpAMD64MOVOstore(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64MOVQload(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -12657,6 +12781,118 @@ func rewriteValueAMD64_OpAMD64SUBQconst(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64XCHGL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XCHGL [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XCHGL)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64XCHGL)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XCHGQ(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XCHGQ [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64XORL(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -13085,6 +13321,148 @@ func rewriteValueAMD64_OpAndB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueAMD64_OpAtomicLoad32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoad32 ptr mem)
+       // cond:
+       // result: (MOVLatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoad64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoad64 ptr mem)
+       // cond:
+       // result: (MOVQatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoadPtr(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoadPtr ptr mem)
+       // cond: config.PtrSize == 8
+       // result: (MOVQatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               if !(config.PtrSize == 8) {
+                       break
+               }
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (AtomicLoadPtr ptr mem)
+       // cond: config.PtrSize == 4
+       // result: (MOVLatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               if !(config.PtrSize == 4) {
+                       break
+               }
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAtomicStore32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStore32 ptr val mem)
+       // cond:
+       // result: (Select1 (XCHGL <MakeTuple(config.Frontend().TypeUInt32(),TypeMem)> val ptr mem))
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Line, OpAMD64XCHGL, MakeTuple(config.Frontend().TypeUInt32(), TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicStore64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStore64 ptr val mem)
+       // cond:
+       // result: (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeUInt64(),TypeMem)> val ptr mem))
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Line, OpAMD64XCHGQ, MakeTuple(config.Frontend().TypeUInt64(), TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond: config.PtrSize == 8
+       // result: (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(config.PtrSize == 8) {
+                       break
+               }
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Line, OpAMD64XCHGQ, MakeTuple(config.Frontend().TypeBytePtr(), TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond: config.PtrSize == 4
+       // result: (Select1 (XCHGL <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(config.PtrSize == 4) {
+                       break
+               }
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Line, OpAMD64XCHGL, MakeTuple(config.Frontend().TypeBytePtr(), TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAvg64u(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 33d90f53414004ae77de999cc70b97298a7be0c6..91103f8475d3ec6cc72827679d571547a8cf8b0c 100644 (file)
@@ -126,7 +126,7 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
        _ = b
        // match: (Add64 x y)
        // cond:
-       // result: (Int64Make           (Add32withcarry <config.fe.TypeInt32()>                         (Int64Hi x)                     (Int64Hi y)                     (Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))             (Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
+       // result: (Int64Make           (Add32withcarry <config.fe.TypeInt32()>                         (Int64Hi x)                     (Int64Hi y)                     (Select1 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))             (Select0 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
        for {
                x := v.Args[0]
                y := v.Args[1]
@@ -138,8 +138,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
                v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32())
                v2.AddArg(y)
                v0.AddArg(v2)
-               v3 := b.NewValue0(v.Line, OpSelect0, TypeFlags)
-               v4 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
+               v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
+               v4 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
                v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
                v5.AddArg(x)
                v4.AddArg(v5)
@@ -149,8 +149,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
                v3.AddArg(v4)
                v0.AddArg(v3)
                v.AddArg(v0)
-               v7 := b.NewValue0(v.Line, OpSelect1, config.fe.TypeUInt32())
-               v8 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
+               v7 := b.NewValue0(v.Line, OpSelect0, config.fe.TypeUInt32())
+               v8 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
                v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
                v9.AddArg(x)
                v8.AddArg(v9)
@@ -2361,7 +2361,7 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
        _ = b
        // match: (Sub64 x y)
        // cond:
-       // result: (Int64Make           (Sub32withcarry <config.fe.TypeInt32()>                         (Int64Hi x)                     (Int64Hi y)                     (Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))             (Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
+       // result: (Int64Make           (Sub32withcarry <config.fe.TypeInt32()>                         (Int64Hi x)                     (Int64Hi y)                     (Select1 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))             (Select0 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
        for {
                x := v.Args[0]
                y := v.Args[1]
@@ -2373,8 +2373,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
                v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32())
                v2.AddArg(y)
                v0.AddArg(v2)
-               v3 := b.NewValue0(v.Line, OpSelect0, TypeFlags)
-               v4 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
+               v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
+               v4 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
                v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
                v5.AddArg(x)
                v4.AddArg(v5)
@@ -2384,8 +2384,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
                v3.AddArg(v4)
                v0.AddArg(v3)
                v.AddArg(v0)
-               v7 := b.NewValue0(v.Line, OpSelect1, config.fe.TypeUInt32())
-               v8 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
+               v7 := b.NewValue0(v.Line, OpSelect0, config.fe.TypeUInt32())
+               v8 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
                v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
                v9.AddArg(x)
                v8.AddArg(v9)
index 4470dc1010ddac196356f430e7a847152425f1db..3ebee6a8f1dab1c40d358b738f592f18aa0933b1 100644 (file)
@@ -33,7 +33,7 @@ type Type interface {
        PtrTo() Type    // given T, return *T
 
        NumFields() int         // # of fields of a struct
-       FieldType(i int) Type   // type of ith field of the struct
+       FieldType(i int) Type   // type of ith field of the struct or ith part of a tuple
        FieldOff(i int) int64   // offset of ith field of the struct
        FieldName(i int) string // name of ith field of the struct
 
@@ -84,31 +84,41 @@ func (t *CompilerType) NumElem() int64         { panic("not implemented") }
 type TupleType struct {
        first  Type
        second Type
+       // Any tuple with a memory type must put that memory type second.
 }
 
-func (t *TupleType) Size() int64            { panic("not implemented") }
-func (t *TupleType) Alignment() int64       { panic("not implemented") }
-func (t *TupleType) IsBoolean() bool        { return false }
-func (t *TupleType) IsInteger() bool        { return false }
-func (t *TupleType) IsSigned() bool         { return false }
-func (t *TupleType) IsFloat() bool          { return false }
-func (t *TupleType) IsComplex() bool        { return false }
-func (t *TupleType) IsPtrShaped() bool      { return false }
-func (t *TupleType) IsString() bool         { return false }
-func (t *TupleType) IsSlice() bool          { return false }
-func (t *TupleType) IsArray() bool          { return false }
-func (t *TupleType) IsStruct() bool         { return false }
-func (t *TupleType) IsInterface() bool      { return false }
-func (t *TupleType) IsMemory() bool         { return false }
-func (t *TupleType) IsFlags() bool          { return false }
-func (t *TupleType) IsVoid() bool           { return false }
-func (t *TupleType) IsTuple() bool          { return true }
-func (t *TupleType) String() string         { return t.first.String() + "," + t.second.String() }
-func (t *TupleType) SimpleString() string   { return "Tuple" }
-func (t *TupleType) ElemType() Type         { panic("not implemented") }
-func (t *TupleType) PtrTo() Type            { panic("not implemented") }
-func (t *TupleType) NumFields() int         { panic("not implemented") }
-func (t *TupleType) FieldType(i int) Type   { panic("not implemented") }
+func (t *TupleType) Size() int64          { panic("not implemented") }
+func (t *TupleType) Alignment() int64     { panic("not implemented") }
+func (t *TupleType) IsBoolean() bool      { return false }
+func (t *TupleType) IsInteger() bool      { return false }
+func (t *TupleType) IsSigned() bool       { return false }
+func (t *TupleType) IsFloat() bool        { return false }
+func (t *TupleType) IsComplex() bool      { return false }
+func (t *TupleType) IsPtrShaped() bool    { return false }
+func (t *TupleType) IsString() bool       { return false }
+func (t *TupleType) IsSlice() bool        { return false }
+func (t *TupleType) IsArray() bool        { return false }
+func (t *TupleType) IsStruct() bool       { return false }
+func (t *TupleType) IsInterface() bool    { return false }
+func (t *TupleType) IsMemory() bool       { return false }
+func (t *TupleType) IsFlags() bool        { return false }
+func (t *TupleType) IsVoid() bool         { return false }
+func (t *TupleType) IsTuple() bool        { return true }
+func (t *TupleType) String() string       { return t.first.String() + "," + t.second.String() }
+func (t *TupleType) SimpleString() string { return "Tuple" }
+func (t *TupleType) ElemType() Type       { panic("not implemented") }
+func (t *TupleType) PtrTo() Type          { panic("not implemented") }
+func (t *TupleType) NumFields() int       { panic("not implemented") }
+func (t *TupleType) FieldType(i int) Type {
+       switch i {
+       case 0:
+               return t.first
+       case 1:
+               return t.second
+       default:
+               panic("bad tuple index")
+       }
+}
 func (t *TupleType) FieldOff(i int) int64   { panic("not implemented") }
 func (t *TupleType) FieldName(i int) string { panic("not implemented") }
 func (t *TupleType) NumElem() int64         { panic("not implemented") }
index 3005a19bfdd6f7faadae3970b4749afcacc9866c..42e5df163cfb0c5d614f03b1ae28080eb5e6a125 100644 (file)
@@ -196,17 +196,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 
        case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
                // output 0 is carry/borrow, output 1 is the low 32 bits.
-               r := gc.SSARegNum1(v)
+               r := gc.SSARegNum0(v)
                if r != gc.SSARegNum(v.Args[0]) {
-                       v.Fatalf("input[0] and output[1] not in same register %s", v.LongString())
+                       v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
                }
                opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))
 
        case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
                // output 0 is carry/borrow, output 1 is the low 32 bits.
-               r := gc.SSARegNum1(v)
+               r := gc.SSARegNum0(v)
                if r != gc.SSARegNum(v.Args[0]) {
-                       v.Fatalf("input[0] and output[1] not in same register %s", v.LongString())
+                       v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
                }
                p := gc.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_CONST
index 32dbbf763d2ad7ff41ef96a1e8e9c1ec13fb1c90..6fb5211c9cedd225d3e4b472406e5ad936b6f5c5 100644 (file)
@@ -2,6 +2,9 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// Note: some of these functions are semantically inlined
+// by the compiler (in src/cmd/compile/internal/gc/ssa.go).
+
 #include "textflag.h"
 
 // bool Cas(int32 *val, int32 old, int32 new)
diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go
new file mode 100644 (file)
index 0000000..47010e3
--- /dev/null
@@ -0,0 +1,28 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic_test
+
+import (
+       "runtime/internal/atomic"
+       "testing"
+)
+
+var sink interface{}
+
+func BenchmarkAtomicLoad64(b *testing.B) {
+       var x uint64
+       sink = &x
+       for i := 0; i < b.N; i++ {
+               _ = atomic.Load64(&x)
+       }
+}
+
+func BenchmarkAtomicStore64(b *testing.B) {
+       var x uint64
+       sink = &x
+       for i := 0; i < b.N; i++ {
+               atomic.Store64(&x, 0)
+       }
+}