cmd/compile: inline atomics from runtime/internal/atomic on amd64

author Keith Randall <khr@golang.org>

Tue, 23 Aug 2016 23:49:28 +0000 (16:49 -0700)

committer Keith Randall <khr@golang.org>

Thu, 25 Aug 2016 20:09:04 +0000 (20:09 +0000)
author Keith Randall <khr@golang.org>
Tue, 23 Aug 2016 23:49:28 +0000 (16:49 -0700)
committer Keith Randall <khr@golang.org>
Thu, 25 Aug 2016 20:09:04 +0000 (20:09 +0000)
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go

index 472b86b38ab699f169fca9274bbbc2066b951d2c..eed9b2e3d75b4b60a9b926d2ade6b4974d44349d 100644 (file)
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -935,7 +935,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                                 ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore,
                                 ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
                                 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload,
-                               ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore:
+                               ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore,
+                               ssa.OpAMD64MOVQatomicload, ssa.OpAMD64MOVLatomicload:
                                 if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
                                         if gc.Debug_checknil != 0 && int(v.Line) > 1 {
                                                 gc.Warnl(v.Line, "removed nil check")
@@ -951,7 +952,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                                         return
                                 }
                         }
-                       if w.Type.IsMemory() {
+                       if w.Type.IsMemory() || w.Type.IsTuple() && w.Type.FieldType(1).IsMemory() {
                                 if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive {
                                         // these ops are OK
                                         mem = w
@@ -976,6 +977,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                 if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
                         gc.Warnl(v.Line, "generated nil check")
                 }
+       case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
+               p := gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = gc.SSARegNum(v.Args[0])
+               gc.AddAux(&p.From, v)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = gc.SSARegNum0(v)
+       case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
+               r := gc.SSARegNum0(v)
+               if r != gc.SSARegNum(v.Args[0]) {
+                       v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
+               }
+               p := gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = gc.SSARegNum(v.Args[1])
+               gc.AddAux(&p.To, v)
         default:
                 v.Unimplementedf("genValue not implemented: %s", v.LongString())
         }
diff --git a/src/cmd/compile/internal/arm/ssa.go b/src/cmd/compile/internal/arm/ssa.go

index f16dc0f95f2ce6396f195f87b77efb8620f05a05..d6a501f90762c32c2148def42d1872db2e505655 100644 (file)
--- a/src/cmd/compile/internal/arm/ssa.go
+++ b/src/cmd/compile/internal/arm/ssa.go
@@ -283,7 +283,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                 p.To.Reg = r
         case ssa.OpARMADDS,
                 ssa.OpARMSUBS:
-               r := gc.SSARegNum1(v)
+               r := gc.SSARegNum0(v)
                 r1 := gc.SSARegNum(v.Args[0])
                 r2 := gc.SSARegNum(v.Args[1])
                 p := gc.Prog(v.Op.Asm())
@@ -356,7 +356,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                 p.From.Offset = v.AuxInt
                 p.Reg = gc.SSARegNum(v.Args[0])
                 p.To.Type = obj.TYPE_REG
-               p.To.Reg = gc.SSARegNum1(v)
+               p.To.Reg = gc.SSARegNum0(v)
         case ssa.OpARMSRRconst:
                 genshift(arm.AMOVW, 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_RR, v.AuxInt)
         case ssa.OpARMADDshiftLL,
@@ -373,7 +373,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
         case ssa.OpARMADDSshiftLL,
                 ssa.OpARMSUBSshiftLL,
                 ssa.OpARMRSBSshiftLL:
-               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LL, v.AuxInt)
+               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_LL, v.AuxInt)
                 p.Scond = arm.C_SBIT
         case ssa.OpARMADDshiftRL,
                 ssa.OpARMADCshiftRL,
@@ -389,7 +389,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
         case ssa.OpARMADDSshiftRL,
                 ssa.OpARMSUBSshiftRL,
                 ssa.OpARMRSBSshiftRL:
-               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LR, v.AuxInt)
+               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_LR, v.AuxInt)
                 p.Scond = arm.C_SBIT
         case ssa.OpARMADDshiftRA,
                 ssa.OpARMADCshiftRA,
@@ -405,7 +405,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
         case ssa.OpARMADDSshiftRA,
                 ssa.OpARMSUBSshiftRA,
                 ssa.OpARMRSBSshiftRA:
-               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_AR, v.AuxInt)
+               p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_AR, v.AuxInt)
                 p.Scond = arm.C_SBIT
         case ssa.OpARMMVNshiftLL:
                 genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt)
@@ -433,7 +433,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
         case ssa.OpARMADDSshiftLLreg,
                 ssa.OpARMSUBSshiftLLreg,
                 ssa.OpARMRSBSshiftLLreg:
-               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LL)
+               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_LL)
                 p.Scond = arm.C_SBIT
         case ssa.OpARMADDshiftRLreg,
                 ssa.OpARMADCshiftRLreg,
@@ -449,7 +449,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
         case ssa.OpARMADDSshiftRLreg,
                 ssa.OpARMSUBSshiftRLreg,
                 ssa.OpARMRSBSshiftRLreg:
-               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LR)
+               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_LR)
                 p.Scond = arm.C_SBIT
         case ssa.OpARMADDshiftRAreg,
                 ssa.OpARMADCshiftRAreg,
@@ -465,7 +465,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
         case ssa.OpARMADDSshiftRAreg,
                 ssa.OpARMSUBSshiftRAreg,
                 ssa.OpARMRSBSshiftRAreg:
-               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_AR)
+               p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_AR)
                 p.Scond = arm.C_SBIT
         case ssa.OpARMHMUL,
                 ssa.OpARMHMULU:
diff --git a/src/cmd/compile/internal/gc/inl.go b/src/cmd/compile/internal/gc/inl.go

index 0bdabb8c911cd4cbd1c9430d91140208b0a47077..4fa8395940d4b22a00ff66922f29848b9e3f4a5b 100644 (file)
--- a/src/cmd/compile/internal/gc/inl.go
+++ b/src/cmd/compile/internal/gc/inl.go
@@ -477,7 +477,7 @@ func inlnode(n *Node) *Node {
                 if Debug['m'] > 3 {
                         fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign))
                 }
-               if n.Left.Func != nil && n.Left.Func.Inl.Len() != 0 && !isIntrinsicCall1(n) { // normal case
+               if n.Left.Func != nil && n.Left.Func.Inl.Len() != 0 && !isIntrinsicCall(n) { // normal case
                         n = mkinlcall(n, n.Left, n.Isddd)
                 } else if n.isMethodCalledAsFunction() && n.Left.Sym.Def != nil {
                         n = mkinlcall(n, n.Left.Sym.Def, n.Isddd)
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go

index 928575c3fa22d4425751e23bedce1e91d2e8f754..9dcdb661cd0b842d8f621224573592434eacd084 100644 (file)
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -571,7 +571,14 @@ func (s *state) stmt(n *Node) {
         case OEMPTY, ODCLCONST, ODCLTYPE, OFALL:
  
         // Expression statements
-       case OCALLFUNC, OCALLMETH, OCALLINTER:
+       case OCALLFUNC:
+               if isIntrinsicCall(n) {
+                       s.intrinsicCall(n)
+                       return
+               }
+               fallthrough
+
+       case OCALLMETH, OCALLINTER:
                 s.call(n, callNormal)
                 if n.Op == OCALLFUNC && n.Left.Op == ONAME && n.Left.Class == PFUNC &&
                         (compiling_runtime && n.Left.Sym.Name == "throw" ||
@@ -2107,8 +2114,8 @@ func (s *state) expr(n *Node) *ssa.Value {
                 return s.newValue2(ssa.OpStringMake, n.Type, p, l)
  
         case OCALLFUNC:
-               if isIntrinsicCall1(n) {
-                       return s.intrinsicCall1(n)
+               if isIntrinsicCall(n) {
+                       return s.intrinsicCall(n)
                 }
                 fallthrough
  
@@ -2516,12 +2523,12 @@ const (
         callGo
  )
  
-// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic
+// isSSAIntrinsic returns true if n is a call to a recognized intrinsic
  // that can be handled by the SSA backend.
  // SSA uses this, but so does the front end to see if should not
  // inline a function because it is a candidate for intrinsic
  // substitution.
-func isSSAIntrinsic1(s *Sym) bool {
+func isSSAIntrinsic(s *Sym) bool {
         // The test below is not quite accurate -- in the event that
         // a function is disabled on a per-function basis, for example
         // because of hash-keyed binary failure search, SSA might be
@@ -2541,38 +2548,74 @@ func isSSAIntrinsic1(s *Sym) bool {
                         return true
                 }
         }
+       if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/atomic" {
+               switch s.Name {
+               case "Load", "Load64", "Loadint64", "Loadp", "Loaduint", "Loaduintptr":
+                       return true
+               case "Store", "Store64", "StorepNoWB", "Storeuintptr":
+                       return true
+               }
+       }
         return false
  }
  
-func isIntrinsicCall1(n *Node) bool {
+func isIntrinsicCall(n *Node) bool {
         if n == nil || n.Left == nil {
                 return false
         }
-       return isSSAIntrinsic1(n.Left.Sym)
+       return isSSAIntrinsic(n.Left.Sym)
  }
  
-// intrinsicFirstArg extracts arg from n.List and eval
-func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
-       x := n.List.First()
+// intrinsicArg extracts the ith arg from n.List and returns its value.
+func (s *state) intrinsicArg(n *Node, i int) *ssa.Value {
+       x := n.List.Slice()[i]
         if x.Op == OAS {
                 x = x.Right
         }
         return s.expr(x)
  }
+func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
+       return s.intrinsicArg(n, 0)
+}
  
-// intrinsicCall1 converts a call to a recognized 1-arg intrinsic
-// into the intrinsic
-func (s *state) intrinsicCall1(n *Node) *ssa.Value {
+// intrinsicCall converts a call to a recognized intrinsic function into the intrinsic SSA operation.
+func (s *state) intrinsicCall(n *Node) (ret *ssa.Value) {
         var result *ssa.Value
-       switch n.Left.Sym.Name {
-       case "Ctz64":
+       name := n.Left.Sym.Name
+       switch {
+       case name == "Ctz64":
                 result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
-       case "Ctz32":
+               ret = result
+       case name == "Ctz32":
                 result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
-       case "Bswap64":
+               ret = result
+       case name == "Bswap64":
                 result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
-       case "Bswap32":
+               ret = result
+       case name == "Bswap32":
                 result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
+               ret = result
+       case name == "Load" || name == "Loaduint" && s.config.IntSize == 4 || name == "Loaduintptr" && s.config.PtrSize == 4:
+               result = s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
+               s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
+               ret = s.newValue1(ssa.OpSelect0, Types[TUINT32], result)
+       case name == "Load64" || name == "Loadint64" || name == "Loaduint" && s.config.IntSize == 8 || name == "Loaduintptr" && s.config.PtrSize == 8:
+               result = s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
+               s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
+               ret = s.newValue1(ssa.OpSelect0, Types[TUINT64], result)
+       case name == "Loadp":
+               result = s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(Ptrto(Types[TUINT8]), ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
+               s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
+               ret = s.newValue1(ssa.OpSelect0, Ptrto(Types[TUINT8]), result)
+       case name == "Store" || name == "Storeuintptr" && s.config.PtrSize == 4:
+               result = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
+               s.vars[&memVar] = result
+       case name == "Store64" || name == "Storeuintptr" && s.config.PtrSize == 8:
+               result = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
+               s.vars[&memVar] = result
+       case name == "StorepNoWB":
+               result = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
+               s.vars[&memVar] = result
         }
         if result == nil {
                 Fatalf("Unknown special call: %v", n.Left.Sym)
@@ -2580,7 +2623,7 @@ func (s *state) intrinsicCall1(n *Node) *ssa.Value {
         if ssa.IntrinsicsDebug > 0 {
                 Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString())
         }
-       return result
+       return
  }
  
  // Calls the function n using the specified call type.
diff --git a/src/cmd/compile/internal/ssa/deadstore.go b/src/cmd/compile/internal/ssa/deadstore.go

index 7acc3b09d498ac92eabcda92bf85c43a6b0acad8..3386a227ed6b01f1cb460392cb4fd467a45bf54e 100644 (file)
--- a/src/cmd/compile/internal/ssa/deadstore.go
+++ b/src/cmd/compile/internal/ssa/deadstore.go
@@ -29,6 +29,10 @@ func dse(f *Func) {
                         }
                         if v.Type.IsMemory() {
                                 stores = append(stores, v)
+                               if v.Op == OpSelect1 {
+                                       // Use the args of the tuple-generating op.
+                                       v = v.Args[0]
+                               }
                                 for _, a := range v.Args {
                                         if a.Block == b && a.Type.IsMemory() {
                                                 storeUse.add(a.ID)
diff --git a/src/cmd/compile/internal/ssa/gen/386Ops.go b/src/cmd/compile/internal/ssa/gen/386Ops.go

index 1013adf4a619dfe3d5c4893aa873f5e703aa7f28..c0cd7c6b2009876af03979c0e7e420c62bc6ca3c 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/386Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/386Ops.go
@@ -106,8 +106,8 @@ func init() {
                 gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
                 gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
                 gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-               gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{0, gp}}
-               gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{0, gp}}
+               gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
+               gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
                 gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
                 gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
                 gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules

index ea37f07cc349ad48f1490daf23d01ece3c12c75a..a412604b5952a52b89b68af80f7f824ab50cb9db 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -464,6 +464,19 @@
  
  (If cond yes no) -> (NE (TESTB cond cond) yes no)
  
+// Atomic loads.  Other than preserving their ordering with respect to other loads, nothing special here.
+(AtomicLoad32 ptr mem) -> (MOVLatomicload ptr mem)
+(AtomicLoad64 ptr mem) -> (MOVQatomicload ptr mem)
+(AtomicLoadPtr ptr mem) && config.PtrSize == 8 -> (MOVQatomicload ptr mem)
+(AtomicLoadPtr ptr mem) && config.PtrSize == 4 -> (MOVLatomicload ptr mem)
+
+// Atomic stores.  We use XCHG to prevent the hardware reordering a subsequent load.
+// TODO: most runtime uses of atomic stores don't need that property.  Use normal stores for those?
+(AtomicStore32 ptr val mem) -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeUInt32(),TypeMem)> val ptr mem))
+(AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeUInt64(),TypeMem)> val ptr mem))
+(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+
  // ***************************
  // Above: lowering rules
  // Below: optimizations
@@ -1626,3 +1639,23 @@
         (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
  (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
         (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+
+// Merge ADDQconst and LEAQ into atomic loads.
+(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+       (MOVQatomicload [off1+off2] {sym} ptr mem)
+(MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+       (MOVLatomicload [off1+off2] {sym} ptr mem)
+(MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+       (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+       (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+
+// Merge ADDQconst and LEAQ into atomic stores.
+(XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+       (XCHGQ [off1+off2] {sym} val ptr mem)
+(XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
+       (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+(XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+       (XCHGL [off1+off2] {sym} val ptr mem)
+(XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
+       (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go

index 9359e6d027468f82d3f57696cae72e392d58b469..f30785032479107decd9da0031befded05530536 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -134,6 +134,7 @@ func init() {
                 gpstoreconst    = regInfo{inputs: []regMask{gpspsb, 0}}
                 gpstoreidx      = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
                 gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
+               gpstorexchg     = regInfo{inputs: []regMask{gp, gp, 0}, outputs: []regMask{gp}}
  
                 fp01    = regInfo{inputs: nil, outputs: fponly}
                 fp21    = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
@@ -509,6 +510,20 @@ func init() {
                 {name: "FlagLT_UGT"}, // signed < and unsigned >
                 {name: "FlagGT_UGT"}, // signed > and unsigned <
                 {name: "FlagGT_ULT"}, // signed > and unsigned >
+
+               // Atomic loads.  These are just normal loads but return <value,memory> tuples
+               // so they can be properly ordered with other loads.
+               // load from arg0+auxint+aux.  arg1=mem.
+               {name: "MOVLatomicload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff"},
+               {name: "MOVQatomicload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff"},
+               // Atomic stores.  We use XCHG to get the right memory ordering semantics.
+               // These ops return a tuple of <old memory contents, memory>.  The old contents are
+               // ignored for now but they are allocated to a register so that the argument register
+               // is properly clobbered (together with resultInArg0).
+               // store arg0 to arg1+auxint+aux, arg2=mem.
+               // Note: arg0 and arg1 are backwards compared to MOVLstore (to facilitate resultInArg0)!
+               {name: "XCHGL", argLength: 3, reg: gpstorexchg, asm: "XCHGL", aux: "SymOff", resultInArg0: true},
+               {name: "XCHGQ", argLength: 3, reg: gpstorexchg, asm: "XCHGQ", aux: "SymOff", resultInArg0: true},
         }
  
         var AMD64blocks = []blockData{
diff --git a/src/cmd/compile/internal/ssa/gen/ARMOps.go b/src/cmd/compile/internal/ssa/gen/ARMOps.go

index f1774c6be045565176efdb35585047f96b6bbd6d..b0114328b3d2b630c5fd9298e9d692c277dc1c28 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go
@@ -99,17 +99,17 @@ func init() {
         var (
                 gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
                 gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-               gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{0, gp}}
+               gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
                 gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
                 gp1flags  = regInfo{inputs: []regMask{gpg}}
                 gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
                 gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
-               gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{0, gp}}
+               gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
                 gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
                 gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
                 gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
                 gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-               gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{0, gp}}
+               gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
                 gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
                 gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
                 gpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
diff --git a/src/cmd/compile/internal/ssa/gen/dec64.rules b/src/cmd/compile/internal/ssa/gen/dec64.rules

index e419c741b664b2735039c1394ef7b4aa58e3a1d8..8f0227af664cdcf7cbf56b0ad63dbab844286383 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec64.rules
@@ -39,16 +39,16 @@
                 (Add32withcarry <config.fe.TypeInt32()>
                         (Int64Hi x)
                         (Int64Hi y)
-                       (Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
-               (Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
+                       (Select1 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
+               (Select0 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
  
  (Sub64 x y) ->
         (Int64Make
                 (Sub32withcarry <config.fe.TypeInt32()>
                         (Int64Hi x)
                         (Int64Hi y)
-                       (Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
-               (Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
+                       (Select1 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
+               (Select0 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
  
  (Mul64 x y) ->
         (Int64Make
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go

index 1668f6a39040fb044e156da5db9ec0ecd16d1d06..dfa5ed6de35e885e351498a493e06c7bb1ba806d 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -417,10 +417,10 @@ var genericOps = []opData{
         {name: "Int64Hi", argLength: 1, typ: "UInt32"},   // high 32-bit of arg0
         {name: "Int64Lo", argLength: 1, typ: "UInt32"},   // low 32-bit of arg0
  
-       {name: "Add32carry", argLength: 2, commutative: true, typ: "(Flags,UInt32)"}, // arg0 + arg1, returns (carry, value)
+       {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry)
         {name: "Add32withcarry", argLength: 3, commutative: true},                    // arg0 + arg1 + arg2, arg2=carry (0 or 1)
  
-       {name: "Sub32carry", argLength: 2, typ: "(Flags,UInt32)"}, // arg0 - arg1, returns (carry, value)
+       {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry)
         {name: "Sub32withcarry", argLength: 3},                    // arg0 - arg1 - arg2, arg2=carry (0 or 1)
  
         {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
@@ -440,6 +440,17 @@ var genericOps = []opData{
         // pseudo-ops for breaking Tuple
         {name: "Select0", argLength: 1}, // the first component of a tuple
         {name: "Select1", argLength: 1}, // the second component of a tuple
+
+       // Atomic operations used for semantically inlining runtime/internal/atomic.
+       // Atomic loads return a new memory so that the loads are properly ordered
+       // with respect to other loads and stores.
+       // TODO: use for sync/atomic at some point.
+       {name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"},   // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"},   // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"}, // Load from arg0.  arg1=memory.  Returns loaded value and new memory.
+       {name: "AtomicStore32", argLength: 3, typ: "Mem"},           // Store arg1 to arg0.  arg2=memory.  Returns memory.
+       {name: "AtomicStore64", argLength: 3, typ: "Mem"},           // Store arg1 to arg0.  arg2=memory.  Returns memory.
+       {name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem"},      // Store arg1 to arg0.  arg2=memory.  Returns memory.
  }
  
  //     kind           control    successors       implicit exit
diff --git a/src/cmd/compile/internal/ssa/gen/main.go b/src/cmd/compile/internal/ssa/gen/main.go

index 059315542eaa0d79bde38d220f76ad65a5e29157..29f3fa538808c8e12149088f6dbdd492320a99b5 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/main.go
+++ b/src/cmd/compile/internal/ssa/gen/main.go
@@ -43,7 +43,7 @@ type opData struct {
         rematerializeable bool
         argLength         int32 // number of arguments, if -1, then this operation has a variable number of arguments
         commutative       bool  // this operation is commutative on its first 2 arguments (e.g. addition)
-       resultInArg0      bool  // last output of v and v.Args[0] must be allocated to the same register
+       resultInArg0      bool  // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
         clobberFlags      bool  // this op clobbers flags register
  }
  
@@ -161,11 +161,11 @@ func genOp() {
                         }
                         if v.resultInArg0 {
                                 fmt.Fprintln(w, "resultInArg0: true,")
-                               if v.reg.inputs[0] != v.reg.outputs[len(v.reg.outputs)-1] {
-                                       log.Fatalf("input[0] and last output register must be equal for %s", v.name)
+                               if v.reg.inputs[0] != v.reg.outputs[0] {
+                                       log.Fatalf("input[0] and output[0] must use the same registers for %s", v.name)
                                 }
-                               if v.commutative && v.reg.inputs[1] != v.reg.outputs[len(v.reg.outputs)-1] {
-                                       log.Fatalf("input[1] and last output register must be equal for %s", v.name)
+                               if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
+                                       log.Fatalf("input[1] and output[0] must use the same registers for %s", v.name)
                                 }
                         }
                         if v.clobberFlags {
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go

index 61386126772f97af0b9ab5659ac087cefff97ae1..f8e2d8979f084cb2228f25b2473538ae961e5aa6 100644 (file)
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -586,6 +586,10 @@ const (
         OpAMD64FlagLT_UGT
         OpAMD64FlagGT_UGT
         OpAMD64FlagGT_ULT
+       OpAMD64MOVLatomicload
+       OpAMD64MOVQatomicload
+       OpAMD64XCHGL
+       OpAMD64XCHGQ
  
         OpARMADD
         OpARMADDconst
@@ -1491,6 +1495,12 @@ const (
         OpCvt64Fto64U
         OpSelect0
         OpSelect1
+       OpAtomicLoad32
+       OpAtomicLoad64
+       OpAtomicLoadPtr
+       OpAtomicStore32
+       OpAtomicStore64
+       OpAtomicStorePtrNoWB
  )
  
  var opcodeTable = [...]opInfo{
@@ -1855,8 +1865,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 239}, // AX CX DX BX BP SI DI
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 239}, // AX CX DX BX BP SI DI
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
                         },
                 },
         },
@@ -1871,8 +1881,8 @@ var opcodeTable = [...]opInfo{
                                 {0, 239}, // AX CX DX BX BP SI DI
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 239}, // AX CX DX BX BP SI DI
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
                         },
                 },
         },
@@ -1952,8 +1962,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 239}, // AX CX DX BX BP SI DI
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 239}, // AX CX DX BX BP SI DI
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
                         },
                 },
         },
@@ -1968,8 +1978,8 @@ var opcodeTable = [...]opInfo{
                                 {0, 239}, // AX CX DX BX BP SI DI
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 239}, // AX CX DX BX BP SI DI
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
                         },
                 },
         },
@@ -6789,6 +6799,66 @@ var opcodeTable = [...]opInfo{
                 argLen: 0,
                 reg:    regInfo{},
         },
+       {
+               name:    "MOVLatomicload",
+               auxType: auxSymOff,
+               argLen:  2,
+               asm:     x86.AMOVL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:    "MOVQatomicload",
+               auxType: auxSymOff,
+               argLen:  2,
+               asm:     x86.AMOVQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "XCHGL",
+               auxType:      auxSymOff,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AXCHGL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "XCHGQ",
+               auxType:      auxSymOff,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AXCHGQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
  
         {
                 name:        "ADD",
@@ -6991,8 +7061,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -7006,8 +7076,8 @@ var opcodeTable = [...]opInfo{
                                 {0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -7050,8 +7120,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -7065,8 +7135,8 @@ var opcodeTable = [...]opInfo{
                                 {0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -7080,8 +7150,8 @@ var opcodeTable = [...]opInfo{
                                 {0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8041,8 +8111,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8057,8 +8127,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8073,8 +8143,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8089,8 +8159,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8105,8 +8175,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8121,8 +8191,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8137,8 +8207,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8153,8 +8223,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8169,8 +8239,8 @@ var opcodeTable = [...]opInfo{
                                 {1, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8677,8 +8747,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8693,8 +8763,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8709,8 +8779,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8725,8 +8795,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8741,8 +8811,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8757,8 +8827,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8773,8 +8843,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8789,8 +8859,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -8805,8 +8875,8 @@ var opcodeTable = [...]opInfo{
                                 {2, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                         outputs: []outputInfo{
-                               {0, 0},
-                               {1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+                               {1, 0},
+                               {0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
                         },
                 },
         },
@@ -16156,6 +16226,36 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
+       {
+               name:    "AtomicLoad32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AtomicLoad64",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AtomicLoadPtr",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AtomicStore32",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "AtomicStore64",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "AtomicStorePtrNoWB",
+               argLen:  3,
+               generic: true,
+       },
  }
  
  func (o Op) Asm() obj.As    { return opcodeTable[o].asm }
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go

index 3dc9fad28a9f95fcb1aee3488734fbb5e61a46de..e853f6631632cf1945b8b123330296cb3d542541 100644 (file)
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -1204,7 +1204,7 @@ func (s *regAllocState) regalloc(f *Func) {
                                         if mask == 0 {
                                                 continue
                                         }
-                                       if opcodeTable[v.Op].resultInArg0 && out.idx == len(regspec.outputs)-1 {
+                                       if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
                                                 if !opcodeTable[v.Op].commutative {
                                                         // Output must use the same register as input 0.
                                                         r := register(s.f.getHome(args[0].ID).(*Register).Num)
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go

index 6c479bf91f1b35e1b5b17fc948269b1ed9ad4f83..05f01b291633cf9093901d84c4d29d8a5eef8b25 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -76,6 +76,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                 return rewriteValueAMD64_OpAMD64MOVLQSXload(v, config)
         case OpAMD64MOVLQZX:
                 return rewriteValueAMD64_OpAMD64MOVLQZX(v, config)
+       case OpAMD64MOVLatomicload:
+               return rewriteValueAMD64_OpAMD64MOVLatomicload(v, config)
         case OpAMD64MOVLload:
                 return rewriteValueAMD64_OpAMD64MOVLload(v, config)
         case OpAMD64MOVLloadidx1:
@@ -98,6 +100,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                 return rewriteValueAMD64_OpAMD64MOVOload(v, config)
         case OpAMD64MOVOstore:
                 return rewriteValueAMD64_OpAMD64MOVOstore(v, config)
+       case OpAMD64MOVQatomicload:
+               return rewriteValueAMD64_OpAMD64MOVQatomicload(v, config)
         case OpAMD64MOVQload:
                 return rewriteValueAMD64_OpAMD64MOVQload(v, config)
         case OpAMD64MOVQloadidx1:
@@ -256,6 +260,10 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                 return rewriteValueAMD64_OpAMD64SUBQ(v, config)
         case OpAMD64SUBQconst:
                 return rewriteValueAMD64_OpAMD64SUBQconst(v, config)
+       case OpAMD64XCHGL:
+               return rewriteValueAMD64_OpAMD64XCHGL(v, config)
+       case OpAMD64XCHGQ:
+               return rewriteValueAMD64_OpAMD64XCHGQ(v, config)
         case OpAMD64XORL:
                 return rewriteValueAMD64_OpAMD64XORL(v, config)
         case OpAMD64XORLconst:
@@ -290,6 +298,18 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                 return rewriteValueAMD64_OpAnd8(v, config)
         case OpAndB:
                 return rewriteValueAMD64_OpAndB(v, config)
+       case OpAtomicLoad32:
+               return rewriteValueAMD64_OpAtomicLoad32(v, config)
+       case OpAtomicLoad64:
+               return rewriteValueAMD64_OpAtomicLoad64(v, config)
+       case OpAtomicLoadPtr:
+               return rewriteValueAMD64_OpAtomicLoadPtr(v, config)
+       case OpAtomicStore32:
+               return rewriteValueAMD64_OpAtomicStore32(v, config)
+       case OpAtomicStore64:
+               return rewriteValueAMD64_OpAtomicStore64(v, config)
+       case OpAtomicStorePtrNoWB:
+               return rewriteValueAMD64_OpAtomicStorePtrNoWB(v, config)
         case OpAvg64u:
                 return rewriteValueAMD64_OpAvg64u(v, config)
         case OpBswap32:
@@ -4368,6 +4388,58 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool {
         }
         return false
  }
+func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVLatomicload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
  func rewriteValueAMD64_OpAMD64MOVLload(v *Value, config *Config) bool {
         b := v.Block
         _ = b
@@ -5884,6 +5956,58 @@ func rewriteValueAMD64_OpAMD64MOVOstore(v *Value, config *Config) bool {
         }
         return false
  }
+func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
  func rewriteValueAMD64_OpAMD64MOVQload(v *Value, config *Config) bool {
         b := v.Block
         _ = b
@@ -12657,6 +12781,118 @@ func rewriteValueAMD64_OpAMD64SUBQconst(v *Value, config *Config) bool {
         }
         return false
  }
+func rewriteValueAMD64_OpAMD64XCHGL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XCHGL [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XCHGL)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64XCHGL)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XCHGQ(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XCHGQ [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
  func rewriteValueAMD64_OpAMD64XORL(v *Value, config *Config) bool {
         b := v.Block
         _ = b
@@ -13085,6 +13321,148 @@ func rewriteValueAMD64_OpAndB(v *Value, config *Config) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpAtomicLoad32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoad32 ptr mem)
+       // cond:
+       // result: (MOVLatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoad64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoad64 ptr mem)
+       // cond:
+       // result: (MOVQatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoadPtr(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicLoadPtr ptr mem)
+       // cond: config.PtrSize == 8
+       // result: (MOVQatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               if !(config.PtrSize == 8) {
+                       break
+               }
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (AtomicLoadPtr ptr mem)
+       // cond: config.PtrSize == 4
+       // result: (MOVLatomicload ptr mem)
+       for {
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               if !(config.PtrSize == 4) {
+                       break
+               }
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAtomicStore32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStore32 ptr val mem)
+       // cond:
+       // result: (Select1 (XCHGL <MakeTuple(config.Frontend().TypeUInt32(),TypeMem)> val ptr mem))
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Line, OpAMD64XCHGL, MakeTuple(config.Frontend().TypeUInt32(), TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicStore64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStore64 ptr val mem)
+       // cond:
+       // result: (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeUInt64(),TypeMem)> val ptr mem))
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Line, OpAMD64XCHGQ, MakeTuple(config.Frontend().TypeUInt64(), TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond: config.PtrSize == 8
+       // result: (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(config.PtrSize == 8) {
+                       break
+               }
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Line, OpAMD64XCHGQ, MakeTuple(config.Frontend().TypeBytePtr(), TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond: config.PtrSize == 4
+       // result: (Select1 (XCHGL <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+       for {
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(config.PtrSize == 4) {
+                       break
+               }
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Line, OpAMD64XCHGL, MakeTuple(config.Frontend().TypeBytePtr(), TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
  func rewriteValueAMD64_OpAvg64u(v *Value, config *Config) bool {
         b := v.Block
         _ = b
diff --git a/src/cmd/compile/internal/ssa/rewritedec64.go b/src/cmd/compile/internal/ssa/rewritedec64.go

index 33d90f53414004ae77de999cc70b97298a7be0c6..91103f8475d3ec6cc72827679d571547a8cf8b0c 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewritedec64.go
+++ b/src/cmd/compile/internal/ssa/rewritedec64.go
@@ -126,7 +126,7 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
         _ = b
         // match: (Add64 x y)
         // cond:
-       // result: (Int64Make           (Add32withcarry <config.fe.TypeInt32()>                         (Int64Hi x)                     (Int64Hi y)                     (Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))             (Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
+       // result: (Int64Make           (Add32withcarry <config.fe.TypeInt32()>                         (Int64Hi x)                     (Int64Hi y)                     (Select1 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))             (Select0 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
         for {
                 x := v.Args[0]
                 y := v.Args[1]
@@ -138,8 +138,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
                 v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32())
                 v2.AddArg(y)
                 v0.AddArg(v2)
-               v3 := b.NewValue0(v.Line, OpSelect0, TypeFlags)
-               v4 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
+               v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
+               v4 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
                 v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
                 v5.AddArg(x)
                 v4.AddArg(v5)
@@ -149,8 +149,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
                 v3.AddArg(v4)
                 v0.AddArg(v3)
                 v.AddArg(v0)
-               v7 := b.NewValue0(v.Line, OpSelect1, config.fe.TypeUInt32())
-               v8 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
+               v7 := b.NewValue0(v.Line, OpSelect0, config.fe.TypeUInt32())
+               v8 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
                 v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
                 v9.AddArg(x)
                 v8.AddArg(v9)
@@ -2361,7 +2361,7 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
         _ = b
         // match: (Sub64 x y)
         // cond:
-       // result: (Int64Make           (Sub32withcarry <config.fe.TypeInt32()>                         (Int64Hi x)                     (Int64Hi y)                     (Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))             (Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
+       // result: (Int64Make           (Sub32withcarry <config.fe.TypeInt32()>                         (Int64Hi x)                     (Int64Hi y)                     (Select1 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))             (Select0 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
         for {
                 x := v.Args[0]
                 y := v.Args[1]
@@ -2373,8 +2373,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
                 v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32())
                 v2.AddArg(y)
                 v0.AddArg(v2)
-               v3 := b.NewValue0(v.Line, OpSelect0, TypeFlags)
-               v4 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
+               v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
+               v4 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
                 v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
                 v5.AddArg(x)
                 v4.AddArg(v5)
@@ -2384,8 +2384,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
                 v3.AddArg(v4)
                 v0.AddArg(v3)
                 v.AddArg(v0)
-               v7 := b.NewValue0(v.Line, OpSelect1, config.fe.TypeUInt32())
-               v8 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32()))
+               v7 := b.NewValue0(v.Line, OpSelect0, config.fe.TypeUInt32())
+               v8 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
                 v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
                 v9.AddArg(x)
                 v8.AddArg(v9)
diff --git a/src/cmd/compile/internal/ssa/type.go b/src/cmd/compile/internal/ssa/type.go

index 4470dc1010ddac196356f430e7a847152425f1db..3ebee6a8f1dab1c40d358b738f592f18aa0933b1 100644 (file)
--- a/src/cmd/compile/internal/ssa/type.go
+++ b/src/cmd/compile/internal/ssa/type.go
@@ -33,7 +33,7 @@ type Type interface {
         PtrTo() Type    // given T, return *T
  
         NumFields() int         // # of fields of a struct
-       FieldType(i int) Type   // type of ith field of the struct
+       FieldType(i int) Type   // type of ith field of the struct or ith part of a tuple
         FieldOff(i int) int64   // offset of ith field of the struct
         FieldName(i int) string // name of ith field of the struct
  
@@ -84,31 +84,41 @@ func (t *CompilerType) NumElem() int64         { panic("not implemented") }
  type TupleType struct {
         first  Type
         second Type
+       // Any tuple with a memory type must put that memory type second.
  }
  
-func (t *TupleType) Size() int64            { panic("not implemented") }
-func (t *TupleType) Alignment() int64       { panic("not implemented") }
-func (t *TupleType) IsBoolean() bool        { return false }
-func (t *TupleType) IsInteger() bool        { return false }
-func (t *TupleType) IsSigned() bool         { return false }
-func (t *TupleType) IsFloat() bool          { return false }
-func (t *TupleType) IsComplex() bool        { return false }
-func (t *TupleType) IsPtrShaped() bool      { return false }
-func (t *TupleType) IsString() bool         { return false }
-func (t *TupleType) IsSlice() bool          { return false }
-func (t *TupleType) IsArray() bool          { return false }
-func (t *TupleType) IsStruct() bool         { return false }
-func (t *TupleType) IsInterface() bool      { return false }
-func (t *TupleType) IsMemory() bool         { return false }
-func (t *TupleType) IsFlags() bool          { return false }
-func (t *TupleType) IsVoid() bool           { return false }
-func (t *TupleType) IsTuple() bool          { return true }
-func (t *TupleType) String() string         { return t.first.String() + "," + t.second.String() }
-func (t *TupleType) SimpleString() string   { return "Tuple" }
-func (t *TupleType) ElemType() Type         { panic("not implemented") }
-func (t *TupleType) PtrTo() Type            { panic("not implemented") }
-func (t *TupleType) NumFields() int         { panic("not implemented") }
-func (t *TupleType) FieldType(i int) Type   { panic("not implemented") }
+func (t *TupleType) Size() int64          { panic("not implemented") }
+func (t *TupleType) Alignment() int64     { panic("not implemented") }
+func (t *TupleType) IsBoolean() bool      { return false }
+func (t *TupleType) IsInteger() bool      { return false }
+func (t *TupleType) IsSigned() bool       { return false }
+func (t *TupleType) IsFloat() bool        { return false }
+func (t *TupleType) IsComplex() bool      { return false }
+func (t *TupleType) IsPtrShaped() bool    { return false }
+func (t *TupleType) IsString() bool       { return false }
+func (t *TupleType) IsSlice() bool        { return false }
+func (t *TupleType) IsArray() bool        { return false }
+func (t *TupleType) IsStruct() bool       { return false }
+func (t *TupleType) IsInterface() bool    { return false }
+func (t *TupleType) IsMemory() bool       { return false }
+func (t *TupleType) IsFlags() bool        { return false }
+func (t *TupleType) IsVoid() bool         { return false }
+func (t *TupleType) IsTuple() bool        { return true }
+func (t *TupleType) String() string       { return t.first.String() + "," + t.second.String() }
+func (t *TupleType) SimpleString() string { return "Tuple" }
+func (t *TupleType) ElemType() Type       { panic("not implemented") }
+func (t *TupleType) PtrTo() Type          { panic("not implemented") }
+func (t *TupleType) NumFields() int       { panic("not implemented") }
+func (t *TupleType) FieldType(i int) Type {
+       switch i {
+       case 0:
+               return t.first
+       case 1:
+               return t.second
+       default:
+               panic("bad tuple index")
+       }
+}
  func (t *TupleType) FieldOff(i int) int64   { panic("not implemented") }
  func (t *TupleType) FieldName(i int) string { panic("not implemented") }
  func (t *TupleType) NumElem() int64         { panic("not implemented") }
diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go

index 3005a19bfdd6f7faadae3970b4749afcacc9866c..42e5df163cfb0c5d614f03b1ae28080eb5e6a125 100644 (file)
--- a/src/cmd/compile/internal/x86/ssa.go
+++ b/src/cmd/compile/internal/x86/ssa.go
@@ -196,17 +196,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
  
         case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
                 // output 0 is carry/borrow, output 1 is the low 32 bits.
-               r := gc.SSARegNum1(v)
+               r := gc.SSARegNum0(v)
                 if r != gc.SSARegNum(v.Args[0]) {
-                       v.Fatalf("input[0] and output[1] not in same register %s", v.LongString())
+                       v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
                 }
                 opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))
  
         case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
                 // output 0 is carry/borrow, output 1 is the low 32 bits.
-               r := gc.SSARegNum1(v)
+               r := gc.SSARegNum0(v)
                 if r != gc.SSARegNum(v.Args[0]) {
-                       v.Fatalf("input[0] and output[1] not in same register %s", v.LongString())
+                       v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
                 }
                 p := gc.Prog(v.Op.Asm())
                 p.From.Type = obj.TYPE_CONST
diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s

index 32dbbf763d2ad7ff41ef96a1e8e9c1ec13fb1c90..6fb5211c9cedd225d3e4b472406e5ad936b6f5c5 100644 (file)
--- a/src/runtime/internal/atomic/asm_amd64.s
+++ b/src/runtime/internal/atomic/asm_amd64.s
@@ -2,6 +2,9 @@
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
+// Note: some of these functions are semantically inlined
+// by the compiler (in src/cmd/compile/internal/gc/ssa.go).
+
  #include "textflag.h"
  
  // bool Cas(int32 *val, int32 old, int32 new)
diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go

new file mode 100644 (file)

index 0000000..47010e3
--- /dev/null
+++ b/src/runtime/internal/atomic/bench_test.go
@@ -0,0 +1,28 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atomic_test
+
+import (
+       "runtime/internal/atomic"
+       "testing"
+)
+
+var sink interface{}
+
+func BenchmarkAtomicLoad64(b *testing.B) {
+       var x uint64
+       sink = &x
+       for i := 0; i < b.N; i++ {
+               _ = atomic.Load64(&x)
+       }
+}
+
+func BenchmarkAtomicStore64(b *testing.B) {
+       var x uint64
+       sink = &x
+       for i := 0; i < b.N; i++ {
+               atomic.Store64(&x, 0)
+       }
+}
author	Keith Randall <khr@golang.org>
	Tue, 23 Aug 2016 23:49:28 +0000 (16:49 -0700)
committer	Keith Randall <khr@golang.org>
	Thu, 25 Aug 2016 20:09:04 +0000 (20:09 +0000)
src/cmd/compile/internal/amd64/ssa.go		patch \| blob \| history
src/cmd/compile/internal/arm/ssa.go		patch \| blob \| history
src/cmd/compile/internal/gc/inl.go		patch \| blob \| history
src/cmd/compile/internal/gc/ssa.go		patch \| blob \| history
src/cmd/compile/internal/ssa/deadstore.go		patch \| blob \| history
src/cmd/compile/internal/ssa/gen/386Ops.go		patch \| blob \| history
src/cmd/compile/internal/ssa/gen/AMD64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/gen/AMD64Ops.go		patch \| blob \| history
src/cmd/compile/internal/ssa/gen/ARMOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/gen/dec64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/gen/genericOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/gen/main.go		patch \| blob \| history
src/cmd/compile/internal/ssa/opGen.go		patch \| blob \| history
src/cmd/compile/internal/ssa/regalloc.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteAMD64.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewritedec64.go		patch \| blob \| history
src/cmd/compile/internal/ssa/type.go		patch \| blob \| history
src/cmd/compile/internal/x86/ssa.go		patch \| blob \| history
src/runtime/internal/atomic/asm_amd64.s		patch \| blob \| history
src/runtime/internal/atomic/bench_test.go	[new file with mode: 0644]	patch \| blob