]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile,cmd/go,cmd/internal,runtime: remove dynamic checks for atomics for ARM64...
authorAndrey Bokhanko <andreybokhanko@gmail.com>
Wed, 6 Mar 2024 14:44:03 +0000 (17:44 +0300)
committerCherry Mui <cherryyz@google.com>
Thu, 21 Mar 2024 20:08:06 +0000 (20:08 +0000)
Remove dynamic checks for atomic instructions for ARM64 targets that support LSE extension.

For #66131

Change-Id: I0ec1b183a3f4ea4c8a537430646e6bc4b4f64271
Reviewed-on: https://go-review.googlesource.com/c/go/+/569536
Reviewed-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Fannie Zhang <Fannie.Zhang@arm.com>
Reviewed-by: Shu-Chun Weng <scw@google.com>
src/cmd/compile/internal/ssagen/ssa.go
src/cmd/go/internal/cfg/cfg.go
src/cmd/go/internal/work/gc.go
src/cmd/internal/testdir/testdir_test.go
src/internal/buildcfg/cfg.go
src/internal/buildcfg/cfg_test.go
src/runtime/internal/atomic/atomic_arm64.s
test/codegen/atomics.go [new file with mode: 0644]

index 06180f8deaa9c18d704eea2997e89c3bf5b741c4..5174cf123c2e9ff76cdac27f31f789a51dd1fce5 100644 (file)
@@ -4387,31 +4387,35 @@ func InitTables() {
        makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ, rtyp types.Kind, emit atomicOpEmitter) intrinsicBuilder {
 
                return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-                       // Target Atomic feature is identified by dynamic detection
-                       addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
-                       v := s.load(types.Types[types.TBOOL], addr)
-                       b := s.endBlock()
-                       b.Kind = ssa.BlockIf
-                       b.SetControl(v)
-                       bTrue := s.f.NewBlock(ssa.BlockPlain)
-                       bFalse := s.f.NewBlock(ssa.BlockPlain)
-                       bEnd := s.f.NewBlock(ssa.BlockPlain)
-                       b.AddEdgeTo(bTrue)
-                       b.AddEdgeTo(bFalse)
-                       b.Likely = ssa.BranchLikely
-
-                       // We have atomic instructions - use it directly.
-                       s.startBlock(bTrue)
-                       emit(s, n, args, op1, typ)
-                       s.endBlock().AddEdgeTo(bEnd)
-
-                       // Use original instruction sequence.
-                       s.startBlock(bFalse)
-                       emit(s, n, args, op0, typ)
-                       s.endBlock().AddEdgeTo(bEnd)
-
-                       // Merge results.
-                       s.startBlock(bEnd)
+                       if buildcfg.GOARM64.LSE {
+                               emit(s, n, args, op1, typ)
+                       } else {
+                               // Target Atomic feature is identified by dynamic detection
+                               addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
+                               v := s.load(types.Types[types.TBOOL], addr)
+                               b := s.endBlock()
+                               b.Kind = ssa.BlockIf
+                               b.SetControl(v)
+                               bTrue := s.f.NewBlock(ssa.BlockPlain)
+                               bFalse := s.f.NewBlock(ssa.BlockPlain)
+                               bEnd := s.f.NewBlock(ssa.BlockPlain)
+                               b.AddEdgeTo(bTrue)
+                               b.AddEdgeTo(bFalse)
+                               b.Likely = ssa.BranchLikely
+
+                               // We have atomic instructions - use it directly.
+                               s.startBlock(bTrue)
+                               emit(s, n, args, op1, typ)
+                               s.endBlock().AddEdgeTo(bEnd)
+
+                               // Use original instruction sequence.
+                               s.startBlock(bFalse)
+                               emit(s, n, args, op0, typ)
+                               s.endBlock().AddEdgeTo(bEnd)
+
+                               // Merge results.
+                               s.startBlock(bEnd)
+                       }
                        if rtyp == types.TNIL {
                                return nil
                        } else {
index e0da810c738a54fe1c1a77e525358abdec85ea01..afb595a0c6aa4cb48a8172840e976b5597d5b6da 100644 (file)
@@ -402,6 +402,7 @@ var (
 
        // Used in envcmd.MkEnv and build ID computations.
        GOARM     = envOr("GOARM", fmt.Sprint(buildcfg.GOARM))
+       GOARM64   = envOr("GOARM64", fmt.Sprint(buildcfg.GOARM64))
        GO386     = envOr("GO386", buildcfg.GO386)
        GOAMD64   = envOr("GOAMD64", fmt.Sprintf("%s%d", "v", buildcfg.GOAMD64))
        GOMIPS    = envOr("GOMIPS", buildcfg.GOMIPS)
@@ -429,6 +430,8 @@ func GetArchEnv() (key, val string) {
        switch Goarch {
        case "arm":
                return "GOARM", GOARM
+       case "arm64":
+               return "GOARM64", GOARM64
        case "386":
                return "GO386", GO386
        case "amd64":
index a85b262374a1b3f4832bf6a879addaaba9e2c532..be61a606d52979688798b7b2f9876495d65c594d 100644 (file)
@@ -8,6 +8,7 @@ import (
        "bufio"
        "bytes"
        "fmt"
+       "internal/buildcfg"
        "internal/platform"
        "io"
        "log"
@@ -378,6 +379,13 @@ func asmArgs(a *Action, p *load.Package) []any {
                }
        }
 
+       if cfg.Goarch == "arm64" {
+               g, err := buildcfg.ParseGoarm64(cfg.GOARM64)
+               if err == nil && g.LSE {
+                       args = append(args, "-D", "GOARM64_LSE")
+               }
+       }
+
        return args
 }
 
index 6f1c56eb2dea525f33cd1d09df6389302eb7ddd7..8d68591982a5032df664e81450011b808cbf0ee8 100644 (file)
@@ -1459,7 +1459,19 @@ var (
        // Regexp to extract an architecture check: architecture name (or triplet),
        // followed by semi-colon, followed by a comma-separated list of opcode checks.
        // Extraneous spaces are ignored.
-       rxAsmPlatform = regexp.MustCompile(`(\w+)(/\w+)?(/\w*)?\s*:\s*(` + reMatchCheck + `(?:\s*,\s*` + reMatchCheck + `)*)`)
+       //
+       // An example: arm64/v8.1 : -`ADD` , `SUB`
+       //      "(\w+)" matches "arm64" (architecture name)
+       //      "(/[\w.]+)?" matches "v8.1" (architecture version)
+       //      "(/\w*)?" doesn't match anything here (it's an optional part of the triplet)
+       //      "\s*:\s*" matches " : " (semi-colon)
+       //      "(" starts a capturing group
+       //      first reMatchCheck matches "-`ADD`"
+       //      `(?:" starts a non-capturing group
+       //      "\s*,\s*` matches " , "
+       //      second reMatchCheck matches "`SUB`"
+       //      ")*)" closes started groups; "*" means that there might be other elements in the comma-separated list
+       rxAsmPlatform = regexp.MustCompile(`(\w+)(/[\w.]+)?(/\w*)?\s*:\s*(` + reMatchCheck + `(?:\s*,\s*` + reMatchCheck + `)*)`)
 
        // Regexp to extract a single opcoded check
        rxAsmCheck = regexp.MustCompile(reMatchCheck)
@@ -1471,7 +1483,7 @@ var (
                "386":     {"GO386", "sse2", "softfloat"},
                "amd64":   {"GOAMD64", "v1", "v2", "v3", "v4"},
                "arm":     {"GOARM", "5", "6", "7", "7,softfloat"},
-               "arm64":   {},
+               "arm64":   {"GOARM64", "v8.0", "v8.1"},
                "loong64": {},
                "mips":    {"GOMIPS", "hardfloat", "softfloat"},
                "mips64":  {"GOMIPS64", "hardfloat", "softfloat"},
index b074a36b941f7f6b7f80dba27dba3100d5fa0a22..a16e76b3055845dc4efc773c1fbad32c12bc8a01 100644 (file)
@@ -127,7 +127,7 @@ func goarm() (g goarmFeatures) {
        return
 }
 
-type goarm64Features struct {
+type Goarm64Features struct {
        Version string
        // Large Systems Extension
        LSE bool
@@ -139,7 +139,7 @@ type goarm64Features struct {
        Crypto bool
 }
 
-func (g goarm64Features) String() string {
+func (g Goarm64Features) String() string {
        arm64Str := g.Version
        if g.LSE {
                arm64Str += ",lse"
@@ -150,7 +150,7 @@ func (g goarm64Features) String() string {
        return arm64Str
 }
 
-func parseGoarm64(v string) (g goarm64Features) {
+func ParseGoarm64(v string) (g Goarm64Features, e error) {
        const (
                lseOpt    = ",lse"
                cryptoOpt = ",crypto"
@@ -184,7 +184,7 @@ func parseGoarm64(v string) (g goarm64Features) {
                // LSE extension is mandatory starting from 8.1
                g.LSE = true
        default:
-               Error = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
+               e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
                        lseOpt, cryptoOpt)
                g.Version = defaultGOARM64
        }
@@ -192,13 +192,14 @@ func parseGoarm64(v string) (g goarm64Features) {
        return
 }
 
-func goarm64() goarm64Features {
-       return parseGoarm64(envOr("GOARM64", defaultGOARM64))
+func goarm64() (g Goarm64Features) {
+       g, Error = ParseGoarm64(envOr("GOARM64", defaultGOARM64))
+       return
 }
 
 // Returns true if g supports giving ARM64 ISA
 // Note that this function doesn't accept / test suffixes (like ",lse" or ",crypto")
-func (g goarm64Features) Supports(s string) bool {
+func (g Goarm64Features) Supports(s string) bool {
        // We only accept "v{8-9}.{0-9}. Everything else is malformed.
        if len(s) != 4 {
                return false
index 33a9c5e1b8c434f57b099381148a596db689aeb0..d01cdd010961b4fe3c84eae2065ff3171493e07b 100644 (file)
@@ -75,7 +75,7 @@ func TestConfigFlags(t *testing.T) {
 }
 
 func TestGoarm64FeaturesSupports(t *testing.T) {
-       g := parseGoarm64("v9.3")
+       g, _ := ParseGoarm64("v9.3")
 
        if !g.Supports("v9.3") {
                t.Errorf("Wrong goarm64Features.Supports for v9.3, v9.3")
index 3a249d3ed2b04e8e8923f5fdcd92696da39ad3c2..ede56538b880cb748ea964f7d4db41b34018ea82 100644 (file)
@@ -128,17 +128,21 @@ TEXT ·Store64(SB), NOSPLIT, $0-16
 TEXT ·Xchg(SB), NOSPLIT, $0-20
        MOVD    ptr+0(FP), R0
        MOVW    new+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        SWPALW  R1, (R0), R2
        MOVW    R2, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRW  (R0), R2
        STLXRW  R1, (R0), R3
        CBNZ    R3, load_store_loop
        MOVW    R2, ret+16(FP)
        RET
+#endif
 
 // uint64 Xchg64(ptr *uint64, new uint64)
 // Atomically:
@@ -148,17 +152,21 @@ load_store_loop:
 TEXT ·Xchg64(SB), NOSPLIT, $0-24
        MOVD    ptr+0(FP), R0
        MOVD    new+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        SWPALD  R1, (R0), R2
        MOVD    R2, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXR   (R0), R2
        STLXR   R1, (R0), R3
        CBNZ    R3, load_store_loop
        MOVD    R2, ret+16(FP)
        RET
+#endif
 
 // bool Cas(uint32 *ptr, uint32 old, uint32 new)
 // Atomically:
@@ -171,14 +179,17 @@ TEXT ·Cas(SB), NOSPLIT, $0-17
        MOVD    ptr+0(FP), R0
        MOVW    old+8(FP), R1
        MOVW    new+12(FP), R2
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        MOVD    R1, R3
        CASALW  R3, (R0), R2
        CMP     R1, R3
        CSET    EQ, R0
        MOVB    R0, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRW  (R0), R3
        CMPW    R1, R3
@@ -189,6 +200,7 @@ ok:
        CSET    EQ, R0
        MOVB    R0, ret+16(FP)
        RET
+#endif
 
 // bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
 // Atomically:
@@ -202,14 +214,17 @@ TEXT ·Cas64(SB), NOSPLIT, $0-25
        MOVD    ptr+0(FP), R0
        MOVD    old+8(FP), R1
        MOVD    new+16(FP), R2
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        MOVD    R1, R3
        CASALD  R3, (R0), R2
        CMP     R1, R3
        CSET    EQ, R0
        MOVB    R0, ret+24(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXR   (R0), R3
        CMP     R1, R3
@@ -220,6 +235,7 @@ ok:
        CSET    EQ, R0
        MOVB    R0, ret+24(FP)
        RET
+#endif
 
 // uint32 xadd(uint32 volatile *ptr, int32 delta)
 // Atomically:
@@ -228,12 +244,15 @@ ok:
 TEXT ·Xadd(SB), NOSPLIT, $0-20
        MOVD    ptr+0(FP), R0
        MOVW    delta+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        LDADDALW        R1, (R0), R2
        ADD     R1, R2
        MOVW    R2, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRW  (R0), R2
        ADDW    R2, R1, R2
@@ -241,6 +260,7 @@ load_store_loop:
        CBNZ    R3, load_store_loop
        MOVW    R2, ret+16(FP)
        RET
+#endif
 
 // uint64 Xadd64(uint64 volatile *ptr, int64 delta)
 // Atomically:
@@ -249,12 +269,15 @@ load_store_loop:
 TEXT ·Xadd64(SB), NOSPLIT, $0-24
        MOVD    ptr+0(FP), R0
        MOVD    delta+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        LDADDALD        R1, (R0), R2
        ADD     R1, R2
        MOVD    R2, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXR   (R0), R2
        ADD     R2, R1, R2
@@ -262,6 +285,7 @@ load_store_loop:
        CBNZ    R3, load_store_loop
        MOVD    R2, ret+16(FP)
        RET
+#endif
 
 TEXT ·Xchgint32(SB), NOSPLIT, $0-20
        B       ·Xchg(SB)
@@ -275,72 +299,91 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
 TEXT ·And8(SB), NOSPLIT, $0-9
        MOVD    ptr+0(FP), R0
        MOVB    val+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        MVN     R1, R2
        LDCLRALB        R2, (R0), R3
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRB  (R0), R2
        AND     R1, R2
        STLXRB  R2, (R0), R3
        CBNZ    R3, load_store_loop
        RET
+#endif
 
 TEXT ·Or8(SB), NOSPLIT, $0-9
        MOVD    ptr+0(FP), R0
        MOVB    val+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        LDORALB R1, (R0), R2
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRB  (R0), R2
        ORR     R1, R2
        STLXRB  R2, (R0), R3
        CBNZ    R3, load_store_loop
        RET
+#endif
 
 // func And(addr *uint32, v uint32)
 TEXT ·And(SB), NOSPLIT, $0-12
        MOVD    ptr+0(FP), R0
        MOVW    val+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        MVN     R1, R2
        LDCLRALW        R2, (R0), R3
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRW  (R0), R2
        AND     R1, R2
        STLXRW  R2, (R0), R3
        CBNZ    R3, load_store_loop
        RET
+#endif
 
 // func Or(addr *uint32, v uint32)
 TEXT ·Or(SB), NOSPLIT, $0-12
        MOVD    ptr+0(FP), R0
        MOVW    val+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        LDORALW R1, (R0), R2
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRW  (R0), R2
        ORR     R1, R2
        STLXRW  R2, (R0), R3
        CBNZ    R3, load_store_loop
        RET
+#endif
 
 // func Or32(addr *uint32, v uint32) old uint32
 TEXT ·Or32(SB), NOSPLIT, $0-20
        MOVD    ptr+0(FP), R0
        MOVW    val+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        LDORALW R1, (R0), R2
        MOVD    R2, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRW  (R0), R2
        ORR     R1, R2, R3
@@ -348,17 +391,21 @@ load_store_loop:
        CBNZ    R4, load_store_loop
        MOVD R2, ret+16(FP)
        RET
+#endif
 
 // func And32(addr *uint32, v uint32) old uint32
 TEXT ·And32(SB), NOSPLIT, $0-20
        MOVD    ptr+0(FP), R0
        MOVW    val+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        MVN     R1, R2
        LDCLRALW        R2, (R0), R3
        MOVD    R3, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXRW  (R0), R2
        AND     R1, R2, R3
@@ -366,16 +413,20 @@ load_store_loop:
        CBNZ    R4, load_store_loop
        MOVD R2, ret+16(FP)
        RET
+#endif
 
 // func Or64(addr *uint64, v uint64) old uint64
 TEXT ·Or64(SB), NOSPLIT, $0-24
        MOVD    ptr+0(FP), R0
        MOVD    val+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        LDORALD R1, (R0), R2
        MOVD    R2, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXR   (R0), R2
        ORR     R1, R2, R3
@@ -383,17 +434,21 @@ load_store_loop:
        CBNZ    R4, load_store_loop
        MOVD    R2, ret+16(FP)
        RET
+#endif
 
 // func And64(addr *uint64, v uint64) old uint64
 TEXT ·And64(SB), NOSPLIT, $0-24
        MOVD    ptr+0(FP), R0
        MOVD    val+8(FP), R1
+#ifndef GOARM64_LSE
        MOVBU   internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
        CBZ     R4, load_store_loop
+#endif
        MVN     R1, R2
        LDCLRALD        R2, (R0), R3
        MOVD    R3, ret+16(FP)
        RET
+#ifndef GOARM64_LSE
 load_store_loop:
        LDAXR   (R0), R2
        AND     R1, R2, R3
@@ -401,6 +456,7 @@ load_store_loop:
        CBNZ    R4, load_store_loop
        MOVD    R2, ret+16(FP)
        RET
+#endif
 
 // func Anduintptr(addr *uintptr, v uintptr) old uintptr
 TEXT ·Anduintptr(SB), NOSPLIT, $0-24
diff --git a/test/codegen/atomics.go b/test/codegen/atomics.go
new file mode 100644 (file)
index 0000000..feaa31b
--- /dev/null
@@ -0,0 +1,27 @@
+// asmcheck
+
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check that atomic instructions without dynamic checks are
+// generated for architectures that support them
+
+package codegen
+
+import "sync/atomic"
+
+type Counter struct {
+       count int32
+}
+
+func (c *Counter) Increment() {
+       // Check that ARm64 v8.0 has both atomic instruction (LDADDALW) and a dynamic check
+       // (for arm64HasATOMICS), while ARM64 v8.1 has only atomic and no dynamic check.
+       // arm64/v8.0:"LDADDALW"
+       // arm64/v8.1:"LDADDALW"
+       // arm64/v8.0:".*arm64HasATOMICS"
+       // arm64/v8.1:-".*arm64HasATOMICS"
+       atomic.AddInt32(&c.count, 1)
+}
+