Remove dynamic checks for atomic instructions for ARM64 targets that support LSE extension.
For #66131
Change-Id: I0ec1b183a3f4ea4c8a537430646e6bc4b4f64271
Reviewed-on: https://go-review.googlesource.com/c/go/+/569536
Reviewed-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Fannie Zhang <Fannie.Zhang@arm.com>
Reviewed-by: Shu-Chun Weng <scw@google.com>
makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ, rtyp types.Kind, emit atomicOpEmitter) intrinsicBuilder {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- // Target Atomic feature is identified by dynamic detection
- addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
- v := s.load(types.Types[types.TBOOL], addr)
- b := s.endBlock()
- b.Kind = ssa.BlockIf
- b.SetControl(v)
- bTrue := s.f.NewBlock(ssa.BlockPlain)
- bFalse := s.f.NewBlock(ssa.BlockPlain)
- bEnd := s.f.NewBlock(ssa.BlockPlain)
- b.AddEdgeTo(bTrue)
- b.AddEdgeTo(bFalse)
- b.Likely = ssa.BranchLikely
-
- // We have atomic instructions - use it directly.
- s.startBlock(bTrue)
- emit(s, n, args, op1, typ)
- s.endBlock().AddEdgeTo(bEnd)
-
- // Use original instruction sequence.
- s.startBlock(bFalse)
- emit(s, n, args, op0, typ)
- s.endBlock().AddEdgeTo(bEnd)
-
- // Merge results.
- s.startBlock(bEnd)
+ if buildcfg.GOARM64.LSE {
+ emit(s, n, args, op1, typ)
+ } else {
+ // Target Atomic feature is identified by dynamic detection
+ addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
+ v := s.load(types.Types[types.TBOOL], addr)
+ b := s.endBlock()
+ b.Kind = ssa.BlockIf
+ b.SetControl(v)
+ bTrue := s.f.NewBlock(ssa.BlockPlain)
+ bFalse := s.f.NewBlock(ssa.BlockPlain)
+ bEnd := s.f.NewBlock(ssa.BlockPlain)
+ b.AddEdgeTo(bTrue)
+ b.AddEdgeTo(bFalse)
+ b.Likely = ssa.BranchLikely
+
+ // We have atomic instructions - use it directly.
+ s.startBlock(bTrue)
+ emit(s, n, args, op1, typ)
+ s.endBlock().AddEdgeTo(bEnd)
+
+ // Use original instruction sequence.
+ s.startBlock(bFalse)
+ emit(s, n, args, op0, typ)
+ s.endBlock().AddEdgeTo(bEnd)
+
+ // Merge results.
+ s.startBlock(bEnd)
+ }
if rtyp == types.TNIL {
return nil
} else {
// Used in envcmd.MkEnv and build ID computations.
GOARM = envOr("GOARM", fmt.Sprint(buildcfg.GOARM))
+ GOARM64 = envOr("GOARM64", fmt.Sprint(buildcfg.GOARM64))
GO386 = envOr("GO386", buildcfg.GO386)
GOAMD64 = envOr("GOAMD64", fmt.Sprintf("%s%d", "v", buildcfg.GOAMD64))
GOMIPS = envOr("GOMIPS", buildcfg.GOMIPS)
switch Goarch {
case "arm":
return "GOARM", GOARM
+ case "arm64":
+ return "GOARM64", GOARM64
case "386":
return "GO386", GO386
case "amd64":
"bufio"
"bytes"
"fmt"
+ "internal/buildcfg"
"internal/platform"
"io"
"log"
}
}
+ if cfg.Goarch == "arm64" {
+ g, err := buildcfg.ParseGoarm64(cfg.GOARM64)
+ if err == nil && g.LSE {
+ args = append(args, "-D", "GOARM64_LSE")
+ }
+ }
+
return args
}
// Regexp to extract an architecture check: architecture name (or triplet),
// followed by semi-colon, followed by a comma-separated list of opcode checks.
// Extraneous spaces are ignored.
- rxAsmPlatform = regexp.MustCompile(`(\w+)(/\w+)?(/\w*)?\s*:\s*(` + reMatchCheck + `(?:\s*,\s*` + reMatchCheck + `)*)`)
+ //
+ // An example: arm64/v8.1 : -`ADD` , `SUB`
+ // "(\w+)" matches "arm64" (architecture name)
+ // "(/[\w.]+)?" matches "v8.1" (architecture version)
+ // "(/\w*)?" doesn't match anything here (it's an optional part of the triplet)
+ // "\s*:\s*" matches " : " (semi-colon)
+ // "(" starts a capturing group
+ // first reMatchCheck matches "-`ADD`"
+ // `(?:" starts a non-capturing group
+ // "\s*,\s*` matches " , "
+ // second reMatchCheck matches "`SUB`"
+ // ")*)" closes started groups; "*" means that there might be other elements in the comma-separated list
+ rxAsmPlatform = regexp.MustCompile(`(\w+)(/[\w.]+)?(/\w*)?\s*:\s*(` + reMatchCheck + `(?:\s*,\s*` + reMatchCheck + `)*)`)
// Regexp to extract a single opcoded check
rxAsmCheck = regexp.MustCompile(reMatchCheck)
"386": {"GO386", "sse2", "softfloat"},
"amd64": {"GOAMD64", "v1", "v2", "v3", "v4"},
"arm": {"GOARM", "5", "6", "7", "7,softfloat"},
- "arm64": {},
+ "arm64": {"GOARM64", "v8.0", "v8.1"},
"loong64": {},
"mips": {"GOMIPS", "hardfloat", "softfloat"},
"mips64": {"GOMIPS64", "hardfloat", "softfloat"},
return
}
-type goarm64Features struct {
+type Goarm64Features struct {
Version string
// Large Systems Extension
LSE bool
Crypto bool
}
-func (g goarm64Features) String() string {
+func (g Goarm64Features) String() string {
arm64Str := g.Version
if g.LSE {
arm64Str += ",lse"
return arm64Str
}
-func parseGoarm64(v string) (g goarm64Features) {
+func ParseGoarm64(v string) (g Goarm64Features, e error) {
const (
lseOpt = ",lse"
cryptoOpt = ",crypto"
// LSE extension is mandatory starting from 8.1
g.LSE = true
default:
- Error = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
+ e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q",
lseOpt, cryptoOpt)
g.Version = defaultGOARM64
}
return
}
-func goarm64() goarm64Features {
- return parseGoarm64(envOr("GOARM64", defaultGOARM64))
+func goarm64() (g Goarm64Features) {
+ g, Error = ParseGoarm64(envOr("GOARM64", defaultGOARM64))
+ return
}
// Returns true if g supports giving ARM64 ISA
// Note that this function doesn't accept / test suffixes (like ",lse" or ",crypto")
-func (g goarm64Features) Supports(s string) bool {
+func (g Goarm64Features) Supports(s string) bool {
// We only accept "v{8-9}.{0-9}. Everything else is malformed.
if len(s) != 4 {
return false
}
func TestGoarm64FeaturesSupports(t *testing.T) {
- g := parseGoarm64("v9.3")
+ g, _ := ParseGoarm64("v9.3")
if !g.Supports("v9.3") {
t.Errorf("Wrong goarm64Features.Supports for v9.3, v9.3")
TEXT ·Xchg(SB), NOSPLIT, $0-20
MOVD ptr+0(FP), R0
MOVW new+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
SWPALW R1, (R0), R2
MOVW R2, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
STLXRW R1, (R0), R3
CBNZ R3, load_store_loop
MOVW R2, ret+16(FP)
RET
+#endif
// uint64 Xchg64(ptr *uint64, new uint64)
// Atomically:
TEXT ·Xchg64(SB), NOSPLIT, $0-24
MOVD ptr+0(FP), R0
MOVD new+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
SWPALD R1, (R0), R2
MOVD R2, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R2
STLXR R1, (R0), R3
CBNZ R3, load_store_loop
MOVD R2, ret+16(FP)
RET
+#endif
// bool Cas(uint32 *ptr, uint32 old, uint32 new)
// Atomically:
MOVD ptr+0(FP), R0
MOVW old+8(FP), R1
MOVW new+12(FP), R2
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
MOVD R1, R3
CASALW R3, (R0), R2
CMP R1, R3
CSET EQ, R0
MOVB R0, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R3
CMPW R1, R3
CSET EQ, R0
MOVB R0, ret+16(FP)
RET
+#endif
// bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
// Atomically:
MOVD ptr+0(FP), R0
MOVD old+8(FP), R1
MOVD new+16(FP), R2
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
MOVD R1, R3
CASALD R3, (R0), R2
CMP R1, R3
CSET EQ, R0
MOVB R0, ret+24(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R3
CMP R1, R3
CSET EQ, R0
MOVB R0, ret+24(FP)
RET
+#endif
// uint32 xadd(uint32 volatile *ptr, int32 delta)
// Atomically:
TEXT ·Xadd(SB), NOSPLIT, $0-20
MOVD ptr+0(FP), R0
MOVW delta+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
LDADDALW R1, (R0), R2
ADD R1, R2
MOVW R2, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
ADDW R2, R1, R2
CBNZ R3, load_store_loop
MOVW R2, ret+16(FP)
RET
+#endif
// uint64 Xadd64(uint64 volatile *ptr, int64 delta)
// Atomically:
TEXT ·Xadd64(SB), NOSPLIT, $0-24
MOVD ptr+0(FP), R0
MOVD delta+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
LDADDALD R1, (R0), R2
ADD R1, R2
MOVD R2, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R2
ADD R2, R1, R2
CBNZ R3, load_store_loop
MOVD R2, ret+16(FP)
RET
+#endif
TEXT ·Xchgint32(SB), NOSPLIT, $0-20
B ·Xchg(SB)
TEXT ·And8(SB), NOSPLIT, $0-9
MOVD ptr+0(FP), R0
MOVB val+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
MVN R1, R2
LDCLRALB R2, (R0), R3
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRB (R0), R2
AND R1, R2
STLXRB R2, (R0), R3
CBNZ R3, load_store_loop
RET
+#endif
TEXT ·Or8(SB), NOSPLIT, $0-9
MOVD ptr+0(FP), R0
MOVB val+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
LDORALB R1, (R0), R2
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRB (R0), R2
ORR R1, R2
STLXRB R2, (R0), R3
CBNZ R3, load_store_loop
RET
+#endif
// func And(addr *uint32, v uint32)
TEXT ·And(SB), NOSPLIT, $0-12
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
MVN R1, R2
LDCLRALW R2, (R0), R3
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
AND R1, R2
STLXRW R2, (R0), R3
CBNZ R3, load_store_loop
RET
+#endif
// func Or(addr *uint32, v uint32)
TEXT ·Or(SB), NOSPLIT, $0-12
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
LDORALW R1, (R0), R2
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
ORR R1, R2
STLXRW R2, (R0), R3
CBNZ R3, load_store_loop
RET
+#endif
// func Or32(addr *uint32, v uint32) old uint32
TEXT ·Or32(SB), NOSPLIT, $0-20
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
LDORALW R1, (R0), R2
MOVD R2, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
ORR R1, R2, R3
CBNZ R4, load_store_loop
MOVD R2, ret+16(FP)
RET
+#endif
// func And32(addr *uint32, v uint32) old uint32
TEXT ·And32(SB), NOSPLIT, $0-20
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
MVN R1, R2
LDCLRALW R2, (R0), R3
MOVD R3, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXRW (R0), R2
AND R1, R2, R3
CBNZ R4, load_store_loop
MOVD R2, ret+16(FP)
RET
+#endif
// func Or64(addr *uint64, v uint64) old uint64
TEXT ·Or64(SB), NOSPLIT, $0-24
MOVD ptr+0(FP), R0
MOVD val+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
LDORALD R1, (R0), R2
MOVD R2, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R2
ORR R1, R2, R3
CBNZ R4, load_store_loop
MOVD R2, ret+16(FP)
RET
+#endif
// func And64(addr *uint64, v uint64) old uint64
TEXT ·And64(SB), NOSPLIT, $0-24
MOVD ptr+0(FP), R0
MOVD val+8(FP), R1
+#ifndef GOARM64_LSE
MOVBU internal∕cpu·ARM64+const_offsetARM64HasATOMICS(SB), R4
CBZ R4, load_store_loop
+#endif
MVN R1, R2
LDCLRALD R2, (R0), R3
MOVD R3, ret+16(FP)
RET
+#ifndef GOARM64_LSE
load_store_loop:
LDAXR (R0), R2
AND R1, R2, R3
CBNZ R4, load_store_loop
MOVD R2, ret+16(FP)
RET
+#endif
// func Anduintptr(addr *uintptr, v uintptr) old uintptr
TEXT ·Anduintptr(SB), NOSPLIT, $0-24
--- /dev/null
+// asmcheck
+
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check that atomic instructions without dynamic checks are
+// generated for architectures that support them
+
+package codegen
+
+import "sync/atomic"
+
+type Counter struct {
+ count int32
+}
+
+func (c *Counter) Increment() {
+ // Check that ARm64 v8.0 has both atomic instruction (LDADDALW) and a dynamic check
+ // (for arm64HasATOMICS), while ARM64 v8.1 has only atomic and no dynamic check.
+ // arm64/v8.0:"LDADDALW"
+ // arm64/v8.1:"LDADDALW"
+ // arm64/v8.0:".*arm64HasATOMICS"
+ // arm64/v8.1:-".*arm64HasATOMICS"
+ atomic.AddInt32(&c.count, 1)
+}
+