From c6c9634515e6128a5acb8645dced62581f5d1b1b Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sat, 3 Aug 2024 01:22:58 +1000 Subject: [PATCH] cmd/compile/internal/ssagen: factor out intrinsics code The intrinsic handling code is a good thousand lines in the fairly large ssa.go file. This code is already reasonably self-contained - factor it out into a separate file so that future changes are easier to manage (and it becomes easier to add/change intrinsics for an architecture). Change-Id: I3c18d3d1bb6332f1817d902150e736373bf1ac81 Reviewed-on: https://go-review.googlesource.com/c/go/+/605477 Reviewed-by: Carlos Amedee LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: Cherry Mui --- src/cmd/compile/internal/ssagen/intrinsics.go | 1047 +++++++++++++++++ src/cmd/compile/internal/ssagen/ssa.go | 1035 +--------------- 2 files changed, 1051 insertions(+), 1031 deletions(-) create mode 100644 src/cmd/compile/internal/ssagen/intrinsics.go diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go new file mode 100644 index 0000000000..f44531b88c --- /dev/null +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -0,0 +1,1047 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssagen + +import ( + "fmt" + "internal/buildcfg" + + "cmd/compile/internal/base" + "cmd/compile/internal/ir" + "cmd/compile/internal/ssa" + "cmd/compile/internal/types" + "cmd/internal/sys" +) + +var intrinsics map[intrinsicKey]intrinsicBuilder + +// An intrinsicBuilder converts a call node n into an ssa value that +// implements that call as an intrinsic. args is a list of arguments to the func. +type intrinsicBuilder func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value + +type intrinsicKey struct { + arch *sys.Arch + pkg string + fn string +} + +func initIntrinsics() { + intrinsics = map[intrinsicKey]intrinsicBuilder{} + + var p4 []*sys.Arch + var p8 []*sys.Arch + var lwatomics []*sys.Arch + for _, a := range sys.Archs { + if a.PtrSize == 4 { + p4 = append(p4, a) + } else { + p8 = append(p8, a) + } + if a.Family != sys.PPC64 { + lwatomics = append(lwatomics, a) + } + } + all := sys.Archs[:] + + // add adds the intrinsic b for pkg.fn for the given list of architectures. + add := func(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) { + for _, a := range archs { + intrinsics[intrinsicKey{a, pkg, fn}] = b + } + } + // addF does the same as add but operates on architecture families. + addF := func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) { + for _, a := range sys.Archs { + if a.InFamily(archFamilies...) { + intrinsics[intrinsicKey{a, pkg, fn}] = b + } + } + } + // alias defines pkg.fn = pkg2.fn2 for all architectures in archs for which pkg2.fn2 exists. + alias := func(pkg, fn, pkg2, fn2 string, archs ...*sys.Arch) { + aliased := false + for _, a := range archs { + if b, ok := intrinsics[intrinsicKey{a, pkg2, fn2}]; ok { + intrinsics[intrinsicKey{a, pkg, fn}] = b + aliased = true + } + } + if !aliased { + panic(fmt.Sprintf("attempted to alias undefined intrinsic: %s.%s", pkg, fn)) + } + } + + /******** runtime ********/ + if !base.Flag.Cfg.Instrumenting { + add("runtime", "slicebytetostringtmp", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + // Compiler frontend optimizations emit OBYTES2STRTMP nodes + // for the backend instead of slicebytetostringtmp calls + // when not instrumenting. + return s.newValue2(ssa.OpStringMake, n.Type(), args[0], args[1]) + }, + all...) + } + addF("internal/runtime/math", "MulUintptr", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if s.config.PtrSize == 4 { + return s.newValue2(ssa.OpMul32uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1]) + } + return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1]) + }, + sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.RISCV64, sys.ARM64) + add("runtime", "KeepAlive", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0]) + s.vars[memVar] = s.newValue2(ssa.OpKeepAlive, types.TypeMem, data, s.mem()) + return nil + }, + all...) + add("runtime", "getclosureptr", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue0(ssa.OpGetClosurePtr, s.f.Config.Types.Uintptr) + }, + all...) + + add("runtime", "getcallerpc", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue0(ssa.OpGetCallerPC, s.f.Config.Types.Uintptr) + }, + all...) + + add("runtime", "getcallersp", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpGetCallerSP, s.f.Config.Types.Uintptr, s.mem()) + }, + all...) + + addF("runtime", "publicationBarrier", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem()) + return nil + }, + sys.ARM64, sys.PPC64, sys.RISCV64) + + brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X} + if buildcfg.GOPPC64 >= 10 { + // Use only on Power10 as the new byte reverse instructions that Power10 provide + // make it worthwhile as an intrinsic + brev_arch = append(brev_arch, sys.PPC64) + } + /******** internal/runtime/sys ********/ + addF("internal/runtime/sys", "Bswap32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0]) + }, + brev_arch...) + addF("internal/runtime/sys", "Bswap64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0]) + }, + brev_arch...) + + /****** Prefetch ******/ + makePrefetchFunc := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue2(op, types.TypeMem, args[0], s.mem()) + return nil + } + } + + // Make Prefetch intrinsics for supported platforms + // On the unsupported platforms stub function will be eliminated + addF("internal/runtime/sys", "Prefetch", makePrefetchFunc(ssa.OpPrefetchCache), + sys.AMD64, sys.ARM64, sys.PPC64) + addF("internal/runtime/sys", "PrefetchStreamed", makePrefetchFunc(ssa.OpPrefetchCacheStreamed), + sys.AMD64, sys.ARM64, sys.PPC64) + + /******** internal/runtime/atomic ********/ + addF("internal/runtime/atomic", "Load", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue2(ssa.OpAtomicLoad32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v) + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Load8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue2(ssa.OpAtomicLoad8, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], v) + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Load64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue2(ssa.OpAtomicLoad64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v) + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "LoadAcq", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue2(ssa.OpAtomicLoadAcq32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v) + }, + sys.PPC64, sys.S390X) + addF("internal/runtime/atomic", "LoadAcq64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue2(ssa.OpAtomicLoadAcq64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v) + }, + sys.PPC64) + addF("internal/runtime/atomic", "Loadp", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, s.f.Config.Types.BytePtr, v) + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + + addF("internal/runtime/atomic", "Store", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicStore32, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Store8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicStore8, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Store64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicStore64, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "StorepNoWB", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "StoreRel", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.PPC64, sys.S390X) + addF("internal/runtime/atomic", "StoreRel64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel64, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.PPC64) + + addF("internal/runtime/atomic", "Xchg", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue3(ssa.OpAtomicExchange32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v) + }, + sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Xchg64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v) + }, + sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + + type atomicOpEmitter func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) + + makeAtomicGuardedIntrinsicARM64common := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter, needReturn bool) intrinsicBuilder { + + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if buildcfg.GOARM64.LSE { + emit(s, n, args, op1, typ, needReturn) + } else { + // Target Atomic feature is identified by dynamic detection + addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb) + v := s.load(types.Types[types.TBOOL], addr) + b := s.endBlock() + b.Kind = ssa.BlockIf + b.SetControl(v) + bTrue := s.f.NewBlock(ssa.BlockPlain) + bFalse := s.f.NewBlock(ssa.BlockPlain) + bEnd := s.f.NewBlock(ssa.BlockPlain) + b.AddEdgeTo(bTrue) + b.AddEdgeTo(bFalse) + b.Likely = ssa.BranchLikely + + // We have atomic instructions - use it directly. + s.startBlock(bTrue) + emit(s, n, args, op1, typ, needReturn) + s.endBlock().AddEdgeTo(bEnd) + + // Use original instruction sequence. + s.startBlock(bFalse) + emit(s, n, args, op0, typ, needReturn) + s.endBlock().AddEdgeTo(bEnd) + + // Merge results. + s.startBlock(bEnd) + } + if needReturn { + return s.variable(n, types.Types[typ]) + } else { + return nil + } + } + } + makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder { + return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, true) + } + makeAtomicGuardedIntrinsicARM64old := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder { + return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, false) + } + + atomicEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) { + v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + if needReturn { + s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v) + } + } + addF("internal/runtime/atomic", "Xchg", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Xchg64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicEmitterARM64), + sys.ARM64) + + addF("internal/runtime/atomic", "Xadd", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v) + }, + sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Xadd64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v) + }, + sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + + addF("internal/runtime/atomic", "Xadd", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Xadd64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicEmitterARM64), + sys.ARM64) + + addF("internal/runtime/atomic", "Cas", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v) + }, + sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Cas64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v) + }, + sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "CasRel", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v) + }, + sys.PPC64) + + atomicCasEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) { + v := s.newValue4(op, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) + s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + if needReturn { + s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v) + } + } + + addF("internal/runtime/atomic", "Cas", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, types.TBOOL, atomicCasEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Cas64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, types.TBOOL, atomicCasEmitterARM64), + sys.ARM64) + + // Old-style atomic logical operation API (all supported archs except arm64). + addF("internal/runtime/atomic", "And8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "And", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Or8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("internal/runtime/atomic", "Or", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + + // arm64 always uses the new-style atomic logical operations, for both the + // old and new style API. + addF("internal/runtime/atomic", "And8", + makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd8value, ssa.OpAtomicAnd8valueVariant, types.TUINT8, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Or8", + makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr8value, ssa.OpAtomicOr8valueVariant, types.TUINT8, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "And64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd64value, ssa.OpAtomicAnd64valueVariant, types.TUINT64, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "And32", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "And", + makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Or64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr64value, ssa.OpAtomicOr64valueVariant, types.TUINT64, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Or32", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64), + sys.ARM64) + addF("internal/runtime/atomic", "Or", + makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64), + sys.ARM64) + + // New-style atomic logical operations, which return the old memory value. + addF("internal/runtime/atomic", "And64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue3(ssa.OpAtomicAnd64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem()) + p0, p1 := s.split(v) + s.vars[memVar] = p1 + return p0 + }, + sys.AMD64) + addF("internal/runtime/atomic", "And32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue3(ssa.OpAtomicAnd32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem()) + p0, p1 := s.split(v) + s.vars[memVar] = p1 + return p0 + }, + sys.AMD64) + addF("internal/runtime/atomic", "Or64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue3(ssa.OpAtomicOr64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem()) + p0, p1 := s.split(v) + s.vars[memVar] = p1 + return p0 + }, + sys.AMD64) + addF("internal/runtime/atomic", "Or32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + v := s.newValue3(ssa.OpAtomicOr32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem()) + p0, p1 := s.split(v) + s.vars[memVar] = p1 + return p0 + }, + sys.AMD64) + + // Aliases for atomic load operations + alias("internal/runtime/atomic", "Loadint32", "internal/runtime/atomic", "Load", all...) + alias("internal/runtime/atomic", "Loadint64", "internal/runtime/atomic", "Load64", all...) + alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load", p4...) + alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load64", p8...) + alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load", p4...) + alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load64", p8...) + alias("internal/runtime/atomic", "LoadAcq", "internal/runtime/atomic", "Load", lwatomics...) + alias("internal/runtime/atomic", "LoadAcq64", "internal/runtime/atomic", "Load64", lwatomics...) + alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...) + alias("sync", "runtime_LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...) // linknamed + alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...) + alias("sync", "runtime_LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...) // linknamed + + // Aliases for atomic store operations + alias("internal/runtime/atomic", "Storeint32", "internal/runtime/atomic", "Store", all...) + alias("internal/runtime/atomic", "Storeint64", "internal/runtime/atomic", "Store64", all...) + alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store", p4...) + alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store64", p8...) + alias("internal/runtime/atomic", "StoreRel", "internal/runtime/atomic", "Store", lwatomics...) + alias("internal/runtime/atomic", "StoreRel64", "internal/runtime/atomic", "Store64", lwatomics...) + alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...) + alias("sync", "runtime_StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...) // linknamed + alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...) + alias("sync", "runtime_StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...) // linknamed + + // Aliases for atomic swap operations + alias("internal/runtime/atomic", "Xchgint32", "internal/runtime/atomic", "Xchg", all...) + alias("internal/runtime/atomic", "Xchgint64", "internal/runtime/atomic", "Xchg64", all...) + alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg", p4...) + alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg64", p8...) + + // Aliases for atomic add operations + alias("internal/runtime/atomic", "Xaddint32", "internal/runtime/atomic", "Xadd", all...) + alias("internal/runtime/atomic", "Xaddint64", "internal/runtime/atomic", "Xadd64", all...) + alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd", p4...) + alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd64", p8...) + + // Aliases for atomic CAS operations + alias("internal/runtime/atomic", "Casint32", "internal/runtime/atomic", "Cas", all...) + alias("internal/runtime/atomic", "Casint64", "internal/runtime/atomic", "Cas64", all...) + alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas", p4...) + alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas64", p8...) + alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas", p4...) + alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas64", p8...) + alias("internal/runtime/atomic", "CasRel", "internal/runtime/atomic", "Cas", lwatomics...) + + // Aliases for atomic And/Or operations + alias("internal/runtime/atomic", "Anduintptr", "internal/runtime/atomic", "And64", sys.ArchARM64) + alias("internal/runtime/atomic", "Oruintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64) + + /******** math ********/ + addF("math", "sqrt", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpSqrt, types.Types[types.TFLOAT64], args[0]) + }, + sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm) + addF("math", "Trunc", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpTrunc, types.Types[types.TFLOAT64], args[0]) + }, + sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm) + addF("math", "Ceil", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpCeil, types.Types[types.TFLOAT64], args[0]) + }, + sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm) + addF("math", "Floor", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpFloor, types.Types[types.TFLOAT64], args[0]) + }, + sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm) + addF("math", "Round", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpRound, types.Types[types.TFLOAT64], args[0]) + }, + sys.ARM64, sys.PPC64, sys.S390X) + addF("math", "RoundToEven", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpRoundToEven, types.Types[types.TFLOAT64], args[0]) + }, + sys.ARM64, sys.S390X, sys.Wasm) + addF("math", "Abs", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0]) + }, + sys.ARM64, sys.ARM, sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64) + addF("math", "Copysign", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1]) + }, + sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm) + addF("math", "FMA", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2]) + }, + sys.ARM64, sys.PPC64, sys.RISCV64, sys.S390X) + addF("math", "FMA", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if !s.config.UseFMA { + s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] + return s.variable(n, types.Types[types.TFLOAT64]) + } + + if buildcfg.GOAMD64 >= 3 { + return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2]) + } + + v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasFMA) + b := s.endBlock() + b.Kind = ssa.BlockIf + b.SetControl(v) + bTrue := s.f.NewBlock(ssa.BlockPlain) + bFalse := s.f.NewBlock(ssa.BlockPlain) + bEnd := s.f.NewBlock(ssa.BlockPlain) + b.AddEdgeTo(bTrue) + b.AddEdgeTo(bFalse) + b.Likely = ssa.BranchLikely // >= haswell cpus are common + + // We have the intrinsic - use it directly. + s.startBlock(bTrue) + s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2]) + s.endBlock().AddEdgeTo(bEnd) + + // Call the pure Go version. + s.startBlock(bFalse) + s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] + s.endBlock().AddEdgeTo(bEnd) + + // Merge results. + s.startBlock(bEnd) + return s.variable(n, types.Types[types.TFLOAT64]) + }, + sys.AMD64) + addF("math", "FMA", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if !s.config.UseFMA { + s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] + return s.variable(n, types.Types[types.TFLOAT64]) + } + addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARMHasVFPv4, s.sb) + v := s.load(types.Types[types.TBOOL], addr) + b := s.endBlock() + b.Kind = ssa.BlockIf + b.SetControl(v) + bTrue := s.f.NewBlock(ssa.BlockPlain) + bFalse := s.f.NewBlock(ssa.BlockPlain) + bEnd := s.f.NewBlock(ssa.BlockPlain) + b.AddEdgeTo(bTrue) + b.AddEdgeTo(bFalse) + b.Likely = ssa.BranchLikely + + // We have the intrinsic - use it directly. + s.startBlock(bTrue) + s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2]) + s.endBlock().AddEdgeTo(bEnd) + + // Call the pure Go version. + s.startBlock(bFalse) + s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] + s.endBlock().AddEdgeTo(bEnd) + + // Merge results. + s.startBlock(bEnd) + return s.variable(n, types.Types[types.TFLOAT64]) + }, + sys.ARM) + + makeRoundAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if buildcfg.GOAMD64 >= 2 { + return s.newValue1(op, types.Types[types.TFLOAT64], args[0]) + } + + v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasSSE41) + b := s.endBlock() + b.Kind = ssa.BlockIf + b.SetControl(v) + bTrue := s.f.NewBlock(ssa.BlockPlain) + bFalse := s.f.NewBlock(ssa.BlockPlain) + bEnd := s.f.NewBlock(ssa.BlockPlain) + b.AddEdgeTo(bTrue) + b.AddEdgeTo(bFalse) + b.Likely = ssa.BranchLikely // most machines have sse4.1 nowadays + + // We have the intrinsic - use it directly. + s.startBlock(bTrue) + s.vars[n] = s.newValue1(op, types.Types[types.TFLOAT64], args[0]) + s.endBlock().AddEdgeTo(bEnd) + + // Call the pure Go version. + s.startBlock(bFalse) + s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] + s.endBlock().AddEdgeTo(bEnd) + + // Merge results. + s.startBlock(bEnd) + return s.variable(n, types.Types[types.TFLOAT64]) + } + } + addF("math", "RoundToEven", + makeRoundAMD64(ssa.OpRoundToEven), + sys.AMD64) + addF("math", "Floor", + makeRoundAMD64(ssa.OpFloor), + sys.AMD64) + addF("math", "Ceil", + makeRoundAMD64(ssa.OpCeil), + sys.AMD64) + addF("math", "Trunc", + makeRoundAMD64(ssa.OpTrunc), + sys.AMD64) + + /******** math/bits ********/ + addF("math/bits", "TrailingZeros64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + addF("math/bits", "TrailingZeros32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + addF("math/bits", "TrailingZeros16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0]) + c := s.constInt32(types.Types[types.TUINT32], 1<<16) + y := s.newValue2(ssa.OpOr32, types.Types[types.TUINT32], x, c) + return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], y) + }, + sys.MIPS) + addF("math/bits", "TrailingZeros16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm) + addF("math/bits", "TrailingZeros16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0]) + c := s.constInt64(types.Types[types.TUINT64], 1<<16) + y := s.newValue2(ssa.OpOr64, types.Types[types.TUINT64], x, c) + return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], y) + }, + sys.S390X, sys.PPC64) + addF("math/bits", "TrailingZeros8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0]) + c := s.constInt32(types.Types[types.TUINT32], 1<<8) + y := s.newValue2(ssa.OpOr32, types.Types[types.TUINT32], x, c) + return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], y) + }, + sys.MIPS) + addF("math/bits", "TrailingZeros8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm) + addF("math/bits", "TrailingZeros8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) + c := s.constInt64(types.Types[types.TUINT64], 1<<8) + y := s.newValue2(ssa.OpOr64, types.Types[types.TUINT64], x, c) + return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], y) + }, + sys.S390X) + alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...) + alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...) + // ReverseBytes inlines correctly, no need to intrinsify it. + // Nothing special is needed for targets where ReverseBytes16 lowers to a rotate + // On Power10, 16-bit rotate is not available so use BRH instruction + if buildcfg.GOPPC64 >= 10 { + addF("math/bits", "ReverseBytes16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT], args[0]) + }, + sys.PPC64) + } + + addF("math/bits", "Len64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + addF("math/bits", "Len32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.ARM64, sys.PPC64) + addF("math/bits", "Len32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if s.config.PtrSize == 4 { + return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) + } + x := s.newValue1(ssa.OpZeroExt32to64, types.Types[types.TUINT64], args[0]) + return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) + }, + sys.ARM, sys.S390X, sys.MIPS, sys.Wasm) + addF("math/bits", "Len16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if s.config.PtrSize == 4 { + x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0]) + return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x) + } + x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0]) + return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) + }, + sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + addF("math/bits", "Len16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0]) + }, + sys.AMD64) + addF("math/bits", "Len8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if s.config.PtrSize == 4 { + x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0]) + return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x) + } + x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) + return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) + }, + sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + addF("math/bits", "Len8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) + }, + sys.AMD64) + addF("math/bits", "Len", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if s.config.PtrSize == 4 { + return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) + } + return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + // LeadingZeros is handled because it trivially calls Len. + addF("math/bits", "Reverse64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0]) + }, + sys.ARM64) + addF("math/bits", "Reverse32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitRev32, types.Types[types.TINT], args[0]) + }, + sys.ARM64) + addF("math/bits", "Reverse16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitRev16, types.Types[types.TINT], args[0]) + }, + sys.ARM64) + addF("math/bits", "Reverse8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitRev8, types.Types[types.TINT], args[0]) + }, + sys.ARM64) + addF("math/bits", "Reverse", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0]) + }, + sys.ARM64) + addF("math/bits", "RotateLeft8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(ssa.OpRotateLeft8, types.Types[types.TUINT8], args[0], args[1]) + }, + sys.AMD64, sys.RISCV64) + addF("math/bits", "RotateLeft16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(ssa.OpRotateLeft16, types.Types[types.TUINT16], args[0], args[1]) + }, + sys.AMD64, sys.RISCV64) + addF("math/bits", "RotateLeft32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(ssa.OpRotateLeft32, types.Types[types.TUINT32], args[0], args[1]) + }, + sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm) + addF("math/bits", "RotateLeft64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(ssa.OpRotateLeft64, types.Types[types.TUINT64], args[0], args[1]) + }, + sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm) + alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...) + + makeOnesCountAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if buildcfg.GOAMD64 >= 2 { + return s.newValue1(op, types.Types[types.TINT], args[0]) + } + + v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasPOPCNT) + b := s.endBlock() + b.Kind = ssa.BlockIf + b.SetControl(v) + bTrue := s.f.NewBlock(ssa.BlockPlain) + bFalse := s.f.NewBlock(ssa.BlockPlain) + bEnd := s.f.NewBlock(ssa.BlockPlain) + b.AddEdgeTo(bTrue) + b.AddEdgeTo(bFalse) + b.Likely = ssa.BranchLikely // most machines have popcnt nowadays + + // We have the intrinsic - use it directly. + s.startBlock(bTrue) + s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0]) + s.endBlock().AddEdgeTo(bEnd) + + // Call the pure Go version. + s.startBlock(bFalse) + s.vars[n] = s.callResult(n, callNormal) // types.Types[TINT] + s.endBlock().AddEdgeTo(bEnd) + + // Merge results. + s.startBlock(bEnd) + return s.variable(n, types.Types[types.TINT]) + } + } + addF("math/bits", "OnesCount64", + makeOnesCountAMD64(ssa.OpPopCount64), + sys.AMD64) + addF("math/bits", "OnesCount64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0]) + }, + sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm) + addF("math/bits", "OnesCount32", + makeOnesCountAMD64(ssa.OpPopCount32), + sys.AMD64) + addF("math/bits", "OnesCount32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0]) + }, + sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm) + addF("math/bits", "OnesCount16", + makeOnesCountAMD64(ssa.OpPopCount16), + sys.AMD64) + addF("math/bits", "OnesCount16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0]) + }, + sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm) + addF("math/bits", "OnesCount8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0]) + }, + sys.S390X, sys.PPC64, sys.Wasm) + addF("math/bits", "OnesCount", + makeOnesCountAMD64(ssa.OpPopCount64), + sys.AMD64) + addF("math/bits", "Mul64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1]) + }, + sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64, sys.Loong64) + alias("math/bits", "Mul", "math/bits", "Mul64", p8...) + alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...) + addF("math/bits", "Add64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) + }, + sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64) + alias("math/bits", "Add", "math/bits", "Add64", p8...) + alias("internal/runtime/math", "Add64", "math/bits", "Add64", all...) + addF("math/bits", "Sub64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) + }, + sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64) + alias("math/bits", "Sub", "math/bits", "Sub64", p8...) + addF("math/bits", "Div64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + // check for divide-by-zero/overflow and panic with appropriate message + cmpZero := s.newValue2(s.ssaOp(ir.ONE, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[2], s.zeroVal(types.Types[types.TUINT64])) + s.check(cmpZero, ir.Syms.Panicdivide) + cmpOverflow := s.newValue2(s.ssaOp(ir.OLT, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[0], args[2]) + s.check(cmpOverflow, ir.Syms.Panicoverflow) + return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) + }, + sys.AMD64) + alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64) + + alias("internal/runtime/sys", "TrailingZeros8", "math/bits", "TrailingZeros8", all...) + alias("internal/runtime/sys", "TrailingZeros32", "math/bits", "TrailingZeros32", all...) + alias("internal/runtime/sys", "TrailingZeros64", "math/bits", "TrailingZeros64", all...) + alias("internal/runtime/sys", "Len8", "math/bits", "Len8", all...) + alias("internal/runtime/sys", "Len64", "math/bits", "Len64", all...) + alias("internal/runtime/sys", "OnesCount64", "math/bits", "OnesCount64", all...) + + /******** sync/atomic ********/ + + // Note: these are disabled by flag_race in findIntrinsic below. + alias("sync/atomic", "LoadInt32", "internal/runtime/atomic", "Load", all...) + alias("sync/atomic", "LoadInt64", "internal/runtime/atomic", "Load64", all...) + alias("sync/atomic", "LoadPointer", "internal/runtime/atomic", "Loadp", all...) + alias("sync/atomic", "LoadUint32", "internal/runtime/atomic", "Load", all...) + alias("sync/atomic", "LoadUint64", "internal/runtime/atomic", "Load64", all...) + alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load", p4...) + alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load64", p8...) + + alias("sync/atomic", "StoreInt32", "internal/runtime/atomic", "Store", all...) + alias("sync/atomic", "StoreInt64", "internal/runtime/atomic", "Store64", all...) + // Note: not StorePointer, that needs a write barrier. Same below for {CompareAnd}Swap. + alias("sync/atomic", "StoreUint32", "internal/runtime/atomic", "Store", all...) + alias("sync/atomic", "StoreUint64", "internal/runtime/atomic", "Store64", all...) + alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store", p4...) + alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store64", p8...) + + alias("sync/atomic", "SwapInt32", "internal/runtime/atomic", "Xchg", all...) + alias("sync/atomic", "SwapInt64", "internal/runtime/atomic", "Xchg64", all...) + alias("sync/atomic", "SwapUint32", "internal/runtime/atomic", "Xchg", all...) + alias("sync/atomic", "SwapUint64", "internal/runtime/atomic", "Xchg64", all...) + alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg", p4...) + alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg64", p8...) + + alias("sync/atomic", "CompareAndSwapInt32", "internal/runtime/atomic", "Cas", all...) + alias("sync/atomic", "CompareAndSwapInt64", "internal/runtime/atomic", "Cas64", all...) + alias("sync/atomic", "CompareAndSwapUint32", "internal/runtime/atomic", "Cas", all...) + alias("sync/atomic", "CompareAndSwapUint64", "internal/runtime/atomic", "Cas64", all...) + alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas", p4...) + alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas64", p8...) + + alias("sync/atomic", "AddInt32", "internal/runtime/atomic", "Xadd", all...) + alias("sync/atomic", "AddInt64", "internal/runtime/atomic", "Xadd64", all...) + alias("sync/atomic", "AddUint32", "internal/runtime/atomic", "Xadd", all...) + alias("sync/atomic", "AddUint64", "internal/runtime/atomic", "Xadd64", all...) + alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd", p4...) + alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd64", p8...) + + alias("sync/atomic", "AndInt32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "AndUint32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "AndInt64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "AndUint64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "AndUintptr", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "OrInt32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "OrUint32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "OrInt64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "OrUint64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64) + alias("sync/atomic", "OrUintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64) + + /******** math/big ********/ + alias("math/big", "mulWW", "math/bits", "Mul64", p8...) +} + +// findIntrinsic returns a function which builds the SSA equivalent of the +// function identified by the symbol sym. If sym is not an intrinsic call, returns nil. +func findIntrinsic(sym *types.Sym) intrinsicBuilder { + if sym == nil || sym.Pkg == nil { + return nil + } + pkg := sym.Pkg.Path + if sym.Pkg == ir.Pkgs.Runtime { + pkg = "runtime" + } + if base.Flag.Race && pkg == "sync/atomic" { + // The race detector needs to be able to intercept these calls. + // We can't intrinsify them. + return nil + } + // Skip intrinsifying math functions (which may contain hard-float + // instructions) when soft-float + if Arch.SoftFloat && pkg == "math" { + return nil + } + + fn := sym.Name + if ssa.IntrinsicsDisable { + if pkg == "runtime" && (fn == "getcallerpc" || fn == "getcallersp" || fn == "getclosureptr") { + // These runtime functions don't have definitions, must be intrinsics. + } else { + return nil + } + } + return intrinsics[intrinsicKey{Arch.LinkArch.Arch, pkg, fn}] +} + +func IsIntrinsicCall(n *ir.CallExpr) bool { + if n == nil { + return false + } + name, ok := n.Fun.(*ir.Name) + if !ok { + return false + } + return findIntrinsic(name.Sym()) != nil +} diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index c02f5f5129..67479ace3b 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -221,6 +221,10 @@ func InitConfig() { ir.Syms.SigPanic = typecheck.LookupRuntimeFunc("sigpanic") } +func InitTables() { + initIntrinsics() +} + // AbiForBodylessFuncStackMap returns the ABI for a bodyless function's stack map. // This is not necessarily the ABI used to call it. // Currently (1.17 dev) such a stack map is always ABI0; @@ -4200,1037 +4204,6 @@ func (s *state) split(v *ssa.Value) (*ssa.Value, *ssa.Value) { return p0, p1 } -var intrinsics map[intrinsicKey]intrinsicBuilder - -// An intrinsicBuilder converts a call node n into an ssa value that -// implements that call as an intrinsic. args is a list of arguments to the func. -type intrinsicBuilder func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value - -type intrinsicKey struct { - arch *sys.Arch - pkg string - fn string -} - -func InitTables() { - intrinsics = map[intrinsicKey]intrinsicBuilder{} - - var p4 []*sys.Arch - var p8 []*sys.Arch - var lwatomics []*sys.Arch - for _, a := range sys.Archs { - if a.PtrSize == 4 { - p4 = append(p4, a) - } else { - p8 = append(p8, a) - } - if a.Family != sys.PPC64 { - lwatomics = append(lwatomics, a) - } - } - all := sys.Archs[:] - - // add adds the intrinsic b for pkg.fn for the given list of architectures. - add := func(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) { - for _, a := range archs { - intrinsics[intrinsicKey{a, pkg, fn}] = b - } - } - // addF does the same as add but operates on architecture families. - addF := func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) { - for _, a := range sys.Archs { - if a.InFamily(archFamilies...) { - intrinsics[intrinsicKey{a, pkg, fn}] = b - } - } - } - // alias defines pkg.fn = pkg2.fn2 for all architectures in archs for which pkg2.fn2 exists. - alias := func(pkg, fn, pkg2, fn2 string, archs ...*sys.Arch) { - aliased := false - for _, a := range archs { - if b, ok := intrinsics[intrinsicKey{a, pkg2, fn2}]; ok { - intrinsics[intrinsicKey{a, pkg, fn}] = b - aliased = true - } - } - if !aliased { - panic(fmt.Sprintf("attempted to alias undefined intrinsic: %s.%s", pkg, fn)) - } - } - - /******** runtime ********/ - if !base.Flag.Cfg.Instrumenting { - add("runtime", "slicebytetostringtmp", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - // Compiler frontend optimizations emit OBYTES2STRTMP nodes - // for the backend instead of slicebytetostringtmp calls - // when not instrumenting. - return s.newValue2(ssa.OpStringMake, n.Type(), args[0], args[1]) - }, - all...) - } - addF("internal/runtime/math", "MulUintptr", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if s.config.PtrSize == 4 { - return s.newValue2(ssa.OpMul32uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1]) - } - return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1]) - }, - sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.RISCV64, sys.ARM64) - add("runtime", "KeepAlive", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0]) - s.vars[memVar] = s.newValue2(ssa.OpKeepAlive, types.TypeMem, data, s.mem()) - return nil - }, - all...) - add("runtime", "getclosureptr", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue0(ssa.OpGetClosurePtr, s.f.Config.Types.Uintptr) - }, - all...) - - add("runtime", "getcallerpc", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue0(ssa.OpGetCallerPC, s.f.Config.Types.Uintptr) - }, - all...) - - add("runtime", "getcallersp", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpGetCallerSP, s.f.Config.Types.Uintptr, s.mem()) - }, - all...) - - addF("runtime", "publicationBarrier", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem()) - return nil - }, - sys.ARM64, sys.PPC64, sys.RISCV64) - - brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X} - if buildcfg.GOPPC64 >= 10 { - // Use only on Power10 as the new byte reverse instructions that Power10 provide - // make it worthwhile as an intrinsic - brev_arch = append(brev_arch, sys.PPC64) - } - /******** internal/runtime/sys ********/ - addF("internal/runtime/sys", "Bswap32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0]) - }, - brev_arch...) - addF("internal/runtime/sys", "Bswap64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0]) - }, - brev_arch...) - - /****** Prefetch ******/ - makePrefetchFunc := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue2(op, types.TypeMem, args[0], s.mem()) - return nil - } - } - - // Make Prefetch intrinsics for supported platforms - // On the unsupported platforms stub function will be eliminated - addF("internal/runtime/sys", "Prefetch", makePrefetchFunc(ssa.OpPrefetchCache), - sys.AMD64, sys.ARM64, sys.PPC64) - addF("internal/runtime/sys", "PrefetchStreamed", makePrefetchFunc(ssa.OpPrefetchCacheStreamed), - sys.AMD64, sys.ARM64, sys.PPC64) - - /******** internal/runtime/atomic ********/ - addF("internal/runtime/atomic", "Load", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue2(ssa.OpAtomicLoad32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v) - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Load8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue2(ssa.OpAtomicLoad8, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], v) - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Load64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue2(ssa.OpAtomicLoad64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v) - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "LoadAcq", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue2(ssa.OpAtomicLoadAcq32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v) - }, - sys.PPC64, sys.S390X) - addF("internal/runtime/atomic", "LoadAcq64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue2(ssa.OpAtomicLoadAcq64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v) - }, - sys.PPC64) - addF("internal/runtime/atomic", "Loadp", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, s.f.Config.Types.BytePtr, v) - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - - addF("internal/runtime/atomic", "Store", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicStore32, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Store8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicStore8, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Store64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicStore64, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "StorepNoWB", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "StoreRel", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.PPC64, sys.S390X) - addF("internal/runtime/atomic", "StoreRel64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel64, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.PPC64) - - addF("internal/runtime/atomic", "Xchg", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue3(ssa.OpAtomicExchange32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v) - }, - sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Xchg64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v) - }, - sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - - type atomicOpEmitter func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) - - makeAtomicGuardedIntrinsicARM64common := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter, needReturn bool) intrinsicBuilder { - - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if buildcfg.GOARM64.LSE { - emit(s, n, args, op1, typ, needReturn) - } else { - // Target Atomic feature is identified by dynamic detection - addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb) - v := s.load(types.Types[types.TBOOL], addr) - b := s.endBlock() - b.Kind = ssa.BlockIf - b.SetControl(v) - bTrue := s.f.NewBlock(ssa.BlockPlain) - bFalse := s.f.NewBlock(ssa.BlockPlain) - bEnd := s.f.NewBlock(ssa.BlockPlain) - b.AddEdgeTo(bTrue) - b.AddEdgeTo(bFalse) - b.Likely = ssa.BranchLikely - - // We have atomic instructions - use it directly. - s.startBlock(bTrue) - emit(s, n, args, op1, typ, needReturn) - s.endBlock().AddEdgeTo(bEnd) - - // Use original instruction sequence. - s.startBlock(bFalse) - emit(s, n, args, op0, typ, needReturn) - s.endBlock().AddEdgeTo(bEnd) - - // Merge results. - s.startBlock(bEnd) - } - if needReturn { - return s.variable(n, types.Types[typ]) - } else { - return nil - } - } - } - makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder { - return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, true) - } - makeAtomicGuardedIntrinsicARM64old := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder { - return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, false) - } - - atomicEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) { - v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - if needReturn { - s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v) - } - } - addF("internal/runtime/atomic", "Xchg", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "Xchg64", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicEmitterARM64), - sys.ARM64) - - addF("internal/runtime/atomic", "Xadd", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v) - }, - sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Xadd64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v) - }, - sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - - addF("internal/runtime/atomic", "Xadd", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "Xadd64", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicEmitterARM64), - sys.ARM64) - - addF("internal/runtime/atomic", "Cas", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v) - }, - sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Cas64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v) - }, - sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "CasRel", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v) - }, - sys.PPC64) - - atomicCasEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) { - v := s.newValue4(op, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) - s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) - if needReturn { - s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v) - } - } - - addF("internal/runtime/atomic", "Cas", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, types.TBOOL, atomicCasEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "Cas64", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, types.TBOOL, atomicCasEmitterARM64), - sys.ARM64) - - // Old-style atomic logical operation API (all supported archs except arm64). - addF("internal/runtime/atomic", "And8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "And", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Or8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("internal/runtime/atomic", "Or", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, - sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - - // arm64 always uses the new-style atomic logical operations, for both the - // old and new style API. - addF("internal/runtime/atomic", "And8", - makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd8value, ssa.OpAtomicAnd8valueVariant, types.TUINT8, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "Or8", - makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr8value, ssa.OpAtomicOr8valueVariant, types.TUINT8, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "And64", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd64value, ssa.OpAtomicAnd64valueVariant, types.TUINT64, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "And32", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "And", - makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "Or64", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr64value, ssa.OpAtomicOr64valueVariant, types.TUINT64, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "Or32", - makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64), - sys.ARM64) - addF("internal/runtime/atomic", "Or", - makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64), - sys.ARM64) - - // New-style atomic logical operations, which return the old memory value. - addF("internal/runtime/atomic", "And64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue3(ssa.OpAtomicAnd64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem()) - p0, p1 := s.split(v) - s.vars[memVar] = p1 - return p0 - }, - sys.AMD64) - addF("internal/runtime/atomic", "And32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue3(ssa.OpAtomicAnd32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem()) - p0, p1 := s.split(v) - s.vars[memVar] = p1 - return p0 - }, - sys.AMD64) - addF("internal/runtime/atomic", "Or64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue3(ssa.OpAtomicOr64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem()) - p0, p1 := s.split(v) - s.vars[memVar] = p1 - return p0 - }, - sys.AMD64) - addF("internal/runtime/atomic", "Or32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - v := s.newValue3(ssa.OpAtomicOr32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem()) - p0, p1 := s.split(v) - s.vars[memVar] = p1 - return p0 - }, - sys.AMD64) - - // Aliases for atomic load operations - alias("internal/runtime/atomic", "Loadint32", "internal/runtime/atomic", "Load", all...) - alias("internal/runtime/atomic", "Loadint64", "internal/runtime/atomic", "Load64", all...) - alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load", p4...) - alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load64", p8...) - alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load", p4...) - alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load64", p8...) - alias("internal/runtime/atomic", "LoadAcq", "internal/runtime/atomic", "Load", lwatomics...) - alias("internal/runtime/atomic", "LoadAcq64", "internal/runtime/atomic", "Load64", lwatomics...) - alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...) - alias("sync", "runtime_LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...) // linknamed - alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...) - alias("sync", "runtime_LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...) // linknamed - - // Aliases for atomic store operations - alias("internal/runtime/atomic", "Storeint32", "internal/runtime/atomic", "Store", all...) - alias("internal/runtime/atomic", "Storeint64", "internal/runtime/atomic", "Store64", all...) - alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store", p4...) - alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store64", p8...) - alias("internal/runtime/atomic", "StoreRel", "internal/runtime/atomic", "Store", lwatomics...) - alias("internal/runtime/atomic", "StoreRel64", "internal/runtime/atomic", "Store64", lwatomics...) - alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...) - alias("sync", "runtime_StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...) // linknamed - alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...) - alias("sync", "runtime_StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...) // linknamed - - // Aliases for atomic swap operations - alias("internal/runtime/atomic", "Xchgint32", "internal/runtime/atomic", "Xchg", all...) - alias("internal/runtime/atomic", "Xchgint64", "internal/runtime/atomic", "Xchg64", all...) - alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg", p4...) - alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg64", p8...) - - // Aliases for atomic add operations - alias("internal/runtime/atomic", "Xaddint32", "internal/runtime/atomic", "Xadd", all...) - alias("internal/runtime/atomic", "Xaddint64", "internal/runtime/atomic", "Xadd64", all...) - alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd", p4...) - alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd64", p8...) - - // Aliases for atomic CAS operations - alias("internal/runtime/atomic", "Casint32", "internal/runtime/atomic", "Cas", all...) - alias("internal/runtime/atomic", "Casint64", "internal/runtime/atomic", "Cas64", all...) - alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas", p4...) - alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas64", p8...) - alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas", p4...) - alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas64", p8...) - alias("internal/runtime/atomic", "CasRel", "internal/runtime/atomic", "Cas", lwatomics...) - - // Aliases for atomic And/Or operations - alias("internal/runtime/atomic", "Anduintptr", "internal/runtime/atomic", "And64", sys.ArchARM64) - alias("internal/runtime/atomic", "Oruintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64) - - /******** math ********/ - addF("math", "sqrt", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpSqrt, types.Types[types.TFLOAT64], args[0]) - }, - sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm) - addF("math", "Trunc", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpTrunc, types.Types[types.TFLOAT64], args[0]) - }, - sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm) - addF("math", "Ceil", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpCeil, types.Types[types.TFLOAT64], args[0]) - }, - sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm) - addF("math", "Floor", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpFloor, types.Types[types.TFLOAT64], args[0]) - }, - sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm) - addF("math", "Round", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpRound, types.Types[types.TFLOAT64], args[0]) - }, - sys.ARM64, sys.PPC64, sys.S390X) - addF("math", "RoundToEven", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpRoundToEven, types.Types[types.TFLOAT64], args[0]) - }, - sys.ARM64, sys.S390X, sys.Wasm) - addF("math", "Abs", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0]) - }, - sys.ARM64, sys.ARM, sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64) - addF("math", "Copysign", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1]) - }, - sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm) - addF("math", "FMA", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2]) - }, - sys.ARM64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("math", "FMA", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if !s.config.UseFMA { - s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] - return s.variable(n, types.Types[types.TFLOAT64]) - } - - if buildcfg.GOAMD64 >= 3 { - return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2]) - } - - v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasFMA) - b := s.endBlock() - b.Kind = ssa.BlockIf - b.SetControl(v) - bTrue := s.f.NewBlock(ssa.BlockPlain) - bFalse := s.f.NewBlock(ssa.BlockPlain) - bEnd := s.f.NewBlock(ssa.BlockPlain) - b.AddEdgeTo(bTrue) - b.AddEdgeTo(bFalse) - b.Likely = ssa.BranchLikely // >= haswell cpus are common - - // We have the intrinsic - use it directly. - s.startBlock(bTrue) - s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2]) - s.endBlock().AddEdgeTo(bEnd) - - // Call the pure Go version. - s.startBlock(bFalse) - s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] - s.endBlock().AddEdgeTo(bEnd) - - // Merge results. - s.startBlock(bEnd) - return s.variable(n, types.Types[types.TFLOAT64]) - }, - sys.AMD64) - addF("math", "FMA", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if !s.config.UseFMA { - s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] - return s.variable(n, types.Types[types.TFLOAT64]) - } - addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARMHasVFPv4, s.sb) - v := s.load(types.Types[types.TBOOL], addr) - b := s.endBlock() - b.Kind = ssa.BlockIf - b.SetControl(v) - bTrue := s.f.NewBlock(ssa.BlockPlain) - bFalse := s.f.NewBlock(ssa.BlockPlain) - bEnd := s.f.NewBlock(ssa.BlockPlain) - b.AddEdgeTo(bTrue) - b.AddEdgeTo(bFalse) - b.Likely = ssa.BranchLikely - - // We have the intrinsic - use it directly. - s.startBlock(bTrue) - s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2]) - s.endBlock().AddEdgeTo(bEnd) - - // Call the pure Go version. - s.startBlock(bFalse) - s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] - s.endBlock().AddEdgeTo(bEnd) - - // Merge results. - s.startBlock(bEnd) - return s.variable(n, types.Types[types.TFLOAT64]) - }, - sys.ARM) - - makeRoundAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if buildcfg.GOAMD64 >= 2 { - return s.newValue1(op, types.Types[types.TFLOAT64], args[0]) - } - - v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasSSE41) - b := s.endBlock() - b.Kind = ssa.BlockIf - b.SetControl(v) - bTrue := s.f.NewBlock(ssa.BlockPlain) - bFalse := s.f.NewBlock(ssa.BlockPlain) - bEnd := s.f.NewBlock(ssa.BlockPlain) - b.AddEdgeTo(bTrue) - b.AddEdgeTo(bFalse) - b.Likely = ssa.BranchLikely // most machines have sse4.1 nowadays - - // We have the intrinsic - use it directly. - s.startBlock(bTrue) - s.vars[n] = s.newValue1(op, types.Types[types.TFLOAT64], args[0]) - s.endBlock().AddEdgeTo(bEnd) - - // Call the pure Go version. - s.startBlock(bFalse) - s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64] - s.endBlock().AddEdgeTo(bEnd) - - // Merge results. - s.startBlock(bEnd) - return s.variable(n, types.Types[types.TFLOAT64]) - } - } - addF("math", "RoundToEven", - makeRoundAMD64(ssa.OpRoundToEven), - sys.AMD64) - addF("math", "Floor", - makeRoundAMD64(ssa.OpFloor), - sys.AMD64) - addF("math", "Ceil", - makeRoundAMD64(ssa.OpCeil), - sys.AMD64) - addF("math", "Trunc", - makeRoundAMD64(ssa.OpTrunc), - sys.AMD64) - - /******** math/bits ********/ - addF("math/bits", "TrailingZeros64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0]) - }, - sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) - addF("math/bits", "TrailingZeros32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0]) - }, - sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) - addF("math/bits", "TrailingZeros16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0]) - c := s.constInt32(types.Types[types.TUINT32], 1<<16) - y := s.newValue2(ssa.OpOr32, types.Types[types.TUINT32], x, c) - return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], y) - }, - sys.MIPS) - addF("math/bits", "TrailingZeros16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0]) - }, - sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm) - addF("math/bits", "TrailingZeros16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0]) - c := s.constInt64(types.Types[types.TUINT64], 1<<16) - y := s.newValue2(ssa.OpOr64, types.Types[types.TUINT64], x, c) - return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], y) - }, - sys.S390X, sys.PPC64) - addF("math/bits", "TrailingZeros8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0]) - c := s.constInt32(types.Types[types.TUINT32], 1<<8) - y := s.newValue2(ssa.OpOr32, types.Types[types.TUINT32], x, c) - return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], y) - }, - sys.MIPS) - addF("math/bits", "TrailingZeros8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0]) - }, - sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm) - addF("math/bits", "TrailingZeros8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) - c := s.constInt64(types.Types[types.TUINT64], 1<<8) - y := s.newValue2(ssa.OpOr64, types.Types[types.TUINT64], x, c) - return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], y) - }, - sys.S390X) - alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...) - alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...) - // ReverseBytes inlines correctly, no need to intrinsify it. - // Nothing special is needed for targets where ReverseBytes16 lowers to a rotate - // On Power10, 16-bit rotate is not available so use BRH instruction - if buildcfg.GOPPC64 >= 10 { - addF("math/bits", "ReverseBytes16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT], args[0]) - }, - sys.PPC64) - } - - addF("math/bits", "Len64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) - }, - sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) - addF("math/bits", "Len32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) - }, - sys.AMD64, sys.ARM64, sys.PPC64) - addF("math/bits", "Len32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if s.config.PtrSize == 4 { - return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) - } - x := s.newValue1(ssa.OpZeroExt32to64, types.Types[types.TUINT64], args[0]) - return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) - }, - sys.ARM, sys.S390X, sys.MIPS, sys.Wasm) - addF("math/bits", "Len16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if s.config.PtrSize == 4 { - x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0]) - return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x) - } - x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0]) - return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) - }, - sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) - addF("math/bits", "Len16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0]) - }, - sys.AMD64) - addF("math/bits", "Len8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if s.config.PtrSize == 4 { - x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0]) - return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x) - } - x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) - return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) - }, - sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) - addF("math/bits", "Len8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) - }, - sys.AMD64) - addF("math/bits", "Len", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if s.config.PtrSize == 4 { - return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) - } - return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) - }, - sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) - // LeadingZeros is handled because it trivially calls Len. - addF("math/bits", "Reverse64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0]) - }, - sys.ARM64) - addF("math/bits", "Reverse32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitRev32, types.Types[types.TINT], args[0]) - }, - sys.ARM64) - addF("math/bits", "Reverse16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitRev16, types.Types[types.TINT], args[0]) - }, - sys.ARM64) - addF("math/bits", "Reverse8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitRev8, types.Types[types.TINT], args[0]) - }, - sys.ARM64) - addF("math/bits", "Reverse", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0]) - }, - sys.ARM64) - addF("math/bits", "RotateLeft8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue2(ssa.OpRotateLeft8, types.Types[types.TUINT8], args[0], args[1]) - }, - sys.AMD64, sys.RISCV64) - addF("math/bits", "RotateLeft16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue2(ssa.OpRotateLeft16, types.Types[types.TUINT16], args[0], args[1]) - }, - sys.AMD64, sys.RISCV64) - addF("math/bits", "RotateLeft32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue2(ssa.OpRotateLeft32, types.Types[types.TUINT32], args[0], args[1]) - }, - sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm) - addF("math/bits", "RotateLeft64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue2(ssa.OpRotateLeft64, types.Types[types.TUINT64], args[0], args[1]) - }, - sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm) - alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...) - - makeOnesCountAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if buildcfg.GOAMD64 >= 2 { - return s.newValue1(op, types.Types[types.TINT], args[0]) - } - - v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasPOPCNT) - b := s.endBlock() - b.Kind = ssa.BlockIf - b.SetControl(v) - bTrue := s.f.NewBlock(ssa.BlockPlain) - bFalse := s.f.NewBlock(ssa.BlockPlain) - bEnd := s.f.NewBlock(ssa.BlockPlain) - b.AddEdgeTo(bTrue) - b.AddEdgeTo(bFalse) - b.Likely = ssa.BranchLikely // most machines have popcnt nowadays - - // We have the intrinsic - use it directly. - s.startBlock(bTrue) - s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0]) - s.endBlock().AddEdgeTo(bEnd) - - // Call the pure Go version. - s.startBlock(bFalse) - s.vars[n] = s.callResult(n, callNormal) // types.Types[TINT] - s.endBlock().AddEdgeTo(bEnd) - - // Merge results. - s.startBlock(bEnd) - return s.variable(n, types.Types[types.TINT]) - } - } - addF("math/bits", "OnesCount64", - makeOnesCountAMD64(ssa.OpPopCount64), - sys.AMD64) - addF("math/bits", "OnesCount64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0]) - }, - sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm) - addF("math/bits", "OnesCount32", - makeOnesCountAMD64(ssa.OpPopCount32), - sys.AMD64) - addF("math/bits", "OnesCount32", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0]) - }, - sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm) - addF("math/bits", "OnesCount16", - makeOnesCountAMD64(ssa.OpPopCount16), - sys.AMD64) - addF("math/bits", "OnesCount16", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0]) - }, - sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm) - addF("math/bits", "OnesCount8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0]) - }, - sys.S390X, sys.PPC64, sys.Wasm) - addF("math/bits", "OnesCount", - makeOnesCountAMD64(ssa.OpPopCount64), - sys.AMD64) - addF("math/bits", "Mul64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1]) - }, - sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64, sys.Loong64) - alias("math/bits", "Mul", "math/bits", "Mul64", p8...) - alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...) - addF("math/bits", "Add64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) - }, - sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64) - alias("math/bits", "Add", "math/bits", "Add64", p8...) - alias("internal/runtime/math", "Add64", "math/bits", "Add64", all...) - addF("math/bits", "Sub64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) - }, - sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64) - alias("math/bits", "Sub", "math/bits", "Sub64", p8...) - addF("math/bits", "Div64", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - // check for divide-by-zero/overflow and panic with appropriate message - cmpZero := s.newValue2(s.ssaOp(ir.ONE, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[2], s.zeroVal(types.Types[types.TUINT64])) - s.check(cmpZero, ir.Syms.Panicdivide) - cmpOverflow := s.newValue2(s.ssaOp(ir.OLT, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[0], args[2]) - s.check(cmpOverflow, ir.Syms.Panicoverflow) - return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) - }, - sys.AMD64) - alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64) - - alias("internal/runtime/sys", "TrailingZeros8", "math/bits", "TrailingZeros8", all...) - alias("internal/runtime/sys", "TrailingZeros32", "math/bits", "TrailingZeros32", all...) - alias("internal/runtime/sys", "TrailingZeros64", "math/bits", "TrailingZeros64", all...) - alias("internal/runtime/sys", "Len8", "math/bits", "Len8", all...) - alias("internal/runtime/sys", "Len64", "math/bits", "Len64", all...) - alias("internal/runtime/sys", "OnesCount64", "math/bits", "OnesCount64", all...) - - /******** sync/atomic ********/ - - // Note: these are disabled by flag_race in findIntrinsic below. - alias("sync/atomic", "LoadInt32", "internal/runtime/atomic", "Load", all...) - alias("sync/atomic", "LoadInt64", "internal/runtime/atomic", "Load64", all...) - alias("sync/atomic", "LoadPointer", "internal/runtime/atomic", "Loadp", all...) - alias("sync/atomic", "LoadUint32", "internal/runtime/atomic", "Load", all...) - alias("sync/atomic", "LoadUint64", "internal/runtime/atomic", "Load64", all...) - alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load", p4...) - alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load64", p8...) - - alias("sync/atomic", "StoreInt32", "internal/runtime/atomic", "Store", all...) - alias("sync/atomic", "StoreInt64", "internal/runtime/atomic", "Store64", all...) - // Note: not StorePointer, that needs a write barrier. Same below for {CompareAnd}Swap. - alias("sync/atomic", "StoreUint32", "internal/runtime/atomic", "Store", all...) - alias("sync/atomic", "StoreUint64", "internal/runtime/atomic", "Store64", all...) - alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store", p4...) - alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store64", p8...) - - alias("sync/atomic", "SwapInt32", "internal/runtime/atomic", "Xchg", all...) - alias("sync/atomic", "SwapInt64", "internal/runtime/atomic", "Xchg64", all...) - alias("sync/atomic", "SwapUint32", "internal/runtime/atomic", "Xchg", all...) - alias("sync/atomic", "SwapUint64", "internal/runtime/atomic", "Xchg64", all...) - alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg", p4...) - alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg64", p8...) - - alias("sync/atomic", "CompareAndSwapInt32", "internal/runtime/atomic", "Cas", all...) - alias("sync/atomic", "CompareAndSwapInt64", "internal/runtime/atomic", "Cas64", all...) - alias("sync/atomic", "CompareAndSwapUint32", "internal/runtime/atomic", "Cas", all...) - alias("sync/atomic", "CompareAndSwapUint64", "internal/runtime/atomic", "Cas64", all...) - alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas", p4...) - alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas64", p8...) - - alias("sync/atomic", "AddInt32", "internal/runtime/atomic", "Xadd", all...) - alias("sync/atomic", "AddInt64", "internal/runtime/atomic", "Xadd64", all...) - alias("sync/atomic", "AddUint32", "internal/runtime/atomic", "Xadd", all...) - alias("sync/atomic", "AddUint64", "internal/runtime/atomic", "Xadd64", all...) - alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd", p4...) - alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd64", p8...) - - alias("sync/atomic", "AndInt32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "AndUint32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "AndInt64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "AndUint64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "AndUintptr", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "OrInt32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "OrUint32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "OrInt64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "OrUint64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64) - alias("sync/atomic", "OrUintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64) - - /******** math/big ********/ - alias("math/big", "mulWW", "math/bits", "Mul64", p8...) -} - -// findIntrinsic returns a function which builds the SSA equivalent of the -// function identified by the symbol sym. If sym is not an intrinsic call, returns nil. -func findIntrinsic(sym *types.Sym) intrinsicBuilder { - if sym == nil || sym.Pkg == nil { - return nil - } - pkg := sym.Pkg.Path - if sym.Pkg == ir.Pkgs.Runtime { - pkg = "runtime" - } - if base.Flag.Race && pkg == "sync/atomic" { - // The race detector needs to be able to intercept these calls. - // We can't intrinsify them. - return nil - } - // Skip intrinsifying math functions (which may contain hard-float - // instructions) when soft-float - if Arch.SoftFloat && pkg == "math" { - return nil - } - - fn := sym.Name - if ssa.IntrinsicsDisable { - if pkg == "runtime" && (fn == "getcallerpc" || fn == "getcallersp" || fn == "getclosureptr") { - // These runtime functions don't have definitions, must be intrinsics. - } else { - return nil - } - } - return intrinsics[intrinsicKey{Arch.LinkArch.Arch, pkg, fn}] -} - -func IsIntrinsicCall(n *ir.CallExpr) bool { - if n == nil { - return false - } - name, ok := n.Fun.(*ir.Name) - if !ok { - return false - } - return findIntrinsic(name.Sym()) != nil -} - // intrinsicCall converts a call to a recognized intrinsic function into the intrinsic SSA operation. func (s *state) intrinsicCall(n *ir.CallExpr) *ssa.Value { v := findIntrinsic(n.Fun.Sym())(s, n, s.intrinsicArgs(n)) -- 2.48.1