]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64: add atomic memory access instructions support
authorGuoqi Chen <chenguoqi@loongson.cn>
Fri, 31 Mar 2023 19:43:20 +0000 (03:43 +0800)
committerCherry Mui <cherryyz@google.com>
Wed, 22 May 2024 02:04:54 +0000 (02:04 +0000)
The AM* atomic access instruction performs a sequence of “read-modify-write”
operations on a memory cell atomically. Specifically, it retrieves the old
value at the specified address in memory and writes it to the general register
rd, performs some simple operations on the old value in memory and the value
in the general register rk, and then write the result of the operation back
to the memory address pointed to by general register rj.

Go asm syntax:
AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB]{W/V} RK, (RJ), RD
AM{MAX/MIN}[DB]{WU/VU} RK, (RJ), RD

Equivalent platform assembler syntax:
am{swap/add/and/or/xor/max/min}[_db].{w/d} rd, rk, rj
am{max/min}[_db].{wu/du} rd, rk, rj

Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html

Change-Id: I99ea4553ae731675180d63691c19ef334e7e7817
Reviewed-on: https://go-review.googlesource.com/c/go/+/481577
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
Reviewed-by: WANG Xuerui <git@xen0n.name>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/asm/internal/arch/loong64.go
src/cmd/asm/internal/asm/asm.go
src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/a.out.go
src/cmd/internal/obj/loong64/anames.go
src/cmd/internal/obj/loong64/asm.go

index 2958ee1a8681789aef426fcca3060ece0587c039..bf34a94f0782b2106edd3a7e7b0137a2efcc761a 100644 (file)
@@ -55,6 +55,10 @@ func IsLoong64RDTIME(op obj.As) bool {
        return false
 }
 
+func IsLoong64AMO(op obj.As) bool {
+       return loong64.IsAtomicInst(op)
+}
+
 func loong64RegisterNumber(name string, n int16) (int16, bool) {
        switch name {
        case "F":
index 949b688bbdbf2731d758832fde4d17bc5c3cfaeb..b2eaa0a28dcaa289419a93bddcd50d868f79c2e6 100644 (file)
@@ -669,9 +669,17 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
                        prog.Reg = p.getRegister(prog, op, &a[1])
                        prog.To = a[2]
                case sys.Loong64:
-                       prog.From = a[0]
-                       prog.Reg = p.getRegister(prog, op, &a[1])
-                       prog.To = a[2]
+                       switch {
+                       // Loong64 atomic instructions with one input and two outputs.
+                       case arch.IsLoong64AMO(op):
+                               prog.From = a[0]
+                               prog.To = a[1]
+                               prog.RegTo2 = a[2].Reg
+                       default:
+                               prog.From = a[0]
+                               prog.Reg = p.getRegister(prog, op, &a[1])
+                               prog.To = a[2]
+                       }
                case sys.ARM:
                        // Special cases.
                        if arch.IsARMSTREX(op) {
index 5191b66cce262b85cbe2f19aebcee6addabb3fe6..41156febc63475e559f3df82bcca97b2ff6a8174 100644 (file)
@@ -231,3 +231,53 @@ lable2:
 
        MOVV    FCC0, R4                // 04dc1401
        MOVV    R4, FCC0                // 80d81401
+
+       // Loong64 atomic memory access instructions
+       AMSWAPB         R14, (R13), R12 // ac395c38
+       AMSWAPH         R14, (R13), R12 // acb95c38
+       AMSWAPW         R14, (R13), R12 // ac396038
+       AMSWAPV         R14, (R13), R12 // acb96038
+       AMCASB          R14, (R13), R12 // ac395838
+       AMCASH          R14, (R13), R12 // acb95838
+       AMCASW          R14, (R13), R12 // ac395938
+       AMCASV          R14, (R13), R12 // acb95938
+       AMADDW          R14, (R13), R12 // ac396138
+       AMADDV          R14, (R13), R12 // acb96138
+       AMANDW          R14, (R13), R12 // ac396238
+       AMANDV          R14, (R13), R12 // acb96238
+       AMORW           R14, (R13), R12 // ac396338
+       AMORV           R14, (R13), R12 // acb96338
+       AMXORW          R14, (R13), R12 // ac396438
+       AMXORV          R14, (R13), R12 // acb96438
+       AMMAXW          R14, (R13), R12 // ac396538
+       AMMAXV          R14, (R13), R12 // acb96538
+       AMMINW          R14, (R13), R12 // ac396638
+       AMMINV          R14, (R13), R12 // acb96638
+       AMMAXWU         R14, (R13), R12 // ac396738
+       AMMAXVU         R14, (R13), R12 // acb96738
+       AMMINWU         R14, (R13), R12 // ac396838
+       AMMINVU         R14, (R13), R12 // acb96838
+       AMSWAPDBB       R14, (R13), R12 // ac395e38
+       AMSWAPDBH       R14, (R13), R12 // acb95e38
+       AMSWAPDBW       R14, (R13), R12 // ac396938
+       AMSWAPDBV       R14, (R13), R12 // acb96938
+       AMCASDBB        R14, (R13), R12 // ac395a38
+       AMCASDBH        R14, (R13), R12 // acb95a38
+       AMCASDBW        R14, (R13), R12 // ac395b38
+       AMCASDBV        R14, (R13), R12 // acb95b38
+       AMADDDBW        R14, (R13), R12 // ac396a38
+       AMADDDBV        R14, (R13), R12 // acb96a38
+       AMANDDBW        R14, (R13), R12 // ac396b38
+       AMANDDBV        R14, (R13), R12 // acb96b38
+       AMORDBW         R14, (R13), R12 // ac396c38
+       AMORDBV         R14, (R13), R12 // acb96c38
+       AMXORDBW        R14, (R13), R12 // ac396d38
+       AMXORDBV        R14, (R13), R12 // acb96d38
+       AMMAXDBW        R14, (R13), R12 // ac396e38
+       AMMAXDBV        R14, (R13), R12 // acb96e38
+       AMMINDBW        R14, (R13), R12 // ac396f38
+       AMMINDBV        R14, (R13), R12 // acb96f38
+       AMMAXDBWU       R14, (R13), R12 // ac397038
+       AMMAXDBVU       R14, (R13), R12 // acb97038
+       AMMINDBWU       R14, (R13), R12 // ac397138
+       AMMINDBVU       R14, (R13), R12 // acb97138
index d944fcfcb8e4c1c4e603ff190d8d857c2c8c26b3..2dd2a085a8358f989b49cc6372ff0c7454e0fb9c 100644 (file)
@@ -394,6 +394,56 @@ const (
        AMOVVF
        AMOVVD
 
+       // 2.2.7. Atomic Memory Access Instructions
+       AAMSWAPB
+       AAMSWAPH
+       AAMSWAPW
+       AAMSWAPV
+       AAMCASB
+       AAMCASH
+       AAMCASW
+       AAMCASV
+       AAMADDW
+       AAMADDV
+       AAMANDW
+       AAMANDV
+       AAMORW
+       AAMORV
+       AAMXORW
+       AAMXORV
+       AAMMAXW
+       AAMMAXV
+       AAMMINW
+       AAMMINV
+       AAMMAXWU
+       AAMMAXVU
+       AAMMINWU
+       AAMMINVU
+       AAMSWAPDBB
+       AAMSWAPDBH
+       AAMSWAPDBW
+       AAMSWAPDBV
+       AAMCASDBB
+       AAMCASDBH
+       AAMCASDBW
+       AAMCASDBV
+       AAMADDDBW
+       AAMADDDBV
+       AAMANDDBW
+       AAMANDDBV
+       AAMORDBW
+       AAMORDBV
+       AAMXORDBW
+       AAMXORDBV
+       AAMMAXDBW
+       AAMMAXDBV
+       AAMMINDBW
+       AAMMINDBV
+       AAMMAXDBWU
+       AAMMAXDBVU
+       AAMMINDBWU
+       AAMMINDBVU
+
        // 2.2.10. Other Miscellaneous Instructions
        ARDTIMELW
        ARDTIMEHW
index f61756e7a809391dd8df90c7c1067397cb40835b..f21e8c963022820205c2b5a4c0e69f5638d18bff 100644 (file)
@@ -131,6 +131,54 @@ var Anames = []string{
        "MOVDV",
        "MOVVF",
        "MOVVD",
+       "AMSWAPB",
+       "AMSWAPH",
+       "AMSWAPW",
+       "AMSWAPV",
+       "AMCASB",
+       "AMCASH",
+       "AMCASW",
+       "AMCASV",
+       "AMADDW",
+       "AMADDV",
+       "AMANDW",
+       "AMANDV",
+       "AMORW",
+       "AMORV",
+       "AMXORW",
+       "AMXORV",
+       "AMMAXW",
+       "AMMAXV",
+       "AMMINW",
+       "AMMINV",
+       "AMMAXWU",
+       "AMMAXVU",
+       "AMMINWU",
+       "AMMINVU",
+       "AMSWAPDBB",
+       "AMSWAPDBH",
+       "AMSWAPDBW",
+       "AMSWAPDBV",
+       "AMCASDBB",
+       "AMCASDBH",
+       "AMCASDBW",
+       "AMCASDBV",
+       "AMADDDBW",
+       "AMADDDBV",
+       "AMANDDBW",
+       "AMANDDBV",
+       "AMORDBW",
+       "AMORDBV",
+       "AMXORDBW",
+       "AMXORDBV",
+       "AMMAXDBW",
+       "AMMAXDBV",
+       "AMMINDBW",
+       "AMMINDBV",
+       "AMMAXDBWU",
+       "AMMAXDBVU",
+       "AMMINDBWU",
+       "AMMINDBVU",
        "RDTIMELW",
        "RDTIMEHW",
        "RDTIMED",
index 5b823c09af2680783462793c5c8389c8239b7976..4eaf8bf5edc48c424ef491a6134e9f07f3349f6f 100644 (file)
@@ -356,7 +356,7 @@ var optab = []Optab{
        {ATEQ, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0},
 
        {ARDTIMELW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0},
-
+       {AAMSWAPW, C_REG, C_NONE, C_NONE, C_ZOREG, C_REG, 66, 4, 0, 0},
        {ANOOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0},
 
        {obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
@@ -374,6 +374,63 @@ var optab = []Optab{
        {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0},
 }
 
+var atomicInst = map[obj.As]uint32{
+       AAMSWAPB:   0x070B8 << 15, // amswap.b
+       AAMSWAPH:   0x070B9 << 15, // amswap.h
+       AAMSWAPW:   0x070C0 << 15, // amswap.w
+       AAMSWAPV:   0x070C1 << 15, // amswap.d
+       AAMCASB:    0x070B0 << 15, // amcas.b
+       AAMCASH:    0x070B1 << 15, // amcas.h
+       AAMCASW:    0x070B2 << 15, // amcas.w
+       AAMCASV:    0x070B3 << 15, // amcas.d
+       AAMADDW:    0x070C2 << 15, // amadd.w
+       AAMADDV:    0x070C3 << 15, // amadd.d
+       AAMANDW:    0x070C4 << 15, // amand.w
+       AAMANDV:    0x070C5 << 15, // amand.d
+       AAMORW:     0x070C6 << 15, // amor.w
+       AAMORV:     0x070C7 << 15, // amor.d
+       AAMXORW:    0x070C8 << 15, // amxor.w
+       AAMXORV:    0x070C9 << 15, // amxor.d
+       AAMMAXW:    0x070CA << 15, // ammax.w
+       AAMMAXV:    0x070CB << 15, // ammax.d
+       AAMMINW:    0x070CC << 15, // ammin.w
+       AAMMINV:    0x070CD << 15, // ammin.d
+       AAMMAXWU:   0x070CE << 15, // ammax.wu
+       AAMMAXVU:   0x070CF << 15, // ammax.du
+       AAMMINWU:   0x070D0 << 15, // ammin.wu
+       AAMMINVU:   0x070D1 << 15, // ammin.du
+       AAMSWAPDBB: 0x070BC << 15, // amswap_db.b
+       AAMSWAPDBH: 0x070BD << 15, // amswap_db.h
+       AAMSWAPDBW: 0x070D2 << 15, // amswap_db.w
+       AAMSWAPDBV: 0x070D3 << 15, // amswap_db.d
+       AAMCASDBB:  0x070B4 << 15, // amcas_db.b
+       AAMCASDBH:  0x070B5 << 15, // amcas_db.h
+       AAMCASDBW:  0x070B6 << 15, // amcas_db.w
+       AAMCASDBV:  0x070B7 << 15, // amcas_db.d
+       AAMADDDBW:  0x070D4 << 15, // amadd_db.w
+       AAMADDDBV:  0x070D5 << 15, // amadd_db.d
+       AAMANDDBW:  0x070D6 << 15, // amand_db.w
+       AAMANDDBV:  0x070D7 << 15, // amand_db.d
+       AAMORDBW:   0x070D8 << 15, // amor_db.w
+       AAMORDBV:   0x070D9 << 15, // amor_db.d
+       AAMXORDBW:  0x070DA << 15, // amxor_db.w
+       AAMXORDBV:  0x070DB << 15, // amxor_db.d
+       AAMMAXDBW:  0x070DC << 15, // ammax_db.w
+       AAMMAXDBV:  0x070DD << 15, // ammax_db.d
+       AAMMINDBW:  0x070DE << 15, // ammin_db.w
+       AAMMINDBV:  0x070DF << 15, // ammin_db.d
+       AAMMAXDBWU: 0x070E0 << 15, // ammax_db.wu
+       AAMMAXDBVU: 0x070E1 << 15, // ammax_db.du
+       AAMMINDBWU: 0x070E2 << 15, // ammin_db.wu
+       AAMMINDBVU: 0x070E3 << 15, // ammin_db.du
+}
+
+func IsAtomicInst(as obj.As) bool {
+       _, ok := atomicInst[as]
+
+       return ok
+}
+
 // pcAlignPadLength returns the number of bytes required to align pc to alignedValue,
 // reporting an error if alignedValue is not a power of two or is out of range.
 func pcAlignPadLength(ctxt *obj.Link, pc int64, alignedValue int64) int {
@@ -1182,6 +1239,14 @@ func buildop(ctxt *obj.Link) {
 
                case ANOOP:
                        opset(obj.AUNDEF, r0)
+
+               case AAMSWAPW:
+                       for i := range atomicInst {
+                               if i == AAMSWAPW {
+                                       continue
+                               }
+                               opset(i, r0)
+                       }
                }
        }
 }
@@ -1817,6 +1882,18 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
                rel2.Sym = p.From.Sym
                rel2.Type = objabi.R_LOONG64_GOT_LO
                rel2.Add = 0x0
+
+       case 66: // am* From, To, RegTo2 ==> am* RegTo2, From, To
+               rk := p.From.Reg
+               rj := p.To.Reg
+               rd := p.RegTo2
+
+               // See section 2.2.7.1 of https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html
+               // for the register usage constraints.
+               if rd == rj || rd == rk {
+                       c.ctxt.Diag("illegal register combination: %v\n", p)
+               }
+               o1 = OP_RRR(atomicInst[p.As], uint32(rk), uint32(rj), uint32(rd))
        }
 
        out[0] = o1