]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/asm: add support for loong64 FMA instructions
authorXiaolin Zhao <zhaoxiaolin@loongson.cn>
Thu, 31 Oct 2024 08:57:23 +0000 (16:57 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Sat, 2 Nov 2024 01:36:19 +0000 (01:36 +0000)
Add support for assembling the FMA instructions present in the LoongArch
base ISA v1.00. This requires adding a new instruction format and making
use of a third source operand, which is put in RestArgs[0].

The single-precision instructions have the `.s` prefix in their official
mnemonics, and similar Go asm instructions all have `S` prefix for the
other architectures having FMA support, but in this change they instead
have `F` prefix in Go asm because loong64 currently follows the mips
backends in the naming convention. This could be changed later because
FMA is fully expressible in pure Go, making it unlikely to have to hand-
write such assembly in the wild.

Example mapping between actual encoding and Go asm syntax:

fmadd.s fd, fj, fk, fa -> FMADDF fa, fk, fj, fd
(prog.From = fa, prog.Reg = fk, prog.RestArgs[0] = fj and prog.To = fd)

fmadd.s fd, fd, fk, fa -> FMADDF fa, fk, fd
(prog.From = fa, prog.Reg = fk and prog.To = fd)

This patch is a copy of CL 477716.
Co-authored-by: WANG Xuerui <git@xen0n.name>
Change-Id: I9b4e4c601d6c5a854ee238f085849666e4faf090
Reviewed-on: https://go-review.googlesource.com/c/go/+/623877
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/a.out.go
src/cmd/internal/obj/loong64/anames.go
src/cmd/internal/obj/loong64/asm.go

index 2239fe0c687a937d75ab115ab643bc782ad0743d..8d5d58fcd4b2b1357e0e1ea00f5dfe1b86aa2aba 100644 (file)
@@ -303,6 +303,23 @@ lable2:
        AMMINDBWU       R14, (R13), R12 // ac397138
        AMMINDBVU       R14, (R13), R12 // acb97138
 
+       FMADDF  F2, F14, F9, F16        // 30391108
+       FMADDD  F11, F20, F23, F12      // ecd22508
+       FMSUBF  F3, F11, F31, F22       // f6af5108
+       FMSUBD  F13, F30, F9, F15       // 2ff96608
+       FNMADDF F27, F11, F5, F21       // b5ac9d08
+       FNMADDD F29, F14, F27, F6       // 66bbae08
+       FNMSUBF F17, F8, F12, F8        // 88a1d808
+       FNMSUBD F29, F21, F3, F17       // 71d4ee08
+       FMADDF  F2, F14, F9             // 29391108
+       FMADDD  F11, F20, F23           // f7d22508
+       FMSUBF  F3, F11, F31            // ffaf5108
+       FMSUBD  F13, F30, F9            // 29f96608
+       FNMADDF F27, F11, F5            // a5ac9d08
+       FNMADDD F29, F14, F27           // 7bbbae08
+       FNMSUBF F17, F8, F12            // 8ca1d808
+       FNMSUBD F29, F21, F3            // 63d4ee08
+
        FMINF   F4, F5, F6              // a6900a01
        FMINF   F4, F5                  // a5900a01
        FMIND   F4, F5, F6              // a6100b01
index 9470f9418c7737bda75855447fe25bba597311ca..ed60b28fb89b33e58f5c51963ba58576b13755b6 100644 (file)
@@ -493,6 +493,16 @@ const (
        ARDTIMED
        ACPUCFG
 
+       // 3.2.1.2
+       AFMADDF
+       AFMADDD
+       AFMSUBF
+       AFMSUBD
+       AFNMADDF
+       AFNMADDD
+       AFNMSUBF
+       AFNMSUBD
+
        // 3.2.1.3
        AFMINF
        AFMIND
index 28566d15041192d4a032a1982e6dfbc55665b53c..9893650c9a0eac89851d7ab504932dbd3ae9f28b 100644 (file)
@@ -211,6 +211,14 @@ var Anames = []string{
        "RDTIMEHW",
        "RDTIMED",
        "CPUCFG",
+       "FMADDF",
+       "FMADDD",
+       "FMSUBF",
+       "FMSUBD",
+       "FNMADDF",
+       "FNMADDD",
+       "FNMSUBF",
+       "FNMSUBD",
        "FMINF",
        "FMIND",
        "FMAXF",
index 3f3a352ee11fe557c257743d38263f9daba199c1..8dc787b46bd5195520b17f9c82257670b1f0ec6c 100644 (file)
@@ -92,6 +92,9 @@ var optab = []Optab{
        {AMOVF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
        {AMOVD, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
 
+       {AFMADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 39, 4, 0, 0},
+       {AFMADDF, C_FREG, C_FREG, C_FREG, C_FREG, C_NONE, 39, 4, 0, 0},
+
        {AMOVW, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
        {AMOVWU, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
        {AMOVV, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
@@ -1101,6 +1104,15 @@ func buildop(ctxt *obj.Link) {
                        opset(AFSCALEBF, r0)
                        opset(AFSCALEBD, r0)
 
+               case AFMADDF:
+                       opset(AFMADDD, r0)
+                       opset(AFMSUBF, r0)
+                       opset(AFMSUBD, r0)
+                       opset(AFNMADDF, r0)
+                       opset(AFNMADDD, r0)
+                       opset(AFNMSUBF, r0)
+                       opset(AFNMSUBD, r0)
+
                case AAND:
                        opset(AOR, r0)
                        opset(AXOR, r0)
@@ -1257,6 +1269,10 @@ func buildop(ctxt *obj.Link) {
        }
 }
 
+func OP_RRRR(op uint32, r1 uint32, r2 uint32, r3 uint32, r4 uint32) uint32 {
+       return op | (r1&0x1F)<<15 | (r2&0x1F)<<10 | (r3&0x1F)<<5 | (r4 & 0x1F)
+}
+
 // r1 -> rk
 // r2 -> rj
 // r3 -> rd
@@ -1656,6 +1672,13 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
                o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
                o3 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg))
 
+       case 39: // fmadd r1, r2, [r3], r4
+               r := int(p.To.Reg)
+               if len(p.RestArgs) > 0 {
+                       r = int(p.GetFrom3().Reg)
+               }
+               o1 = OP_RRRR(c.oprrrr(p.As), uint32(p.From.Reg), uint32(p.Reg), uint32(r), uint32(p.To.Reg))
+
        case 40: // word
                o1 = uint32(c.regoff(&p.From))
 
@@ -1870,6 +1893,30 @@ func (c *ctxt0) regoff(a *obj.Addr) int32 {
        return int32(c.vregoff(a))
 }
 
+func (c *ctxt0) oprrrr(a obj.As) uint32 {
+       switch a {
+       case AFMADDF:
+               return 0x81 << 20 // fmadd.s
+       case AFMADDD:
+               return 0x82 << 20 // fmadd.d
+       case AFMSUBF:
+               return 0x85 << 20 // fmsub.s
+       case AFMSUBD:
+               return 0x86 << 20 // fmsub.d
+       case AFNMADDF:
+               return 0x89 << 20 // fnmadd.f
+       case AFNMADDD:
+               return 0x8a << 20 // fnmadd.d
+       case AFNMSUBF:
+               return 0x8d << 20 // fnmsub.s
+       case AFNMSUBD:
+               return 0x8e << 20 // fnmsub.d
+       }
+
+       c.ctxt.Diag("bad rrrr opcode %v", a)
+       return 0
+}
+
 func (c *ctxt0) oprrr(a obj.As) uint32 {
        switch a {
        case AADD: