]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/asm, cmd/internal/obj: enable rounding mode suffix for riscv64
authorMeng Zhuo <mzh@golangcn.org>
Tue, 20 Jun 2023 03:16:56 +0000 (11:16 +0800)
committerM Zhuo <mzh@golangcn.org>
Wed, 21 Feb 2024 14:34:57 +0000 (14:34 +0000)
This CL adds rounding modes for riscv64 floating point conversion
instructions by suffix with 5 modes: RNE, RTZ, RDN, RUP and RMM.

For example, for round to nearest (RNE), we can use `FCVTLD.RNE`
According to RISCV manual 8.7 and 9.5, we changed these
conversion instructions:

FCVTWS
FCVTLS
FCVTWUS
FCVTLUS
FCVTWD
FCVTLD
FCVTWUD
FCVTLUD

Note: Round towards zero (RTZ) by default for all these instructions above.

Change-Id: I491e522e14d721e24aa7f528ee0c4640c54c5808
Reviewed-on: https://go-review.googlesource.com/c/go/+/504736
Reviewed-by: Joel Sing <joel@sing.id.au>
Run-TryBot: M Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
src/cmd/asm/internal/asm/asm.go
src/cmd/asm/internal/asm/parse.go
src/cmd/asm/internal/asm/testdata/riscv64.s
src/cmd/internal/obj/link.go
src/cmd/internal/obj/riscv/cpu.go
src/cmd/internal/obj/riscv/list.go
src/cmd/internal/obj/riscv/obj.go

index 375ef803bb7b724acd0ccf7c6be3663659768a65..949b688bbdbf2731d758832fde4d17bc5c3cfaeb 100644 (file)
@@ -16,6 +16,7 @@ import (
        "cmd/asm/internal/lex"
        "cmd/internal/obj"
        "cmd/internal/obj/ppc64"
+       "cmd/internal/obj/riscv"
        "cmd/internal/obj/x86"
        "cmd/internal/sys"
 )
@@ -46,7 +47,11 @@ func (p *Parser) append(prog *obj.Prog, cond string, doLabel bool) {
                                p.errorf("%v", err)
                                return
                        }
-
+               case sys.RISCV64:
+                       if err := riscv.ParseSuffix(prog, cond); err != nil {
+                               p.errorf("unrecognized suffix .%q", cond)
+                               return
+                       }
                default:
                        p.errorf("unrecognized suffix .%q", cond)
                        return
index ef6c840dc27c40a9bd275b4557923a94a6acc090..7a52e5409024c0076ecb704b73f3c6df952b2419 100644 (file)
@@ -217,8 +217,8 @@ next:
                for {
                        tok = p.nextToken()
                        if len(operands) == 0 && len(items) == 0 {
-                               if p.arch.InFamily(sys.ARM, sys.ARM64, sys.AMD64, sys.I386) && tok == '.' {
-                                       // Suffixes: ARM conditionals or x86 modifiers.
+                               if p.arch.InFamily(sys.ARM, sys.ARM64, sys.AMD64, sys.I386, sys.RISCV64) && tok == '.' {
+                                       // Suffixes: ARM conditionals, RISCV rounding mode or x86 modifiers.
                                        tok = p.nextToken()
                                        str := p.lex.Text()
                                        if tok != scanner.Ident {
index 072302b2257d72029c274d3d6458bf4dc866e073..a5ab254eaa364bfe60baad670a3085a6b7ff516b 100644 (file)
@@ -233,11 +233,31 @@ start:
 
        // 11.7: Single-Precision Floating-Point Conversion and Move Instructions
        FCVTWS  F0, X5                                  // d31200c0
+       FCVTWS.RNE      F0, X5                          // d30200c0
+       FCVTWS.RTZ      F0, X5                          // d31200c0
+       FCVTWS.RDN      F0, X5                          // d32200c0
+       FCVTWS.RUP      F0, X5                          // d33200c0
+       FCVTWS.RMM      F0, X5                          // d34200c0
        FCVTLS  F0, X5                                  // d31220c0
+       FCVTLS.RNE      F0, X5                          // d30220c0
+       FCVTLS.RTZ      F0, X5                          // d31220c0
+       FCVTLS.RDN      F0, X5                          // d32220c0
+       FCVTLS.RUP      F0, X5                          // d33220c0
+       FCVTLS.RMM      F0, X5                          // d34220c0
        FCVTSW  X5, F0                                  // 538002d0
        FCVTSL  X5, F0                                  // 538022d0
        FCVTWUS F0, X5                                  // d31210c0
+       FCVTWUS.RNE     F0, X5                          // d30210c0
+       FCVTWUS.RTZ     F0, X5                          // d31210c0
+       FCVTWUS.RDN     F0, X5                          // d32210c0
+       FCVTWUS.RUP     F0, X5                          // d33210c0
+       FCVTWUS.RMM     F0, X5                          // d34210c0
        FCVTLUS F0, X5                                  // d31230c0
+       FCVTLUS.RNE     F0, X5                          // d30230c0
+       FCVTLUS.RTZ     F0, X5                          // d31230c0
+       FCVTLUS.RDN     F0, X5                          // d32230c0
+       FCVTLUS.RUP     F0, X5                          // d33230c0
+       FCVTLUS.RMM     F0, X5                          // d34230c0
        FCVTSWU X5, F0                                  // 538012d0
        FCVTSLU X5, F0                                  // 538032d0
        FSGNJS  F1, F0, F2                              // 53011020
@@ -277,11 +297,31 @@ start:
 
        // 12.5: Double-Precision Floating-Point Conversion and Move Instructions
        FCVTWD  F0, X5                                  // d31200c2
+       FCVTWD.RNE      F0, X5                          // d30200c2
+       FCVTWD.RTZ      F0, X5                          // d31200c2
+       FCVTWD.RDN      F0, X5                          // d32200c2
+       FCVTWD.RUP      F0, X5                          // d33200c2
+       FCVTWD.RMM      F0, X5                          // d34200c2
        FCVTLD  F0, X5                                  // d31220c2
+       FCVTLD.RNE      F0, X5                          // d30220c2
+       FCVTLD.RTZ      F0, X5                          // d31220c2
+       FCVTLD.RDN      F0, X5                          // d32220c2
+       FCVTLD.RUP      F0, X5                          // d33220c2
+       FCVTLD.RMM      F0, X5                          // d34220c2
        FCVTDW  X5, F0                                  // 538002d2
        FCVTDL  X5, F0                                  // 538022d2
        FCVTWUD F0, X5                                  // d31210c2
+       FCVTWUD.RNE F0, X5                              // d30210c2
+       FCVTWUD.RTZ F0, X5                              // d31210c2
+       FCVTWUD.RDN F0, X5                              // d32210c2
+       FCVTWUD.RUP F0, X5                              // d33210c2
+       FCVTWUD.RMM F0, X5                              // d34210c2
        FCVTLUD F0, X5                                  // d31230c2
+       FCVTLUD.RNE F0, X5                              // d30230c2
+       FCVTLUD.RTZ F0, X5                              // d31230c2
+       FCVTLUD.RDN F0, X5                              // d32230c2
+       FCVTLUD.RUP F0, X5                              // d33230c2
+       FCVTLUD.RMM F0, X5                              // d34230c2
        FCVTDWU X5, F0                                  // 538012d2
        FCVTDLU X5, F0                                  // 538032d2
        FCVTSD  F0, F1                                  // d3001040
index 0b7878656caf751267704fdf66b0746e2a2540d6..5be493e176727d30141e0aa09b08d13734afb431 100644 (file)
@@ -314,7 +314,7 @@ type Prog struct {
        RegTo2   int16     // 2nd destination operand
        Mark     uint16    // bitmask of arch-specific items
        Optab    uint16    // arch-specific opcode index
-       Scond    uint8     // bits that describe instruction suffixes (e.g. ARM conditions)
+       Scond    uint8     // bits that describe instruction suffixes (e.g. ARM conditions, RISCV Rounding Mode)
        Back     uint8     // for x86 back end: backwards branch state
        Ft       uint8     // for x86 back end: type index of Prog.From
        Tt       uint8     // for x86 back end: type index of Prog.To
index edd1ac820bf03a691007a9832272e4c73e9d8fe3..919f07b1a9727109678dd9b70cc9851f3bfcb94a 100644 (file)
 
 package riscv
 
-import "cmd/internal/obj"
+import (
+       "errors"
+       "fmt"
+
+       "cmd/internal/obj"
+)
 
 //go:generate go run ../stringer.go -i $GOFILE -o anames.go -p riscv
 
@@ -607,6 +612,50 @@ const (
        ALAST
 )
 
+// opSuffix encoding to uint8 which fit into p.Scond
+var rmSuffixSet = map[string]uint8{
+       "RNE": RM_RNE,
+       "RTZ": RM_RTZ,
+       "RDN": RM_RDN,
+       "RUP": RM_RUP,
+       "RMM": RM_RMM,
+}
+
+const rmSuffixBit uint8 = 1 << 7
+
+func rmSuffixEncode(s string) (uint8, error) {
+       if s == "" {
+               return 0, errors.New("empty suffix")
+       }
+       enc, ok := rmSuffixSet[s]
+       if !ok {
+               return 0, fmt.Errorf("invalid encoding for unknown suffix:%q", s)
+       }
+       return enc | rmSuffixBit, nil
+}
+
+func rmSuffixString(u uint8) (string, error) {
+       if u&rmSuffixBit == 0 {
+               return "", fmt.Errorf("invalid suffix, require round mode bit:%x", u)
+       }
+
+       u &^= rmSuffixBit
+       for k, v := range rmSuffixSet {
+               if v == u {
+                       return k, nil
+               }
+       }
+       return "", fmt.Errorf("unknown suffix:%x", u)
+}
+
+const (
+       RM_RNE uint8 = iota // Round to Nearest, ties to Even
+       RM_RTZ              // Round towards Zero
+       RM_RDN              // Round Down
+       RM_RUP              // Round Up
+       RM_RMM              // Round to Nearest, ties to Max Magnitude
+)
+
 // All unary instructions which write to their arguments (as opposed to reading
 // from them) go here. The assembly parser uses this information to populate
 // its AST in a semantically reasonable way.
index de90961e3255202afcff368ed6d5c6832eacc674..bc87539f271b8590d09f6bcb8fad8fc071dde979 100644 (file)
@@ -13,6 +13,7 @@ import (
 func init() {
        obj.RegisterRegister(obj.RBaseRISCV, REG_END, RegName)
        obj.RegisterOpcode(obj.ABaseRISCV, Anames)
+       obj.RegisterOpSuffix("riscv64", opSuffixString)
 }
 
 func RegName(r int) string {
@@ -31,3 +32,18 @@ func RegName(r int) string {
                return fmt.Sprintf("Rgok(%d)", r-obj.RBaseRISCV)
        }
 }
+
+func opSuffixString(s uint8) string {
+       if s&rmSuffixBit == 0 {
+               return ""
+       }
+
+       ss, err := rmSuffixString(s)
+       if err != nil {
+               ss = fmt.Sprintf("<invalid 0x%x>", s)
+       }
+       if ss == "" {
+               return ss
+       }
+       return fmt.Sprintf(".%s", ss)
+}
index 11d6c202ea0e9fb4d55e586da3c2a5d56a1d1604..3ec740f85a155ec46a30c762731a5b06018386b4 100644 (file)
@@ -28,6 +28,7 @@ import (
        "internal/abi"
        "log"
        "math/bits"
+       "strings"
 )
 
 func buildop(ctxt *obj.Link) {}
@@ -2273,8 +2274,12 @@ func instructionsForProg(p *obj.Prog) []*instruction {
                ins.imm = 0x0ff
 
        case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD:
-               // Set the rounding mode in funct3 to round to zero.
-               ins.funct3 = 1
+               // Set the default rounding mode in funct3 to round to zero.
+               if p.Scond&rmSuffixBit == 0 {
+                       ins.funct3 = uint32(RM_RTZ)
+               } else {
+                       ins.funct3 = uint32(p.Scond &^ rmSuffixBit)
+               }
 
        case AFNES, AFNED:
                // Replace FNE[SD] with FEQ[SD] and NOT.
@@ -2478,6 +2483,14 @@ func isUnsafePoint(p *obj.Prog) bool {
        return p.Mark&USES_REG_TMP == USES_REG_TMP || p.From.Reg == REG_TMP || p.To.Reg == REG_TMP || p.Reg == REG_TMP
 }
 
+func ParseSuffix(prog *obj.Prog, cond string) (err error) {
+       switch prog.As {
+       case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD:
+               prog.Scond, err = rmSuffixEncode(strings.TrimPrefix(cond, "."))
+       }
+       return
+}
+
 var LinkRISCV64 = obj.LinkArch{
        Arch:           sys.ArchRISCV64,
        Init:           buildop,