]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimise float <-> int register moves on riscv64
authorMichael Munday <mike.munday@lowrisc.org>
Wed, 17 Jul 2024 22:54:43 +0000 (23:54 +0100)
committerGopher Robot <gobot@golang.org>
Tue, 5 Aug 2025 15:27:15 +0000 (08:27 -0700)
Use the FMV* instructions to move values between the floating point and
integer register files.

Note: I'm unsure why there is a slowdown in the Float32bits benchmark,
I've checked and an FMVXS instruction is being used as expected. There
are multiple loads and other instructions in the main loop.

goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
                    │ fmv-before.txt │            fmv-after.txt            │
                    │     sec/op     │   sec/op     vs base                │
Acos                     122.7n ± 0%   122.7n ± 0%        ~ (p=1.000 n=10)
Acosh                    197.2n ± 0%   191.5n ± 0%   -2.89% (p=0.000 n=10)
Asin                     122.7n ± 0%   122.7n ± 0%        ~ (p=0.474 n=10)
Asinh                    231.0n ± 0%   224.1n ± 0%   -2.99% (p=0.000 n=10)
Atan                     91.39n ± 0%   91.41n ± 0%        ~ (p=0.465 n=10)
Atanh                    210.3n ± 0%   203.4n ± 0%   -3.26% (p=0.000 n=10)
Atan2                    149.6n ± 0%   149.6n ± 0%        ~ (p=0.721 n=10)
Cbrt                     176.5n ± 0%   165.9n ± 0%   -6.01% (p=0.000 n=10)
Ceil                     25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Copysign                 3.756n ± 0%   3.756n ± 0%        ~ (p=0.149 n=10)
Cos                      95.15n ± 0%   95.15n ± 0%        ~ (p=0.374 n=10)
Cosh                     228.6n ± 0%   224.7n ± 0%   -1.71% (p=0.000 n=10)
Erf                      115.2n ± 0%   115.2n ± 0%        ~ (p=0.474 n=10)
Erfc                     116.4n ± 0%   116.4n ± 0%        ~ (p=0.628 n=10)
Erfinv                   133.3n ± 0%   133.3n ± 0%        ~ (p=1.000 n=10)
Erfcinv                  133.3n ± 0%   133.3n ± 0%        ~ (p=1.000 n=10)
Exp                      194.1n ± 0%   190.3n ± 0%   -1.93% (p=0.000 n=10)
ExpGo                    204.7n ± 0%   200.3n ± 0%   -2.15% (p=0.000 n=10)
Expm1                    137.7n ± 0%   135.2n ± 0%   -1.82% (p=0.000 n=10)
Exp2                     173.4n ± 0%   169.0n ± 0%   -2.54% (p=0.000 n=10)
Exp2Go                   182.8n ± 0%   178.4n ± 0%   -2.41% (p=0.000 n=10)
Abs                      3.756n ± 0%   3.756n ± 0%        ~ (p=0.157 n=10)
Dim                      12.52n ± 0%   12.52n ± 0%        ~ (p=0.737 n=10)
Floor                    25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Max                      21.29n ± 0%   20.03n ± 0%   -5.92% (p=0.000 n=10)
Min                      21.28n ± 0%   20.04n ± 0%   -5.85% (p=0.000 n=10)
Mod                      344.9n ± 0%   319.2n ± 0%   -7.45% (p=0.000 n=10)
Frexp                    55.71n ± 0%   48.85n ± 0%  -12.30% (p=0.000 n=10)
Gamma                    165.9n ± 0%   167.8n ± 0%   +1.15% (p=0.000 n=10)
Hypot                    73.24n ± 0%   70.74n ± 0%   -3.41% (p=0.000 n=10)
HypotGo                  84.50n ± 0%   82.63n ± 0%   -2.21% (p=0.000 n=10)
Ilogb                    49.45n ± 0%   45.70n ± 0%   -7.59% (p=0.000 n=10)
J0                       556.5n ± 0%   544.0n ± 0%   -2.25% (p=0.000 n=10)
J1                       555.3n ± 0%   542.8n ± 0%   -2.24% (p=0.000 n=10)
Jn                       1.181µ ± 0%   1.156µ ± 0%   -2.12% (p=0.000 n=10)
Ldexp                    59.47n ± 0%   53.84n ± 0%   -9.47% (p=0.000 n=10)
Lgamma                   167.2n ± 0%   154.6n ± 0%   -7.51% (p=0.000 n=10)
Log                      160.9n ± 0%   154.6n ± 0%   -3.92% (p=0.000 n=10)
Logb                     49.45n ± 0%   45.70n ± 0%   -7.58% (p=0.000 n=10)
Log1p                    147.1n ± 0%   137.1n ± 0%   -6.80% (p=0.000 n=10)
Log10                    162.1n ± 1%   154.6n ± 0%   -4.63% (p=0.000 n=10)
Log2                     66.99n ± 0%   60.72n ± 0%   -9.36% (p=0.000 n=10)
Modf                     29.42n ± 0%   26.29n ± 0%  -10.64% (p=0.000 n=10)
Nextafter32              41.95n ± 0%   37.88n ± 0%   -9.70% (p=0.000 n=10)
Nextafter64              38.82n ± 0%   33.49n ± 0%  -13.73% (p=0.000 n=10)
PowInt                   252.3n ± 0%   237.3n ± 0%   -5.95% (p=0.000 n=10)
PowFrac                  615.5n ± 0%   589.7n ± 0%   -4.19% (p=0.000 n=10)
Pow10Pos                 10.64n ± 0%   10.64n ± 0%        ~ (p=1.000 n=10)
Pow10Neg                 24.42n ± 0%   15.02n ± 0%  -38.49% (p=0.000 n=10)
Round                    21.91n ± 0%   18.16n ± 0%  -17.12% (p=0.000 n=10)
RoundToEven              24.42n ± 0%   21.29n ± 0%  -12.84% (p=0.000 n=10)
Remainder                308.0n ± 0%   291.2n ± 0%   -5.44% (p=0.000 n=10)
Signbit                  10.02n ± 0%   10.02n ± 0%        ~ (p=1.000 n=10)
Sin                      102.7n ± 0%   102.7n ± 0%        ~ (p=0.211 n=10)
Sincos                   124.0n ± 1%   123.3n ± 0%   -0.56% (p=0.002 n=10)
Sinh                     239.1n ± 0%   234.7n ± 0%   -1.84% (p=0.000 n=10)
SqrtIndirect             2.504n ± 0%   2.504n ± 0%        ~ (p=0.303 n=10)
SqrtLatency              15.03n ± 0%   15.02n ± 0%        ~ (p=0.598 n=10)
SqrtIndirectLatency      15.02n ± 0%   15.02n ± 0%        ~ (p=0.907 n=10)
SqrtGoLatency            165.3n ± 0%   157.2n ± 0%   -4.90% (p=0.000 n=10)
SqrtPrime                3.801µ ± 0%   3.802µ ± 0%        ~ (p=1.000 n=10)
Tan                      125.2n ± 0%   125.2n ± 0%        ~ (p=0.458 n=10)
Tanh                     244.2n ± 0%   239.9n ± 0%   -1.76% (p=0.000 n=10)
Trunc                    25.67n ± 0%   24.42n ± 0%   -4.87% (p=0.000 n=10)
Y0                       550.2n ± 0%   538.1n ± 0%   -2.21% (p=0.000 n=10)
Y1                       552.8n ± 0%   540.6n ± 0%   -2.21% (p=0.000 n=10)
Yn                       1.168µ ± 0%   1.143µ ± 0%   -2.14% (p=0.000 n=10)
Float64bits              8.139n ± 0%   4.385n ± 0%  -46.13% (p=0.000 n=10)
Float64frombits          7.512n ± 0%   3.759n ± 0%  -49.96% (p=0.000 n=10)
Float32bits              8.138n ± 0%   9.393n ± 0%  +15.42% (p=0.000 n=10)
Float32frombits          7.513n ± 0%   3.757n ± 0%  -49.98% (p=0.000 n=10)
FMA                      3.756n ± 0%   3.756n ± 0%        ~ (p=0.246 n=10)
geomean                  77.43n        72.42n        -6.47%

Change-Id: I8dac69b1d17cb3d2af78d1c844d2b5d80000d667
Reviewed-on: https://go-review.googlesource.com/c/go/+/599235
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Michael Munday <mikemndy@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/riscv64/ssa.go
src/cmd/compile/internal/ssa/_gen/RISCV64.rules
src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteRISCV64.go
test/codegen/math.go

index f54ea47c88b0ca0a42cdba0c48df11f3eb661084..ed20782a29cd91b43a88493fcff69673fe8b0e9c 100644 (file)
@@ -417,7 +417,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
        case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FABSD, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD,
-               ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX,
+               ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVXS, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FMVXD,
                ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
                ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD,
                ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW,
index a99a16adff18031a709918c14cb812e41c5959d7..69bf1c7c9e4f0e00e7060cfa86f853fa3367d61c 100644 (file)
        (base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
        (MOV(B|BU|H|HU|W|WU|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
 
+(FMOV(W|D)load [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) &&
+       is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
+       (base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+       (FMOV(W|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
+
 (MOV(B|H|W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) &&
        is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
        (base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
        (base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
        (MOV(B|H|W|D)storezero [off1+off2] {mergeSym(sym1,sym2)} base mem)
 
+(FMOV(W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) &&
+       is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
+       (base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+       (FMOV(W|D)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+
 (MOV(B|BU|H|HU|W|WU|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
        (MOV(B|BU|H|HU|W|WU|D)load [off1+int32(off2)] {sym} base mem)
 
+(FMOV(W|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
+       (FMOV(W|D)load [off1+int32(off2)] {sym} base mem)
+
 (MOV(B|H|W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
        (MOV(B|H|W|D)store [off1+int32(off2)] {sym} base val mem)
 
 (MOV(B|H|W|D)storezero [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
        (MOV(B|H|W|D)storezero [off1+int32(off2)] {sym} base mem)
 
+(FMOV(W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
+       (FMOV(W|D)store [off1+int32(off2)] {sym} base val mem)
+
 // Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis
 // with OffPtr -> ADDI.
 (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x)
 (MOVHUreg <t> x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem)
 (MOVWUreg <t> x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem)
 
+// Replace load from same location as preceding store with copy.
+(MOVDload  [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXD x)
+(FMOVDload [off] {sym} ptr1 (MOVDstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVDX x)
+(MOVWload  [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXS x)
+(MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVWUreg (FMVXS x))
+(FMOVWload [off] {sym} ptr1 (MOVWstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVSX x)
+
 // If a register move has only 1 use, just use the same register without emitting instruction
 // MOVnop does not emit an instruction, only for ensuring the type.
 (MOVDreg x) && x.Uses == 1 => (MOVDnop x)
index c12bc4762162b56c03c09313358fcac0dda9459c..d468a00b0f74167feb0639074b8da39b4c85dc11 100644 (file)
@@ -453,7 +453,8 @@ func init() {
                {name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS", commutative: true, typ: "Float32"},                                       // -(arg0 * arg1) - arg2
                {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS", typ: "Float32"},                                                            // sqrt(arg0)
                {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS", typ: "Float32"},                                                              // -arg0
-               {name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"},                                                              // reinterpret arg0 as float
+               {name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"},                                                              // reinterpret arg0 as float32
+               {name: "FMVXS", argLength: 1, reg: fpgp, asm: "FMVXS", typ: "Int32"},                                                                // reinterpret arg0 as int32, sign extended to 64 bits
                {name: "FCVTSW", argLength: 1, reg: gpfp, asm: "FCVTSW", typ: "Float32"},                                                            // float32(low 32 bits of arg0)
                {name: "FCVTSL", argLength: 1, reg: gpfp, asm: "FCVTSL", typ: "Float32"},                                                            // float32(arg0)
                {name: "FCVTWS", argLength: 1, reg: fpgp, asm: "FCVTWS", typ: "Int32"},                                                              // int32(arg0)
@@ -480,7 +481,8 @@ func init() {
                {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD", typ: "Float64"},                                                              // -arg0
                {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD", typ: "Float64"},                                                              // abs(arg0)
                {name: "FSGNJD", argLength: 2, reg: fp21, asm: "FSGNJD", typ: "Float64"},                                                            // copy sign of arg1 to arg0
-               {name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"},                                                              // reinterpret arg0 as float
+               {name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"},                                                              // reinterpret arg0 as float64
+               {name: "FMVXD", argLength: 1, reg: fpgp, asm: "FMVXD", typ: "Int64"},                                                                // reinterpret arg0 as int64
                {name: "FCVTDW", argLength: 1, reg: gpfp, asm: "FCVTDW", typ: "Float64"},                                                            // float64(low 32 bits of arg0)
                {name: "FCVTDL", argLength: 1, reg: gpfp, asm: "FCVTDL", typ: "Float64"},                                                            // float64(arg0)
                {name: "FCVTWD", argLength: 1, reg: fpgp, asm: "FCVTWD", typ: "Int32"},                                                              // int32(arg0)
index b9c5b1f77cc858617d7047f2eacf3d64de5704e9..60f5278d7b8110f1c5b3c2e0d8c56a39ada6d38b 100644 (file)
@@ -2600,6 +2600,7 @@ const (
        OpRISCV64FSQRTS
        OpRISCV64FNEGS
        OpRISCV64FMVSX
+       OpRISCV64FMVXS
        OpRISCV64FCVTSW
        OpRISCV64FCVTSL
        OpRISCV64FCVTWS
@@ -2625,6 +2626,7 @@ const (
        OpRISCV64FABSD
        OpRISCV64FSGNJD
        OpRISCV64FMVDX
+       OpRISCV64FMVXD
        OpRISCV64FCVTDW
        OpRISCV64FCVTDL
        OpRISCV64FCVTWD
@@ -34985,6 +34987,19 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "FMVXS",
+               argLen: 1,
+               asm:    riscv.AFMVXS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+               },
+       },
        {
                name:   "FCVTSW",
                argLen: 1,
@@ -35345,6 +35360,19 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "FMVXD",
+               argLen: 1,
+               asm:    riscv.AFMVXD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+               },
+       },
        {
                name:   "FCVTDW",
                argLen: 1,
index bbdb8179007821524b14f2fc102999343f6d74eb..0dd952f5120b0d745f2df42f47279f147098781d 100644 (file)
@@ -517,6 +517,14 @@ func rewriteValueRISCV64(v *Value) bool {
                return rewriteValueRISCV64_OpRISCV64FMADDD(v)
        case OpRISCV64FMADDS:
                return rewriteValueRISCV64_OpRISCV64FMADDS(v)
+       case OpRISCV64FMOVDload:
+               return rewriteValueRISCV64_OpRISCV64FMOVDload(v)
+       case OpRISCV64FMOVDstore:
+               return rewriteValueRISCV64_OpRISCV64FMOVDstore(v)
+       case OpRISCV64FMOVWload:
+               return rewriteValueRISCV64_OpRISCV64FMOVWload(v)
+       case OpRISCV64FMOVWstore:
+               return rewriteValueRISCV64_OpRISCV64FMOVWstore(v)
        case OpRISCV64FMSUBD:
                return rewriteValueRISCV64_OpRISCV64FMSUBD(v)
        case OpRISCV64FMSUBS:
@@ -3844,6 +3852,250 @@ func rewriteValueRISCV64_OpRISCV64FMADDS(v *Value) bool {
        }
        return false
 }
+func rewriteValueRISCV64_OpRISCV64FMOVDload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (FMOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpRISCV64MOVaddr {
+                       break
+               }
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+                       break
+               }
+               v.reset(OpRISCV64FMOVDload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(base, mem)
+               return true
+       }
+       // match: (FMOVDload [off1] {sym} (ADDI [off2] base) mem)
+       // cond: is32Bit(int64(off1)+off2)
+       // result: (FMOVDload [off1+int32(off2)] {sym} base mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpRISCV64ADDI {
+                       break
+               }
+               off2 := auxIntToInt64(v_0.AuxInt)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1) + off2)) {
+                       break
+               }
+               v.reset(OpRISCV64FMOVDload)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(base, mem)
+               return true
+       }
+       // match: (FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _))
+       // cond: isSamePtr(ptr1, ptr2)
+       // result: (FMVDX x)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr1 := v_0
+               if v_1.Op != OpRISCV64MOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(isSamePtr(ptr1, ptr2)) {
+                       break
+               }
+               v.reset(OpRISCV64FMVDX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueRISCV64_OpRISCV64FMOVDstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (FMOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpRISCV64MOVaddr {
+                       break
+               }
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+                       break
+               }
+               v.reset(OpRISCV64FMOVDstore)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(base, val, mem)
+               return true
+       }
+       // match: (FMOVDstore [off1] {sym} (ADDI [off2] base) val mem)
+       // cond: is32Bit(int64(off1)+off2)
+       // result: (FMOVDstore [off1+int32(off2)] {sym} base val mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpRISCV64ADDI {
+                       break
+               }
+               off2 := auxIntToInt64(v_0.AuxInt)
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1) + off2)) {
+                       break
+               }
+               v.reset(OpRISCV64FMOVDstore)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg3(base, val, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueRISCV64_OpRISCV64FMOVWload(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (FMOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (FMOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpRISCV64MOVaddr {
+                       break
+               }
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+                       break
+               }
+               v.reset(OpRISCV64FMOVWload)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg2(base, mem)
+               return true
+       }
+       // match: (FMOVWload [off1] {sym} (ADDI [off2] base) mem)
+       // cond: is32Bit(int64(off1)+off2)
+       // result: (FMOVWload [off1+int32(off2)] {sym} base mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpRISCV64ADDI {
+                       break
+               }
+               off2 := auxIntToInt64(v_0.AuxInt)
+               base := v_0.Args[0]
+               mem := v_1
+               if !(is32Bit(int64(off1) + off2)) {
+                       break
+               }
+               v.reset(OpRISCV64FMOVWload)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg2(base, mem)
+               return true
+       }
+       // match: (FMOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _))
+       // cond: isSamePtr(ptr1, ptr2)
+       // result: (FMVSX x)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr1 := v_0
+               if v_1.Op != OpRISCV64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(isSamePtr(ptr1, ptr2)) {
+                       break
+               }
+               v.reset(OpRISCV64FMVSX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueRISCV64_OpRISCV64FMOVWstore(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       config := b.Func.Config
+       // match: (FMOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
+       // result: (FMOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               if v_0.Op != OpRISCV64MOVaddr {
+                       break
+               }
+               off2 := auxIntToInt32(v_0.AuxInt)
+               sym2 := auxToSym(v_0.Aux)
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+                       break
+               }
+               v.reset(OpRISCV64FMOVWstore)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(base, val, mem)
+               return true
+       }
+       // match: (FMOVWstore [off1] {sym} (ADDI [off2] base) val mem)
+       // cond: is32Bit(int64(off1)+off2)
+       // result: (FMOVWstore [off1+int32(off2)] {sym} base val mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpRISCV64ADDI {
+                       break
+               }
+               off2 := auxIntToInt64(v_0.AuxInt)
+               base := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(is32Bit(int64(off1) + off2)) {
+                       break
+               }
+               v.reset(OpRISCV64FMOVWstore)
+               v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+               v.Aux = symToAux(sym)
+               v.AddArg3(base, val, mem)
+               return true
+       }
+       return false
+}
 func rewriteValueRISCV64_OpRISCV64FMSUBD(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
@@ -4977,6 +5229,25 @@ func rewriteValueRISCV64_OpRISCV64MOVDload(v *Value) bool {
                v.AddArg2(base, mem)
                return true
        }
+       // match: (MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _))
+       // cond: isSamePtr(ptr1, ptr2)
+       // result: (FMVXD x)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr1 := v_0
+               if v_1.Op != OpRISCV64FMOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(isSamePtr(ptr1, ptr2)) {
+                       break
+               }
+               v.reset(OpRISCV64FMVXD)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValueRISCV64_OpRISCV64MOVDnop(v *Value) bool {
@@ -5658,6 +5929,7 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       typ := &b.Func.Config.Types
        // match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
        // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
@@ -5701,6 +5973,27 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
                v.AddArg2(base, mem)
                return true
        }
+       // match: (MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _))
+       // cond: isSamePtr(ptr1, ptr2)
+       // result: (MOVWUreg (FMVXS x))
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr1 := v_0
+               if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(isSamePtr(ptr1, ptr2)) {
+                       break
+               }
+               v.reset(OpRISCV64MOVWUreg)
+               v0 := b.NewValue0(v_1.Pos, OpRISCV64FMVXS, typ.Int32)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueRISCV64_OpRISCV64MOVWUreg(v *Value) bool {
@@ -5891,6 +6184,25 @@ func rewriteValueRISCV64_OpRISCV64MOVWload(v *Value) bool {
                v.AddArg2(base, mem)
                return true
        }
+       // match: (MOVWload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _))
+       // cond: isSamePtr(ptr1, ptr2)
+       // result: (FMVXS x)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr1 := v_0
+               if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               x := v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(isSamePtr(ptr1, ptr2)) {
+                       break
+               }
+               v.reset(OpRISCV64FMVXS)
+               v.AddArg(x)
+               return true
+       }
        return false
 }
 func rewriteValueRISCV64_OpRISCV64MOVWreg(v *Value) bool {
index 87d9cd7b2715ba32c2a8a05e82d33074148285aa..4272e4ef887bff94cc97ec7bb0b9d7f9e5412c15 100644 (file)
@@ -160,6 +160,7 @@ func fromFloat64(f64 float64) uint64 {
        // loong64:"MOVV\tF.*, R.*"
        // ppc64x:"MFVSRD"
        // mips64/hardfloat:"MOVV\tF.*, R.*"
+       // riscv64:"FMVXD"
        return math.Float64bits(f64+1) + 1
 }
 
@@ -168,6 +169,7 @@ func fromFloat32(f32 float32) uint32 {
        // arm64:"FMOVS\tF.*, R.*"
        // loong64:"MOVW\tF.*, R.*"
        // mips64/hardfloat:"MOVW\tF.*, R.*"
+       // riscv64:"FMVXW"
        return math.Float32bits(f32+1) + 1
 }
 
@@ -177,6 +179,7 @@ func toFloat64(u64 uint64) float64 {
        // loong64:"MOVV\tR.*, F.*"
        // ppc64x:"MTVSRD"
        // mips64/hardfloat:"MOVV\tR.*, F.*"
+       // riscv64:"FMVDX"
        return math.Float64frombits(u64+1) + 1
 }
 
@@ -185,6 +188,7 @@ func toFloat32(u32 uint32) float32 {
        // arm64:"FMOVS\tR.*, F.*"
        // loong64:"MOVW\tR.*, F.*"
        // mips64/hardfloat:"MOVW\tR.*, F.*"
+       // riscv64:"FMVWX"
        return math.Float32frombits(u32+1) + 1
 }