]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize math.Float64(32)bits and math.Float64(32)frombits on loong64
authorXiaolin Zhao <zhaoxiaolin@loongson.cn>
Mon, 12 Aug 2024 08:41:11 +0000 (16:41 +0800)
committerGopher Robot <gobot@golang.org>
Fri, 13 Sep 2024 19:29:23 +0000 (19:29 +0000)
Use float <-> int register moves without conversion instead of stores
and loads to move float <-> int values like arm64 and mips64.

goos: linux
goarch: loong64
pkg: math
cpu: Loongson-3A6000 @ 2500.00MHz
                    │  bench.old   │               bench.new                │
                    │    sec/op    │    sec/op     vs base                  │
Acos                   15.98n ± 0%    15.94n ± 0%   -0.25% (p=0.000 n=20)
Acosh                  27.75n ± 0%    25.56n ± 0%   -7.89% (p=0.000 n=20)
Asin                   15.85n ± 0%    15.76n ± 0%   -0.57% (p=0.000 n=20)
Asinh                  39.79n ± 0%    37.69n ± 0%   -5.28% (p=0.000 n=20)
Atan                   7.261n ± 0%    7.242n ± 0%   -0.27% (p=0.000 n=20)
Atanh                  28.30n ± 0%    27.62n ± 0%   -2.40% (p=0.000 n=20)
Atan2                  15.85n ± 0%    15.75n ± 0%   -0.63% (p=0.000 n=20)
Cbrt                   27.02n ± 0%    21.08n ± 0%  -21.98% (p=0.000 n=20)
Ceil                   2.830n ± 1%    2.896n ± 1%   +2.31% (p=0.000 n=20)
Copysign              0.8022n ± 0%   0.8004n ± 0%   -0.22% (p=0.000 n=20)
Cos                    11.64n ± 0%    11.61n ± 0%   -0.26% (p=0.000 n=20)
Cosh                   35.98n ± 0%    33.44n ± 0%   -7.05% (p=0.000 n=20)
Erf                    10.09n ± 0%    10.08n ± 0%   -0.10% (p=0.000 n=20)
Erfc                   11.40n ± 0%    11.35n ± 0%   -0.44% (p=0.000 n=20)
Erfinv                 12.31n ± 0%    12.29n ± 0%   -0.16% (p=0.000 n=20)
Erfcinv                12.16n ± 0%    12.17n ± 0%   +0.08% (p=0.000 n=20)
Exp                    28.41n ± 0%    26.44n ± 0%   -6.95% (p=0.000 n=20)
ExpGo                  28.68n ± 0%    27.07n ± 0%   -5.60% (p=0.000 n=20)
Expm1                  17.21n ± 0%    16.75n ± 0%   -2.67% (p=0.000 n=20)
Exp2                   24.71n ± 0%    23.01n ± 0%   -6.88% (p=0.000 n=20)
Exp2Go                 25.17n ± 0%    23.91n ± 0%   -4.99% (p=0.000 n=20)
Abs                   0.8004n ± 0%   0.8004n ± 0%        ~ (p=0.224 n=20)
Dim                    1.201n ± 0%    1.201n ± 0%        ~ (p=1.000 n=20) ¹
Floor                  2.848n ± 0%    2.859n ± 0%   +0.39% (p=0.000 n=20)
Max                    3.074n ± 0%    3.071n ± 0%        ~ (p=0.481 n=20)
Min                    3.179n ± 0%    3.176n ± 0%   -0.09% (p=0.003 n=20)
Mod                    49.62n ± 0%    44.82n ± 0%   -9.67% (p=0.000 n=20)
Frexp                  7.604n ± 0%    6.803n ± 0%  -10.53% (p=0.000 n=20)
Gamma                  18.01n ± 0%    17.61n ± 0%   -2.22% (p=0.000 n=20)
Hypot                  7.204n ± 0%    7.604n ± 0%   +5.55% (p=0.000 n=20)
HypotGo                7.204n ± 0%    7.604n ± 0%   +5.56% (p=0.000 n=20)
Ilogb                  6.003n ± 0%    6.003n ± 0%        ~ (p=0.407 n=20)
J0                     76.43n ± 0%    76.24n ± 0%   -0.25% (p=0.000 n=20)
J1                     76.44n ± 0%    76.44n ± 0%        ~ (p=1.000 n=20)
Jn                     168.2n ± 0%    168.5n ± 0%   +0.18% (p=0.000 n=20)
Ldexp                  8.804n ± 0%    7.604n ± 0%  -13.63% (p=0.000 n=20)
Lgamma                 19.01n ± 0%    19.01n ± 0%        ~ (p=0.695 n=20)
Log                    19.38n ± 0%    19.12n ± 0%   -1.34% (p=0.000 n=20)
Logb                   6.003n ± 0%    6.003n ± 0%        ~ (p=1.000 n=20)
Log1p                  18.57n ± 0%    16.72n ± 0%   -9.96% (p=0.000 n=20)
Log10                  20.67n ± 0%    20.45n ± 0%   -1.06% (p=0.000 n=20)
Log2                   9.605n ± 0%    8.804n ± 0%   -8.34% (p=0.000 n=20)
Modf                   4.402n ± 0%    4.402n ± 0%        ~ (p=1.000 n=20)
Nextafter32            7.204n ± 0%    5.603n ± 0%  -22.22% (p=0.000 n=20)
Nextafter64            6.803n ± 0%    6.003n ± 0%  -11.76% (p=0.000 n=20)
PowInt                 39.62n ± 0%    37.22n ± 0%   -6.06% (p=0.000 n=20)
PowFrac                120.9n ± 0%    108.9n ± 0%   -9.93% (p=0.000 n=20)
Pow10Pos               1.601n ± 0%    1.601n ± 0%        ~ (p=0.487 n=20)
Pow10Neg               2.675n ± 0%    2.675n ± 0%        ~ (p=1.000 n=20)
Round                  3.018n ± 0%    2.401n ± 0%  -20.46% (p=0.000 n=20)
RoundToEven            3.822n ± 0%    3.001n ± 0%  -21.48% (p=0.000 n=20)
Remainder              45.62n ± 0%    42.42n ± 0%   -7.01% (p=0.000 n=20)
Signbit               0.9075n ± 0%   0.8004n ± 0%  -11.81% (p=0.000 n=20)
Sin                    12.65n ± 0%    12.65n ± 0%        ~ (p=0.503 n=20)
Sincos                 14.81n ± 0%    14.60n ± 0%   -1.42% (p=0.000 n=20)
Sinh                   36.75n ± 0%    35.11n ± 0%   -4.46% (p=0.000 n=20)
SqrtIndirect           1.201n ± 0%    1.201n ± 0%        ~ (p=1.000 n=20) ¹
SqrtLatency            4.002n ± 0%    4.002n ± 0%        ~ (p=1.000 n=20)
SqrtIndirectLatency    4.002n ± 0%    4.002n ± 0%        ~ (p=1.000 n=20)
SqrtGoLatency          52.85n ± 0%    40.82n ± 0%  -22.76% (p=0.000 n=20)
SqrtPrime              887.4n ± 0%    887.4n ± 0%        ~ (p=0.751 n=20)
Tan                    13.95n ± 0%    13.97n ± 0%   +0.18% (p=0.000 n=20)
Tanh                   36.79n ± 0%    34.89n ± 0%   -5.16% (p=0.000 n=20)
Trunc                  2.849n ± 0%    2.861n ± 0%   +0.42% (p=0.000 n=20)
Y0                     77.44n ± 0%    77.64n ± 0%   +0.26% (p=0.000 n=20)
Y1                     74.41n ± 0%    74.33n ± 0%   -0.11% (p=0.000 n=20)
Yn                     158.7n ± 0%    159.0n ± 0%   +0.19% (p=0.000 n=20)
Float64bits           0.8774n ± 0%   0.4002n ± 0%  -54.39% (p=0.000 n=20)
Float64frombits       0.8042n ± 0%   0.4002n ± 0%  -50.24% (p=0.000 n=20)
Float32bits           1.1230n ± 0%   0.5336n ± 0%  -52.48% (p=0.000 n=20)
Float32frombits       1.0670n ± 0%   0.8004n ± 0%  -24.99% (p=0.000 n=20)
FMA                    2.001n ± 0%    2.001n ± 0%        ~ (p=0.605 n=20)
geomean                10.87n         10.10n        -7.15%
¹ all samples are equal

goos: linux
goarch: loong64
pkg: math
cpu: Loongson-3A5000 @ 2500.00MHz
                    │  bench.old   │              bench.new               │
                    │    sec/op    │    sec/op     vs base                │
Acos                   33.10n ± 0%    31.95n ± 2%   -3.46% (p=0.000 n=20)
Acosh                  58.38n ± 0%    50.44n ± 0%  -13.60% (p=0.000 n=20)
Asin                   32.70n ± 0%    31.94n ± 0%   -2.32% (p=0.000 n=20)
Asinh                  57.65n ± 0%    50.83n ± 0%  -11.82% (p=0.000 n=20)
Atan                   14.21n ± 0%    14.21n ± 0%        ~ (p=0.501 n=20)
Atanh                  60.86n ± 0%    54.44n ± 0%  -10.56% (p=0.000 n=20)
Atan2                  32.02n ± 0%    34.02n ± 0%   +6.25% (p=0.000 n=20)
Cbrt                   55.58n ± 0%    40.64n ± 0%  -26.88% (p=0.000 n=20)
Ceil                   9.566n ± 0%    9.566n ± 0%        ~ (p=0.463 n=20)
Copysign              0.8005n ± 0%   0.8005n ± 0%        ~ (p=0.806 n=20)
Cos                    18.02n ± 0%    18.02n ± 0%        ~ (p=0.191 n=20)
Cosh                   64.44n ± 0%    65.64n ± 0%   +1.86% (p=0.000 n=20)
Erf                    16.15n ± 0%    16.16n ± 0%        ~ (p=0.770 n=20)
Erfc                   18.71n ± 0%    18.83n ± 0%   +0.61% (p=0.000 n=20)
Erfinv                 19.33n ± 0%    19.34n ± 0%        ~ (p=0.513 n=20)
Erfcinv                18.90n ± 0%    19.78n ± 0%   +4.63% (p=0.000 n=20)
Exp                    50.04n ± 0%    49.66n ± 0%   -0.75% (p=0.000 n=20)
ExpGo                  50.03n ± 0%    50.03n ± 0%        ~ (p=0.723 n=20)
Expm1                  28.41n ± 0%    28.27n ± 0%   -0.49% (p=0.000 n=20)
Exp2                   50.08n ± 0%    51.23n ± 0%   +2.31% (p=0.000 n=20)
Exp2Go                 49.77n ± 0%    49.89n ± 0%   +0.24% (p=0.000 n=20)
Abs                   0.8009n ± 0%   0.8006n ± 0%        ~ (p=0.317 n=20)
Dim                    1.987n ± 0%    1.993n ± 0%   +0.28% (p=0.001 n=20)
Floor                  8.543n ± 0%    8.548n ± 0%        ~ (p=0.509 n=20)
Max                    6.670n ± 0%    6.672n ± 0%        ~ (p=0.335 n=20)
Min                    6.694n ± 0%    6.694n ± 0%        ~ (p=0.459 n=20)
Mod                    56.44n ± 0%    53.23n ± 0%   -5.70% (p=0.000 n=20)
Frexp                  8.409n ± 0%    7.606n ± 0%   -9.55% (p=0.000 n=20)
Gamma                  35.64n ± 0%    35.23n ± 0%   -1.15% (p=0.000 n=20)
Hypot                  11.21n ± 0%    10.61n ± 0%   -5.31% (p=0.000 n=20)
HypotGo                11.50n ± 0%    11.01n ± 0%   -4.30% (p=0.000 n=20)
Ilogb                  7.606n ± 0%    6.804n ± 0%  -10.54% (p=0.000 n=20)
J0                     125.3n ± 0%    126.5n ± 0%   +0.96% (p=0.000 n=20)
J1                     124.9n ± 0%    125.3n ± 0%   +0.32% (p=0.000 n=20)
Jn                     264.3n ± 0%    265.9n ± 0%   +0.61% (p=0.000 n=20)
Ldexp                  9.606n ± 0%    9.204n ± 0%   -4.19% (p=0.000 n=20)
Lgamma                 38.82n ± 0%    38.85n ± 0%   +0.06% (p=0.019 n=20)
Log                    38.44n ± 0%    28.04n ± 0%  -27.06% (p=0.000 n=20)
Logb                   8.405n ± 0%    7.605n ± 0%   -9.52% (p=0.000 n=20)
Log1p                  31.62n ± 0%    27.11n ± 0%  -14.26% (p=0.000 n=20)
Log10                  38.83n ± 0%    28.42n ± 0%  -26.81% (p=0.000 n=20)
Log2                   11.21n ± 0%    10.41n ± 0%   -7.14% (p=0.000 n=20)
Modf                   5.204n ± 0%    5.205n ± 0%        ~ (p=0.983 n=20)
Nextafter32            8.809n ± 0%    7.208n ± 0%  -18.18% (p=0.000 n=20)
Nextafter64            8.405n ± 0%    8.406n ± 0%   +0.01% (p=0.007 n=20)
PowInt                 48.83n ± 0%    44.78n ± 0%   -8.28% (p=0.000 n=20)
PowFrac                146.9n ± 0%    142.1n ± 0%   -3.23% (p=0.000 n=20)
Pow10Pos               2.334n ± 0%    2.333n ± 0%        ~ (p=0.110 n=20)
Pow10Neg               4.803n ± 0%    4.803n ± 0%        ~ (p=0.130 n=20)
Round                  4.816n ± 0%    3.819n ± 0%  -20.70% (p=0.000 n=20)
RoundToEven            5.735n ± 0%    5.204n ± 0%   -9.26% (p=0.000 n=20)
Remainder              52.05n ± 0%    49.64n ± 0%   -4.63% (p=0.000 n=20)
Signbit                1.201n ± 0%    1.001n ± 0%  -16.65% (p=0.000 n=20)
Sin                    20.63n ± 0%    20.64n ± 0%   +0.05% (p=0.040 n=20)
Sincos                 23.82n ± 0%    24.62n ± 0%   +3.36% (p=0.000 n=20)
Sinh                   71.25n ± 0%    68.44n ± 0%   -3.94% (p=0.000 n=20)
SqrtIndirect           2.001n ± 0%    2.001n ± 0%        ~ (p=0.182 n=20)
SqrtLatency            4.003n ± 0%    4.003n ± 0%        ~ (p=0.754 n=20)
SqrtIndirectLatency    4.003n ± 0%    4.003n ± 0%        ~ (p=0.773 n=20)
SqrtGoLatency          60.84n ± 0%    81.26n ± 0%  +33.56% (p=0.000 n=20)
SqrtPrime              1.791µ ± 0%    1.791µ ± 0%        ~ (p=0.784 n=20)
Tan                    27.22n ± 0%    27.22n ± 0%        ~ (p=0.819 n=20)
Tanh                   70.88n ± 0%    69.04n ± 0%   -2.60% (p=0.000 n=20)
Trunc                  8.543n ± 0%    8.543n ± 0%        ~ (p=0.784 n=20)
Y0                     122.9n ± 0%    122.9n ± 0%        ~ (p=0.559 n=20)
Y1                     123.3n ± 0%    121.7n ± 0%   -1.30% (p=0.000 n=20)
Yn                     263.0n ± 0%    262.6n ± 0%   -0.15% (p=0.000 n=20)
Float64bits           1.2010n ± 0%   0.6004n ± 0%  -50.01% (p=0.000 n=20)
Float64frombits       1.2010n ± 0%   0.6004n ± 0%  -50.01% (p=0.000 n=20)
Float32bits           1.7010n ± 0%   0.8005n ± 0%  -52.94% (p=0.000 n=20)
Float32frombits       1.5010n ± 0%   0.8005n ± 0%  -46.67% (p=0.000 n=20)
FMA                    2.001n ± 0%    2.001n ± 0%        ~ (p=0.238 n=20)
geomean                17.41n         16.15n        -7.19%

Change-Id: I0a0c263af2f07203eab1782e69c706f20c689d8d
Reviewed-on: https://go-review.googlesource.com/c/go/+/604737
Auto-Submit: Tim King <taking@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Tim King <taking@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
src/cmd/compile/internal/loong64/ssa.go
src/cmd/compile/internal/ssa/_gen/LOONG64.rules
src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteLOONG64.go
test/codegen/math.go

index fd5ed5f928bba9bbe0d0083f42288b4ff87c4eb5..d9ad1a0a62324e5eca16fa82a713112a3397eb10 100644 (file)
@@ -418,6 +418,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpLOONG64TRUNCDV,
                ssa.OpLOONG64MOVFD,
                ssa.OpLOONG64MOVDF,
+               ssa.OpLOONG64MOVWfpgp,
+               ssa.OpLOONG64MOVWgpfp,
+               ssa.OpLOONG64MOVVfpgp,
+               ssa.OpLOONG64MOVVgpfp,
                ssa.OpLOONG64NEGF,
                ssa.OpLOONG64NEGD,
                ssa.OpLOONG64SQRTD,
index ab280f8f79d66c051066cd3570601498c8504bea..ae01b5eb24a6a0be6a1dfef4d63aa02e409c7c78 100644 (file)
                 mem)
 
 
+// float <=> int register moves, with no conversion.
+// These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}.
+(MOVVload  [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (MOVVfpgp val)
+(MOVDload  [off] {sym} ptr (MOVVstore [off] {sym} ptr val _)) => (MOVVgpfp val)
+(MOVWUload [off] {sym} ptr (MOVFstore [off] {sym} ptr val _)) => (ZeroExt32to64 (MOVWfpgp <typ.Float32> val))
+(MOVFload  [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (MOVWgpfp val)
+
+// Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set.
+(MOVVstore [off] {sym} ptr (MOVVfpgp val) mem) => (MOVDstore [off] {sym} ptr val mem)
+(MOVDstore [off] {sym} ptr (MOVVgpfp val) mem) => (MOVVstore [off] {sym} ptr val mem)
+(MOVWstore [off] {sym} ptr (MOVWfpgp val) mem) => (MOVFstore [off] {sym} ptr val mem)
+(MOVFstore [off] {sym} ptr (MOVWgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem)
+
 // calls
 (StaticCall ...) => (CALLstatic ...)
 (ClosureCall ...) => (CALLclosure ...)
index 8046ffead34670c02fe8e33342f00dfee5731135..140088b6bdefeb32765dede92737c7a5bb47f3fc 100644 (file)
@@ -152,6 +152,8 @@ func init() {
                fp2flags  = regInfo{inputs: []regMask{fp, fp}}
                fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
                fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
+               fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
+               gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
                readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
        )
        ops := []opData{
@@ -258,6 +260,12 @@ func init() {
                {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
                {name: "MOVVstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux.  ar12=mem.
 
+               // moves (no conversion)
+               {name: "MOVWfpgp", argLength: 1, reg: fpgp, asm: "MOVW"}, // move float32 to int32 (no conversion).
+               {name: "MOVWgpfp", argLength: 1, reg: gpfp, asm: "MOVW"}, // move int32 to float32 (no conversion).
+               {name: "MOVVfpgp", argLength: 1, reg: fpgp, asm: "MOVV"}, // move float64 to int64 (no conversion).
+               {name: "MOVVgpfp", argLength: 1, reg: gpfp, asm: "MOVV"}, // move int64 to float64 (no conversion).
+
                // conversions
                {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"},   // move from arg0, sign-extended from byte
                {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
index 7f5ab229e0b431ee2112d5bd46efbe4e31b5eba9..9e6f8b9a97b46b299f13605bb27f277a8f9c004c 100644 (file)
@@ -1824,6 +1824,10 @@ const (
        OpLOONG64MOVHstorezero
        OpLOONG64MOVWstorezero
        OpLOONG64MOVVstorezero
+       OpLOONG64MOVWfpgp
+       OpLOONG64MOVWgpfp
+       OpLOONG64MOVVfpgp
+       OpLOONG64MOVVgpfp
        OpLOONG64MOVBreg
        OpLOONG64MOVBUreg
        OpLOONG64MOVHreg
@@ -24608,6 +24612,58 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "MOVWfpgp",
+               argLen: 1,
+               asm:    loong64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
+       {
+               name:   "MOVWgpfp",
+               argLen: 1,
+               asm:    loong64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
+       {
+               name:   "MOVVfpgp",
+               argLen: 1,
+               asm:    loong64.AMOVV,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
+       {
+               name:   "MOVVgpfp",
+               argLen: 1,
+               asm:    loong64.AMOVV,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
        {
                name:   "MOVBreg",
                argLen: 1,
index e17c305f4feb5eb7908cf7a3bc31d4eeda3c5cba..99690d324cdf90811f3723d0a430c83624aa3669 100644 (file)
@@ -2143,6 +2143,23 @@ func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       // match: (MOVDload [off] {sym} ptr (MOVVstore [off] {sym} ptr val _))
+       // result: (MOVVgpfp val)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpLOONG64MOVVstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpLOONG64MOVVgpfp)
+               v.AddArg(val)
+               return true
+       }
        // match: (MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVDload [off1+int32(off2)] {sym} ptr mem)
@@ -2194,6 +2211,23 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       // match: (MOVDstore [off] {sym} ptr (MOVVgpfp val) mem)
+       // result: (MOVVstore [off] {sym} ptr val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpLOONG64MOVVgpfp {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpLOONG64MOVVstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
        // match: (MOVDstore [off1] {sym} (ADDVconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVDstore [off1+int32(off2)] {sym} ptr val mem)
@@ -2246,6 +2280,23 @@ func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       // match: (MOVFload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _))
+       // result: (MOVWgpfp val)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpLOONG64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpLOONG64MOVWgpfp)
+               v.AddArg(val)
+               return true
+       }
        // match: (MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVFload [off1+int32(off2)] {sym} ptr mem)
@@ -2297,6 +2348,23 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       // match: (MOVFstore [off] {sym} ptr (MOVWgpfp val) mem)
+       // result: (MOVWstore [off] {sym} ptr val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpLOONG64MOVWgpfp {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpLOONG64MOVWstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
        // match: (MOVFstore [off1] {sym} (ADDVconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVFstore [off1+int32(off2)] {sym} ptr val mem)
@@ -2760,6 +2828,23 @@ func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       // match: (MOVVload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _))
+       // result: (MOVVfpgp val)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpLOONG64MOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpLOONG64MOVVfpgp)
+               v.AddArg(val)
+               return true
+       }
        // match: (MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVVload [off1+int32(off2)] {sym} ptr mem)
@@ -2838,6 +2923,23 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       // match: (MOVVstore [off] {sym} ptr (MOVVfpgp val) mem)
+       // result: (MOVDstore [off] {sym} ptr val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpLOONG64MOVVfpgp {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpLOONG64MOVDstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
        // match: (MOVVstore [off1] {sym} (ADDVconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVVstore [off1+int32(off2)] {sym} ptr val mem)
@@ -2940,6 +3042,26 @@ func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       typ := &b.Func.Config.Types
+       // match: (MOVWUload [off] {sym} ptr (MOVFstore [off] {sym} ptr val _))
+       // result: (ZeroExt32to64 (MOVWfpgp <typ.Float32> val))
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpLOONG64MOVFstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+                       break
+               }
+               val := v_1.Args[1]
+               if ptr != v_1.Args[0] {
+                       break
+               }
+               v.reset(OpZeroExt32to64)
+               v0 := b.NewValue0(v_1.Pos, OpLOONG64MOVWfpgp, typ.Float32)
+               v0.AddArg(val)
+               v.AddArg(v0)
+               return true
+       }
        // match: (MOVWUload [off1] {sym} (ADDVconst [off2] ptr) mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVWUload [off1+int32(off2)] {sym} ptr mem)
@@ -3236,6 +3358,23 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        config := b.Func.Config
+       // match: (MOVWstore [off] {sym} ptr (MOVWfpgp val) mem)
+       // result: (MOVFstore [off] {sym} ptr val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               ptr := v_0
+               if v_1.Op != OpLOONG64MOVWfpgp {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v_2
+               v.reset(OpLOONG64MOVFstore)
+               v.AuxInt = int32ToAuxInt(off)
+               v.Aux = symToAux(sym)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
        // match: (MOVWstore [off1] {sym} (ADDVconst [off2] ptr) val mem)
        // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
        // result: (MOVWstore [off1+int32(off2)] {sym} ptr val mem)
index eb6e927dec92fbf77ee2349f76c4762b9f067887..806f9096484bb0e46303e68fc2e86529e92f82f7 100644 (file)
@@ -156,6 +156,7 @@ func fnma(x, y, z float64) float64 {
 func fromFloat64(f64 float64) uint64 {
        // amd64:"MOVQ\tX.*, [^X].*"
        // arm64:"FMOVD\tF.*, R.*"
+       // loong64:"MOVV\tF.*, R.*"
        // ppc64x:"MFVSRD"
        // mips64/hardfloat:"MOVV\tF.*, R.*"
        return math.Float64bits(f64+1) + 1
@@ -164,6 +165,7 @@ func fromFloat64(f64 float64) uint64 {
 func fromFloat32(f32 float32) uint32 {
        // amd64:"MOVL\tX.*, [^X].*"
        // arm64:"FMOVS\tF.*, R.*"
+       // loong64:"MOVW\tF.*, R.*"
        // mips64/hardfloat:"MOVW\tF.*, R.*"
        return math.Float32bits(f32+1) + 1
 }
@@ -171,6 +173,7 @@ func fromFloat32(f32 float32) uint32 {
 func toFloat64(u64 uint64) float64 {
        // amd64:"MOVQ\t[^X].*, X.*"
        // arm64:"FMOVD\tR.*, F.*"
+       // loong64:"MOVV\tR.*, F.*"
        // ppc64x:"MTVSRD"
        // mips64/hardfloat:"MOVV\tR.*, F.*"
        return math.Float64frombits(u64+1) + 1
@@ -179,6 +182,7 @@ func toFloat64(u64 uint64) float64 {
 func toFloat32(u32 uint32) float32 {
        // amd64:"MOVL\t[^X].*, X.*"
        // arm64:"FMOVS\tR.*, F.*"
+       // loong64:"MOVW\tR.*, F.*"
        // mips64/hardfloat:"MOVW\tR.*, F.*"
        return math.Float32frombits(u32+1) + 1
 }