CL 514596 adds float min/max for amd64, this CL adds it for riscv64.
The behavior of the RISC-V FMIN/FMAX instructions almost match Go's
requirements.
However according to RISCV spec 8.3 "NaN Generation and Propagation"
>> if at least one input is a signaling NaN, or if both inputs are quiet
>> NaNs, the result is the canonical NaN. If one operand is a quiet NaN
>> and the other is not a NaN, the result is the non-NaN operand.
Go using quiet NaN as NaN and according to Go spec
>> if any argument is a NaN, the result is a NaN
This requires the float min/max implementation to check whether one
of operand is qNaN before float mix/max actually execute.
This CL also fix a typo in minmax test.
Benchmark on Visionfive2
goos: linux
goarch: riscv64
pkg: runtime
│ float_minmax.old.bench │ float_minmax.new.bench │
│ sec/op │ sec/op vs base │
MinFloat 158.20n ± 0% 28.13n ± 0% -82.22% (p=0.000 n=10)
MaxFloat 158.10n ± 0% 28.12n ± 0% -82.21% (p=0.000 n=10)
geomean 158.1n 28.12n -82.22%
Update #59488
Change-Id: Iab48be6d32b8882044fb8c821438ca8840e5493d
Reviewed-on: https://go-review.googlesource.com/c/go/+/514775
Reviewed-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>
Run-TryBot: M Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
p.Reg = r1
p.To.Type = obj.TYPE_REG
p.To.Reg = r
+
+ case ssa.OpRISCV64LoweredFMAXD, ssa.OpRISCV64LoweredFMIND, ssa.OpRISCV64LoweredFMAXS, ssa.OpRISCV64LoweredFMINS:
+ // Most of FMIN/FMAX result match Go's required behaviour, unless one of the
+ // inputs is a NaN. As such, we need to explicitly test for NaN
+ // before using FMIN/FMAX.
+
+ // FADD Rarg0, Rarg1, Rout // FADD is used to propagate a NaN to the result in these cases.
+ // FEQ Rarg0, Rarg0, Rtmp
+ // BEQZ Rtmp, end
+ // FEQ Rarg1, Rarg1, Rtmp
+ // BEQZ Rtmp, end
+ // F(MIN | MAX)
+
+ r0 := v.Args[0].Reg()
+ r1 := v.Args[1].Reg()
+ out := v.Reg()
+ add, feq := riscv.AFADDD, riscv.AFEQD
+ if v.Op == ssa.OpRISCV64LoweredFMAXS || v.Op == ssa.OpRISCV64LoweredFMINS {
+ add = riscv.AFADDS
+ feq = riscv.AFEQS
+ }
+
+ p1 := s.Prog(add)
+ p1.From.Type = obj.TYPE_REG
+ p1.From.Reg = r0
+ p1.Reg = r1
+ p1.To.Type = obj.TYPE_REG
+ p1.To.Reg = out
+
+ p2 := s.Prog(feq)
+ p2.From.Type = obj.TYPE_REG
+ p2.From.Reg = r0
+ p2.Reg = r0
+ p2.To.Type = obj.TYPE_REG
+ p2.To.Reg = riscv.REG_TMP
+
+ p3 := s.Prog(riscv.ABEQ)
+ p3.From.Type = obj.TYPE_REG
+ p3.From.Reg = riscv.REG_ZERO
+ p3.Reg = riscv.REG_TMP
+ p3.To.Type = obj.TYPE_BRANCH
+
+ p4 := s.Prog(feq)
+ p4.From.Type = obj.TYPE_REG
+ p4.From.Reg = r1
+ p4.Reg = r1
+ p4.To.Type = obj.TYPE_REG
+ p4.To.Reg = riscv.REG_TMP
+
+ p5 := s.Prog(riscv.ABEQ)
+ p5.From.Type = obj.TYPE_REG
+ p5.From.Reg = riscv.REG_ZERO
+ p5.Reg = riscv.REG_TMP
+ p5.To.Type = obj.TYPE_BRANCH
+
+ p6 := s.Prog(v.Op.Asm())
+ p6.From.Type = obj.TYPE_REG
+ p6.From.Reg = r1
+ p6.Reg = r0
+ p6.To.Type = obj.TYPE_REG
+ p6.To.Reg = out
+
+ nop := s.Prog(obj.ANOP)
+ p3.To.SetTarget(nop)
+ p5.To.SetTarget(nop)
+
case ssa.OpRISCV64LoweredMuluhilo:
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
(FMA ...) => (FMADDD ...)
+(Min(64|32)F ...) => (LoweredFMIN(D|S) ...)
+(Max(64|32)F ...) => (LoweredFMAX(D|S) ...)
+
// Sign and zero extension.
(SignExt8to16 ...) => (MOVBreg ...)
{name: "FNES", argLength: 2, reg: fp2gp, asm: "FNES", commutative: true}, // arg0 != arg1
{name: "FLTS", argLength: 2, reg: fp2gp, asm: "FLTS"}, // arg0 < arg1
{name: "FLES", argLength: 2, reg: fp2gp, asm: "FLES"}, // arg0 <= arg1
+ {name: "LoweredFMAXS", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXS", commutative: true, typ: "Float32"}, // max(arg0, arg1)
+ {name: "LoweredFMINS", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMINS", commutative: true, typ: "Float32"}, // min(arg0, arg1)
// D extension.
{name: "FADDD", argLength: 2, reg: fp21, asm: "FADDD", commutative: true, typ: "Float64"}, // arg0 + arg1
{name: "FNED", argLength: 2, reg: fp2gp, asm: "FNED", commutative: true}, // arg0 != arg1
{name: "FLTD", argLength: 2, reg: fp2gp, asm: "FLTD"}, // arg0 < arg1
{name: "FLED", argLength: 2, reg: fp2gp, asm: "FLED"}, // arg0 <= arg1
+ {name: "LoweredFMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1)
+ {name: "LoweredFMAXD", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXD", commutative: true, typ: "Float64"}, // max(arg0, arg1)
}
RISCV64blocks := []blockData{
OpRISCV64FNES
OpRISCV64FLTS
OpRISCV64FLES
+ OpRISCV64LoweredFMAXS
+ OpRISCV64LoweredFMINS
OpRISCV64FADDD
OpRISCV64FSUBD
OpRISCV64FMULD
OpRISCV64FNED
OpRISCV64FLTD
OpRISCV64FLED
+ OpRISCV64LoweredFMIND
+ OpRISCV64LoweredFMAXD
OpS390XFADDS
OpS390XFADD
},
},
},
+ {
+ name: "LoweredFMAXS",
+ argLen: 2,
+ commutative: true,
+ resultNotInArgs: true,
+ asm: riscv.AFMAXS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "LoweredFMINS",
+ argLen: 2,
+ commutative: true,
+ resultNotInArgs: true,
+ asm: riscv.AFMINS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
{
name: "FADDD",
argLen: 2,
},
},
},
+ {
+ name: "LoweredFMIND",
+ argLen: 2,
+ commutative: true,
+ resultNotInArgs: true,
+ asm: riscv.AFMIND,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "LoweredFMAXD",
+ argLen: 2,
+ commutative: true,
+ resultNotInArgs: true,
+ asm: riscv.AFMAXD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
{
name: "FADDS",
return rewriteValueRISCV64_OpLsh8x64(v)
case OpLsh8x8:
return rewriteValueRISCV64_OpLsh8x8(v)
+ case OpMax32F:
+ v.Op = OpRISCV64LoweredFMAXS
+ return true
+ case OpMax64F:
+ v.Op = OpRISCV64LoweredFMAXD
+ return true
+ case OpMin32F:
+ v.Op = OpRISCV64LoweredFMINS
+ return true
+ case OpMin64F:
+ v.Op = OpRISCV64LoweredFMIND
+ return true
case OpMod16:
return rewriteValueRISCV64_OpMod16(v)
case OpMod16u:
if typ.IsFloat() {
switch Arch.LinkArch.Family {
- case sys.AMD64, sys.ARM64:
+ case sys.AMD64, sys.ARM64, sys.RISCV64:
var op ssa.Op
switch {
case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMIN:
}
for _, x := range all {
if z := max(nan, x); !math.IsNaN(z) {
- t.Errorf("min(%v, %v) = %v, want %v", nan, x, z, nan)
+ t.Errorf("max(%v, %v) = %v, want %v", nan, x, z, nan)
}
if z := max(x, nan); !math.IsNaN(z) {
- t.Errorf("min(%v, %v) = %v, want %v", nan, x, z, nan)
+ t.Errorf("max(%v, %v) = %v, want %v", nan, x, z, nan)
}
}
}
test(2, 0, 1)
test(2, 1, 0)
}
+
+func BenchmarkMinFloat(b *testing.B) {
+ var m float64 = 0
+ for i := 0; i < b.N; i++ {
+ for _, f := range all {
+ m = min(m, f)
+ }
+ }
+}
+
+func BenchmarkMaxFloat(b *testing.B) {
+ var m float64 = 0
+ for i := 0; i < b.N; i++ {
+ for _, f := range all {
+ m = max(m, f)
+ }
+ }
+}