ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
- ssa.OpAMD64PXOR,
+ ssa.OpAMD64MINSS, ssa.OpAMD64MINSD,
+ ssa.OpAMD64POR, ssa.OpAMD64PXOR,
ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
ssa.OpARM64FNMULD,
ssa.OpARM64FDIVS,
ssa.OpARM64FDIVD,
+ ssa.OpARM64FMINS,
+ ssa.OpARM64FMIND,
+ ssa.OpARM64FMAXS,
+ ssa.OpARM64FMAXD,
ssa.OpARM64ROR,
ssa.OpARM64RORW:
r := v.Reg()
(Round(32|64)F ...) => (Copy ...)
+// Floating-point min is tricky, as the hardware op isn't right for various special
+// cases (-0 and NaN). We use two hardware ops organized just right to make the
+// result come out how we want it. See https://github.com/golang/go/issues/59488#issuecomment-1553493207
+// (although that comment isn't exactly right, as the value overwritten is not simulated correctly).
+// t1 = MINSD x, y => incorrect if x==NaN or x==-0,y==+0
+// t2 = MINSD t1, x => fixes x==NaN case
+// res = POR t1, t2 => fixes x==-0,y==+0 case
+// Note that this trick depends on the special property that (NaN OR x) produces a NaN (although
+// it might not produce the same NaN as the input).
+(Min(64|32)F <t> x y) => (POR (MINS(D|S) <t> (MINS(D|S) <t> x y) x) (MINS(D|S) <t> x y))
+// Floating-point max is even trickier. Punt to using min instead.
+// max(x,y) == -min(-x,-y)
+(Max(64|32)F <t> x y) => (Neg(64|32)F <t> (Min(64|32)F <t> (Neg(64|32)F <t> x) (Neg(64|32)F <t> y)))
+
(CvtBoolToUint8 ...) => (Copy ...)
// Lowering shifts
// Any use must be preceded by a successful check of runtime.support_fma.
{name: "VFMADD231SD", argLength: 3, reg: fp31, resultInArg0: true, asm: "VFMADD231SD"},
+ // Note that these operations don't exactly match the semantics of Go's
+ // builtin min. In particular, these aren't commutative, because on various
+ // special cases the 2nd argument is preferred.
+ {name: "MINSD", argLength: 2, reg: fp21, resultInArg0: true, asm: "MINSD"}, // min(arg0,arg1)
+ {name: "MINSS", argLength: 2, reg: fp21, resultInArg0: true, asm: "MINSS"}, // min(arg0,arg1)
+
{name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear.
{name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear.
// Note: SBBW and SBBB are subsumed by SBBL
{name: "MOVLi2f", argLength: 1, reg: gpfp, typ: "Float32"}, // move 32 bits from int to float reg
{name: "MOVLf2i", argLength: 1, reg: fpgp, typ: "UInt32"}, // move 32 bits from float to int reg, zero extend
- {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation.
+ {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs (for float negation).
+ {name: "POR", argLength: 2, reg: fp21, asm: "POR", commutative: true, resultInArg0: true}, // inclusive or, applied to X regs (for float min/max).
{name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
{name: "LEAL", argLength: 1, reg: gp11sb, asm: "LEAL", aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
(Sqrt32 ...) => (FSQRTS ...)
+(Min(64|32)F ...) => (FMIN(D|S) ...)
+(Max(64|32)F ...) => (FMAX(D|S) ...)
+
// lowering rotates
// we do rotate detection in generic rules, if the following rules need to be changed, check generic rules first.
(RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64
{name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32
+ {name: "FMIND", argLength: 2, reg: fp21, asm: "FMIND"}, // min(arg0, arg1)
+ {name: "FMINS", argLength: 2, reg: fp21, asm: "FMINS"}, // min(arg0, arg1)
+ {name: "FMAXD", argLength: 2, reg: fp21, asm: "FMAXD"}, // max(arg0, arg1)
+ {name: "FMAXS", argLength: 2, reg: fp21, asm: "FMAXS"}, // max(arg0, arg1)
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // byte reverse, 64-bit
{name: "REVW", argLength: 1, reg: gp11, asm: "REVW"}, // byte reverse, 32-bit
{name: "REV16", argLength: 1, reg: gp11, asm: "REV16"}, // byte reverse in each 16-bit halfword, 64-bit
{name: "Abs", argLength: 1}, // absolute value arg0
{name: "Copysign", argLength: 2}, // copy sign from arg0 to arg1
+ // Float min/max implementation, if hardware is available.
+ {name: "Min64F", argLength: 2}, // min(arg0,arg1)
+ {name: "Min32F", argLength: 2}, // min(arg0,arg1)
+ {name: "Max64F", argLength: 2}, // max(arg0,arg1)
+ {name: "Max32F", argLength: 2}, // max(arg0,arg1)
+
// 3-input opcode.
// Fused-multiply-add, float64 only.
// When a*b+c is exactly zero (before rounding), then the result is +0 or -0.
OpAMD64SQRTSS
OpAMD64ROUNDSD
OpAMD64VFMADD231SD
+ OpAMD64MINSD
+ OpAMD64MINSS
OpAMD64SBBQcarrymask
OpAMD64SBBLcarrymask
OpAMD64SETEQ
OpAMD64MOVLi2f
OpAMD64MOVLf2i
OpAMD64PXOR
+ OpAMD64POR
OpAMD64LEAQ
OpAMD64LEAL
OpAMD64LEAW
OpARM64FNEGD
OpARM64FSQRTD
OpARM64FSQRTS
+ OpARM64FMIND
+ OpARM64FMINS
+ OpARM64FMAXD
+ OpARM64FMAXS
OpARM64REV
OpARM64REVW
OpARM64REV16
OpRoundToEven
OpAbs
OpCopysign
+ OpMin64F
+ OpMin32F
+ OpMax64F
+ OpMax32F
OpFMA
OpPhi
OpCopy
},
},
},
+ {
+ name: "MINSD",
+ argLen: 2,
+ resultInArg0: true,
+ asm: x86.AMINSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "MINSS",
+ argLen: 2,
+ resultInArg0: true,
+ asm: x86.AMINSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "SBBQcarrymask",
argLen: 1,
},
},
},
+ {
+ name: "POR",
+ argLen: 2,
+ commutative: true,
+ resultInArg0: true,
+ asm: x86.APOR,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "LEAQ",
auxType: auxSymOff,
},
},
},
+ {
+ name: "FMIND",
+ argLen: 2,
+ asm: arm64.AFMIND,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FMINS",
+ argLen: 2,
+ asm: arm64.AFMINS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FMAXD",
+ argLen: 2,
+ asm: arm64.AFMAXD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FMAXS",
+ argLen: 2,
+ asm: arm64.AFMAXS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
{
name: "REV",
argLen: 1,
argLen: 2,
generic: true,
},
+ {
+ name: "Min64F",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Min32F",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Max64F",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "Max32F",
+ argLen: 2,
+ generic: true,
+ },
{
name: "FMA",
argLen: 3,
return rewriteValueAMD64_OpLsh8x64(v)
case OpLsh8x8:
return rewriteValueAMD64_OpLsh8x8(v)
+ case OpMax32F:
+ return rewriteValueAMD64_OpMax32F(v)
+ case OpMax64F:
+ return rewriteValueAMD64_OpMax64F(v)
+ case OpMin32F:
+ return rewriteValueAMD64_OpMin32F(v)
+ case OpMin64F:
+ return rewriteValueAMD64_OpMin64F(v)
case OpMod16:
return rewriteValueAMD64_OpMod16(v)
case OpMod16u:
}
return false
}
+func rewriteValueAMD64_OpMax32F(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Max32F <t> x y)
+ // result: (Neg32F <t> (Min32F <t> (Neg32F <t> x) (Neg32F <t> y)))
+ for {
+ t := v.Type
+ x := v_0
+ y := v_1
+ v.reset(OpNeg32F)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpMin32F, t)
+ v1 := b.NewValue0(v.Pos, OpNeg32F, t)
+ v1.AddArg(x)
+ v2 := b.NewValue0(v.Pos, OpNeg32F, t)
+ v2.AddArg(y)
+ v0.AddArg2(v1, v2)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMax64F(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Max64F <t> x y)
+ // result: (Neg64F <t> (Min64F <t> (Neg64F <t> x) (Neg64F <t> y)))
+ for {
+ t := v.Type
+ x := v_0
+ y := v_1
+ v.reset(OpNeg64F)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpMin64F, t)
+ v1 := b.NewValue0(v.Pos, OpNeg64F, t)
+ v1.AddArg(x)
+ v2 := b.NewValue0(v.Pos, OpNeg64F, t)
+ v2.AddArg(y)
+ v0.AddArg2(v1, v2)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMin32F(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Min32F <t> x y)
+ // result: (POR (MINSS <t> (MINSS <t> x y) x) (MINSS <t> x y))
+ for {
+ t := v.Type
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64POR)
+ v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
+ v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
+ v1.AddArg2(x, y)
+ v0.AddArg2(v1, x)
+ v.AddArg2(v0, v1)
+ return true
+ }
+}
+func rewriteValueAMD64_OpMin64F(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (Min64F <t> x y)
+ // result: (POR (MINSD <t> (MINSD <t> x y) x) (MINSD <t> x y))
+ for {
+ t := v.Type
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64POR)
+ v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
+ v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
+ v1.AddArg2(x, y)
+ v0.AddArg2(v1, x)
+ v.AddArg2(v0, v1)
+ return true
+ }
+}
func rewriteValueAMD64_OpMod16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return rewriteValueARM64_OpLsh8x64(v)
case OpLsh8x8:
return rewriteValueARM64_OpLsh8x8(v)
+ case OpMax32F:
+ v.Op = OpARM64FMAXS
+ return true
+ case OpMax64F:
+ v.Op = OpARM64FMAXD
+ return true
+ case OpMin32F:
+ v.Op = OpARM64FMINS
+ return true
+ case OpMin64F:
+ v.Op = OpARM64FMIND
+ return true
case OpMod16:
return rewriteValueARM64_OpMod16(v)
case OpMod16u:
if typ.IsFloat() || typ.IsString() {
// min/max semantics for floats are tricky because of NaNs and
- // negative zero, so we let the runtime handle this instead.
+ // negative zero. Some architectures have instructions which
+ // we can use to generate the right result. For others we must
+ // call into the runtime instead.
//
// Strings are conceptually simpler, but we currently desugar
// string comparisons during walk, not ssagen.
+ if typ.IsFloat() {
+ switch Arch.LinkArch.Family {
+ case sys.AMD64, sys.ARM64:
+ var op ssa.Op
+ switch {
+ case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMIN:
+ op = ssa.OpMin64F
+ case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMAX:
+ op = ssa.OpMax64F
+ case typ.Kind() == types.TFLOAT32 && n.Op() == ir.OMIN:
+ op = ssa.OpMin32F
+ case typ.Kind() == types.TFLOAT32 && n.Op() == ir.OMAX:
+ op = ssa.OpMax32F
+ }
+ return fold(func(x, a *ssa.Value) *ssa.Value {
+ return s.newValue2(op, typ, x, a)
+ })
+ }
+ }
var name string
switch typ.Kind() {
case types.TFLOAT32: