ARM64HasATOMICS *obj.LSym
ARMHasVFPv4 *obj.LSym
Loong64HasLAM_BH *obj.LSym
+ Loong64HasLSX *obj.LSym
X86HasFMA *obj.LSym
X86HasPOPCNT *obj.LSym
X86HasSSE41 *obj.LSym
}
}
fallthrough
+
case ssa.OpLOONG64MOVWF,
ssa.OpLOONG64MOVWD,
ssa.OpLOONG64TRUNCFW,
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
+
+ case ssa.OpLOONG64VPCNT64,
+ ssa.OpLOONG64VPCNT32,
+ ssa.OpLOONG64VPCNT16:
+ p := s.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = ((v.Args[0].Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = ((v.Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
+
case ssa.OpLOONG64NEGV:
// SUB from REGZERO
p := s.Prog(loong64.ASUBVU)
p.Reg = loong64.REGZERO
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
+
case ssa.OpLOONG64DUFFZERO:
// runtime.duffzero expects start address in R20
p := s.Prog(obj.ADUFFZERO)
(BitRev32 ...) => (BITREVW ...)
(BitRev64 ...) => (BITREVV ...)
+(PopCount64 <t> x) => (MOVVfpgp <t> (VPCNT64 <typ.Float64> (MOVVgpfp <typ.Float64> x)))
+(PopCount32 <t> x) => (MOVWfpgp <t> (VPCNT32 <typ.Float32> (MOVWgpfp <typ.Float32> x)))
+(PopCount16 <t> x) => (MOVWfpgp <t> (VPCNT16 <typ.Float32> (MOVWgpfp <typ.Float32> (ZeroExt16to32 x))))
+
// math package intrinsics
(Sqrt ...) => (SQRTD ...)
(Sqrt32 ...) => (SQRTF ...)
readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
)
ops := []opData{
+ // unary ops
+ {name: "NEGV", argLength: 1, reg: gp11}, // -arg0
+ {name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
+ {name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
+
+ {name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
+ {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
+
+ {name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64
+
+ {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // Count leading (high order) zeroes (returns 0-32)
+ {name: "CLZV", argLength: 1, reg: gp11, asm: "CLZV"}, // Count leading (high order) zeroes (returns 0-64)
+
+ {name: "REVB2H", argLength: 1, reg: gp11, asm: "REVB2H"}, // Swap bytes: 0x11223344 -> 0x22114433 (sign extends to 64 bits)
+ {name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655
+ {name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"}, // Swap bytes: 0x1122334455667788 -> 0x8877665544332211
+
+ {name: "BITREV4B", argLength: 1, reg: gp11, asm: "BITREV4B"}, // Reverse the bits of each byte inside a 32-bit arg[0]
+ {name: "BITREVW", argLength: 1, reg: gp11, asm: "BITREVW"}, // Reverse the bits in a 32-bit arg[0]
+ {name: "BITREVV", argLength: 1, reg: gp11, asm: "BITREVV"}, // Reverse the bits in a 64-bit arg[0]
+
+ {name: "VPCNT64", argLength: 1, reg: fp11, asm: "VPCNTV"}, // count set bits for each 64-bit unit and store the result in each 64-bit unit
+ {name: "VPCNT32", argLength: 1, reg: fp11, asm: "VPCNTW"}, // count set bits for each 32-bit unit and store the result in each 32-bit unit
+ {name: "VPCNT16", argLength: 1, reg: fp11, asm: "VPCNTH"}, // count set bits for each 16-bit unit and store the result in each 16-bit unit
+
// binary ops
{name: "ADDV", argLength: 2, reg: gp21, asm: "ADDVU", commutative: true}, // arg0 + arg1
{name: "ADDVconst", argLength: 1, reg: gp11sp, asm: "ADDVU", aux: "Int64"}, // arg0 + auxInt. auxInt is 32-bit, also in other *const ops.
{name: "FNMSUBF", argLength: 3, reg: fp31, asm: "FNMSUBF", commutative: true, typ: "Float32"}, // -((arg0 * arg1) - arg2)
{name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD", commutative: true, typ: "Float64"}, // -((arg0 * arg1) - arg2)
- {name: "NEGV", argLength: 1, reg: gp11}, // -arg0
- {name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
- {name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
- {name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
- {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
-
- {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // Count leading (high order) zeroes (returns 0-32)
- {name: "CLZV", argLength: 1, reg: gp11, asm: "CLZV"}, // Count leading (high order) zeroes (returns 0-64)
-
- {name: "REVB2H", argLength: 1, reg: gp11, asm: "REVB2H"}, // Swap bytes: 0x11223344 -> 0x22114433 (sign extends to 64 bits)
- {name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655
- {name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"}, // Swap bytes: 0x1122334455667788 -> 0x8877665544332211
-
- {name: "BITREV4B", argLength: 1, reg: gp11, asm: "BITREV4B"}, // Reverse the bits of each byte inside a 32-bit arg[0]
- {name: "BITREVW", argLength: 1, reg: gp11, asm: "BITREVW"}, // Reverse the bits in a 32-bit arg[0]
- {name: "BITREVV", argLength: 1, reg: gp11, asm: "BITREVV"}, // Reverse the bits in a 64-bit arg[0]
-
{name: "FMINF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMINF", commutative: true, typ: "Float32"}, // min(arg0, arg1), float32
{name: "FMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1), float64
{name: "FMAXF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXF", commutative: true, typ: "Float32"}, // max(arg0, arg1), float32
{name: "FMAXD", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXD", commutative: true, typ: "Float64"}, // max(arg0, arg1), float64
- {name: "MASKEQZ", argLength: 2, reg: gp21, asm: "MASKEQZ"}, // returns 0 if arg1 == 0, otherwise returns arg0
- {name: "MASKNEZ", argLength: 2, reg: gp21, asm: "MASKNEZ"}, // returns 0 if arg1 != 0, otherwise returns arg0
-
- {name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64
+ {name: "MASKEQZ", argLength: 2, reg: gp21, asm: "MASKEQZ"}, // returns 0 if arg1 == 0, otherwise returns arg0
+ {name: "MASKNEZ", argLength: 2, reg: gp21, asm: "MASKNEZ"}, // returns 0 if arg1 != 0, otherwise returns arg0
{name: "FCOPYSGD", argLength: 2, reg: fp21, asm: "FCOPYSGD"}, // float64
// shifts
OpARM64PRFM
OpARM64DMB
+ OpLOONG64NEGV
+ OpLOONG64NEGF
+ OpLOONG64NEGD
+ OpLOONG64SQRTD
+ OpLOONG64SQRTF
+ OpLOONG64ABSD
+ OpLOONG64CLZW
+ OpLOONG64CLZV
+ OpLOONG64REVB2H
+ OpLOONG64REVB2W
+ OpLOONG64REVBV
+ OpLOONG64BITREV4B
+ OpLOONG64BITREVW
+ OpLOONG64BITREVV
+ OpLOONG64VPCNT64
+ OpLOONG64VPCNT32
+ OpLOONG64VPCNT16
OpLOONG64ADDV
OpLOONG64ADDVconst
OpLOONG64SUBV
OpLOONG64FNMADDD
OpLOONG64FNMSUBF
OpLOONG64FNMSUBD
- OpLOONG64NEGV
- OpLOONG64NEGF
- OpLOONG64NEGD
- OpLOONG64SQRTD
- OpLOONG64SQRTF
- OpLOONG64CLZW
- OpLOONG64CLZV
- OpLOONG64REVB2H
- OpLOONG64REVB2W
- OpLOONG64REVBV
- OpLOONG64BITREV4B
- OpLOONG64BITREVW
- OpLOONG64BITREVV
OpLOONG64FMINF
OpLOONG64FMIND
OpLOONG64FMAXF
OpLOONG64FMAXD
OpLOONG64MASKEQZ
OpLOONG64MASKNEZ
- OpLOONG64ABSD
OpLOONG64FCOPYSGD
OpLOONG64SLLV
OpLOONG64SLLVconst
reg: regInfo{},
},
+ {
+ name: "NEGV",
+ argLen: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "NEGF",
+ argLen: 1,
+ asm: loong64.ANEGF,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "NEGD",
+ argLen: 1,
+ asm: loong64.ANEGD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "SQRTD",
+ argLen: 1,
+ asm: loong64.ASQRTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "SQRTF",
+ argLen: 1,
+ asm: loong64.ASQRTF,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "ABSD",
+ argLen: 1,
+ asm: loong64.AABSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "CLZW",
+ argLen: 1,
+ asm: loong64.ACLZW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "CLZV",
+ argLen: 1,
+ asm: loong64.ACLZV,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "REVB2H",
+ argLen: 1,
+ asm: loong64.AREVB2H,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "REVB2W",
+ argLen: 1,
+ asm: loong64.AREVB2W,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "REVBV",
+ argLen: 1,
+ asm: loong64.AREVBV,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "BITREV4B",
+ argLen: 1,
+ asm: loong64.ABITREV4B,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "BITREVW",
+ argLen: 1,
+ asm: loong64.ABITREVW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "BITREVV",
+ argLen: 1,
+ asm: loong64.ABITREVV,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
+ {
+ name: "VPCNT64",
+ argLen: 1,
+ asm: loong64.AVPCNTV,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "VPCNT32",
+ argLen: 1,
+ asm: loong64.AVPCNTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "VPCNT16",
+ argLen: 1,
+ asm: loong64.AVPCNTH,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
{
name: "ADDV",
argLen: 2,
},
},
},
- {
- name: "NEGV",
- argLen: 1,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
- {
- name: "NEGF",
- argLen: 1,
- asm: loong64.ANEGF,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- outputs: []outputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- },
- },
- {
- name: "NEGD",
- argLen: 1,
- asm: loong64.ANEGD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- outputs: []outputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- },
- },
- {
- name: "SQRTD",
- argLen: 1,
- asm: loong64.ASQRTD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- outputs: []outputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- },
- },
- {
- name: "SQRTF",
- argLen: 1,
- asm: loong64.ASQRTF,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- outputs: []outputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- },
- },
- {
- name: "CLZW",
- argLen: 1,
- asm: loong64.ACLZW,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
- {
- name: "CLZV",
- argLen: 1,
- asm: loong64.ACLZV,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
- {
- name: "REVB2H",
- argLen: 1,
- asm: loong64.AREVB2H,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
- {
- name: "REVB2W",
- argLen: 1,
- asm: loong64.AREVB2W,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
- {
- name: "REVBV",
- argLen: 1,
- asm: loong64.AREVBV,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
- {
- name: "BITREV4B",
- argLen: 1,
- asm: loong64.ABITREV4B,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
- {
- name: "BITREVW",
- argLen: 1,
- asm: loong64.ABITREVW,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
- {
- name: "BITREVV",
- argLen: 1,
- asm: loong64.ABITREVV,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
- },
- outputs: []outputInfo{
- {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
- },
- },
- },
{
name: "FMINF",
argLen: 2,
},
},
},
- {
- name: "ABSD",
- argLen: 1,
- asm: loong64.AABSD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- outputs: []outputInfo{
- {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
- },
- },
- },
{
name: "FCOPYSGD",
argLen: 2,
return true
case OpPanicBounds:
return rewriteValueLOONG64_OpPanicBounds(v)
+ case OpPopCount16:
+ return rewriteValueLOONG64_OpPopCount16(v)
+ case OpPopCount32:
+ return rewriteValueLOONG64_OpPopCount32(v)
+ case OpPopCount64:
+ return rewriteValueLOONG64_OpPopCount64(v)
case OpPubBarrier:
v.Op = OpLOONG64LoweredPubBarrier
return true
}
return false
}
+func rewriteValueLOONG64_OpPopCount16(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (PopCount16 <t> x)
+ // result: (MOVWfpgp <t> (VPCNT16 <typ.Float32> (MOVWgpfp <typ.Float32> (ZeroExt16to32 x))))
+ for {
+ t := v.Type
+ x := v_0
+ v.reset(OpLOONG64MOVWfpgp)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpLOONG64VPCNT16, typ.Float32)
+ v1 := b.NewValue0(v.Pos, OpLOONG64MOVWgpfp, typ.Float32)
+ v2 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
+ v2.AddArg(x)
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValueLOONG64_OpPopCount32(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (PopCount32 <t> x)
+ // result: (MOVWfpgp <t> (VPCNT32 <typ.Float32> (MOVWgpfp <typ.Float32> x)))
+ for {
+ t := v.Type
+ x := v_0
+ v.reset(OpLOONG64MOVWfpgp)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpLOONG64VPCNT32, typ.Float32)
+ v1 := b.NewValue0(v.Pos, OpLOONG64MOVWgpfp, typ.Float32)
+ v1.AddArg(x)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValueLOONG64_OpPopCount64(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (PopCount64 <t> x)
+ // result: (MOVVfpgp <t> (VPCNT64 <typ.Float64> (MOVVgpfp <typ.Float64> x)))
+ for {
+ t := v.Type
+ x := v_0
+ v.reset(OpLOONG64MOVVfpgp)
+ v.Type = t
+ v0 := b.NewValue0(v.Pos, OpLOONG64VPCNT64, typ.Float64)
+ v1 := b.NewValue0(v.Pos, OpLOONG64MOVVgpfp, typ.Float64)
+ v1.AddArg(x)
+ v0.AddArg(v1)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueLOONG64_OpRotateLeft16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return s.variable(n, types.Types[types.TINT])
}
}
+
+ makeOnesCountLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLSX, s.sb)
+ v := s.load(types.Types[types.TBOOL], addr)
+ b := s.endBlock()
+ b.Kind = ssa.BlockIf
+ b.SetControl(v)
+ bTrue := s.f.NewBlock(ssa.BlockPlain)
+ bFalse := s.f.NewBlock(ssa.BlockPlain)
+ bEnd := s.f.NewBlock(ssa.BlockPlain)
+ b.AddEdgeTo(bTrue)
+ b.AddEdgeTo(bFalse)
+ b.Likely = ssa.BranchLikely // most loong64 machines support the LSX
+
+ // We have the intrinsic - use it directly.
+ s.startBlock(bTrue)
+ s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
+ s.endBlock().AddEdgeTo(bEnd)
+
+ // Call the pure Go version.
+ s.startBlock(bFalse)
+ s.vars[n] = s.callResult(n, callNormal) // types.Types[TINT]
+ s.endBlock().AddEdgeTo(bEnd)
+
+ // Merge results.
+ s.startBlock(bEnd)
+ return s.variable(n, types.Types[types.TINT])
+ }
+ }
+
addF("math/bits", "OnesCount64",
makeOnesCountAMD64(ssa.OpPopCount64),
sys.AMD64)
+ addF("math/bits", "OnesCount64",
+ makeOnesCountLoong64(ssa.OpPopCount64),
+ sys.Loong64)
addF("math/bits", "OnesCount64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0])
addF("math/bits", "OnesCount32",
makeOnesCountAMD64(ssa.OpPopCount32),
sys.AMD64)
+ addF("math/bits", "OnesCount32",
+ makeOnesCountLoong64(ssa.OpPopCount32),
+ sys.Loong64)
addF("math/bits", "OnesCount32",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0])
addF("math/bits", "OnesCount16",
makeOnesCountAMD64(ssa.OpPopCount16),
sys.AMD64)
+ addF("math/bits", "OnesCount16",
+ makeOnesCountLoong64(ssa.OpPopCount16),
+ sys.Loong64)
addF("math/bits", "OnesCount16",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0])
{"loong64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{},
{"loong64", "internal/runtime/sys", "Len64"}: struct{}{},
{"loong64", "internal/runtime/sys", "Len8"}: struct{}{},
+ {"loong64", "internal/runtime/sys", "OnesCount64"}: struct{}{},
{"loong64", "math", "Abs"}: struct{}{},
{"loong64", "math", "Copysign"}: struct{}{},
{"loong64", "math", "FMA"}: struct{}{},
{"loong64", "math/bits", "Len16"}: struct{}{},
{"loong64", "math/bits", "Len32"}: struct{}{},
{"loong64", "math/bits", "Len64"}: struct{}{},
+ {"loong64", "math/bits", "OnesCount16"}: struct{}{},
+ {"loong64", "math/bits", "OnesCount32"}: struct{}{},
+ {"loong64", "math/bits", "OnesCount64"}: struct{}{},
{"loong64", "math/bits", "Reverse"}: struct{}{},
{"loong64", "math/bits", "Reverse8"}: struct{}{},
{"loong64", "math/bits", "Reverse16"}: struct{}{},
ir.Syms.ARMHasVFPv4 = typecheck.LookupRuntimeVar("armHasVFPv4") // bool
ir.Syms.ARM64HasATOMICS = typecheck.LookupRuntimeVar("arm64HasATOMICS") // bool
ir.Syms.Loong64HasLAM_BH = typecheck.LookupRuntimeVar("loong64HasLAM_BH") // bool
+ ir.Syms.Loong64HasLSX = typecheck.LookupRuntimeVar("loong64HasLSX") // bool
ir.Syms.Staticuint64s = typecheck.LookupRuntimeVar("staticuint64s")
ir.Syms.Typedmemmove = typecheck.LookupRuntimeFunc("typedmemmove")
ir.Syms.Udiv = typecheck.LookupRuntimeVar("udiv") // asm func with special ABI
var armHasVFPv4 bool
var arm64HasATOMICS bool
var loong64HasLAM_BH bool
+var loong64HasLSX bool
func asanregisterglobals(unsafe.Pointer, uintptr)
{"armHasVFPv4", varTag, 6},
{"arm64HasATOMICS", varTag, 6},
{"loong64HasLAM_BH", varTag, 6},
+ {"loong64HasLSX", varTag, 6},
{"asanregisterglobals", funcTag, 130},
}
{"runtime.armHasVFPv4", 0},
{"runtime.arm64HasATOMICS", 0},
{"runtime.loong64HasLAM_BH", 0},
+ {"runtime.loong64HasLSX", 0},
{"runtime.asanregisterglobals", 1},
{"runtime.deferproc", 1},
{"runtime.deferprocStack", 1},
// The struct is padded to avoid false sharing.
var Loong64 struct {
_ CacheLinePad
+ HasLSX bool // support 128-bit vector extension
HasCRC32 bool // support CRC instruction
HasLAMCAS bool // support AMCAS[_DB].{B/H/W/D}
HasLAM_BH bool // support AM{SWAP/ADD}[_DB].{B/H} instruction
func doinit() {
options = []option{
+ {Name: "lsx", Feature: &Loong64.HasLSX},
{Name: "crc32", Feature: &Loong64.HasCRC32},
{Name: "lamcas", Feature: &Loong64.HasLAMCAS},
{Name: "lam_bh", Feature: &Loong64.HasLAM_BH},
cfg1 := get_cpucfg(1)
cfg2 := get_cpucfg(2)
- Loong64.HasCRC32 = isSet(cfg1, cpucfg1_CRC32)
- Loong64.HasLAMCAS = isSet(cfg2, cpucfg2_LAM_BH)
- Loong64.HasLAM_BH = isSet(cfg2, cpucfg2_LAMCAS)
+ Loong64.HasCRC32 = cfgIsSet(cfg1, cpucfg1_CRC32)
+ Loong64.HasLAMCAS = cfgIsSet(cfg2, cpucfg2_LAM_BH)
+ Loong64.HasLAM_BH = cfgIsSet(cfg2, cpucfg2_LAMCAS)
osInit()
}
-func isSet(cfg uint32, val uint32) bool {
+func cfgIsSet(cfg uint32, val uint32) bool {
return cfg&val != 0
}
// initialized.
var HWCap uint
+// HWCAP bits. These are exposed by the Linux kernel.
+const (
+ hwcap_LOONGARCH_LSX = 1 << 4
+)
+
func hwcapInit() {
// TODO: Features that require kernel support like LSX and LASX can
// be detected here once needed in std library or by the compiler.
+ Loong64.HasLSX = hwcIsSet(HWCap, hwcap_LOONGARCH_LSX)
+}
+
+func hwcIsSet(hwc uint, val uint) bool {
+ return hwc&val != 0
}
armHasVFPv4 bool
- arm64HasATOMICS bool
+ arm64HasATOMICS bool
+
loong64HasLAM_BH bool
+ loong64HasLSX bool
)
case "arm64":
arm64HasATOMICS = cpu.ARM64.HasATOMICS
+
case "loong64":
loong64HasLAM_BH = cpu.Loong64.HasLAM_BH
+ loong64HasLSX = cpu.Loong64.HasLSX
}
}
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
// amd64:"POPCNTQ"
// arm64:"VCNT","VUADDLV"
+ // loong64:"VPCNTV"
// s390x:"POPCNT"
// ppc64x:"POPCNTD"
// wasm:"I64Popcnt"
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
// amd64:"POPCNTQ"
// arm64:"VCNT","VUADDLV"
+ // loong64:"VPCNTV"
// s390x:"POPCNT"
// ppc64x:"POPCNTD"
// wasm:"I64Popcnt"
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
// amd64:"POPCNTL"
// arm64:"VCNT","VUADDLV"
+ // loong64:"VPCNTW"
// s390x:"POPCNT"
// ppc64x:"POPCNTW"
// wasm:"I64Popcnt"
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
// amd64:"POPCNTL"
// arm64:"VCNT","VUADDLV"
+ // loong64:"VPCNTH"
// s390x:"POPCNT"
// ppc64x:"POPCNTW"
// wasm:"I64Popcnt"