From: Wei Xiao Date: Tue, 25 Apr 2017 10:29:54 +0000 (+0800) Subject: cmd/asm: refine Go assembly for ARM64 X-Git-Tag: go1.10beta1~731 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=531e6c06c468badbb1ba180027a93b3f6c5b2615;p=gostls13.git cmd/asm: refine Go assembly for ARM64 Some ARM64-specific instructions (such as SIMD instructions) are not supported. This patch adds support for the following: 1. Extended register, e.g.: ADD Rm.[< can have the following values: UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW and SXTX 2. Arrangement for SIMD instructions, e.g.: VADDP Vm., Vn., Vd. can have the following values: B8, B16, H4, H8, S2, S4 and D2 3. Width specifier and element index for SIMD instructions, e.g.: VMOV Vn.[index], Rd // MOV(to general register) can have the following values: S and D 4. Register List, e.g.: VLD1 (Rn), [Vt1., Vt2., Vt3.] 5. Register offset variant, e.g.: VLD1.P (Rn)(Rm), [Vt1., Vt2.] // Rm is the post-index register 6. Go assembly for ARM64 reference manual new added instructions are required to have according explanation items in the manual and items for existed instructions will be added incrementally For more information about the refinement background, please refer to the discussion (https://groups.google.com/forum/#!topic/golang-dev/rWgDxCrL4GU) This patch only adds syntax and doesn't break any assembly that already exists. Change-Id: I34e90b7faae032820593a0e417022c354a882008 Reviewed-on: https://go-review.googlesource.com/41654 Run-TryBot: Cherry Zhang Reviewed-by: Cherry Zhang --- diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index 4320a299ff..2fd21b58b8 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -11,6 +11,7 @@ package arch import ( "cmd/internal/obj" "cmd/internal/obj/arm64" + "errors" ) var arm64LS = map[string]uint8{ @@ -118,3 +119,162 @@ func arm64RegisterNumber(name string, n int16) (int16, bool) { } return 0, false } + +// ARM64RegisterExtension parses an ARM64 register with extension or arrangment. +func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error { + rm := uint32(reg) + switch ext { + case "UXTB": + if !isAmount { + return errors.New("invalid register extension") + } + a.Reg = arm64.REG_UXTB + (reg & 31) + int16(num<<5) + a.Offset = int64(((rm & 31) << 16) | (uint32(num) << 10)) + case "UXTH": + if !isAmount { + return errors.New("invalid register extension") + } + a.Reg = arm64.REG_UXTH + (num & 31) + int16(num<<5) + a.Offset = int64(((rm & 31) << 16) | (1 << 13) | (uint32(num) << 10)) + case "UXTW": + if !isAmount { + return errors.New("invalid register extension") + } + a.Reg = arm64.REG_UXTW + (reg & 31) + int16(num<<5) + a.Offset = int64(((rm & 31) << 16) | (2 << 13) | (uint32(num) << 10)) + case "UXTX": + if !isAmount { + return errors.New("invalid register extension") + } + a.Reg = arm64.REG_UXTX + (reg & 31) + int16(num<<5) + a.Offset = int64(((rm & 31) << 16) | (3 << 13) | (uint32(num) << 10)) + case "SXTB": + if !isAmount { + return errors.New("invalid register extension") + } + a.Reg = arm64.REG_SXTB + (reg & 31) + int16(num<<5) + a.Offset = int64(((rm & 31) << 16) | (4 << 13) | (uint32(num) << 10)) + case "SXTH": + if !isAmount { + return errors.New("invalid register extension") + } + a.Reg = arm64.REG_SXTH + (reg & 31) + int16(num<<5) + a.Offset = int64(((rm & 31) << 16) | (5 << 13) | (uint32(num) << 10)) + case "SXTW": + if !isAmount { + return errors.New("invalid register extension") + } + a.Reg = arm64.REG_SXTW + (reg & 31) + int16(num<<5) + a.Offset = int64(((rm & 31) << 16) | (6 << 13) | (uint32(num) << 10)) + case "SXTX": + if !isAmount { + return errors.New("invalid register extension") + } + a.Reg = arm64.REG_SXTX + (reg & 31) + int16(num<<5) + a.Offset = int64(((rm & 31) << 16) | (7 << 13) | (uint32(num) << 10)) + case "B8": + a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_8B & 15) << 5) + case "B16": + a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_16B & 15) << 5) + case "H4": + a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_4H & 15) << 5) + case "H8": + a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_8H & 15) << 5) + case "S2": + a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_2S & 15) << 5) + case "S4": + a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_4S & 15) << 5) + case "D2": + a.Reg = arm64.REG_ARNG + (reg & 31) + ((arm64.ARNG_2D & 15) << 5) + case "B": + if !isIndex { + return nil + } + a.Reg = arm64.REG_ELEM + (reg & 31) + ((arm64.ARNG_B & 15) << 5) + a.Index = num + case "H": + if !isIndex { + return nil + } + a.Reg = arm64.REG_ELEM + (reg & 31) + ((arm64.ARNG_H & 15) << 5) + a.Index = num + case "S": + if !isIndex { + return nil + } + a.Reg = arm64.REG_ELEM + (reg & 31) + ((arm64.ARNG_S & 15) << 5) + a.Index = num + case "D": + if !isIndex { + return nil + } + a.Reg = arm64.REG_ELEM + (reg & 31) + ((arm64.ARNG_D & 15) << 5) + a.Index = num + default: + return errors.New("unsupported register extension type: " + ext) + } + a.Type = obj.TYPE_REG + return nil +} + +// ARM64RegisterArrangement parses an ARM64 vector register arrangment. +func ARM64RegisterArrangement(reg int16, name, arng string) (int64, error) { + var curQ, curSize uint16 + if name[0] != 'V' { + return 0, errors.New("expect V0 through V31; found: " + name) + } + if reg < 0 { + return 0, errors.New("invalid register number: " + name) + } + switch arng { + case "B8": + curSize = 0 + curQ = 0 + case "B16": + curSize = 0 + curQ = 1 + case "H4": + curSize = 1 + curQ = 0 + case "H8": + curSize = 1 + curQ = 1 + case "S2": + curSize = 1 + curQ = 0 + case "S4": + curSize = 2 + curQ = 1 + case "D1": + curSize = 3 + curQ = 0 + case "D2": + curSize = 3 + curQ = 1 + default: + return 0, errors.New("invalid arrangement in ARM64 register list") + } + return (int64(curQ) & 1 << 30) | (int64(curSize&3) << 10), nil +} + +// ARM64RegisterListOffset generates offset encoding according to AArch64 specification. +func ARM64RegisterListOffset(firstReg, regCnt int, arrangement int64) (int64, error) { + offset := int64(firstReg) + switch regCnt { + case 1: + offset |= 0x7 << 12 + case 2: + offset |= 0xa << 12 + case 3: + offset |= 0x6 << 12 + case 4: + offset |= 0x2 << 12 + default: + return 0, errors.New("invalid register numbers in ARM64 register list") + } + offset |= arrangement + // arm64 uses the 60th bit to differentiate from other archs + // For more details, refer to: obj/arm64/list7.go + offset |= 1 << 60 + return offset, nil +} diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go index c0dd2db341..1d5d07344d 100644 --- a/src/cmd/asm/internal/asm/parse.go +++ b/src/cmd/asm/internal/asm/parse.go @@ -321,6 +321,10 @@ func (p *Parser) operand(a *obj.Addr) { a.Reg, _ = p.registerReference(name) p.get(')') } + } else if p.atRegisterExtension() { + p.registerExtension(a, tok.String(), prefix) + p.expectOperandEnd() + return } else if r1, r2, scale, ok := p.register(tok.String(), prefix); ok { if scale != 0 { p.errorf("expected simple register reference") @@ -439,6 +443,20 @@ func (p *Parser) atRegisterShift() bool { return p.at('(', scanner.Int, ')') && lex.IsRegisterShift(p.input[p.inputPos+3].ScanToken) } +// atRegisterExtension reports whether we are at the start of an ARM64 extended register. +// We have consumed the register or R prefix. +func (p *Parser) atRegisterExtension() bool { + // ARM64 only. + if p.arch.Family != sys.ARM64 { + return false + } + // R1.xxx + if p.peek() == '.' { + return true + } + return false +} + // registerReference parses a register given either the name, R10, or a parenthesized form, SPR(10). func (p *Parser) registerReference(name string) (int16, bool) { r, present := p.arch.Register[name] @@ -573,6 +591,59 @@ func (p *Parser) registerShift(name string, prefix rune) int64 { } } +// registerExtension parses a register with extension or arrangment. +// There is known to be a register (current token) and an extension operator (peeked token). +func (p *Parser) registerExtension(a *obj.Addr, name string, prefix rune) { + if prefix != 0 { + p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name) + } + + reg, ok := p.registerReference(name) + if !ok { + p.errorf("unexpected %s in register extension", name) + return + } + + p.get('.') + tok := p.next() + ext := tok.String() + isIndex := false + num := int16(0) + isAmount := true // Amount is zero by default + if p.peek() == lex.LSH { + // parses left shift amount applied after extension: <33, R2, R3 AND R1@>33, R2, R3 + ADD R1.UXTB, R2, R3 // 4360218b + ADD R1.UXTB<<4, R2, R3 // 4370218b + VADDP V1.B16, V2.B16, V3.B16 // 43bc214e + VADDP V1.S4, V2.S4, V3.S4 // 43bca14e + VADDP V1.D2, V2.D2, V3.D2 // 43bce14e + VAND V21.B8, V12.B8, V3.B8 // 831d350e + VCMEQ V1.H4, V2.H4, V3.H4 // 438c612e + VORR V5.B16, V4.B16, V3.B16 // 831ca54e + VADD V16.S4, V5.S4, V9.S4 // a984b04e + VEOR V0.B16, V1.B16, V0.B16 // 201c206e + SHA256H V9.S4, V3, V2 // 6240095e + SHA256H2 V9.S4, V4, V3 // 8350095e + SHA256SU0 V8.S4, V7.S4 // 0729285e + SHA256SU1 V6.S4, V5.S4, V7.S4 // a760065e + SHA1SU0 V11.S4, V8.S4, V6.S4 // 06310b5e + SHA1SU1 V5.S4, V1.S4 // a118285e + SHA1C V1.S4, V2, V3 // 4300015e + SHA1H V5, V4 // a408285e + SHA1M V8.S4, V7, V6 // e620085e + SHA1P V11.S4, V10, V9 // 49110b5e + VADDV V0.S4, V0 // 00b8b14e + VMOVI $82, V0.B16 // 40e6024f // LTYPE1 imsr ',' spreg ',' // { @@ -84,6 +106,18 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 MOVD $1, ZR MOVD $1, R1 MOVD ZR, (R1) + VLD1 (R8), [V1.B16, V2.B16] // 01a1404c + VLD1.P (R3), [V31.H8, V0.H8] // 7fa4df4c + VLD1.P (R8)(R20), [V21.B16, V22.B16] // VLD1.P (R8)(R20*1), [V21.B16,V22.B16] // 15a1d44c + VLD1.P 64(R1), [V5.B16, V6.B16, V7.B16, V8.B16] // 2520df4c + VST1.P [V4.S4, V5.S4], 32(R1) // 24a89f4c + VST1 [V0.S4, V1.S4], (R0) // 00a8004c + VMOVS V20, (R0) // 140000bd + VMOVS.P V20, 4(R0) // 144400bc + VMOVS.W V20, 4(R0) // 144c00bc + VMOVS (R0), V20 // 140040bd + VMOVS.P 8(R0), V20 // 148440bc + VMOVS.W 8(R0), V20 // 148c40bc // small offset fits into instructions MOVB 1(R1), R2 // 22048039 @@ -147,7 +181,16 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 // outcode($1, &$2, NREG, &$4); // } MOVK $1, R1 - + VMOV V8.S[1], R1 // 013d0c0e + VMOV V0.D[0], R11 // 0b3c084e + VMOV V0.D[1], R11 // 0b3c184e + VMOV R20, V1.S[0] // 811e044e + VMOV R1, V9.H4 // 290c020e + VMOV R22, V11.D2 // cb0e084e + VMOV V2.B16, V4.B16 // 441ca24e + VMOV V20.S[0], V20 // 9406045e + VREV32 V5.B16, V5.B16 // a508206e + VDUP V19.S[0], V17.S4 // 7106044e // // B/BL // @@ -193,6 +236,7 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 CMP R1->11, R2 CMP R1>>22, R2 CMP R1<<33, R2 + CMP R22.SXTX, RSP // ffe336eb // // CBZ // diff --git a/src/cmd/internal/obj/arm/list5.go b/src/cmd/internal/obj/arm/list5.go index 169a7f5ad9..6522f9aff8 100644 --- a/src/cmd/internal/obj/arm/list5.go +++ b/src/cmd/internal/obj/arm/list5.go @@ -38,6 +38,7 @@ import ( func init() { obj.RegisterRegister(obj.RBaseARM, MAXREG, rconv) obj.RegisterOpcode(obj.ABaseARM, Anames) + obj.RegisterRegisterList(obj.RegListARMLo, obj.RegListARMHi, rlconv) } func rconv(r int) string { @@ -81,3 +82,25 @@ func DRconv(a int) string { fp += s return fp } + +func rlconv(list int64) string { + str := "" + for i := 0; i < 16; i++ { + if list&(1< +const ( + REG_ARNG = obj.RBaseARM64 + 1<<10 + iota<<9 // Vn. + REG_ELEM // Vn.[index] + REG_ELEM_END +) + // Not registers, but flags that can be combined with regular register // constants to indicate extended register conversion. When checking, // you should subtract obj.RBaseARM64 first. From this difference, bit 11 @@ -264,9 +272,12 @@ const ( C_VREG // V0..V31 C_PAIR // (Rn, Rm) C_SHIFT // Rn<<2 - C_EXTREG // Rn.UXTB<<3 + C_EXTREG // Rn.UXTB[<<3] C_SPR // REG_NZCV C_COND // EQ, NE, etc + C_ARNG // Vn. + C_ELEM // Vn.[index] + C_LIST // [V1, V2, V3] C_ZCON // $0 or ZR C_ABCON0 // could be C_ADDCON0 or C_BITCON @@ -720,6 +731,20 @@ const ( ASHA256H2 ASHA256SU0 ASHA256SU1 + AVADD + AVADDP + AVAND + AVCMEQ + AVEOR + AVMOV + AVLD1 + AVORR + AVREV32 + AVST1 + AVDUP + AVMOVS + AVADDV + AVMOVI ALAST AB = obj.AJMP ABL = obj.ACALL @@ -731,3 +756,20 @@ const ( SHIFT_LR = 1 << 22 SHIFT_AR = 2 << 22 ) + +// Arrangement for ARM64 SIMD instructions +const ( + // arrangement types + ARNG_8B = iota + ARNG_16B + ARNG_1D + ARNG_4H + ARNG_8H + ARNG_2S + ARNG_4S + ARNG_2D + ARNG_B + ARNG_H + ARNG_S + ARNG_D +) diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 4ee4043af7..3fe8025e80 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -366,5 +366,19 @@ var Anames = []string{ "SHA256H2", "SHA256SU0", "SHA256SU1", + "VADD", + "VADDP", + "VAND", + "VCMEQ", + "VEOR", + "VMOV", + "VLD1", + "VORR", + "VREV32", + "VST1", + "VDUP", + "VMOVS", + "VADDV", + "VMOVI", "LAST", } diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go index a9cccc19f6..cb4b13934d 100644 --- a/src/cmd/internal/obj/arm64/anames7.go +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -16,6 +16,9 @@ var cnames7 = []string{ "EXTREG", "SPR", "COND", + "ARNG", + "ELEM", + "LIST", "ZCON", "ABCON0", "ADDCON0", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index a7f4b010ee..31cec14f00 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -320,6 +320,10 @@ var optab = []Optab{ {AMOVW, C_REG, C_NONE, C_ZOREG, 20, 4, 0, 0, 0}, {AMOVW, C_REG, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + {AVMOVS, C_VREG, C_NONE, C_UAUTO16K, 20, 4, REGSP, 0, 0}, + {AVMOVS, C_VREG, C_NONE, C_ZOREG, 20, 4, 0, 0, 0}, + {AVMOVS, C_VREG, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, + /* unscaled 9-bit signed displacement store */ {AMOVB, C_REG, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, {AMOVB, C_REG, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, @@ -368,6 +372,10 @@ var optab = []Optab{ {AMOVD, C_UOREG32K, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, {AMOVD, C_NSOREG, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, + {AVMOVS, C_UAUTO16K, C_NONE, C_VREG, 21, 4, REGSP, 0, 0}, + {AVMOVS, C_ZOREG, C_NONE, C_VREG, 21, 4, 0, 0, 0}, + {AVMOVS, C_UOREG16K, C_NONE, C_VREG, 21, 4, 0, 0, 0}, + /* long displacement store */ {AMOVB, C_REG, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, {AMOVB, C_REG, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, @@ -403,6 +411,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, {AFMOVS, C_LOREG, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, {AFMOVD, C_LOREG, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + {AVMOVS, C_LOREG, C_NONE, C_VREG, 22, 4, 0, 0, C_XPOST}, {AMOVD, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AMOVW, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AMOVH, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, @@ -410,6 +419,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AFMOVS, C_LOREG, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, {AFMOVD, C_LOREG, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + {AVMOVS, C_LOREG, C_NONE, C_VREG, 22, 4, 0, 0, C_XPRE}, /* pre/post-indexed store (unscaled, signed 9-bit offset) */ {AMOVD, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, @@ -419,6 +429,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVS, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVD, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AVMOVS, C_VREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AMOVD, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AMOVW, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AMOVH, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, @@ -426,6 +437,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVS, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVD, C_FREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AVMOVS, C_VREG, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, /* pre/post-indexed/signed-offset load/store register pair (unscaled, signed 10-bit quad-aligned and long offset) */ @@ -557,8 +569,27 @@ var optab = []Optab{ // { ASTXP, C_REG, C_NONE, C_ZOREG, 59, 4, 0 , 0}, // TODO(aram): - {AAESD, C_VREG, C_NONE, C_VREG, 29, 4, 0, 0, 0}, + {AAESD, C_VREG, C_NONE, C_VREG, 29, 4, 0, 0, 0}, // for compatibility with old code + {AAESD, C_ARNG, C_NONE, C_ARNG, 29, 4, 0, 0, 0}, // recommend using the new one for better readability {ASHA1C, C_VREG, C_REG, C_VREG, 1, 4, 0, 0, 0}, + {ASHA1C, C_ARNG, C_VREG, C_VREG, 1, 4, 0, 0, 0}, + {ASHA1H, C_VREG, C_NONE, C_VREG, 29, 4, 0, 0, 0}, + {ASHA1SU0, C_ARNG, C_ARNG, C_ARNG, 1, 4, 0, 0, 0}, + {ASHA256H, C_ARNG, C_VREG, C_VREG, 1, 4, 0, 0, 0}, + {AVADDP, C_ARNG, C_ARNG, C_ARNG, 72, 4, 0, 0, 0}, + {AVLD1, C_ZOREG, C_NONE, C_LIST, 81, 4, 0, 0, 0}, + {AVLD1, C_LOREG, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVMOV, C_ELEM, C_NONE, C_REG, 73, 4, 0, 0, 0}, + {AVMOV, C_REG, C_NONE, C_ARNG, 82, 4, 0, 0, 0}, + {AVMOV, C_ARNG, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, + {AVMOV, C_REG, C_NONE, C_ELEM, 78, 4, 0, 0, 0}, + {AVMOV, C_ELEM, C_NONE, C_VREG, 80, 4, 0, 0, 0}, + {AVREV32, C_ARNG, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, + {AVST1, C_LIST, C_NONE, C_ZOREG, 84, 4, 0, 0, 0}, + {AVST1, C_LIST, C_NONE, C_LOREG, 84, 4, 0, 0, C_XPOST}, + {AVDUP, C_ELEM, C_NONE, C_ARNG, 79, 4, 0, 0, 0}, + {AVADDV, C_ARNG, C_NONE, C_VREG, 85, 4, 0, 0, 0}, + {AVMOVI, C_ADDCON, C_NONE, C_ARNG, 86, 4, 0, 0, 0}, {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, {obj.APCDATA, C_VCON, C_NONE, C_VCON, 0, 0, 0, 0, 0}, @@ -1154,7 +1185,11 @@ func rclass(r int16) int { return C_COND case r == REGSP: return C_RSP - case r®_EXT != 0: + case r >= REG_ARNG && r < REG_ELEM: + return C_ARNG + case r >= REG_ELEM && r < REG_ELEM_END: + return C_ELEM + case r >= REG_UXTB && r < REG_SPECIAL: return C_EXTREG case r >= REG_SPECIAL: return C_SPR @@ -1176,6 +1211,9 @@ func (c *ctxt7) aclass(a *obj.Addr) int { case obj.TYPE_SHIFT: return C_SHIFT + case obj.TYPE_REGLIST: + return C_LIST + case obj.TYPE_MEM: switch a.Name { case obj.NAME_EXTERN, obj.NAME_STATIC: @@ -2011,22 +2049,41 @@ func buildop(ctxt *obj.Link) { case ASTXP: oprangeset(ASTXPW, t) + case AVADDP: + oprangeset(AVAND, t) + oprangeset(AVCMEQ, t) + oprangeset(AVORR, t) + oprangeset(AVADD, t) + oprangeset(AVEOR, t) + case AAESD: oprangeset(AAESE, t) oprangeset(AAESMC, t) oprangeset(AAESIMC, t) - oprangeset(ASHA1H, t) oprangeset(ASHA1SU1, t) oprangeset(ASHA256SU0, t) case ASHA1C: oprangeset(ASHA1P, t) oprangeset(ASHA1M, t) - oprangeset(ASHA1SU0, t) - oprangeset(ASHA256H, t) + + case ASHA256H: oprangeset(ASHA256H2, t) + + case ASHA1SU0: oprangeset(ASHA256SU1, t) + case ASHA1H, + AVMOV, + AVLD1, + AVREV32, + AVST1, + AVDUP, + AVMOVS, + AVADDV, + AVMOVI: + break + case obj.ANOP, obj.AUNDEF, obj.AFUNCDATA, @@ -2512,8 +2569,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = c.opxrrr(p, p.As) if (p.From.Reg-obj.RBaseARM64)®_EXT != 0 { - c.ctxt.Diag("extended register not implemented\n%v", p) - // o1 |= uint32(p.From.Offset) /* includes reg, op, etc */ + o1 |= uint32(p.From.Offset) /* includes reg, op, etc */ } else { o1 |= uint32(p.From.Reg&31) << 16 } @@ -3148,6 +3204,81 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor Vm., Vn., Vd. */ + af := int((p.From.Reg >> 5) & 15) + af3 := int((p.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + if af != af3 || af != at { + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + + Q := 0 + size := 0 + switch af { + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_2D: + Q = 1 + size = 3 + case ARNG_2S: + Q = 0 + size = 2 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_4S: + Q = 1 + size = 2 + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_8H: + Q = 1 + size = 1 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + + if (p.As == AVORR || p.As == AVAND || p.As == AVEOR) && + (af != ARNG_16B && af != ARNG_8B) { + c.ctxt.Diag("invalid arrangement on op %v", p.As) + } else if p.As == AVORR { + size = 2 + } else if p.As == AVAND || p.As == AVEOR { + size = 0 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 73: /* vmov V.[index], R */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + o1 = 7<<25 | 0xf<<10 + switch (p.From.Reg >> 5) & 15 { + case ARNG_B: + imm5 |= 1 + imm5 |= int(p.From.Index) << 1 + case ARNG_H: + imm5 |= 2 + imm5 |= int(p.From.Index) << 2 + case ARNG_S: + imm5 |= 4 + imm5 |= int(p.From.Index) << 3 + case ARNG_D: + imm5 |= 8 + imm5 |= int(p.From.Index) << 4 + o1 |= 1 << 30 + default: + c.ctxt.Diag("invalid arrangement on op V.[index], R: %v\n", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + case 74: // add $O, R, Rtmp // ldp (Rtmp), (R1, R2) @@ -3256,6 +3387,248 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o2 |= uint32(REGTMP & 31) o3 |= uint32(int64(2<<30|5<<27) | (p.From.Offset&31)<<10 | int64(uint32(REGTMP&31)<<5) | int64(p.From.Reg&31)) + case 78: /* vmov R, V.[index] */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + o1 = 1<<30 | 7<<25 | 7<<10 + switch (p.To.Reg >> 5) & 15 { + case ARNG_B: + imm5 |= 1 + imm5 |= int(p.From.Index) << 1 + case ARNG_H: + imm5 |= 2 + imm5 |= int(p.From.Index) << 2 + case ARNG_S: + imm5 |= 4 + imm5 |= int(p.From.Index) << 3 + case ARNG_D: + imm5 |= 8 + imm5 |= int(p.From.Index) << 4 + default: + c.ctxt.Diag("invalid arrangement on op R, V.[index]: %v\n", p) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 79: /* vdup Vn.[index], Vd. */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + o1 = 7<<25 | 1<<10 + var imm5, Q uint32 + switch (p.To.Reg >> 5) & 15 { + case ARNG_16B: + Q = 1 + imm5 = 1 + imm5 |= uint32(p.From.Index) << 1 + case ARNG_2D: + Q = 1 + imm5 = 8 + imm5 |= uint32(p.From.Index) << 4 + case ARNG_2S: + Q = 0 + imm5 = 4 + imm5 |= uint32(p.From.Index) << 3 + case ARNG_4H: + Q = 0 + imm5 = 2 + imm5 |= uint32(p.From.Index) << 2 + case ARNG_4S: + Q = 1 + imm5 = 4 + imm5 |= uint32(p.From.Index) << 3 + case ARNG_8B: + Q = 0 + imm5 = 1 + imm5 |= uint32(p.From.Index) << 1 + case ARNG_8H: + Q = 1 + imm5 = 2 + imm5 |= uint32(p.From.Index) << 2 + default: + c.ctxt.Diag("invalid arrangement on VDUP Vn.[index], Vd.: %v\n", p) + } + o1 |= (uint32(Q&1) << 30) | (uint32(imm5&0x1f) << 16) + o1 |= (uint32(rf&31) << 5) | uint32(rt&31) + + case 80: /* vmov V.[index], Vn */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + imm5 := 0 + switch p.As { + case AVMOV: + o1 = 1<<30 | 15<<25 | 1<<10 + switch (p.From.Reg >> 5) & 15 { + case ARNG_B: + imm5 |= 1 + imm5 |= int(p.From.Index) << 1 + case ARNG_H: + imm5 |= 2 + imm5 |= int(p.From.Index) << 2 + case ARNG_S: + imm5 |= 4 + imm5 |= int(p.From.Index) << 3 + case ARNG_D: + imm5 |= 8 + imm5 |= int(p.From.Index) << 4 + default: + c.ctxt.Diag("invalid arrangement on op V.[index], Vn: %v\n", p) + } + default: + c.ctxt.Diag("unsupported op %v", p.As) + } + o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 81: /* vld1 (Rn), [Vt1., Vt2., ...] */ + r := int(p.From.Reg) + o1 = 3<<26 | 1<<22 + if o.scond == C_XPOST { + o1 |= 1 << 23 + if p.From.Index == 0 { + // immediate offset variant + o1 |= 0x1f << 16 + } else { + // register offset variant + o1 |= uint32(p.From.Index&31) << 16 + } + } + o1 |= uint32(p.To.Offset) + o1 |= uint32(r&31) << 5 + + case 82: /* vmov Rn, Vd. */ + rf := int(p.From.Reg) + rt := int(p.To.Reg) + o1 = 7<<25 | 3<<10 + var imm5, Q uint32 + switch (p.To.Reg >> 5) & 15 { + case ARNG_16B: + Q = 1 + imm5 = 1 + case ARNG_2D: + Q = 1 + imm5 = 8 + case ARNG_2S: + Q = 0 + imm5 = 4 + case ARNG_4H: + Q = 0 + imm5 = 2 + case ARNG_4S: + Q = 1 + imm5 = 4 + case ARNG_8B: + Q = 0 + imm5 = 1 + case ARNG_8H: + Q = 1 + imm5 = 2 + default: + c.ctxt.Diag("invalid arrangement on VMOV Rn, Vd.: %v\n", p) + } + o1 |= (uint32(Q&1) << 30) | (uint32(imm5&0x1f) << 16) + o1 |= (uint32(rf&31) << 5) | uint32(rt&31) + + case 83: /* vmov Vn., Vd. */ + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + if af != at { + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + + Q := 0 + size := 0 + switch af { + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_8H: + Q = 1 + size = 1 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + + if (p.As == AVMOV) && (af != ARNG_16B && af != ARNG_8B) { + c.ctxt.Diag("invalid arrangement on op %v", p.As) + } + + if p.As == AVMOV { + o1 |= uint32(rf&31) << 16 + } + + o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 84: /* vst1 [Vt1., Vt2., ...], (Rn) */ + r := int(p.To.Reg) + o1 = 3 << 26 + if o.scond == C_XPOST { + o1 |= 1 << 23 + if p.To.Index == 0 { + // immediate offset variant + o1 |= 0x1f << 16 + } else { + // register offset variant + o1 |= uint32(p.To.Index&31) << 16 + } + } + o1 |= uint32(p.From.Offset) + o1 |= uint32(r&31) << 5 + + case 85: /* vaddv Vn., Vd*/ + af := int((p.From.Reg >> 5) & 15) + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + Q := 0 + size := 0 + switch af { + case ARNG_8B: + Q = 0 + size = 0 + case ARNG_16B: + Q = 1 + size = 0 + case ARNG_4H: + Q = 0 + size = 1 + case ARNG_8H: + Q = 1 + size = 1 + case ARNG_4S: + Q = 1 + size = 2 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 5) | uint32(rt&31) + + case 86: /* vmovi $imm8, Vd.*/ + at := int((p.To.Reg >> 5) & 15) + r := int(p.From.Offset) + if r > 255 || r < 0 { + c.ctxt.Diag("immediate constant out of range: %v\n", p) + } + rt := int((p.To.Reg) & 31) + Q := 0 + switch at { + case ARNG_8B: + Q = 0 + case ARNG_16B: + Q = 1 + default: + c.ctxt.Diag("invalid arrangement: %v\n", p) + } + o1 = 0xf<<24 | 0xe<<12 | 1<<10 + o1 |= (uint32(Q&1) << 30) | (uint32((r>>5)&7) << 16) | (uint32(r&0x1f) << 5) | uint32(rt&31) + // This is supposed to be something that stops execution. // It's not supposed to be reached, ever, but if it is, we'd // like to be able to tell how we got there. Assemble as @@ -3279,6 +3652,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { * basic Rm op Rn -> Rd (using shifted register with 0) * also op Rn -> Rt * also Rm*Rn op Ra -> Rd + * also Vm op Vn -> Vd */ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { switch a { @@ -3792,6 +4166,33 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AFCVTHD: return FPOP1S(0, 0, 3, 5) + + case AVADD: + return 7<<25 | 1<<21 | 1<<15 | 1<<10 + + case AVADDP: + return 7<<25 | 1<<21 | 1<<15 | 15<<10 + + case AVAND: + return 7<<25 | 1<<21 | 7<<10 + + case AVCMEQ: + return 1<<29 | 0x71<<21 | 0x23<<10 + + case AVEOR: + return 1<<29 | 0x71<<21 | 7<<10 + + case AVORR: + return 7<<25 | 5<<21 | 7<<10 + + case AVREV32: + return 11<<26 | 2<<24 | 1<<21 | 1<<11 + + case AVMOV: + return 7<<25 | 5<<21 | 7<<10 + + case AVADDV: + return 7<<25 | 3<<20 | 3<<15 | 7<<11 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -4396,6 +4797,9 @@ func (c *ctxt7) opldr12(p *obj.Prog, a obj.As) uint32 { case AFMOVD: return LDSTR12U(3, 1, 1) + + case AVMOVS: + return LDSTR12U(2, 1, 1) } c.ctxt.Diag("bad opldr12 %v\n%v", a, p) @@ -4479,6 +4883,9 @@ func (c *ctxt7) opldrpp(p *obj.Prog, a obj.As) uint32 { case AMOVBU: return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 + + case AVMOVS: + return 2<<30 | 7<<27 | 1<<26 | 0<<24 | 1<<22 } c.ctxt.Diag("bad opldr %v\n%v", a, p) @@ -4698,7 +5105,7 @@ func movesize(a obj.As) int { case AMOVD: return 3 - case AMOVW, AMOVWU: + case AMOVW, AMOVWU, AVMOVS: return 2 case AMOVH, AMOVHU: diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go new file mode 100644 index 0000000000..9f8606a5ec --- /dev/null +++ b/src/cmd/internal/obj/arm64/doc.go @@ -0,0 +1,143 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package arm64 + +/* + +Go Assembly for ARM64 Reference Manual + +1. Alphabetical list of basic instructions + // TODO + +2. Alphabetical list of float-point instructions + // TODO + +3. Alphabetical list of SIMD instructions + + VADD: Add (vector). + VADD .T, ., . + Is an arrangement specifier and can have the following values: + 8B, 16B, H4, H8, S2, S4, D2 + + VADDP: Add Pairwise (vector) + VADDP ., ., . + Is an arrangement specifier and can have the following values: + B8, B16, H4, H8, S2, S4, D2 + + VADDV: Add across Vector. + VADDV ., Vd + Is an arrangement specifier and can have the following values: + 8B, 16B, H4, H8, S4 + + VAND: Bitwise AND (vector) + VAND ., ., . + Is an arrangement specifier and can have the following values: + B8, B16 + + VCMEQ: Compare bitwise Equal (vector) + VCMEQ ., ., . + Is an arrangement specifier and can have the following values: + B8, B16, H4, H8, S2, S4, D2 + + VDUP: Duplicate vector element to vector or scalar. + VDUP .[index], . + Is an arrangement specifier and can have the following values: + 8B, 16B, H4, H8, S2, S4, D2 + Is an element size specifier and can have the following values: + B, H, S, D + + VEOR: Bitwise exclusive OR (vector, register) + VEOR ., ., . + Is an arrangement specifier and can have the following values: + B8, B16 + + VLD1: Load multiple single-element structures + VLD1 (Rn), [., . ...] // no offset + VLD1.P imm(Rn), [., . ...] // immediate offset variant + VLD1.P (Rn)(Rm), [., . ...] // register offset variant + Is an arrangement specifier and can have the following values: + B8, B16, H4, H8, S2, S4, D1, D2 + + VMOV: move + VMOV .[index], Rd // Move vector element to general-purpose register. + Is a source width specifier and can have the following values: + B, H, S (Wd) + D (Xd) + + VMOV Rn, . // Duplicate general-purpose register to vector. + Is an arrangement specifier and can have the following values: + B8, B16, H4, H8, S2, S4 (Wn) + D2 (Xn) + + VMOV ., . // Move vector. + Is an arrangement specifier and can have the following values: + B8, B16 + + VMOV Rn, .[index] // Move general-purpose register to a vector element. + Is a source width specifier and can have the following values: + B, H, S (Wd) + D (Xd) + + VMOV .[index], Vn // Move vector element to scalar. + Is an element size specifier and can have the following values: + B, H, S, D + + VMOVI: Move Immediate (vector). + VMOVI $imm8, . + is an arrangement specifier and can have the following values: + 8B, 16B + + VMOVS: Load SIMD&FP Register (immediate offset). ARMv8: LDR (immediate, SIMD&FP) + Store SIMD&FP register (immediate offset). ARMv8: STR (immediate, SIMD&FP) + VMOVS (Rn), Vn + VMOVS.W imm(Rn), Vn + VMOVS.P imm(Rn), Vn + VMOVS Vn, (Rn) + VMOVS.W Vn, imm(Rn) + VMOVS.P Vn, imm(Rn) + + VORR: Bitwise inclusive OR (vector, register) + VORR ., ., . + Is an arrangement specifier and can have the following values: + B8, B16 + + VREV32: Reverse elements in 32-bit words (vector). + REV32 ., . + Is an arrangement specifier and can have the following values: + B8, B16, H4, H8 + + VST1: Store multiple single-element structures + VST1 [., . ...], (Rn) // no offset + VST1.P [., . ...], imm(Rn) // immediate offset variant + VST1.P [., . ...], (Rn)(Rm) // register offset variant + Is an arrangement specifier and can have the following values: + B8, B16, H4, H8, S2, S4, D1, D2 + +4. Alphabetical list of cryptographic extension instructions + + SHA1C, SHA1M, SHA1P: SHA1 hash update. + SHA1C .S4, Vn, Vd + SHA1M .S4, Vn, Vd + SHA1P .S4, Vn, Vd + + SHA1H: SHA1 fixed rotate. + SHA1H Vn, Vd + + SHA1SU0: SHA1 schedule update 0. + SHA256SU1: SHA256 schedule update 1. + SHA1SU0 .S4, .S4, .S4 + SHA256SU1 .S4, .S4, .S4 + + SHA1SU1: SHA1 schedule update 1. + SHA256SU0: SHA256 schedule update 0. + SHA1SU1 .S4, .S4 + SHA256SU0 .S4, .S4 + + SHA256H, SHA256H2: SHA256 hash update. + SHA256H .S4, Vn, Vd + SHA256H2 .S4, Vn, Vd + + +*/ diff --git a/src/cmd/internal/obj/arm64/list7.go b/src/cmd/internal/obj/arm64/list7.go index 65be486cee..9a9f4b45b7 100644 --- a/src/cmd/internal/obj/arm64/list7.go +++ b/src/cmd/internal/obj/arm64/list7.go @@ -57,6 +57,38 @@ var strcond = [16]string{ func init() { obj.RegisterRegister(obj.RBaseARM64, REG_SPECIAL+1024, rconv) obj.RegisterOpcode(obj.ABaseARM64, Anames) + obj.RegisterRegisterList(obj.RegListARM64Lo, obj.RegListARM64Hi, rlconv) +} + +func arrange(a int) string { + switch a { + case ARNG_8B: + return "B8" + case ARNG_16B: + return "B16" + case ARNG_4H: + return "H4" + case ARNG_8H: + return "H8" + case ARNG_2S: + return "S2" + case ARNG_4S: + return "S4" + case ARNG_1D: + return "D1" + case ARNG_2D: + return "D2" + case ARNG_B: + return "B" + case ARNG_H: + return "H" + case ARNG_S: + return "S" + case ARNG_D: + return "D" + default: + return "" + } } func rconv(r int) string { @@ -102,6 +134,58 @@ func rconv(r int) string { return "DAIFSet" case r == REG_DAIFClr: return "DAIFClr" + case REG_UXTB <= r && r < REG_UXTH: + if (r>>5)&7 != 0 { + return fmt.Sprintf("R%d.UXTB<<%d", r&31, (r>>5)&7) + } else { + return fmt.Sprintf("R%d.UXTB", r&31) + } + case REG_UXTH <= r && r < REG_UXTW: + if (r>>5)&7 != 0 { + return fmt.Sprintf("R%d.UXTH<<%d", r&31, (r>>5)&7) + } else { + return fmt.Sprintf("R%d.UXTH", r&31) + } + case REG_UXTW <= r && r < REG_UXTX: + if (r>>5)&7 != 0 { + return fmt.Sprintf("R%d.UXTW<<%d", r&31, (r>>5)&7) + } else { + return fmt.Sprintf("R%d.UXTW", r&31) + } + case REG_UXTX <= r && r < REG_SXTB: + if (r>>5)&7 != 0 { + return fmt.Sprintf("R%d.UXTX<<%d", r&31, (r>>5)&7) + } else { + return fmt.Sprintf("R%d.UXTX", r&31) + } + case REG_SXTB <= r && r < REG_SXTH: + if (r>>5)&7 != 0 { + return fmt.Sprintf("R%d.SXTB<<%d", r&31, (r>>5)&7) + } else { + return fmt.Sprintf("R%d.SXTB", r&31) + } + case REG_SXTH <= r && r < REG_SXTW: + if (r>>5)&7 != 0 { + return fmt.Sprintf("R%d.SXTH<<%d", r&31, (r>>5)&7) + } else { + return fmt.Sprintf("R%d.SXTH", r&31) + } + case REG_SXTW <= r && r < REG_SXTX: + if (r>>5)&7 != 0 { + return fmt.Sprintf("R%d.SXTW<<%d", r&31, (r>>5)&7) + } else { + return fmt.Sprintf("R%d.SXTW", r&31) + } + case REG_SXTX <= r && r < REG_SPECIAL: + if (r>>5)&7 != 0 { + return fmt.Sprintf("R%d.SXTX<<%d", r&31, (r>>5)&7) + } else { + return fmt.Sprintf("R%d.SXTX", r&31) + } + case REG_ARNG <= r && r < REG_ELEM: + return fmt.Sprintf("V%d.%s", r&31, arrange((r>>5)&15)) + case REG_ELEM <= r && r < REG_ELEM_END: + return fmt.Sprintf("V%d.%s", r&31, arrange((r>>5)&15)) } return fmt.Sprintf("badreg(%d)", r) } @@ -112,3 +196,60 @@ func DRconv(a int) string { } return "C_??" } + +func rlconv(list int64) string { + str := "" + + // ARM64 register list follows ARM64 instruction decode schema + // | 31 | 30 | ... | 15 - 12 | 11 - 10 | ... | + // +----+----+-----+---------+---------+-----+ + // | | Q | ... | opcode | size | ... | + + firstReg := int(list & 31) + opcode := (list >> 12) & 15 + var regCnt int + var t string + switch opcode { + case 0x7: + regCnt = 1 + case 0xa: + regCnt = 2 + case 0x6: + regCnt = 3 + case 0x2: + regCnt = 4 + default: + regCnt = -1 + } + // Q:size + arng := ((list>>30)&1)<<2 | (list>>10)&3 + switch arng { + case 0: + t = "B8" + case 4: + t = "B16" + case 1: + t = "H4" + case 5: + t = "H8" + case 2: + t = "S2" + case 6: + t = "S4" + case 3: + t = "D1" + case 7: + t = "D2" + } + for i := 0; i < regCnt; i++ { + if str == "" { + str += "[" + } else { + str += "," + } + str += fmt.Sprintf("V%d.", (firstReg+i)&31) + str += t + } + str += "]" + return str +} diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 00453f2d3a..5041a820df 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -138,10 +138,13 @@ import ( // offset = second register // // [reg, reg, reg-reg] -// Register list for ARM. +// Register list for ARM and ARM64. // Encoding: // type = TYPE_REGLIST +// On ARM: // offset = bit mask of registers in list; R0 is low bit. +// On ARM64: +// offset = register count (Q:size) | arrangement (opcode) | first register // // reg, reg // Register pair for ARM. @@ -155,6 +158,27 @@ import ( // index = second register // scale = 1 // +// reg.[US]XT[BHWX] +// Register extension for ARM64 +// Encoding: +// type = TYPE_REG +// reg = REG_[US]XT[BHWX] + register + shift amount +// offset = ((reg&31) << 16) | (exttype << 13) | (amount<<10) +// +// reg. +// Register arrangement for ARM64 SIMD register +// e.g.: V1.S4, V2.S2, V7.D2, V2.H4, V6.B16 +// Encoding: +// type = TYPE_REG +// reg = REG_ARNG + register + arrangement +// +// reg.[index] +// Register element for ARM64 +// Encoding: +// type = TYPE_REG +// reg = REG_ELEM + register + arrangement +// index = element index + type Addr struct { Reg int16 Index int16 diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go index 67c74c2f89..f1ac1a8808 100644 --- a/src/cmd/internal/obj/util.go +++ b/src/cmd/internal/obj/util.go @@ -186,7 +186,7 @@ func Dconv(p *Prog, a *Addr) string { // PINSRQ CX,$1,X6 // where the $1 is included in the p->to Addr. // Move into a new field. - if a.Offset != 0 { + if a.Offset != 0 && (a.Reg < RBaseARM64 || a.Reg >= RBaseMIPS) { str = fmt.Sprintf("$%d,%v", a.Offset, Rconv(int(a.Reg))) break } @@ -195,6 +195,10 @@ func Dconv(p *Prog, a *Addr) string { if a.Name != NAME_NONE || a.Sym != nil { str = fmt.Sprintf("%v(%v)(REG)", Mconv(a), Rconv(int(a.Reg))) } + if (RBaseARM64+1<<10+1<<9) /* arm64.REG_ELEM */ <= a.Reg && + a.Reg < (RBaseARM64+1<<11) /* arm64.REG_ELEM_END */ { + str += fmt.Sprintf("[%d]", a.Index) + } case TYPE_BRANCH: if a.Sym != nil { @@ -272,7 +276,7 @@ func Dconv(p *Prog, a *Addr) string { str = fmt.Sprintf("%v, %v", Rconv(int(a.Offset)), Rconv(int(a.Reg))) case TYPE_REGLIST: - str = regListConv(int(a.Offset)) + str = RLconv(a.Offset) } return str @@ -409,27 +413,40 @@ func Rconv(reg int) string { return fmt.Sprintf("R???%d", reg) } -func regListConv(list int) string { - str := "" +type regListSet struct { + lo int64 + hi int64 + RLconv func(int64) string +} - for i := 0; i < 16; i++ { // TODO: 16 is ARM-specific. - if list&(1<