]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/asm, cmd/internal/obj/ppc64: add ppc64 vector registers and instructions
authorCarlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Tue, 12 Apr 2016 21:38:00 +0000 (18:38 -0300)
committerDavid Chase <drchase@google.com>
Mon, 19 Sep 2016 18:39:36 +0000 (18:39 +0000)
The current implementation for Power architecture does not include the vector
(Altivec) registers.  This adds the 32 VMX registers and the most commonly used
instructions: X-form loads/stores; VX-form logical operations, add/sub,
rotate/shift, count, splat, SHA Sigma and AES cipher; VC-form compare; and
VA-form permute, shift, add/sub and select.

Fixes #15619

Change-Id: I544b990631726e8fdfcce8ecca0aeeb72faae9aa
Reviewed-on: https://go-review.googlesource.com/25600
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/asm/internal/arch/arch.go
src/cmd/asm/internal/arch/ppc64.go
src/cmd/asm/internal/asm/asm.go
src/cmd/asm/internal/asm/operand_test.go
src/cmd/asm/internal/asm/testdata/ppc64.s
src/cmd/internal/obj/ppc64/a.out.go
src/cmd/internal/obj/ppc64/anames.go
src/cmd/internal/obj/ppc64/anames9.go
src/cmd/internal/obj/ppc64/asm9.go
src/cmd/internal/obj/ppc64/list9.go

index 4b5b46a78c97d6215e884779d48387c66c3fe6e4..97117714f6571add9d192fc788690f8b89988cf5 100644 (file)
@@ -319,6 +319,9 @@ func archPPC64() *Arch {
        for i := ppc64.REG_F0; i <= ppc64.REG_F31; i++ {
                register[obj.Rconv(i)] = int16(i)
        }
+       for i := ppc64.REG_V0; i <= ppc64.REG_V31; i++ {
+               register[obj.Rconv(i)] = int16(i)
+       }
        for i := ppc64.REG_CR0; i <= ppc64.REG_CR7; i++ {
                register[obj.Rconv(i)] = int16(i)
        }
index b47cd80c623d4f2ae189accfdf80579153b490c9..8621bb623b52d1a1edf274413736a1451a8034df 100644 (file)
@@ -77,6 +77,10 @@ func ppc64RegisterNumber(name string, n int16) (int16, bool) {
                if 0 <= n && n <= 7 {
                        return ppc64.REG_CR0 + n, true
                }
+       case "V":
+               if 0 <= n && n <= 31 {
+                       return ppc64.REG_V0 + n, true
+               }
        case "F":
                if 0 <= n && n <= 31 {
                        return ppc64.REG_F0 + n, true
index 3cb69c7997524c3e98bbc29831a9937a24190218..0dab80b6aae34789732a5622ccc7d65df65c6540 100644 (file)
@@ -665,9 +665,6 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
                }
                if p.arch.Family == sys.PPC64 {
                        if arch.IsPPC64RLD(op) {
-                               // 2nd operand must always be a register.
-                               // TODO: Do we need to guard this with the instruction type?
-                               // That is, are there 4-operand instructions without this property?
                                prog.From = a[0]
                                prog.Reg = p.getRegister(prog, op, &a[1])
                                prog.From3 = newAddr(a[2])
@@ -681,6 +678,27 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
                                prog.To = a[3]                            // rt
                                break
                        }
+                       // Else, it is a VA-form instruction
+                       // reg reg reg reg
+                       // imm reg reg reg
+                       // Or a VX-form instruction
+                       // imm imm reg reg
+                       if a[1].Type == obj.TYPE_REG {
+                               prog.From = a[0]
+                               prog.Reg = p.getRegister(prog, op, &a[1])
+                               prog.From3 = newAddr(a[2])
+                               prog.To = a[3]
+                               break
+                       } else if a[1].Type == obj.TYPE_CONST {
+                               prog.From = a[0]
+                               prog.Reg = p.getRegister(prog, op, &a[2])
+                               prog.From3 = newAddr(a[1])
+                               prog.To = a[3]
+                               break
+                       } else {
+                               p.errorf("invalid addressing modes for %s instruction", op)
+                               return
+                       }
                }
                if p.arch.Family == sys.S390X {
                        prog.From = a[1]
index 590fbc112e3a50dc0eaeb7e7f0a6eb912558a089..e62658937829084d583be4f7e71eb0d9a5159831 100644 (file)
@@ -340,6 +340,38 @@ var ppc64OperandTests = []operandTest{
        {"6(PC)", "6(PC)"},
        {"CR7", "CR7"},
        {"CTR", "CTR"},
+       {"V0", "V0"},
+       {"V1", "V1"},
+       {"V2", "V2"},
+       {"V3", "V3"},
+       {"V4", "V4"},
+       {"V5", "V5"},
+       {"V6", "V6"},
+       {"V7", "V7"},
+       {"V8", "V8"},
+       {"V9", "V9"},
+       {"V10", "V10"},
+       {"V11", "V11"},
+       {"V12", "V12"},
+       {"V13", "V13"},
+       {"V14", "V14"},
+       {"V15", "V15"},
+       {"V16", "V16"},
+       {"V17", "V17"},
+       {"V18", "V18"},
+       {"V19", "V19"},
+       {"V20", "V20"},
+       {"V21", "V21"},
+       {"V22", "V22"},
+       {"V23", "V23"},
+       {"V24", "V24"},
+       {"V25", "V25"},
+       {"V26", "V26"},
+       {"V27", "V27"},
+       {"V28", "V28"},
+       {"V29", "V29"},
+       {"V30", "V30"},
+       {"V31", "V31"},
        {"F14", "F14"},
        {"F15", "F15"},
        {"F16", "F16"},
index 2e3bf3b747ab5d17162608cbaeea398d7c48d4f9..f5fa0af9de8fde3314f96c77667dedcf1c7e0a0e 100644 (file)
@@ -664,6 +664,213 @@ label1:
        DCBF    (R1)
        DCBF    (R1+R2) // DCBF (R1)(R2*1)
 
+//     VMX instructions
+
+//     Described as:
+//     <instruction type>, <instruction format>
+//     <golang asm operand order> produces
+//     <Power ISA operand order>
+
+//     Vector load, VX-form
+//     <MNEMONIC> (RB)(RA*1),VRT produces
+//     <mnemonic> VRT,RA,RB
+       LVEBX   (R1)(R2*1), V0
+       LVEHX   (R3)(R4*1), V1
+       LVEWX   (R5)(R6*1), V2
+       LVX     (R7)(R8*1), V3
+       LVXL    (R9)(R10*1), V4
+       LVSL    (R11)(R12*1), V5
+       LVSR    (R14)(R15*1), V6
+
+//     Vector store, VX-form
+//     <MNEMONIC> VRT,(RB)(RA*1) produces
+//     <mnemonic> VRT,RA,RB
+       STVEBX  V31, (R1)(R2*1)
+       STVEHX  V30, (R2)(R3*1)
+       STVEWX  V29, (R4)(R5*1)
+       STVX    V28, (R6)(R7*1)
+       STVXL   V27, (R9)(R9*1)
+
+//     Vector AND, VX-form
+//     <MNEMONIC> VRA,VRB,VRT produces
+//     <mnemonic> VRT,VRA,VRB
+       VANDL   V10, V9, V8
+       VANDC   V15, V14, V13
+       VNAND   V19, V18, V17
+
+//     Vector OR, VX-form
+//     <MNEMONIC> VRA,VRB,VRT produces
+//     <mnemonic> VRT,VRA,VRB
+       VORL    V26, V25, V24
+       VORC    V23, V22, V21
+       VNOR    V20, V19, V18
+       VXOR    V17, V16, V15
+       VEQV    V14, V13, V12
+
+//     Vector ADD, VX-form
+//     <MNEMONIC> VRA,VRB,VRT produces
+//     <mnemonic> VRT,VRA,VRB
+       VADDUBM V3, V2, V1
+       VADDUHM V3, V2, V1
+       VADDUWM V3, V2, V1
+       VADDUDM V3, V2, V1
+       VADDUQM V3, V2, V1
+       VADDCUQ V3, V2, V1
+       VADDCUW V3, V2, V1
+       VADDUBS V3, V2, V1
+       VADDUHS V3, V2, V1
+       VADDUWS V3, V2, V1
+       VADDSBS V3, V2, V1
+       VADDSHS V3, V2, V1
+       VADDSWS V3, V2, V1
+
+//     Vector ADD extended, VA-form
+//     <MNEMONIC> VRA,VRB,VRC,VRT produces
+//     <mnemonic> VRT,VRA,VRB,VRC
+       VADDEUQM V4, V3, V2, V1
+       VADDECUQ V4, V3, V2, V1
+
+//     Vector SUB, VX-form
+//     <MNEMONIC> VRA,VRB,VRT produces
+//     <mnemonic> VRT,VRA,VRB
+       VSUBUBM V3, V2, V1
+       VSUBUHM V3, V2, V1
+       VSUBUWM V3, V2, V1
+       VSUBUDM V3, V2, V1
+       VSUBUQM V3, V2, V1
+       VSUBCUQ V3, V2, V1
+       VSUBCUW V3, V2, V1
+       VSUBUBS V3, V2, V1
+       VSUBUHS V3, V2, V1
+       VSUBUWS V3, V2, V1
+       VSUBSBS V3, V2, V1
+       VSUBSHS V3, V2, V1
+       VSUBSWS V3, V2, V1
+
+//     Vector SUB extended, VA-form
+//     <MNEMONIC> VRA,VRB,VRC,VRT produces
+//     <mnemonic> VRT,VRA,VRB,VRC
+       VSUBEUQM V4, V3, V2, V1
+       VSUBECUQ V4, V3, V2, V1
+
+//     Vector rotate, VX-form
+//     <MNEMONIC> VRA,VRB,VRT produces
+//     <mnemonic> VRT,VRA,VRB
+       VRLB    V2, V1, V0
+       VRLH    V2, V1, V0
+       VRLW    V2, V1, V0
+       VRLD    V2, V1, V0
+
+//     Vector shift, VX-form
+//     <MNEMONIC> VRA,VRB,VRT
+//     <mnemonic> VRT,VRA,VRB
+       VSLB    V2, V1, V0
+       VSLH    V2, V1, V0
+       VSLW    V2, V1, V0
+       VSL     V2, V1, V0
+       VSLO    V2, V1, V0
+       VSRB    V2, V1, V0
+       VSRH    V2, V1, V0
+       VSRW    V2, V1, V0
+       VSR     V2, V1, V0
+       VSRO    V2, V1, V0
+       VSLD    V2, V1, V0
+       VSRD    V2, V1, V0
+       VSRAB   V2, V1, V0
+       VSRAH   V2, V1, V0
+       VSRAW   V2, V1, V0
+       VSRAD   V2, V1, V0
+
+//     Vector shift by octect immediate, VA-form with SHB 4-bit field
+//     <MNEMONIC> SHB,VRA,VRB,VRT produces
+//     <mnemonic> VRT,VRA,VRB,SHB
+       VSLDOI  $4, V2, V1, V0
+
+//     Vector count, VX-form
+//     <MNEMONIC> VRB,VRT produces
+//     <mnemonic> VRT,VRB
+       VCLZB   V4, V5
+       VCLZH   V4, V5
+       VCLZW   V4, V5
+       VCLZD   V4, V5
+       VPOPCNTB V4, V5
+       VPOPCNTH V4, V5
+       VPOPCNTW V4, V5
+       VPOPCNTD V4, V5
+
+//     Vector compare, VC-form
+//     <MNEMONIC> VRA,VRB,VRT produces
+//     <mnemonic> VRT,VRA,VRB
+//     * Note: 'CC' suffix denotes Rc=1
+//       i.e. vcmpequb. v3,v1,v2 equals VCMPEQUBCC V1,V2,V3
+       VCMPEQUB    V3, V2, V1
+       VCMPEQUBCC  V3, V2, V1
+       VCMPEQUH    V3, V2, V1
+       VCMPEQUHCC  V3, V2, V1
+       VCMPEQUW    V3, V2, V1
+       VCMPEQUWCC  V3, V2, V1
+       VCMPEQUD    V3, V2, V1
+       VCMPEQUDCC  V3, V2, V1
+       VCMPGTUB    V3, V2, V1
+       VCMPGTUBCC  V3, V2, V1
+       VCMPGTUH    V3, V2, V1
+       VCMPGTUHCC  V3, V2, V1
+       VCMPGTUW    V3, V2, V1
+       VCMPGTUWCC  V3, V2, V1
+       VCMPGTUD    V3, V2, V1
+       VCMPGTUDCC  V3, V2, V1
+       VCMPGTSB    V3, V2, V1
+       VCMPGTSBCC  V3, V2, V1
+       VCMPGTSH    V3, V2, V1
+       VCMPGTSHCC  V3, V2, V1
+       VCMPGTSW    V3, V2, V1
+       VCMPGTSWCC  V3, V2, V1
+       VCMPGTSD    V3, V2, V1
+       VCMPGTSDCC  V3, V2, V1
+
+//     Vector permute, VA-form
+//     <MNEMONIC> VRA,VRB,VRC,VRT produces
+//     <mnemonic> VRT,VRA,VRB,VRC
+       VPERM V3, V2, V1, V0
+
+//     Vector select, VA-form
+//     <MNEMONIC> VRA,VRB,VRC,VRT produces
+//     <mnemonic> VRT,VRA,VRB,VRC
+       VSEL  V3, V2, V1, V0
+
+//     Vector splat, VX-form with 4-bit UIM field
+//     <MNEMONIC> UIM,VRB,VRT produces
+//     <mnemonic> VRT,VRB,UIM
+       VSPLTB    $15, V1, V0
+       VSPLTH    $7, V1, V0
+       VSPLTW    $3, V1, V0
+
+//     Vector splat immediate signed, VX-form with 5-bit SIM field
+//     <MNEMONIC> SIM,VRT produces
+//     <mnemonic> VRT,SIM
+       VSPLTISB  $31, V4
+       VSPLTISH  $31, V4
+       VSPLTISW  $31, V4
+
+//     Vector AES cipher, VX-form
+//     <MNEMONIC> VRA,VRB,VRT produces
+//     <mnemonic> VRT,VRA,VRB
+       VCIPHER       V3, V2, V1
+       VCIPHERLAST   V3, V2, V1
+       VNCIPHER      V3, V2, V1
+       VNCIPHERLAST  V3, V2, V1
+
+//     Vector AES subbytes, VX-form
+//     <MNEMONIC> VRA,VRT produces
+//     <mnemonic> VRT,VRA
+       VSBOX         V2, V1
+
+//     Vector SHA, VX-form with ST bit field and 4-bit SIX field
+//     <MNEMONIC> SIX,VRA,ST,VRT produces
+//     <mnemonic> VRT,VRA,ST,SIX
+       VSHASIGMAW    $15, V1, $1, V0
+       VSHASIGMAD    $15, V1, $1, V0
+
 //
 // NOP
 //
index 4c7b303bc838dd9660b7d7b67325f5601b70f656..468d8f8c760eed719af5ca4285692941bee514d2 100644 (file)
@@ -110,6 +110,39 @@ const (
        REG_F30
        REG_F31
 
+       REG_V0
+       REG_V1
+       REG_V2
+       REG_V3
+       REG_V4
+       REG_V5
+       REG_V6
+       REG_V7
+       REG_V8
+       REG_V9
+       REG_V10
+       REG_V11
+       REG_V12
+       REG_V13
+       REG_V14
+       REG_V15
+       REG_V16
+       REG_V17
+       REG_V18
+       REG_V19
+       REG_V20
+       REG_V21
+       REG_V22
+       REG_V23
+       REG_V24
+       REG_V25
+       REG_V26
+       REG_V27
+       REG_V28
+       REG_V29
+       REG_V30
+       REG_V31
+
        REG_CR0
        REG_CR1
        REG_CR2
@@ -193,6 +226,7 @@ const (
        C_NONE = iota
        C_REG
        C_FREG
+       C_VREG
        C_CREG
        C_SPR /* special processor register */
        C_ZCON
@@ -550,6 +584,153 @@ const (
        /* more 64-bit operations */
        AHRFID
 
+       /* Vector */
+       ALV
+       ALVEBX
+       ALVEHX
+       ALVEWX
+       ALVX
+       ALVXL
+       ALVSL
+       ALVSR
+       ASTV
+       ASTVEBX
+       ASTVEHX
+       ASTVEWX
+       ASTVX
+       ASTVXL
+       AVAND
+       AVANDL
+       AVANDC
+       AVNAND
+       AVOR
+       AVORL
+       AVORC
+       AVNOR
+       AVXOR
+       AVEQV
+       AVADDUM
+       AVADDUBM
+       AVADDUHM
+       AVADDUWM
+       AVADDUDM
+       AVADDUQM
+       AVADDCU
+       AVADDCUQ
+       AVADDCUW
+       AVADDUS
+       AVADDUBS
+       AVADDUHS
+       AVADDUWS
+       AVADDSS
+       AVADDSBS
+       AVADDSHS
+       AVADDSWS
+       AVADDE
+       AVADDEUQM
+       AVADDECUQ
+       AVSUBUM
+       AVSUBUBM
+       AVSUBUHM
+       AVSUBUWM
+       AVSUBUDM
+       AVSUBUQM
+       AVSUBCU
+       AVSUBCUQ
+       AVSUBCUW
+       AVSUBUS
+       AVSUBUBS
+       AVSUBUHS
+       AVSUBUWS
+       AVSUBSS
+       AVSUBSBS
+       AVSUBSHS
+       AVSUBSWS
+       AVSUBE
+       AVSUBEUQM
+       AVSUBECUQ
+       AVR
+       AVRLB
+       AVRLH
+       AVRLW
+       AVRLD
+       AVS
+       AVSLB
+       AVSLH
+       AVSLW
+       AVSL
+       AVSLO
+       AVSRB
+       AVSRH
+       AVSRW
+       AVSR
+       AVSRO
+       AVSLD
+       AVSRD
+       AVSA
+       AVSRAB
+       AVSRAH
+       AVSRAW
+       AVSRAD
+       AVSOI
+       AVSLDOI
+       AVCLZ
+       AVCLZB
+       AVCLZH
+       AVCLZW
+       AVCLZD
+       AVPOPCNT
+       AVPOPCNTB
+       AVPOPCNTH
+       AVPOPCNTW
+       AVPOPCNTD
+       AVCMPEQ
+       AVCMPEQUB
+       AVCMPEQUBCC
+       AVCMPEQUH
+       AVCMPEQUHCC
+       AVCMPEQUW
+       AVCMPEQUWCC
+       AVCMPEQUD
+       AVCMPEQUDCC
+       AVCMPGT
+       AVCMPGTUB
+       AVCMPGTUBCC
+       AVCMPGTUH
+       AVCMPGTUHCC
+       AVCMPGTUW
+       AVCMPGTUWCC
+       AVCMPGTUD
+       AVCMPGTUDCC
+       AVCMPGTSB
+       AVCMPGTSBCC
+       AVCMPGTSH
+       AVCMPGTSHCC
+       AVCMPGTSW
+       AVCMPGTSWCC
+       AVCMPGTSD
+       AVCMPGTSDCC
+       AVPERM
+       AVSEL
+       AVSPLT
+       AVSPLTB
+       AVSPLTH
+       AVSPLTW
+       AVSPLTI
+       AVSPLTISB
+       AVSPLTISH
+       AVSPLTISW
+       AVCIPH
+       AVCIPHER
+       AVCIPHERLAST
+       AVNCIPH
+       AVNCIPHER
+       AVNCIPHERLAST
+       AVSBOX
+       AVSHASIGMA
+       AVSHASIGMAW
+       AVSHASIGMAD
+
        ALAST
 
        // aliases
index b0e4f10ec6d36d38dccfcef8c92a5221bbd03228..e064d45c333acbdf279c9b6f44bd0d7075fe300a 100644 (file)
@@ -308,5 +308,150 @@ var Anames = []string{
        "REMDUV",
        "REMDUVCC",
        "HRFID",
+       "LV",
+       "LVEBX",
+       "LVEHX",
+       "LVEWX",
+       "LVX",
+       "LVXL",
+       "LVSL",
+       "LVSR",
+       "STV",
+       "STVEBX",
+       "STVEHX",
+       "STVEWX",
+       "STVX",
+       "STVXL",
+       "VAND",
+       "VANDL",
+       "VANDC",
+       "VNAND",
+       "VOR",
+       "VORL",
+       "VORC",
+       "VNOR",
+       "VXOR",
+       "VEQV",
+       "VADDUM",
+       "VADDUBM",
+       "VADDUHM",
+       "VADDUWM",
+       "VADDUDM",
+       "VADDUQM",
+       "VADDCU",
+       "VADDCUQ",
+       "VADDCUW",
+       "VADDUS",
+       "VADDUBS",
+       "VADDUHS",
+       "VADDUWS",
+       "VADDSS",
+       "VADDSBS",
+       "VADDSHS",
+       "VADDSWS",
+       "VADDE",
+       "VADDEUQM",
+       "VADDECUQ",
+       "VSUBUM",
+       "VSUBUBM",
+       "VSUBUHM",
+       "VSUBUWM",
+       "VSUBUDM",
+       "VSUBUQM",
+       "VSUBCU",
+       "VSUBCUQ",
+       "VSUBCUW",
+       "VSUBUS",
+       "VSUBUBS",
+       "VSUBUHS",
+       "VSUBUWS",
+       "VSUBSS",
+       "VSUBSBS",
+       "VSUBSHS",
+       "VSUBSWS",
+       "VSUBE",
+       "VSUBEUQM",
+       "VSUBECUQ",
+       "VR",
+       "VRLB",
+       "VRLH",
+       "VRLW",
+       "VRLD",
+       "VS",
+       "VSLB",
+       "VSLH",
+       "VSLW",
+       "VSL",
+       "VSLO",
+       "VSRB",
+       "VSRH",
+       "VSRW",
+       "VSR",
+       "VSRO",
+       "VSLD",
+       "VSRD",
+       "VSA",
+       "VSRAB",
+       "VSRAH",
+       "VSRAW",
+       "VSRAD",
+       "VSOI",
+       "VSLDOI",
+       "VCLZ",
+       "VCLZB",
+       "VCLZH",
+       "VCLZW",
+       "VCLZD",
+       "VPOPCNT",
+       "VPOPCNTB",
+       "VPOPCNTH",
+       "VPOPCNTW",
+       "VPOPCNTD",
+       "VCMPEQ",
+       "VCMPEQUB",
+       "VCMPEQUBCC",
+       "VCMPEQUH",
+       "VCMPEQUHCC",
+       "VCMPEQUW",
+       "VCMPEQUWCC",
+       "VCMPEQUD",
+       "VCMPEQUDCC",
+       "VCMPGT",
+       "VCMPGTUB",
+       "VCMPGTUBCC",
+       "VCMPGTUH",
+       "VCMPGTUHCC",
+       "VCMPGTUW",
+       "VCMPGTUWCC",
+       "VCMPGTUD",
+       "VCMPGTUDCC",
+       "VCMPGTSB",
+       "VCMPGTSBCC",
+       "VCMPGTSH",
+       "VCMPGTSHCC",
+       "VCMPGTSW",
+       "VCMPGTSWCC",
+       "VCMPGTSD",
+       "VCMPGTSDCC",
+       "VPERM",
+       "VSEL",
+       "VSPLT",
+       "VSPLTB",
+       "VSPLTH",
+       "VSPLTW",
+       "VSPLTI",
+       "VSPLTISB",
+       "VSPLTISH",
+       "VSPLTISW",
+       "VCIPH",
+       "VCIPHER",
+       "VCIPHERLAST",
+       "VNCIPH",
+       "VNCIPHER",
+       "VNCIPHERLAST",
+       "VSBOX",
+       "VSHASIGMA",
+       "VSHASIGMAW",
+       "VSHASIGMAD",
        "LAST",
 }
index f7d1d77c863352de68d96455c4120a3628411609..578e8bb52c563133eeceac3cacf60da8ade08ddd 100644 (file)
@@ -8,6 +8,7 @@ var cnames9 = []string{
        "NONE",
        "REG",
        "FREG",
+       "VREG",
        "CREG",
        "SPR",
        "ZCON",
index d16298b08ff52b6476462dd833406b4be2e2ba61..79282acd1c7a4862aff2b86998042018761fee88 100644 (file)
@@ -48,7 +48,7 @@ const (
 )
 
 type Optab struct {
-       as    obj.As
+       as    obj.As // Opcode
        a1    uint8
        a2    uint8
        a3    uint8
@@ -141,13 +141,13 @@ var optab = []Optab{
        {ARLDCL, C_REG, C_REG, C_LCON, C_REG, 14, 4, 0},
        {ARLDCL, C_REG, C_NONE, C_LCON, C_REG, 14, 4, 0},
        {AFADD, C_FREG, C_NONE, C_NONE, C_FREG, 2, 4, 0},
-       {AFADD, C_FREG, C_REG, C_NONE, C_FREG, 2, 4, 0},
+       {AFADD, C_FREG, C_FREG, C_NONE, C_FREG, 2, 4, 0},
        {AFABS, C_FREG, C_NONE, C_NONE, C_FREG, 33, 4, 0},
        {AFABS, C_NONE, C_NONE, C_NONE, C_FREG, 33, 4, 0},
        {AFMOVD, C_FREG, C_NONE, C_NONE, C_FREG, 33, 4, 0},
-       {AFMADD, C_FREG, C_REG, C_FREG, C_FREG, 34, 4, 0},
+       {AFMADD, C_FREG, C_FREG, C_FREG, C_FREG, 34, 4, 0},
        {AFMUL, C_FREG, C_NONE, C_NONE, C_FREG, 32, 4, 0},
-       {AFMUL, C_FREG, C_REG, C_NONE, C_FREG, 32, 4, 0},
+       {AFMUL, C_FREG, C_FREG, C_NONE, C_FREG, 32, 4, 0},
 
        /* store, short offset */
        {AMOVD, C_REG, C_REG, C_NONE, C_ZOREG, 7, 4, REGZERO},
@@ -344,6 +344,68 @@ var optab = []Optab{
        {AMOVD, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0},  /* mtmsrd */
        {AMOVWZ, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsr */
 
+       /* Vector instructions */
+
+       /* Vector load */
+       {ALV, C_SOREG, C_NONE, C_NONE, C_VREG, 45, 4, 0}, /* vector load, x-form */
+
+       /* Vector store */
+       {ASTV, C_VREG, C_NONE, C_NONE, C_SOREG, 44, 4, 0}, /* vector store, x-form */
+
+       /* Vector logical */
+       {AVAND, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector and, vx-form */
+       {AVOR, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0},  /* vector or, vx-form */
+
+       /* Vector add */
+       {AVADDUM, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector add unsigned modulo, vx-form */
+       {AVADDCU, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector add & write carry unsigned, vx-form */
+       {AVADDUS, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector add unsigned saturate, vx-form */
+       {AVADDSS, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector add signed saturate, vx-form */
+       {AVADDE, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0},  /* vector add extended, va-form */
+
+       /* Vector subtract */
+       {AVSUBUM, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector subtract unsigned modulo, vx-form */
+       {AVSUBCU, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector subtract & write carry unsigned, vx-form */
+       {AVSUBUS, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector subtract unsigned saturate, vx-form */
+       {AVSUBSS, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector subtract signed saturate, vx-form */
+       {AVSUBE, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0},  /* vector subtract extended, va-form */
+
+       /* Vector rotate */
+       {AVR, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector rotate, vx-form */
+
+       /* Vector shift */
+       {AVS, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0},     /* vector shift, vx-form */
+       {AVSA, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0},    /* vector shift algebraic, vx-form */
+       {AVSOI, C_ANDCON, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector shift by octet immediate, va-form */
+
+       /* Vector count */
+       {AVCLZ, C_VREG, C_NONE, C_NONE, C_VREG, 85, 4, 0},    /* vector count leading zeros, vx-form */
+       {AVPOPCNT, C_VREG, C_NONE, C_NONE, C_VREG, 85, 4, 0}, /* vector population count, vx-form */
+
+       /* Vector compare */
+       {AVCMPEQ, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector compare equal, vc-form */
+       {AVCMPGT, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector compare greater than, vc-form */
+
+       /* Vector permute */
+       {AVPERM, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector permute, va-form */
+
+       /* Vector select */
+       {AVSEL, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector select, va-form */
+
+       /* Vector splat */
+       {AVSPLT, C_SCON, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector splat, vx-form */
+       {AVSPLT, C_ADDCON, C_VREG, C_NONE, C_VREG, 82, 4, 0},
+       {AVSPLTI, C_SCON, C_NONE, C_NONE, C_VREG, 82, 4, 0}, /* vector splat immediate, vx-form */
+       {AVSPLTI, C_ADDCON, C_NONE, C_NONE, C_VREG, 82, 4, 0},
+
+       /* Vector AES */
+       {AVCIPH, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0},  /* vector AES cipher, vx-form */
+       {AVNCIPH, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector AES inverse cipher, vx-form */
+       {AVSBOX, C_VREG, C_NONE, C_NONE, C_VREG, 82, 4, 0},  /* vector AES subbytes, vx-form */
+
+       /* Vector SHA */
+       {AVSHASIGMA, C_ANDCON, C_VREG, C_ANDCON, C_VREG, 82, 4, 0}, /* vector SHA sigma, vx-form */
+
        /* 64-bit special registers */
        {AMOVD, C_REG, C_NONE, C_NONE, C_SPR, 66, 4, 0},
        {AMOVD, C_REG, C_NONE, C_NONE, C_LR, 66, 4, 0},
@@ -554,6 +616,9 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int {
                if REG_F0 <= a.Reg && a.Reg <= REG_F31 {
                        return C_FREG
                }
+               if REG_V0 <= a.Reg && a.Reg <= REG_V31 {
+                       return C_VREG
+               }
                if REG_CR0 <= a.Reg && a.Reg <= REG_CR7 || a.Reg == REG_CR {
                        return C_CREG
                }
@@ -762,7 +827,13 @@ func oplook(ctxt *obj.Link, p *obj.Prog) *Optab {
        a4--
        a2 := C_NONE
        if p.Reg != 0 {
-               a2 = C_REG
+               if REG_R0 <= p.Reg && p.Reg <= REG_R31 {
+                       a2 = C_REG
+               } else if REG_V0 <= p.Reg && p.Reg <= REG_V31 {
+                       a2 = C_VREG
+               } else if REG_F0 <= p.Reg && p.Reg <= REG_F31 {
+                       a2 = C_FREG
+               }
        }
 
        //print("oplook %v %d %d %d %d\n", p, a1, a2, a3, a4);
@@ -1036,6 +1107,184 @@ func buildop(ctxt *obj.Link) {
                        opset(AMOVDU, r0)
                        opset(AMOVMW, r0)
 
+               case ALV: /* lvebx, lvehx, lvewx, lvx, lvxl, lvsl, lvsr */
+                       opset(ALVEBX, r0)
+                       opset(ALVEHX, r0)
+                       opset(ALVEWX, r0)
+                       opset(ALVX, r0)
+                       opset(ALVXL, r0)
+                       opset(ALVSL, r0)
+                       opset(ALVSR, r0)
+
+               case ASTV: /* stvebx, stvehx, stvewx, stvx, stvxl */
+                       opset(ASTVEBX, r0)
+                       opset(ASTVEHX, r0)
+                       opset(ASTVEWX, r0)
+                       opset(ASTVX, r0)
+                       opset(ASTVXL, r0)
+
+               case AVAND: /* vand, vandc, vnand */
+                       opset(AVANDL, r0)
+                       opset(AVANDC, r0)
+                       opset(AVNAND, r0)
+
+               case AVOR: /* vor, vorc, vxor, vnor, veqv */
+                       opset(AVORL, r0)
+                       opset(AVORC, r0)
+                       opset(AVXOR, r0)
+                       opset(AVNOR, r0)
+                       opset(AVEQV, r0)
+
+               case AVADDUM: /* vaddubm, vadduhm, vadduwm, vaddudm, vadduqm */
+                       opset(AVADDUBM, r0)
+                       opset(AVADDUHM, r0)
+                       opset(AVADDUWM, r0)
+                       opset(AVADDUDM, r0)
+                       opset(AVADDUQM, r0)
+
+               case AVADDCU: /* vaddcuq, vaddcuw */
+                       opset(AVADDCUQ, r0)
+                       opset(AVADDCUW, r0)
+
+               case AVADDUS: /* vaddubs, vadduhs, vadduws */
+                       opset(AVADDUBS, r0)
+                       opset(AVADDUHS, r0)
+                       opset(AVADDUWS, r0)
+
+               case AVADDSS: /* vaddsbs, vaddshs, vaddsws */
+                       opset(AVADDSBS, r0)
+                       opset(AVADDSHS, r0)
+                       opset(AVADDSWS, r0)
+
+               case AVADDE: /* vaddeuqm, vaddecuq */
+                       opset(AVADDEUQM, r0)
+                       opset(AVADDECUQ, r0)
+
+               case AVSUBUM: /* vsububm, vsubuhm, vsubuwm, vsubudm, vsubuqm */
+                       opset(AVSUBUBM, r0)
+                       opset(AVSUBUHM, r0)
+                       opset(AVSUBUWM, r0)
+                       opset(AVSUBUDM, r0)
+                       opset(AVSUBUQM, r0)
+
+               case AVSUBCU: /* vsubcuq, vsubcuw */
+                       opset(AVSUBCUQ, r0)
+                       opset(AVSUBCUW, r0)
+
+               case AVSUBUS: /* vsububs, vsubuhs, vsubuws */
+                       opset(AVSUBUBS, r0)
+                       opset(AVSUBUHS, r0)
+                       opset(AVSUBUWS, r0)
+
+               case AVSUBSS: /* vsubsbs, vsubshs, vsubsws */
+                       opset(AVSUBSBS, r0)
+                       opset(AVSUBSHS, r0)
+                       opset(AVSUBSWS, r0)
+
+               case AVSUBE: /* vsubeuqm, vsubecuq */
+                       opset(AVSUBEUQM, r0)
+                       opset(AVSUBECUQ, r0)
+
+               case AVR: /* vrlb, vrlh, vrlw, vrld */
+                       opset(AVRLB, r0)
+                       opset(AVRLH, r0)
+                       opset(AVRLW, r0)
+                       opset(AVRLD, r0)
+
+               case AVS: /* vs[l,r], vs[l,r]o, vs[l,r]b, vs[l,r]h, vs[l,r]w, vs[l,r]d */
+                       opset(AVSLB, r0)
+                       opset(AVSLH, r0)
+                       opset(AVSLW, r0)
+                       opset(AVSL, r0)
+                       opset(AVSLO, r0)
+                       opset(AVSRB, r0)
+                       opset(AVSRH, r0)
+                       opset(AVSRW, r0)
+                       opset(AVSR, r0)
+                       opset(AVSRO, r0)
+                       opset(AVSLD, r0)
+                       opset(AVSRD, r0)
+
+               case AVSA: /* vsrab, vsrah, vsraw, vsrad */
+                       opset(AVSRAB, r0)
+                       opset(AVSRAH, r0)
+                       opset(AVSRAW, r0)
+                       opset(AVSRAD, r0)
+
+               case AVSOI: /* vsldoi */
+                       opset(AVSLDOI, r0)
+
+               case AVCLZ: /* vclzb, vclzh, vclzw, vclzd */
+                       opset(AVCLZB, r0)
+                       opset(AVCLZH, r0)
+                       opset(AVCLZW, r0)
+                       opset(AVCLZD, r0)
+
+               case AVPOPCNT: /* vpopcntb, vpopcnth, vpopcntw, vpopcntd */
+                       opset(AVPOPCNTB, r0)
+                       opset(AVPOPCNTH, r0)
+                       opset(AVPOPCNTW, r0)
+                       opset(AVPOPCNTD, r0)
+
+               case AVCMPEQ: /* vcmpequb[.], vcmpequh[.], vcmpequw[.], vcmpequd[.] */
+                       opset(AVCMPEQUB, r0)
+                       opset(AVCMPEQUBCC, r0)
+                       opset(AVCMPEQUH, r0)
+                       opset(AVCMPEQUHCC, r0)
+                       opset(AVCMPEQUW, r0)
+                       opset(AVCMPEQUWCC, r0)
+                       opset(AVCMPEQUD, r0)
+                       opset(AVCMPEQUDCC, r0)
+
+               case AVCMPGT: /* vcmpgt[u,s]b[.], vcmpgt[u,s]h[.], vcmpgt[u,s]w[.], vcmpgt[u,s]d[.] */
+                       opset(AVCMPGTUB, r0)
+                       opset(AVCMPGTUBCC, r0)
+                       opset(AVCMPGTUH, r0)
+                       opset(AVCMPGTUHCC, r0)
+                       opset(AVCMPGTUW, r0)
+                       opset(AVCMPGTUWCC, r0)
+                       opset(AVCMPGTUD, r0)
+                       opset(AVCMPGTUDCC, r0)
+                       opset(AVCMPGTSB, r0)
+                       opset(AVCMPGTSBCC, r0)
+                       opset(AVCMPGTSH, r0)
+                       opset(AVCMPGTSHCC, r0)
+                       opset(AVCMPGTSW, r0)
+                       opset(AVCMPGTSWCC, r0)
+                       opset(AVCMPGTSD, r0)
+                       opset(AVCMPGTSDCC, r0)
+
+               case AVPERM: /* vperm */
+                       opset(AVPERM, r0)
+
+               case AVSEL: /* vsel */
+                       opset(AVSEL, r0)
+
+               case AVSPLT: /* vspltb, vsplth, vspltw */
+                       opset(AVSPLTB, r0)
+                       opset(AVSPLTH, r0)
+                       opset(AVSPLTW, r0)
+
+               case AVSPLTI: /* vspltisb, vspltish, vspltisw */
+                       opset(AVSPLTISB, r0)
+                       opset(AVSPLTISH, r0)
+                       opset(AVSPLTISW, r0)
+
+               case AVCIPH: /* vcipher, vcipherlast */
+                       opset(AVCIPHER, r0)
+                       opset(AVCIPHERLAST, r0)
+
+               case AVNCIPH: /* vncipher, vncipherlast */
+                       opset(AVNCIPHER, r0)
+                       opset(AVNCIPHERLAST, r0)
+
+               case AVSBOX: /* vsbox */
+                       opset(AVSBOX, r0)
+
+               case AVSHASIGMA: /* vshasigmaw, vshasigmad */
+                       opset(AVSHASIGMAW, r0)
+                       opset(AVSHASIGMAD, r0)
+
                case AAND: /* logical op Rb,Rs,Ra; no literal */
                        opset(AANDN, r0)
 
@@ -1310,6 +1559,14 @@ func buildop(ctxt *obj.Link) {
        }
 }
 
+func OPVX(o uint32, xo uint32, oe uint32, rc uint32) uint32 {
+       return o<<26 | xo | oe<<11 | rc&1
+}
+
+func OPVC(o uint32, xo uint32, oe uint32, rc uint32) uint32 {
+       return o<<26 | xo | oe<<11 | (rc&1)<<10
+}
+
 func OPVCC(o uint32, xo uint32, oe uint32, rc uint32) uint32 {
        return o<<26 | xo<<1 | oe<<10 | rc&1
 }
@@ -1327,10 +1584,40 @@ func AOP_RRR(op uint32, d uint32, a uint32, b uint32) uint32 {
        return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11
 }
 
+/* VX-form 2-register operands, r/r/none */
+func AOP_RR(op uint32, d uint32, a uint32) uint32 {
+       return op | (d&31)<<21 | (a&31)<<11
+}
+
+/* VA-form 4-register operands */
+func AOP_RRRR(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
+       return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&31)<<6
+}
+
 func AOP_IRR(op uint32, d uint32, a uint32, simm uint32) uint32 {
        return op | (d&31)<<21 | (a&31)<<16 | simm&0xFFFF
 }
 
+/* VX-form 2-register + UIM operands */
+func AOP_VIRR(op uint32, d uint32, a uint32, simm uint32) uint32 {
+       return op | (d&31)<<21 | (simm&0xFFFF)<<16 | (a&31)<<11
+}
+
+/* VX-form 2-register + ST + SIX operands */
+func AOP_IIRR(op uint32, d uint32, a uint32, sbit uint32, simm uint32) uint32 {
+       return op | (d&31)<<21 | (a&31)<<16 | (sbit&1)<<15 | (simm&0xF)<<11
+}
+
+/* VA-form 3-register + SHB operands */
+func AOP_IRRR(op uint32, d uint32, a uint32, b uint32, simm uint32) uint32 {
+       return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (simm&0xF)<<6
+}
+
+/* VX-form 1-register + SIM operands */
+func AOP_IR(op uint32, d uint32, simm uint32) uint32 {
+       return op | (d&31)<<21 | (simm&31)<<16
+}
+
 func LOP_RRR(op uint32, a uint32, s uint32, b uint32) uint32 {
        return op | (s&31)<<21 | (a&31)<<16 | (b&31)<<11
 }
@@ -2532,11 +2819,52 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
                rel.Siz = 8
                rel.Sym = p.From.Sym
                rel.Type = obj.R_ADDRPOWER_GOT
+       case 82: /* vector instructions, VX-form and VC-form */
+               if p.From.Type == obj.TYPE_REG {
+                       /* reg reg none OR reg reg reg */
+                       /* 3-register operand order: VRA, VRB, VRT */
+                       /* 2-register operand order: VRA, VRT */
+                       o1 = AOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
+               } else if p.From3Type() == obj.TYPE_CONST {
+                       /* imm imm reg reg */
+                       /* operand order: SIX, VRA, ST, VRT */
+                       six := int(regoff(ctxt, &p.From))
+                       st := int(regoff(ctxt, p.From3))
+                       o1 = AOP_IIRR(opiirr(ctxt, p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(st), uint32(six))
+               } else if p.From3Type() == obj.TYPE_NONE && p.Reg != 0 {
+                       /* imm reg reg */
+                       /* operand order: UIM, VRB, VRT */
+                       uim := int(regoff(ctxt, &p.From))
+                       o1 = AOP_VIRR(opirr(ctxt, p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(uim))
+               } else {
+                       /* imm reg */
+                       /* operand order: SIM, VRT */
+                       sim := int(regoff(ctxt, &p.From))
+                       o1 = AOP_IR(opirr(ctxt, p.As), uint32(p.To.Reg), uint32(sim))
+               }
+
+       case 83: /* vector instructions, VA-form */
+               if p.From.Type == obj.TYPE_REG {
+                       /* reg reg reg reg */
+                       /* 4-register operand order: VRA, VRB, VRC, VRT */
+                       o1 = AOP_RRRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), uint32(p.From3.Reg))
+               } else if p.From.Type == obj.TYPE_CONST {
+                       /* imm reg reg reg */
+                       /* operand order: SHB, VRA, VRB, VRT */
+                       shb := int(regoff(ctxt, &p.From))
+                       o1 = AOP_IRRR(opirrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.Reg), uint32(p.From3.Reg), uint32(shb))
+               }
+
        case 84: // ISEL BC,RA,RB,RT -> isel rt,ra,rb,bc
                bc := vregoff(ctxt, &p.From)
 
                // rt = To.Reg, ra = p.Reg, rb = p.From3.Reg
                o1 = AOP_ISEL(OP_ISEL, uint32(p.To.Reg), uint32(p.Reg), uint32(p.From3.Reg), uint32(bc))
+
+       case 85: /* vector instructions, VX-form */
+               /* reg none reg */
+               /* 2-register operand order: VRB, VRT */
+               o1 = AOP_RR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg))
        }
 
        out[0] = o1
@@ -3071,13 +3399,260 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
        case ATD:
                return OPVCC(31, 68, 0, 0)
 
+       /* Vector (VMX/Altivec) instructions */
+       /* ISA 2.03 enables these for PPC970. For POWERx processors, these */
+       /* are enabled starting at POWER6 (ISA 2.05). */
+       case AVANDL:
+               return OPVX(4, 1028, 0, 0) /* vand - v2.03 */
+       case AVANDC:
+               return OPVX(4, 1092, 0, 0) /* vandc - v2.03 */
+       case AVNAND:
+               return OPVX(4, 1412, 0, 0) /* vnand - v2.07 */
+
+       case AVORL:
+               return OPVX(4, 1156, 0, 0) /* vor - v2.03 */
+       case AVORC:
+               return OPVX(4, 1348, 0, 0) /* vorc - v2.07 */
+       case AVNOR:
+               return OPVX(4, 1284, 0, 0) /* vnor - v2.03 */
+       case AVXOR:
+               return OPVX(4, 1220, 0, 0) /* vxor - v2.03 */
+       case AVEQV:
+               return OPVX(4, 1668, 0, 0) /* veqv - v2.07 */
+
+       case AVADDUBM:
+               return OPVX(4, 0, 0, 0) /* vaddubm - v2.03 */
+       case AVADDUHM:
+               return OPVX(4, 64, 0, 0) /* vadduhm - v2.03 */
+       case AVADDUWM:
+               return OPVX(4, 128, 0, 0) /* vadduwm - v2.03 */
+       case AVADDUDM:
+               return OPVX(4, 192, 0, 0) /* vaddudm - v2.07 */
+       case AVADDUQM:
+               return OPVX(4, 256, 0, 0) /* vadduqm - v2.07 */
+
+       case AVADDCUQ:
+               return OPVX(4, 320, 0, 0) /* vaddcuq - v2.07 */
+       case AVADDCUW:
+               return OPVX(4, 384, 0, 0) /* vaddcuw - v2.03 */
+
+       case AVADDUBS:
+               return OPVX(4, 512, 0, 0) /* vaddubs - v2.03 */
+       case AVADDUHS:
+               return OPVX(4, 576, 0, 0) /* vadduhs - v2.03 */
+       case AVADDUWS:
+               return OPVX(4, 640, 0, 0) /* vadduws - v2.03 */
+
+       case AVADDSBS:
+               return OPVX(4, 768, 0, 0) /* vaddsbs - v2.03 */
+       case AVADDSHS:
+               return OPVX(4, 832, 0, 0) /* vaddshs - v2.03 */
+       case AVADDSWS:
+               return OPVX(4, 896, 0, 0) /* vaddsws - v2.03 */
+
+       case AVADDEUQM:
+               return OPVX(4, 60, 0, 0) /* vaddeuqm - v2.07 */
+       case AVADDECUQ:
+               return OPVX(4, 61, 0, 0) /* vaddecuq - v2.07 */
+
+       case AVSUBUBM:
+               return OPVX(4, 1024, 0, 0) /* vsububm - v2.03 */
+       case AVSUBUHM:
+               return OPVX(4, 1088, 0, 0) /* vsubuhm - v2.03 */
+       case AVSUBUWM:
+               return OPVX(4, 1152, 0, 0) /* vsubuwm - v2.03 */
+       case AVSUBUDM:
+               return OPVX(4, 1216, 0, 0) /* vsubudm - v2.07 */
+       case AVSUBUQM:
+               return OPVX(4, 1280, 0, 0) /* vsubuqm - v2.07 */
+
+       case AVSUBCUQ:
+               return OPVX(4, 1344, 0, 0) /* vsubcuq - v2.07 */
+       case AVSUBCUW:
+               return OPVX(4, 1408, 0, 0) /* vsubcuw - v2.03 */
+
+       case AVSUBUBS:
+               return OPVX(4, 1536, 0, 0) /* vsububs - v2.03 */
+       case AVSUBUHS:
+               return OPVX(4, 1600, 0, 0) /* vsubuhs - v2.03 */
+       case AVSUBUWS:
+               return OPVX(4, 1664, 0, 0) /* vsubuws - v2.03 */
+
+       case AVSUBSBS:
+               return OPVX(4, 1792, 0, 0) /* vsubsbs - v2.03 */
+       case AVSUBSHS:
+               return OPVX(4, 1856, 0, 0) /* vsubshs - v2.03 */
+       case AVSUBSWS:
+               return OPVX(4, 1920, 0, 0) /* vsubsws - v2.03 */
+
+       case AVSUBEUQM:
+               return OPVX(4, 62, 0, 0) /* vsubeuqm - v2.07 */
+       case AVSUBECUQ:
+               return OPVX(4, 63, 0, 0) /* vsubecuq - v2.07 */
+
+       case AVRLB:
+               return OPVX(4, 4, 0, 0) /* vrlb - v2.03 */
+       case AVRLH:
+               return OPVX(4, 68, 0, 0) /* vrlh - v2.03 */
+       case AVRLW:
+               return OPVX(4, 132, 0, 0) /* vrlw - v2.03 */
+       case AVRLD:
+               return OPVX(4, 196, 0, 0) /* vrld - v2.07 */
+
+       case AVSLB:
+               return OPVX(4, 260, 0, 0) /* vslh - v2.03 */
+       case AVSLH:
+               return OPVX(4, 324, 0, 0) /* vslh - v2.03 */
+       case AVSLW:
+               return OPVX(4, 388, 0, 0) /* vslw - v2.03 */
+       case AVSL:
+               return OPVX(4, 452, 0, 0) /* vsl - v2.03 */
+       case AVSLO:
+               return OPVX(4, 1036, 0, 0) /* vsl - v2.03 */
+       case AVSRB:
+               return OPVX(4, 516, 0, 0) /* vsrb - v2.03 */
+       case AVSRH:
+               return OPVX(4, 580, 0, 0) /* vsrh - v2.03 */
+       case AVSRW:
+               return OPVX(4, 644, 0, 0) /* vsrw - v2.03 */
+       case AVSR:
+               return OPVX(4, 708, 0, 0) /* vsr - v2.03 */
+       case AVSRO:
+               return OPVX(4, 1100, 0, 0) /* vsro - v2.03 */
+       case AVSLD:
+               return OPVX(4, 1476, 0, 0) /* vsld - v2.07 */
+       case AVSRD:
+               return OPVX(4, 1732, 0, 0) /* vsrd - v2.07 */
+
+       case AVSRAB:
+               return OPVX(4, 772, 0, 0) /* vsrab - v2.03 */
+       case AVSRAH:
+               return OPVX(4, 836, 0, 0) /* vsrah - v2.03 */
+       case AVSRAW:
+               return OPVX(4, 900, 0, 0) /* vsraw - v2.03 */
+       case AVSRAD:
+               return OPVX(4, 964, 0, 0) /* vsrad - v2.07 */
+
+       case AVCLZB:
+               return OPVX(4, 1794, 0, 0) /* vclzb - v2.07 */
+       case AVCLZH:
+               return OPVX(4, 1858, 0, 0) /* vclzh - v2.07 */
+       case AVCLZW:
+               return OPVX(4, 1922, 0, 0) /* vclzw - v2.07 */
+       case AVCLZD:
+               return OPVX(4, 1986, 0, 0) /* vclzd - v2.07 */
+
+       case AVPOPCNTB:
+               return OPVX(4, 1795, 0, 0) /* vpopcntb - v2.07 */
+       case AVPOPCNTH:
+               return OPVX(4, 1859, 0, 0) /* vpopcnth - v2.07 */
+       case AVPOPCNTW:
+               return OPVX(4, 1923, 0, 0) /* vpopcntw - v2.07 */
+       case AVPOPCNTD:
+               return OPVX(4, 1987, 0, 0) /* vpopcntd - v2.07 */
+
+       case AVCMPEQUB:
+               return OPVC(4, 6, 0, 0) /* vcmpequb - v2.03 */
+       case AVCMPEQUBCC:
+               return OPVC(4, 6, 0, 1) /* vcmpequb. - v2.03 */
+       case AVCMPEQUH:
+               return OPVC(4, 70, 0, 0) /* vcmpequh - v2.03 */
+       case AVCMPEQUHCC:
+               return OPVC(4, 70, 0, 1) /* vcmpequh. - v2.03 */
+       case AVCMPEQUW:
+               return OPVC(4, 134, 0, 0) /* vcmpequw - v2.03 */
+       case AVCMPEQUWCC:
+               return OPVC(4, 134, 0, 1) /* vcmpequw. - v2.03 */
+       case AVCMPEQUD:
+               return OPVC(4, 199, 0, 0) /* vcmpequd - v2.07 */
+       case AVCMPEQUDCC:
+               return OPVC(4, 199, 0, 1) /* vcmpequd. - v2.07 */
+
+       case AVCMPGTUB:
+               return OPVC(4, 518, 0, 0) /* vcmpgtub - v2.03 */
+       case AVCMPGTUBCC:
+               return OPVC(4, 518, 0, 1) /* vcmpgtub. - v2.03 */
+       case AVCMPGTUH:
+               return OPVC(4, 582, 0, 0) /* vcmpgtuh - v2.03 */
+       case AVCMPGTUHCC:
+               return OPVC(4, 582, 0, 1) /* vcmpgtuh. - v2.03 */
+       case AVCMPGTUW:
+               return OPVC(4, 646, 0, 0) /* vcmpgtuw - v2.03 */
+       case AVCMPGTUWCC:
+               return OPVC(4, 646, 0, 1) /* vcmpgtuw. - v2.03 */
+       case AVCMPGTUD:
+               return OPVC(4, 711, 0, 0) /* vcmpgtud - v2.07 */
+       case AVCMPGTUDCC:
+               return OPVC(4, 711, 0, 1) /* vcmpgtud. v2.07 */
+       case AVCMPGTSB:
+               return OPVC(4, 774, 0, 0) /* vcmpgtsb - v2.03 */
+       case AVCMPGTSBCC:
+               return OPVC(4, 774, 0, 1) /* vcmpgtsb. - v2.03 */
+       case AVCMPGTSH:
+               return OPVC(4, 838, 0, 0) /* vcmpgtsh - v2.03 */
+       case AVCMPGTSHCC:
+               return OPVC(4, 838, 0, 1) /* vcmpgtsh. - v2.03 */
+       case AVCMPGTSW:
+               return OPVC(4, 902, 0, 0) /* vcmpgtsw - v2.03 */
+       case AVCMPGTSWCC:
+               return OPVC(4, 902, 0, 1) /* vcmpgtsw. - v2.03 */
+       case AVCMPGTSD:
+               return OPVC(4, 967, 0, 0) /* vcmpgtsd - v2.07 */
+       case AVCMPGTSDCC:
+               return OPVC(4, 967, 0, 1) /* vcmpgtsd. - v2.07 */
+
+       case AVPERM:
+               return OPVX(4, 43, 0, 0) /* vperm - v2.03 */
+
+       case AVSEL:
+               return OPVX(4, 42, 0, 0) /* vsel - v2.03 */
+
+       case AVCIPHER:
+               return OPVX(4, 1288, 0, 0) /* vcipher - v2.07 */
+       case AVCIPHERLAST:
+               return OPVX(4, 1289, 0, 0) /* vcipherlast - v2.07 */
+       case AVNCIPHER:
+               return OPVX(4, 1352, 0, 0) /* vncipher - v2.07 */
+       case AVNCIPHERLAST:
+               return OPVX(4, 1353, 0, 0) /* vncipherlast - v2.07 */
+       case AVSBOX:
+               return OPVX(4, 1480, 0, 0) /* vsbox - v2.07 */
+       /* End of vector instructions */
+
        case AXOR:
                return OPVCC(31, 316, 0, 0)
        case AXORCC:
                return OPVCC(31, 316, 0, 1)
        }
 
-       ctxt.Diag("bad r/r opcode %v", a)
+       ctxt.Diag("bad r/r, r/r/r or r/r/r/r opcode %v", a)
+       return 0
+}
+
+func opirrr(ctxt *obj.Link, a obj.As) uint32 {
+       switch a {
+       /* Vector (VMX/Altivec) instructions */
+       /* ISA 2.03 enables these for PPC970. For POWERx processors, these */
+       /* are enabled starting at POWER6 (ISA 2.05). */
+       case AVSLDOI:
+               return OPVX(4, 44, 0, 0) /* vsldoi - v2.03 */
+       }
+
+       ctxt.Diag("bad i/r/r/r opcode %v", a)
+       return 0
+}
+
+func opiirr(ctxt *obj.Link, a obj.As) uint32 {
+       switch a {
+       /* Vector (VMX/Altivec) instructions */
+       /* ISA 2.07 enables these for POWER8 and beyond. */
+       case AVSHASIGMAW:
+               return OPVX(4, 1666, 0, 0) /* vshasigmaw - v2.07 */
+       case AVSHASIGMAD:
+               return OPVX(4, 1730, 0, 0) /* vshasigmad - v2.07 */
+       }
+
+       ctxt.Diag("bad i/i/r/r opcode %v", a)
        return 0
 }
 
@@ -3193,13 +3768,31 @@ func opirr(ctxt *obj.Link, a obj.As) uint32 {
        case ATD:
                return OPVCC(2, 0, 0, 0)
 
+       /* Vector (VMX/Altivec) instructions */
+       /* ISA 2.03 enables these for PPC970. For POWERx processors, these */
+       /* are enabled starting at POWER6 (ISA 2.05). */
+       case AVSPLTB:
+               return OPVX(4, 524, 0, 0) /* vspltb - v2.03 */
+       case AVSPLTH:
+               return OPVX(4, 588, 0, 0) /* vsplth - v2.03 */
+       case AVSPLTW:
+               return OPVX(4, 652, 0, 0) /* vspltw - v2.03 */
+
+       case AVSPLTISB:
+               return OPVX(4, 780, 0, 0) /* vspltisb - v2.03 */
+       case AVSPLTISH:
+               return OPVX(4, 844, 0, 0) /* vspltish - v2.03 */
+       case AVSPLTISW:
+               return OPVX(4, 908, 0, 0) /* vspltisw - v2.03 */
+       /* End of vector instructions */
+
        case AXOR:
                return OPVCC(26, 0, 0, 0) /* XORIL */
        case -AXOR:
                return OPVCC(27, 0, 0, 0) /* XORIU */
        }
 
-       ctxt.Diag("bad opcode i/r %v", a)
+       ctxt.Diag("bad opcode i/r or i/r/r %v", a)
        return 0
 }
 
@@ -3307,6 +3900,25 @@ func oploadx(ctxt *obj.Link, a obj.As) uint32 {
                return OPVCC(31, 21, 0, 0) /* ldx */
        case AMOVDU:
                return OPVCC(31, 53, 0, 0) /* ldux */
+
+       /* Vector (VMX/Altivec) instructions */
+       /* ISA 2.03 enables these for PPC970. For POWERx processors, these */
+       /* are enabled starting at POWER6 (ISA 2.05). */
+       case ALVEBX:
+               return OPVCC(31, 7, 0, 0) /* lvebx - v2.03 */
+       case ALVEHX:
+               return OPVCC(31, 39, 0, 0) /* lvehx - v2.03 */
+       case ALVEWX:
+               return OPVCC(31, 71, 0, 0) /* lvewx - v2.03 */
+       case ALVX:
+               return OPVCC(31, 103, 0, 0) /* lvx - v2.03 */
+       case ALVXL:
+               return OPVCC(31, 359, 0, 0) /* lvxl - v2.03 */
+       case ALVSL:
+               return OPVCC(31, 6, 0, 0) /* lvsl - v2.03 */
+       case ALVSR:
+               return OPVCC(31, 38, 0, 0) /* lvsr - v2.03 */
+               /* End of vector instructions */
        }
 
        ctxt.Diag("bad loadx opcode %v", a)
@@ -3407,6 +4019,21 @@ func opstorex(ctxt *obj.Link, a obj.As) uint32 {
                return OPVCC(31, 149, 0, 0) /* stdx */
        case AMOVDU:
                return OPVCC(31, 181, 0, 0) /* stdux */
+
+       /* Vector (VMX/Altivec) instructions */
+       /* ISA 2.03 enables these for PPC970. For POWERx processors, these */
+       /* are enabled starting at POWER6 (ISA 2.05). */
+       case ASTVEBX:
+               return OPVCC(31, 135, 0, 0) /* stvebx - v2.03 */
+       case ASTVEHX:
+               return OPVCC(31, 167, 0, 0) /* stvehx - v2.03 */
+       case ASTVEWX:
+               return OPVCC(31, 199, 0, 0) /* stvewx - v2.03 */
+       case ASTVX:
+               return OPVCC(31, 231, 0, 0) /* stvx - v2.03 */
+       case ASTVXL:
+               return OPVCC(31, 487, 0, 0) /* stvxl - v2.03 */
+               /* End of vector instructions */
        }
 
        ctxt.Diag("unknown storex opcode %v", a)
index d46297a3eb3f35857164e7c031638b50e4a92463..147473458116d5803b8726fd772d658c13376e3f 100644 (file)
@@ -53,6 +53,9 @@ func Rconv(r int) string {
        if REG_F0 <= r && r <= REG_F31 {
                return fmt.Sprintf("F%d", r-REG_F0)
        }
+       if REG_V0 <= r && r <= REG_V31 {
+               return fmt.Sprintf("V%d", r-REG_V0)
+       }
        if REG_CR0 <= r && r <= REG_CR7 {
                return fmt.Sprintf("CR%d", r-REG_CR0)
        }