]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/asm: add more SIMD instructions on arm64
authorJunchen Li <junchen.li@arm.com>
Mon, 31 Aug 2020 05:32:33 +0000 (13:32 +0800)
committerCherry Zhang <cherryyz@google.com>
Thu, 10 Sep 2020 15:48:36 +0000 (15:48 +0000)
This CL adds USHLL, USHLL2, UZP1, UZP2, and BIF instructions requested
by #40725. And since UXTL* are aliases of USHLL*, this CL also merges
them into one case.

Updates #40725

Change-Id: I404a4fdaf953319f72eea548175bec1097a2a816
Reviewed-on: https://go-review.googlesource.com/c/go/+/253659
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/asm/internal/asm/testdata/arm64.s
src/cmd/asm/internal/asm/testdata/arm64error.s
src/cmd/internal/obj/arm64/a.out.go
src/cmd/internal/obj/arm64/anames.go
src/cmd/internal/obj/arm64/asm7.go

index 451ca749ba0f4cc41c7ebb778d6eae56db7e5b8b..e106ff2ae114bb23adbcfbd77846690e889f53f9 100644 (file)
@@ -156,6 +156,26 @@ TEXT       foo(SB), DUPOK|NOSPLIT, $-8
        VCMTST  V2.B8, V29.B8, V2.B8            // a28f220e
        VCMTST  V2.D2, V23.D2, V3.D2            // e38ee24e
        VSUB    V2.B8, V30.B8, V30.B8           // de87222e
+       VUZP1   V0.B8, V30.B8, V1.B8            // c11b000e
+       VUZP1   V1.B16, V29.B16, V2.B16         // a21b014e
+       VUZP1   V2.H4, V28.H4, V3.H4            // 831b420e
+       VUZP1   V3.H8, V27.H8, V4.H8            // 641b434e
+       VUZP1   V28.S2, V2.S2, V5.S2            // 45189c0e
+       VUZP1   V29.S4, V1.S4, V6.S4            // 26189d4e
+       VUZP1   V30.D2, V0.D2, V7.D2            // 0718de4e
+       VUZP2   V0.D2, V30.D2, V1.D2            // c15bc04e
+       VUZP2   V30.D2, V0.D2, V29.D2           // 1d58de4e
+       VUSHLL  $0, V30.B8, V30.H8              // dea7082f
+       VUSHLL  $0, V30.H4, V29.S4              // dda7102f
+       VUSHLL  $0, V29.S2, V2.D2               // a2a7202f
+       VUSHLL2 $0, V30.B16, V2.H8              // c2a7086f
+       VUSHLL2 $0, V30.H8, V30.S4              // dea7106f
+       VUSHLL2 $0, V29.S4, V2.D2               // a2a7206f
+       VUSHLL  $7, V30.B8, V30.H8              // dea70f2f
+       VUSHLL  $15, V30.H4, V29.S4             // dda71f2f
+       VUSHLL2 $31, V30.S4, V2.D2              // c2a73f6f
+       VBIF    V0.B8, V30.B8, V1.B8            // c11fe02e
+       VBIF    V30.B16, V0.B16, V2.B16         // 021cfe6e
        MOVD    (R2)(R6.SXTW), R4               // 44c866f8
        MOVD    (R3)(R6), R5                    // MOVD (R3)(R6*1), R5                  // 656866f8
        MOVD    (R2)(R6), R4                    // MOVD (R2)(R6*1), R4                  // 446866f8
index 2a911b4cced41437fd37f0cf3be0e69ee4ba44d7..20b1f3e9f06cf4ac7a32ec8e8ac5621ddac0f496 100644 (file)
@@ -345,4 +345,12 @@ TEXT errors(SB),$0
        VUXTL   V30.D2, V30.H8                                   // ERROR "operand mismatch"
        VUXTL2  V20.B8, V21.H8                                   // ERROR "operand mismatch"
        VUXTL   V3.D2, V4.B8                                     // ERROR "operand mismatch"
+       VUZP1   V0.B8, V30.B8, V1.B16                            // ERROR "operand mismatch"
+       VUZP2   V0.Q1, V30.Q1, V1.Q1                             // ERROR "invalid arrangement"
+       VUSHLL  $0, V30.D2, V30.H8                               // ERROR "operand mismatch"
+       VUSHLL2 $0, V20.B8, V21.H8                               // ERROR "operand mismatch"
+       VUSHLL  $8, V30.B8, V30.H8                               // ERROR "shift amount out of range"
+       VUSHLL2 $32, V30.S4, V2.D2                               // ERROR "shift amount out of range"
+       VBIF    V0.B8, V1.B8, V2.B16                             // ERROR "operand mismatch"
+       VBIF    V0.D2, V1.D2, V2.D2                              // ERROR "invalid arrangement"
        RET
index ab065e07e5db2c8a01a1493e0bbddccc662cd801..2839da1437b6b33c7fff75126f1015292ffc33bb 100644 (file)
@@ -954,6 +954,7 @@ const (
        AVADD
        AVADDP
        AVAND
+       AVBIF
        AVCMEQ
        AVCNT
        AVEOR
@@ -986,6 +987,12 @@ const (
        AVEXT
        AVRBIT
        AVUSHR
+       AVUSHLL
+       AVUSHLL2
+       AVUXTL
+       AVUXTL2
+       AVUZP1
+       AVUZP2
        AVSHL
        AVSRI
        AVBSL
@@ -994,8 +1001,6 @@ const (
        AVZIP1
        AVZIP2
        AVCMTST
-       AVUXTL
-       AVUXTL2
        ALAST
        AB  = obj.AJMP
        ABL = obj.ACALL
index 8961f04b0cb126bfec694dfbd03b48196d8730f9..48c066abfdad268e9b039b6fe361e302272b4050 100644 (file)
@@ -461,6 +461,7 @@ var Anames = []string{
        "VADD",
        "VADDP",
        "VAND",
+       "VBIF",
        "VCMEQ",
        "VCNT",
        "VEOR",
@@ -493,6 +494,12 @@ var Anames = []string{
        "VEXT",
        "VRBIT",
        "VUSHR",
+       "VUSHLL",
+       "VUSHLL2",
+       "VUXTL",
+       "VUXTL2",
+       "VUZP1",
+       "VUZP2",
        "VSHL",
        "VSRI",
        "VBSL",
@@ -501,7 +508,5 @@ var Anames = []string{
        "VZIP1",
        "VZIP2",
        "VCMTST",
-       "VUXTL",
-       "VUXTL2",
        "LAST",
 }
index 7ce18d0f137d3f972347147256f1ae86a63b44d1..df4bbbbd352bf6679d9df523ca7c1caf3da2ee49 100644 (file)
@@ -480,6 +480,7 @@ var optab = []Optab{
        {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0},
        {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0},
        {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0},
+       {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0},
        {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0},
 
        /* conditional operations */
@@ -2751,6 +2752,9 @@ func buildop(ctxt *obj.Link) {
                        oprangeset(AVBSL, t)
                        oprangeset(AVBIT, t)
                        oprangeset(AVCMTST, t)
+                       oprangeset(AVUZP1, t)
+                       oprangeset(AVUZP2, t)
+                       oprangeset(AVBIF, t)
 
                case AVADD:
                        oprangeset(AVSUB, t)
@@ -2801,6 +2805,9 @@ func buildop(ctxt *obj.Link) {
                case AVUXTL:
                        oprangeset(AVUXTL2, t)
 
+               case AVUSHLL:
+                       oprangeset(AVUSHLL2, t)
+
                case AVLD1R:
                        oprangeset(AVLD2, t)
                        oprangeset(AVLD2R, t)
@@ -4177,7 +4184,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                rel.Add = 0
                rel.Type = objabi.R_ARM64_GOTPCREL
 
-       case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm.<T>, Vn.<T>, Vd.<T> */
+       case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm.<T>, Vn.<T>, Vd.<T> */
                af := int((p.From.Reg >> 5) & 15)
                af3 := int((p.Reg >> 5) & 15)
                at := int((p.To.Reg >> 5) & 15)
@@ -4219,7 +4226,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                }
 
                switch p.As {
-               case AVORR, AVAND, AVEOR, AVBIT, AVBSL:
+               case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF:
                        if af != ARNG_16B && af != ARNG_8B {
                                c.ctxt.Diag("invalid arrangement: %v", p)
                        }
@@ -4233,7 +4240,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                        size = 0
                case AVBSL:
                        size = 1
-               case AVORR, AVBIT:
+               case AVORR, AVBIT, AVBIF:
                        size = 2
                case AVFMLA, AVFMLS:
                        if af == ARNG_2D {
@@ -5120,56 +5127,44 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
        case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool.
                o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
 
-       case 102: // VUXTL{2} Vn.<Tb>, Vd.<Ta>
-               af := int((p.From.Reg >> 5) & 15)
-               at := int((p.To.Reg >> 5) & 15)
-               var Q, immh uint32
-               switch at {
-               case ARNG_8H:
-                       if af == ARNG_8B {
-                               immh = 1
-                               Q = 0
-                       } else if af == ARNG_16B {
-                               immh = 1
-                               Q = 1
-                       } else {
-                               c.ctxt.Diag("operand mismatch: %v\n", p)
-                       }
-               case ARNG_4S:
-                       if af == ARNG_4H {
-                               immh = 2
-                               Q = 0
-                       } else if af == ARNG_8H {
-                               immh = 2
-                               Q = 1
-                       } else {
-                               c.ctxt.Diag("operand mismatch: %v\n", p)
-                       }
-               case ARNG_2D:
-                       if af == ARNG_2S {
-                               immh = 4
-                               Q = 0
-                       } else if af == ARNG_4S {
-                               immh = 4
-                               Q = 1
-                       } else {
-                               c.ctxt.Diag("operand mismatch: %v\n", p)
-                       }
+       case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
+               o1 = c.opirr(p, p.As)
+               rf := p.Reg
+               af := uint8((p.Reg >> 5) & 15)
+               at := uint8((p.To.Reg >> 5) & 15)
+               shift := int(p.From.Offset)
+               if p.As == AVUXTL || p.As == AVUXTL2 {
+                       rf = p.From.Reg
+                       af = uint8((p.From.Reg >> 5) & 15)
+                       shift = 0
+               }
+
+               pack := func(q, x, y uint8) uint32 {
+                       return uint32(q)<<16 | uint32(x)<<8 | uint32(y)
+               }
+
+               var Q uint8 = uint8(o1>>30) & 1
+               var immh, width uint8
+               switch pack(Q, af, at) {
+               case pack(0, ARNG_8B, ARNG_8H):
+                       immh, width = 1, 8
+               case pack(1, ARNG_16B, ARNG_8H):
+                       immh, width = 1, 8
+               case pack(0, ARNG_4H, ARNG_4S):
+                       immh, width = 2, 16
+               case pack(1, ARNG_8H, ARNG_4S):
+                       immh, width = 2, 16
+               case pack(0, ARNG_2S, ARNG_2D):
+                       immh, width = 4, 32
+               case pack(1, ARNG_4S, ARNG_2D):
+                       immh, width = 4, 32
                default:
                        c.ctxt.Diag("operand mismatch: %v\n", p)
                }
-
-               if p.As == AVUXTL && Q == 1 {
-                       c.ctxt.Diag("operand mismatch: %v\n", p)
+               if !(0 <= shift && shift <= int(width-1)) {
+                       c.ctxt.Diag("shift amount out of range: %v\n", p)
                }
-               if p.As == AVUXTL2 && Q == 0 {
-                       c.ctxt.Diag("operand mismatch: %v\n", p)
-               }
-
-               o1 = c.oprrr(p, p.As)
-               rf := int((p.From.Reg) & 31)
-               rt := int((p.To.Reg) & 31)
-               o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31)
+               o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31)
        }
        out[0] = o1
        out[1] = o2
@@ -5802,6 +5797,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
        case AVLD2R, AVLD4R:
                return 0xD<<24 | 3<<21
 
+       case AVBIF:
+               return 1<<29 | 7<<25 | 7<<21 | 7<<10
+
        case AVBIT:
                return 1<<29 | 0x75<<21 | 7<<10
 
@@ -5811,8 +5809,11 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
        case AVCMTST:
                return 0xE<<24 | 1<<21 | 0x23<<10
 
-       case AVUXTL, AVUXTL2:
-               return 0x5e<<23 | 0x29<<10
+       case AVUZP1:
+               return 7<<25 | 3<<11
+
+       case AVUZP2:
+               return 7<<25 | 1<<14 | 3<<11
        }
 
        c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
@@ -6011,6 +6012,12 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
 
        case AVSRI:
                return 0x5E<<23 | 17<<10
+
+       case AVUSHLL, AVUXTL:
+               return 1<<29 | 15<<24 | 0x29<<10
+
+       case AVUSHLL2, AVUXTL2:
+               return 3<<29 | 15<<24 | 0x29<<10
        }
 
        c.ctxt.Diag("%v: bad irr %v", p, a)