]> Cypherpunks repositories - gostls13.git/commitdiff
internal/runtime/gc/scan: include package in expandAVX512 symbol names
authorMichael Pratt <mpratt@google.com>
Thu, 22 Jan 2026 16:56:00 +0000 (11:56 -0500)
committerMichael Pratt <mpratt@google.com>
Tue, 27 Jan 2026 16:25:05 +0000 (08:25 -0800)
These symbols are part of the scan package, so they should include the
package name in the symbol name for consistency.

Change-Id: I6a6a636ca63b34a8da7fb2f0bfe1fa5f8672fff5
Reviewed-on: https://go-review.googlesource.com/c/go/+/738522
Reviewed-by: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/internal/runtime/gc/scan/expand_amd64.s
src/internal/runtime/gc/scan/mkasm.go

index 6b0be44cc10450a35464bff7b6348034d839ee28..6aed13923b261f833a03c92306b559ab09cb58e6 100644 (file)
@@ -5,32 +5,32 @@
 
 GLOBL ·gcExpandersAVX512(SB), RODATA, $0x220
 DATA  ·gcExpandersAVX512+0x00(SB)/8, $0
-DATA  ·gcExpandersAVX512+0x08(SB)/8, $expandAVX512_1<>(SB)
-DATA  ·gcExpandersAVX512+0x10(SB)/8, $expandAVX512_2<>(SB)
-DATA  ·gcExpandersAVX512+0x18(SB)/8, $expandAVX512_3<>(SB)
-DATA  ·gcExpandersAVX512+0x20(SB)/8, $expandAVX512_4<>(SB)
-DATA  ·gcExpandersAVX512+0x28(SB)/8, $expandAVX512_6<>(SB)
-DATA  ·gcExpandersAVX512+0x30(SB)/8, $expandAVX512_8<>(SB)
-DATA  ·gcExpandersAVX512+0x38(SB)/8, $expandAVX512_10<>(SB)
-DATA  ·gcExpandersAVX512+0x40(SB)/8, $expandAVX512_12<>(SB)
-DATA  ·gcExpandersAVX512+0x48(SB)/8, $expandAVX512_14<>(SB)
-DATA  ·gcExpandersAVX512+0x50(SB)/8, $expandAVX512_16<>(SB)
-DATA  ·gcExpandersAVX512+0x58(SB)/8, $expandAVX512_18<>(SB)
-DATA  ·gcExpandersAVX512+0x60(SB)/8, $expandAVX512_20<>(SB)
-DATA  ·gcExpandersAVX512+0x68(SB)/8, $expandAVX512_22<>(SB)
-DATA  ·gcExpandersAVX512+0x70(SB)/8, $expandAVX512_24<>(SB)
-DATA  ·gcExpandersAVX512+0x78(SB)/8, $expandAVX512_26<>(SB)
-DATA  ·gcExpandersAVX512+0x80(SB)/8, $expandAVX512_28<>(SB)
-DATA  ·gcExpandersAVX512+0x88(SB)/8, $expandAVX512_30<>(SB)
-DATA  ·gcExpandersAVX512+0x90(SB)/8, $expandAVX512_32<>(SB)
-DATA  ·gcExpandersAVX512+0x98(SB)/8, $expandAVX512_36<>(SB)
-DATA  ·gcExpandersAVX512+0xa0(SB)/8, $expandAVX512_40<>(SB)
-DATA  ·gcExpandersAVX512+0xa8(SB)/8, $expandAVX512_44<>(SB)
-DATA  ·gcExpandersAVX512+0xb0(SB)/8, $expandAVX512_48<>(SB)
-DATA  ·gcExpandersAVX512+0xb8(SB)/8, $expandAVX512_52<>(SB)
-DATA  ·gcExpandersAVX512+0xc0(SB)/8, $expandAVX512_56<>(SB)
-DATA  ·gcExpandersAVX512+0xc8(SB)/8, $expandAVX512_60<>(SB)
-DATA  ·gcExpandersAVX512+0xd0(SB)/8, $expandAVX512_64<>(SB)
+DATA  ·gcExpandersAVX512+0x08(SB)/8, $·expandAVX512_1<>(SB)
+DATA  ·gcExpandersAVX512+0x10(SB)/8, $·expandAVX512_2<>(SB)
+DATA  ·gcExpandersAVX512+0x18(SB)/8, $·expandAVX512_3<>(SB)
+DATA  ·gcExpandersAVX512+0x20(SB)/8, $·expandAVX512_4<>(SB)
+DATA  ·gcExpandersAVX512+0x28(SB)/8, $·expandAVX512_6<>(SB)
+DATA  ·gcExpandersAVX512+0x30(SB)/8, $·expandAVX512_8<>(SB)
+DATA  ·gcExpandersAVX512+0x38(SB)/8, $·expandAVX512_10<>(SB)
+DATA  ·gcExpandersAVX512+0x40(SB)/8, $·expandAVX512_12<>(SB)
+DATA  ·gcExpandersAVX512+0x48(SB)/8, $·expandAVX512_14<>(SB)
+DATA  ·gcExpandersAVX512+0x50(SB)/8, $·expandAVX512_16<>(SB)
+DATA  ·gcExpandersAVX512+0x58(SB)/8, $·expandAVX512_18<>(SB)
+DATA  ·gcExpandersAVX512+0x60(SB)/8, $·expandAVX512_20<>(SB)
+DATA  ·gcExpandersAVX512+0x68(SB)/8, $·expandAVX512_22<>(SB)
+DATA  ·gcExpandersAVX512+0x70(SB)/8, $·expandAVX512_24<>(SB)
+DATA  ·gcExpandersAVX512+0x78(SB)/8, $·expandAVX512_26<>(SB)
+DATA  ·gcExpandersAVX512+0x80(SB)/8, $·expandAVX512_28<>(SB)
+DATA  ·gcExpandersAVX512+0x88(SB)/8, $·expandAVX512_30<>(SB)
+DATA  ·gcExpandersAVX512+0x90(SB)/8, $·expandAVX512_32<>(SB)
+DATA  ·gcExpandersAVX512+0x98(SB)/8, $·expandAVX512_36<>(SB)
+DATA  ·gcExpandersAVX512+0xa0(SB)/8, $·expandAVX512_40<>(SB)
+DATA  ·gcExpandersAVX512+0xa8(SB)/8, $·expandAVX512_44<>(SB)
+DATA  ·gcExpandersAVX512+0xb0(SB)/8, $·expandAVX512_48<>(SB)
+DATA  ·gcExpandersAVX512+0xb8(SB)/8, $·expandAVX512_52<>(SB)
+DATA  ·gcExpandersAVX512+0xc0(SB)/8, $·expandAVX512_56<>(SB)
+DATA  ·gcExpandersAVX512+0xc8(SB)/8, $·expandAVX512_60<>(SB)
+DATA  ·gcExpandersAVX512+0xd0(SB)/8, $·expandAVX512_64<>(SB)
 DATA  ·gcExpandersAVX512+0xd8(SB)/8, $0
 DATA  ·gcExpandersAVX512+0xe0(SB)/8, $0
 DATA  ·gcExpandersAVX512+0xe8(SB)/8, $0
@@ -73,56 +73,56 @@ DATA  ·gcExpandersAVX512+0x208(SB)/8, $0
 DATA  ·gcExpandersAVX512+0x210(SB)/8, $0
 DATA  ·gcExpandersAVX512+0x218(SB)/8, $0
 
-TEXT expandAVX512_1<>(SB), NOSPLIT, $0-0
+TEXT ·expandAVX512_1<>(SB), NOSPLIT, $0-0
        VMOVDQU64 (AX), Z1
        VMOVDQU64 64(AX), Z2
        RET
 
-GLOBL expandAVX512_2_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_2_inShuf0<>+0x00(SB)/8, $0x0706050403020100
-DATA  expandAVX512_2_inShuf0<>+0x08(SB)/8, $0x0706050403020100
-DATA  expandAVX512_2_inShuf0<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_2_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_2_inShuf0<>+0x20(SB)/8, $0x1716151413121110
-DATA  expandAVX512_2_inShuf0<>+0x28(SB)/8, $0x1716151413121110
-DATA  expandAVX512_2_inShuf0<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
-DATA  expandAVX512_2_inShuf0<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
-
-GLOBL expandAVX512_2_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_2_mat0<>+0x00(SB)/8, $0x0101020204040808
-DATA  expandAVX512_2_mat0<>+0x08(SB)/8, $0x1010202040408080
-DATA  expandAVX512_2_mat0<>+0x10(SB)/8, $0x0101020204040808
-DATA  expandAVX512_2_mat0<>+0x18(SB)/8, $0x1010202040408080
-DATA  expandAVX512_2_mat0<>+0x20(SB)/8, $0x0101020204040808
-DATA  expandAVX512_2_mat0<>+0x28(SB)/8, $0x1010202040408080
-DATA  expandAVX512_2_mat0<>+0x30(SB)/8, $0x0101020204040808
-DATA  expandAVX512_2_mat0<>+0x38(SB)/8, $0x1010202040408080
-
-GLOBL expandAVX512_2_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_2_inShuf1<>+0x00(SB)/8, $0x2726252423222120
-DATA  expandAVX512_2_inShuf1<>+0x08(SB)/8, $0x2726252423222120
-DATA  expandAVX512_2_inShuf1<>+0x10(SB)/8, $0x2f2e2d2c2b2a2928
-DATA  expandAVX512_2_inShuf1<>+0x18(SB)/8, $0x2f2e2d2c2b2a2928
-DATA  expandAVX512_2_inShuf1<>+0x20(SB)/8, $0x3736353433323130
-DATA  expandAVX512_2_inShuf1<>+0x28(SB)/8, $0x3736353433323130
-DATA  expandAVX512_2_inShuf1<>+0x30(SB)/8, $0x3f3e3d3c3b3a3938
-DATA  expandAVX512_2_inShuf1<>+0x38(SB)/8, $0x3f3e3d3c3b3a3938
-
-GLOBL expandAVX512_2_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_2_outShufLo+0x00(SB)/8, $0x0b030a0209010800
-DATA  expandAVX512_2_outShufLo+0x08(SB)/8, $0x0f070e060d050c04
-DATA  expandAVX512_2_outShufLo+0x10(SB)/8, $0x1b131a1219111810
-DATA  expandAVX512_2_outShufLo+0x18(SB)/8, $0x1f171e161d151c14
-DATA  expandAVX512_2_outShufLo+0x20(SB)/8, $0x2b232a2229212820
-DATA  expandAVX512_2_outShufLo+0x28(SB)/8, $0x2f272e262d252c24
-DATA  expandAVX512_2_outShufLo+0x30(SB)/8, $0x3b333a3239313830
-DATA  expandAVX512_2_outShufLo+0x38(SB)/8, $0x3f373e363d353c34
-
-TEXT expandAVX512_2<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_2_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_2_mat0<>(SB), Z1
-       VMOVDQU64 expandAVX512_2_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_2_outShufLo(SB), Z3
+GLOBL ·expandAVX512_2_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_2_inShuf0<>+0x00(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_2_inShuf0<>+0x08(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_2_inShuf0<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_2_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_2_inShuf0<>+0x20(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_2_inShuf0<>+0x28(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_2_inShuf0<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
+DATA  ·expandAVX512_2_inShuf0<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
+
+GLOBL ·expandAVX512_2_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_2_mat0<>+0x00(SB)/8, $0x0101020204040808
+DATA  ·expandAVX512_2_mat0<>+0x08(SB)/8, $0x1010202040408080
+DATA  ·expandAVX512_2_mat0<>+0x10(SB)/8, $0x0101020204040808
+DATA  ·expandAVX512_2_mat0<>+0x18(SB)/8, $0x1010202040408080
+DATA  ·expandAVX512_2_mat0<>+0x20(SB)/8, $0x0101020204040808
+DATA  ·expandAVX512_2_mat0<>+0x28(SB)/8, $0x1010202040408080
+DATA  ·expandAVX512_2_mat0<>+0x30(SB)/8, $0x0101020204040808
+DATA  ·expandAVX512_2_mat0<>+0x38(SB)/8, $0x1010202040408080
+
+GLOBL ·expandAVX512_2_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_2_inShuf1<>+0x00(SB)/8, $0x2726252423222120
+DATA  ·expandAVX512_2_inShuf1<>+0x08(SB)/8, $0x2726252423222120
+DATA  ·expandAVX512_2_inShuf1<>+0x10(SB)/8, $0x2f2e2d2c2b2a2928
+DATA  ·expandAVX512_2_inShuf1<>+0x18(SB)/8, $0x2f2e2d2c2b2a2928
+DATA  ·expandAVX512_2_inShuf1<>+0x20(SB)/8, $0x3736353433323130
+DATA  ·expandAVX512_2_inShuf1<>+0x28(SB)/8, $0x3736353433323130
+DATA  ·expandAVX512_2_inShuf1<>+0x30(SB)/8, $0x3f3e3d3c3b3a3938
+DATA  ·expandAVX512_2_inShuf1<>+0x38(SB)/8, $0x3f3e3d3c3b3a3938
+
+GLOBL ·expandAVX512_2_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_2_outShufLo+0x00(SB)/8, $0x0b030a0209010800
+DATA  ·expandAVX512_2_outShufLo+0x08(SB)/8, $0x0f070e060d050c04
+DATA  ·expandAVX512_2_outShufLo+0x10(SB)/8, $0x1b131a1219111810
+DATA  ·expandAVX512_2_outShufLo+0x18(SB)/8, $0x1f171e161d151c14
+DATA  ·expandAVX512_2_outShufLo+0x20(SB)/8, $0x2b232a2229212820
+DATA  ·expandAVX512_2_outShufLo+0x28(SB)/8, $0x2f272e262d252c24
+DATA  ·expandAVX512_2_outShufLo+0x30(SB)/8, $0x3b333a3239313830
+DATA  ·expandAVX512_2_outShufLo+0x38(SB)/8, $0x3f373e363d353c34
+
+TEXT ·expandAVX512_2<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_2_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_2_mat0<>(SB), Z1
+       VMOVDQU64 ·expandAVX512_2_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_2_outShufLo(SB), Z3
        VMOVDQU64 (AX), Z4
        VPERMB Z4, Z0, Z0
        VGF2P8AFFINEQB $0, Z1, Z0, Z0
@@ -132,73 +132,73 @@ TEXT expandAVX512_2<>(SB), NOSPLIT, $0-0
        VPERMB Z2, Z3, Z2
        RET
 
-GLOBL expandAVX512_3_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_3_inShuf0<>+0x00(SB)/8, $0x0706050403020100
-DATA  expandAVX512_3_inShuf0<>+0x08(SB)/8, $0x0706050403020100
-DATA  expandAVX512_3_inShuf0<>+0x10(SB)/8, $0x0706050403020100
-DATA  expandAVX512_3_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_3_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_3_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_3_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_3_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_3_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_3_mat0<>+0x00(SB)/8, $0x0101010202020404
-DATA  expandAVX512_3_mat0<>+0x08(SB)/8, $0x0408080810101020
-DATA  expandAVX512_3_mat0<>+0x10(SB)/8, $0x2020404040808080
-DATA  expandAVX512_3_mat0<>+0x18(SB)/8, $0x0101010202020404
-DATA  expandAVX512_3_mat0<>+0x20(SB)/8, $0x0408080810101020
-DATA  expandAVX512_3_mat0<>+0x28(SB)/8, $0x2020404040808080
-DATA  expandAVX512_3_mat0<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_3_mat0<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_3_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_3_inShuf1<>+0x00(SB)/8, $0x1716151413121110
-DATA  expandAVX512_3_inShuf1<>+0x08(SB)/8, $0x1716151413121110
-DATA  expandAVX512_3_inShuf1<>+0x10(SB)/8, $0x1716151413121110
-DATA  expandAVX512_3_inShuf1<>+0x18(SB)/8, $0x1f1e1d1c1b1a1918
-DATA  expandAVX512_3_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
-DATA  expandAVX512_3_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
-DATA  expandAVX512_3_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_3_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_3_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_3_inShuf2<>+0x00(SB)/8, $0x2726252423222120
-DATA  expandAVX512_3_inShuf2<>+0x08(SB)/8, $0x2726252423222120
-DATA  expandAVX512_3_inShuf2<>+0x10(SB)/8, $0x2726252423222120
-DATA  expandAVX512_3_inShuf2<>+0x18(SB)/8, $0xffffffffff2a2928
-DATA  expandAVX512_3_inShuf2<>+0x20(SB)/8, $0xffffffffff2a2928
-DATA  expandAVX512_3_inShuf2<>+0x28(SB)/8, $0xffffffffffff2928
-DATA  expandAVX512_3_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_3_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_3_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_3_outShufLo+0x00(SB)/8, $0x0a02110901100800
-DATA  expandAVX512_3_outShufLo+0x08(SB)/8, $0x05140c04130b0312
-DATA  expandAVX512_3_outShufLo+0x10(SB)/8, $0x170f07160e06150d
-DATA  expandAVX512_3_outShufLo+0x18(SB)/8, $0x221a292119282018
-DATA  expandAVX512_3_outShufLo+0x20(SB)/8, $0x1d2c241c2b231b2a
-DATA  expandAVX512_3_outShufLo+0x28(SB)/8, $0x2f271f2e261e2d25
-DATA  expandAVX512_3_outShufLo+0x30(SB)/8, $0x4a42514941504840
-DATA  expandAVX512_3_outShufLo+0x38(SB)/8, $0x45544c44534b4352
-
-GLOBL expandAVX512_3_outShufHi(SB), RODATA, $0x40
-DATA  expandAVX512_3_outShufHi+0x00(SB)/8, $0x170f07160e06150d
-DATA  expandAVX512_3_outShufHi+0x08(SB)/8, $0x221a292119282018
-DATA  expandAVX512_3_outShufHi+0x10(SB)/8, $0x1d2c241c2b231b2a
-DATA  expandAVX512_3_outShufHi+0x18(SB)/8, $0x2f271f2e261e2d25
-DATA  expandAVX512_3_outShufHi+0x20(SB)/8, $0x4a42514941504840
-DATA  expandAVX512_3_outShufHi+0x28(SB)/8, $0x45544c44534b4352
-DATA  expandAVX512_3_outShufHi+0x30(SB)/8, $0x574f47564e46554d
-DATA  expandAVX512_3_outShufHi+0x38(SB)/8, $0x625a696159686058
-
-TEXT expandAVX512_3<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_3_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_3_mat0<>(SB), Z3
-       VMOVDQU64 expandAVX512_3_inShuf1<>(SB), Z4
-       VMOVDQU64 expandAVX512_3_inShuf2<>(SB), Z5
-       VMOVDQU64 expandAVX512_3_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_3_outShufHi(SB), Z2
+GLOBL ·expandAVX512_3_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_3_inShuf0<>+0x00(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_3_inShuf0<>+0x08(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_3_inShuf0<>+0x10(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_3_inShuf0<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_3_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_3_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_3_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_3_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_3_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_3_mat0<>+0x00(SB)/8, $0x0101010202020404
+DATA  ·expandAVX512_3_mat0<>+0x08(SB)/8, $0x0408080810101020
+DATA  ·expandAVX512_3_mat0<>+0x10(SB)/8, $0x2020404040808080
+DATA  ·expandAVX512_3_mat0<>+0x18(SB)/8, $0x0101010202020404
+DATA  ·expandAVX512_3_mat0<>+0x20(SB)/8, $0x0408080810101020
+DATA  ·expandAVX512_3_mat0<>+0x28(SB)/8, $0x2020404040808080
+DATA  ·expandAVX512_3_mat0<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_3_mat0<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_3_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_3_inShuf1<>+0x00(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_3_inShuf1<>+0x08(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_3_inShuf1<>+0x10(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_3_inShuf1<>+0x18(SB)/8, $0x1f1e1d1c1b1a1918
+DATA  ·expandAVX512_3_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
+DATA  ·expandAVX512_3_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
+DATA  ·expandAVX512_3_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_3_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_3_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_3_inShuf2<>+0x00(SB)/8, $0x2726252423222120
+DATA  ·expandAVX512_3_inShuf2<>+0x08(SB)/8, $0x2726252423222120
+DATA  ·expandAVX512_3_inShuf2<>+0x10(SB)/8, $0x2726252423222120
+DATA  ·expandAVX512_3_inShuf2<>+0x18(SB)/8, $0xffffffffff2a2928
+DATA  ·expandAVX512_3_inShuf2<>+0x20(SB)/8, $0xffffffffff2a2928
+DATA  ·expandAVX512_3_inShuf2<>+0x28(SB)/8, $0xffffffffffff2928
+DATA  ·expandAVX512_3_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_3_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_3_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_3_outShufLo+0x00(SB)/8, $0x0a02110901100800
+DATA  ·expandAVX512_3_outShufLo+0x08(SB)/8, $0x05140c04130b0312
+DATA  ·expandAVX512_3_outShufLo+0x10(SB)/8, $0x170f07160e06150d
+DATA  ·expandAVX512_3_outShufLo+0x18(SB)/8, $0x221a292119282018
+DATA  ·expandAVX512_3_outShufLo+0x20(SB)/8, $0x1d2c241c2b231b2a
+DATA  ·expandAVX512_3_outShufLo+0x28(SB)/8, $0x2f271f2e261e2d25
+DATA  ·expandAVX512_3_outShufLo+0x30(SB)/8, $0x4a42514941504840
+DATA  ·expandAVX512_3_outShufLo+0x38(SB)/8, $0x45544c44534b4352
+
+GLOBL ·expandAVX512_3_outShufHi(SB), RODATA, $0x40
+DATA  ·expandAVX512_3_outShufHi+0x00(SB)/8, $0x170f07160e06150d
+DATA  ·expandAVX512_3_outShufHi+0x08(SB)/8, $0x221a292119282018
+DATA  ·expandAVX512_3_outShufHi+0x10(SB)/8, $0x1d2c241c2b231b2a
+DATA  ·expandAVX512_3_outShufHi+0x18(SB)/8, $0x2f271f2e261e2d25
+DATA  ·expandAVX512_3_outShufHi+0x20(SB)/8, $0x4a42514941504840
+DATA  ·expandAVX512_3_outShufHi+0x28(SB)/8, $0x45544c44534b4352
+DATA  ·expandAVX512_3_outShufHi+0x30(SB)/8, $0x574f47564e46554d
+DATA  ·expandAVX512_3_outShufHi+0x38(SB)/8, $0x625a696159686058
+
+TEXT ·expandAVX512_3<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_3_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_3_mat0<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_3_inShuf1<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_3_inShuf2<>(SB), Z5
+       VMOVDQU64 ·expandAVX512_3_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_3_outShufHi(SB), Z2
        VMOVDQU64 (AX), Z6
        VPERMB Z6, Z0, Z0
        VGF2P8AFFINEQB $0, Z3, Z0, Z0
@@ -210,51 +210,51 @@ TEXT expandAVX512_3<>(SB), NOSPLIT, $0-0
        VPERMI2B Z3, Z4, Z2
        RET
 
-GLOBL expandAVX512_4_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_4_inShuf0<>+0x00(SB)/8, $0x0706050403020100
-DATA  expandAVX512_4_inShuf0<>+0x08(SB)/8, $0x0706050403020100
-DATA  expandAVX512_4_inShuf0<>+0x10(SB)/8, $0x0706050403020100
-DATA  expandAVX512_4_inShuf0<>+0x18(SB)/8, $0x0706050403020100
-DATA  expandAVX512_4_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_4_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_4_inShuf0<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_4_inShuf0<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
-
-GLOBL expandAVX512_4_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_4_mat0<>+0x00(SB)/8, $0x0101010102020202
-DATA  expandAVX512_4_mat0<>+0x08(SB)/8, $0x0404040408080808
-DATA  expandAVX512_4_mat0<>+0x10(SB)/8, $0x1010101020202020
-DATA  expandAVX512_4_mat0<>+0x18(SB)/8, $0x4040404080808080
-DATA  expandAVX512_4_mat0<>+0x20(SB)/8, $0x0101010102020202
-DATA  expandAVX512_4_mat0<>+0x28(SB)/8, $0x0404040408080808
-DATA  expandAVX512_4_mat0<>+0x30(SB)/8, $0x1010101020202020
-DATA  expandAVX512_4_mat0<>+0x38(SB)/8, $0x4040404080808080
-
-GLOBL expandAVX512_4_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_4_inShuf1<>+0x00(SB)/8, $0x1716151413121110
-DATA  expandAVX512_4_inShuf1<>+0x08(SB)/8, $0x1716151413121110
-DATA  expandAVX512_4_inShuf1<>+0x10(SB)/8, $0x1716151413121110
-DATA  expandAVX512_4_inShuf1<>+0x18(SB)/8, $0x1716151413121110
-DATA  expandAVX512_4_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
-DATA  expandAVX512_4_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
-DATA  expandAVX512_4_inShuf1<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
-DATA  expandAVX512_4_inShuf1<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
-
-GLOBL expandAVX512_4_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_4_outShufLo+0x00(SB)/8, $0x1911090118100800
-DATA  expandAVX512_4_outShufLo+0x08(SB)/8, $0x1b130b031a120a02
-DATA  expandAVX512_4_outShufLo+0x10(SB)/8, $0x1d150d051c140c04
-DATA  expandAVX512_4_outShufLo+0x18(SB)/8, $0x1f170f071e160e06
-DATA  expandAVX512_4_outShufLo+0x20(SB)/8, $0x3931292138302820
-DATA  expandAVX512_4_outShufLo+0x28(SB)/8, $0x3b332b233a322a22
-DATA  expandAVX512_4_outShufLo+0x30(SB)/8, $0x3d352d253c342c24
-DATA  expandAVX512_4_outShufLo+0x38(SB)/8, $0x3f372f273e362e26
-
-TEXT expandAVX512_4<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_4_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_4_mat0<>(SB), Z1
-       VMOVDQU64 expandAVX512_4_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_4_outShufLo(SB), Z3
+GLOBL ·expandAVX512_4_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_4_inShuf0<>+0x00(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_4_inShuf0<>+0x08(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_4_inShuf0<>+0x10(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_4_inShuf0<>+0x18(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_4_inShuf0<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_4_inShuf0<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_4_inShuf0<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_4_inShuf0<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
+
+GLOBL ·expandAVX512_4_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_4_mat0<>+0x00(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_4_mat0<>+0x08(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_4_mat0<>+0x10(SB)/8, $0x1010101020202020
+DATA  ·expandAVX512_4_mat0<>+0x18(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_4_mat0<>+0x20(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_4_mat0<>+0x28(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_4_mat0<>+0x30(SB)/8, $0x1010101020202020
+DATA  ·expandAVX512_4_mat0<>+0x38(SB)/8, $0x4040404080808080
+
+GLOBL ·expandAVX512_4_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_4_inShuf1<>+0x00(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_4_inShuf1<>+0x08(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_4_inShuf1<>+0x10(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_4_inShuf1<>+0x18(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_4_inShuf1<>+0x20(SB)/8, $0x1f1e1d1c1b1a1918
+DATA  ·expandAVX512_4_inShuf1<>+0x28(SB)/8, $0x1f1e1d1c1b1a1918
+DATA  ·expandAVX512_4_inShuf1<>+0x30(SB)/8, $0x1f1e1d1c1b1a1918
+DATA  ·expandAVX512_4_inShuf1<>+0x38(SB)/8, $0x1f1e1d1c1b1a1918
+
+GLOBL ·expandAVX512_4_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_4_outShufLo+0x00(SB)/8, $0x1911090118100800
+DATA  ·expandAVX512_4_outShufLo+0x08(SB)/8, $0x1b130b031a120a02
+DATA  ·expandAVX512_4_outShufLo+0x10(SB)/8, $0x1d150d051c140c04
+DATA  ·expandAVX512_4_outShufLo+0x18(SB)/8, $0x1f170f071e160e06
+DATA  ·expandAVX512_4_outShufLo+0x20(SB)/8, $0x3931292138302820
+DATA  ·expandAVX512_4_outShufLo+0x28(SB)/8, $0x3b332b233a322a22
+DATA  ·expandAVX512_4_outShufLo+0x30(SB)/8, $0x3d352d253c342c24
+DATA  ·expandAVX512_4_outShufLo+0x38(SB)/8, $0x3f372f273e362e26
+
+TEXT ·expandAVX512_4<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_4_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_4_mat0<>(SB), Z1
+       VMOVDQU64 ·expandAVX512_4_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_4_outShufLo(SB), Z3
        VMOVDQU64 (AX), Z4
        VPERMB Z4, Z0, Z0
        VGF2P8AFFINEQB $0, Z1, Z0, Z0
@@ -264,73 +264,73 @@ TEXT expandAVX512_4<>(SB), NOSPLIT, $0-0
        VPERMB Z2, Z3, Z2
        RET
 
-GLOBL expandAVX512_6_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_6_inShuf0<>+0x00(SB)/8, $0x0706050403020100
-DATA  expandAVX512_6_inShuf0<>+0x08(SB)/8, $0x0706050403020100
-DATA  expandAVX512_6_inShuf0<>+0x10(SB)/8, $0x0706050403020100
-DATA  expandAVX512_6_inShuf0<>+0x18(SB)/8, $0x0706050403020100
-DATA  expandAVX512_6_inShuf0<>+0x20(SB)/8, $0x0706050403020100
-DATA  expandAVX512_6_inShuf0<>+0x28(SB)/8, $0x0706050403020100
-DATA  expandAVX512_6_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_6_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_6_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_6_mat0<>+0x00(SB)/8, $0x0101010101010202
-DATA  expandAVX512_6_mat0<>+0x08(SB)/8, $0x0202020204040404
-DATA  expandAVX512_6_mat0<>+0x10(SB)/8, $0x0404080808080808
-DATA  expandAVX512_6_mat0<>+0x18(SB)/8, $0x1010101010102020
-DATA  expandAVX512_6_mat0<>+0x20(SB)/8, $0x2020202040404040
-DATA  expandAVX512_6_mat0<>+0x28(SB)/8, $0x4040808080808080
-DATA  expandAVX512_6_mat0<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_6_mat0<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_6_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_6_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_6_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_6_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_6_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_6_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_6_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_6_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_6_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_6_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_6_inShuf2<>+0x00(SB)/8, $0xffff151413121110
-DATA  expandAVX512_6_inShuf2<>+0x08(SB)/8, $0xffff151413121110
-DATA  expandAVX512_6_inShuf2<>+0x10(SB)/8, $0xffffff1413121110
-DATA  expandAVX512_6_inShuf2<>+0x18(SB)/8, $0xffffff1413121110
-DATA  expandAVX512_6_inShuf2<>+0x20(SB)/8, $0xffffff1413121110
-DATA  expandAVX512_6_inShuf2<>+0x28(SB)/8, $0xffffff1413121110
-DATA  expandAVX512_6_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_6_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_6_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_6_outShufLo+0x00(SB)/8, $0x0901282018100800
-DATA  expandAVX512_6_outShufLo+0x08(SB)/8, $0x1a120a0229211911
-DATA  expandAVX512_6_outShufLo+0x10(SB)/8, $0x2b231b130b032a22
-DATA  expandAVX512_6_outShufLo+0x18(SB)/8, $0x0d052c241c140c04
-DATA  expandAVX512_6_outShufLo+0x20(SB)/8, $0x1e160e062d251d15
-DATA  expandAVX512_6_outShufLo+0x28(SB)/8, $0x2f271f170f072e26
-DATA  expandAVX512_6_outShufLo+0x30(SB)/8, $0x4941686058504840
-DATA  expandAVX512_6_outShufLo+0x38(SB)/8, $0x5a524a4269615951
-
-GLOBL expandAVX512_6_outShufHi(SB), RODATA, $0x40
-DATA  expandAVX512_6_outShufHi+0x00(SB)/8, $0x2b231b130b032a22
-DATA  expandAVX512_6_outShufHi+0x08(SB)/8, $0x0d052c241c140c04
-DATA  expandAVX512_6_outShufHi+0x10(SB)/8, $0x1e160e062d251d15
-DATA  expandAVX512_6_outShufHi+0x18(SB)/8, $0x2f271f170f072e26
-DATA  expandAVX512_6_outShufHi+0x20(SB)/8, $0x4941686058504840
-DATA  expandAVX512_6_outShufHi+0x28(SB)/8, $0x5a524a4269615951
-DATA  expandAVX512_6_outShufHi+0x30(SB)/8, $0x6b635b534b436a62
-DATA  expandAVX512_6_outShufHi+0x38(SB)/8, $0x4d456c645c544c44
-
-TEXT expandAVX512_6<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_6_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_6_mat0<>(SB), Z3
-       VMOVDQU64 expandAVX512_6_inShuf1<>(SB), Z4
-       VMOVDQU64 expandAVX512_6_inShuf2<>(SB), Z5
-       VMOVDQU64 expandAVX512_6_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_6_outShufHi(SB), Z2
+GLOBL ·expandAVX512_6_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_6_inShuf0<>+0x00(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_6_inShuf0<>+0x08(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_6_inShuf0<>+0x10(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_6_inShuf0<>+0x18(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_6_inShuf0<>+0x20(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_6_inShuf0<>+0x28(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_6_inShuf0<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_6_inShuf0<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_6_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_6_mat0<>+0x00(SB)/8, $0x0101010101010202
+DATA  ·expandAVX512_6_mat0<>+0x08(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_6_mat0<>+0x10(SB)/8, $0x0404080808080808
+DATA  ·expandAVX512_6_mat0<>+0x18(SB)/8, $0x1010101010102020
+DATA  ·expandAVX512_6_mat0<>+0x20(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_6_mat0<>+0x28(SB)/8, $0x4040808080808080
+DATA  ·expandAVX512_6_mat0<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_6_mat0<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_6_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_6_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_6_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_6_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_6_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_6_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_6_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_6_inShuf1<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_6_inShuf1<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_6_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_6_inShuf2<>+0x00(SB)/8, $0xffff151413121110
+DATA  ·expandAVX512_6_inShuf2<>+0x08(SB)/8, $0xffff151413121110
+DATA  ·expandAVX512_6_inShuf2<>+0x10(SB)/8, $0xffffff1413121110
+DATA  ·expandAVX512_6_inShuf2<>+0x18(SB)/8, $0xffffff1413121110
+DATA  ·expandAVX512_6_inShuf2<>+0x20(SB)/8, $0xffffff1413121110
+DATA  ·expandAVX512_6_inShuf2<>+0x28(SB)/8, $0xffffff1413121110
+DATA  ·expandAVX512_6_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_6_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_6_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_6_outShufLo+0x00(SB)/8, $0x0901282018100800
+DATA  ·expandAVX512_6_outShufLo+0x08(SB)/8, $0x1a120a0229211911
+DATA  ·expandAVX512_6_outShufLo+0x10(SB)/8, $0x2b231b130b032a22
+DATA  ·expandAVX512_6_outShufLo+0x18(SB)/8, $0x0d052c241c140c04
+DATA  ·expandAVX512_6_outShufLo+0x20(SB)/8, $0x1e160e062d251d15
+DATA  ·expandAVX512_6_outShufLo+0x28(SB)/8, $0x2f271f170f072e26
+DATA  ·expandAVX512_6_outShufLo+0x30(SB)/8, $0x4941686058504840
+DATA  ·expandAVX512_6_outShufLo+0x38(SB)/8, $0x5a524a4269615951
+
+GLOBL ·expandAVX512_6_outShufHi(SB), RODATA, $0x40
+DATA  ·expandAVX512_6_outShufHi+0x00(SB)/8, $0x2b231b130b032a22
+DATA  ·expandAVX512_6_outShufHi+0x08(SB)/8, $0x0d052c241c140c04
+DATA  ·expandAVX512_6_outShufHi+0x10(SB)/8, $0x1e160e062d251d15
+DATA  ·expandAVX512_6_outShufHi+0x18(SB)/8, $0x2f271f170f072e26
+DATA  ·expandAVX512_6_outShufHi+0x20(SB)/8, $0x4941686058504840
+DATA  ·expandAVX512_6_outShufHi+0x28(SB)/8, $0x5a524a4269615951
+DATA  ·expandAVX512_6_outShufHi+0x30(SB)/8, $0x6b635b534b436a62
+DATA  ·expandAVX512_6_outShufHi+0x38(SB)/8, $0x4d456c645c544c44
+
+TEXT ·expandAVX512_6<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_6_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_6_mat0<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_6_inShuf1<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_6_inShuf2<>(SB), Z5
+       VMOVDQU64 ·expandAVX512_6_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_6_outShufHi(SB), Z2
        VMOVDQU64 (AX), Z6
        VPERMB Z6, Z0, Z0
        VGF2P8AFFINEQB $0, Z3, Z0, Z0
@@ -342,51 +342,51 @@ TEXT expandAVX512_6<>(SB), NOSPLIT, $0-0
        VPERMI2B Z3, Z4, Z2
        RET
 
-GLOBL expandAVX512_8_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_8_inShuf0<>+0x00(SB)/8, $0x0706050403020100
-DATA  expandAVX512_8_inShuf0<>+0x08(SB)/8, $0x0706050403020100
-DATA  expandAVX512_8_inShuf0<>+0x10(SB)/8, $0x0706050403020100
-DATA  expandAVX512_8_inShuf0<>+0x18(SB)/8, $0x0706050403020100
-DATA  expandAVX512_8_inShuf0<>+0x20(SB)/8, $0x0706050403020100
-DATA  expandAVX512_8_inShuf0<>+0x28(SB)/8, $0x0706050403020100
-DATA  expandAVX512_8_inShuf0<>+0x30(SB)/8, $0x0706050403020100
-DATA  expandAVX512_8_inShuf0<>+0x38(SB)/8, $0x0706050403020100
-
-GLOBL expandAVX512_8_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_8_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_8_mat0<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_8_mat0<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_8_mat0<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_8_mat0<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_8_mat0<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_8_mat0<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_8_mat0<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_8_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_8_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_8_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_8_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_8_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_8_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_8_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_8_inShuf1<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_8_inShuf1<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
-
-GLOBL expandAVX512_8_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_8_outShufLo+0x00(SB)/8, $0x3830282018100800
-DATA  expandAVX512_8_outShufLo+0x08(SB)/8, $0x3931292119110901
-DATA  expandAVX512_8_outShufLo+0x10(SB)/8, $0x3a322a221a120a02
-DATA  expandAVX512_8_outShufLo+0x18(SB)/8, $0x3b332b231b130b03
-DATA  expandAVX512_8_outShufLo+0x20(SB)/8, $0x3c342c241c140c04
-DATA  expandAVX512_8_outShufLo+0x28(SB)/8, $0x3d352d251d150d05
-DATA  expandAVX512_8_outShufLo+0x30(SB)/8, $0x3e362e261e160e06
-DATA  expandAVX512_8_outShufLo+0x38(SB)/8, $0x3f372f271f170f07
-
-TEXT expandAVX512_8<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_8_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_8_mat0<>(SB), Z1
-       VMOVDQU64 expandAVX512_8_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_8_outShufLo(SB), Z3
+GLOBL ·expandAVX512_8_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_8_inShuf0<>+0x00(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_8_inShuf0<>+0x08(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_8_inShuf0<>+0x10(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_8_inShuf0<>+0x18(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_8_inShuf0<>+0x20(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_8_inShuf0<>+0x28(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_8_inShuf0<>+0x30(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_8_inShuf0<>+0x38(SB)/8, $0x0706050403020100
+
+GLOBL ·expandAVX512_8_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_8_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_8_mat0<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_8_mat0<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_8_mat0<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_8_mat0<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_8_mat0<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_8_mat0<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_8_mat0<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_8_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_8_inShuf1<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_8_inShuf1<>+0x08(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_8_inShuf1<>+0x10(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_8_inShuf1<>+0x18(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_8_inShuf1<>+0x20(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_8_inShuf1<>+0x28(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_8_inShuf1<>+0x30(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_8_inShuf1<>+0x38(SB)/8, $0x0f0e0d0c0b0a0908
+
+GLOBL ·expandAVX512_8_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_8_outShufLo+0x00(SB)/8, $0x3830282018100800
+DATA  ·expandAVX512_8_outShufLo+0x08(SB)/8, $0x3931292119110901
+DATA  ·expandAVX512_8_outShufLo+0x10(SB)/8, $0x3a322a221a120a02
+DATA  ·expandAVX512_8_outShufLo+0x18(SB)/8, $0x3b332b231b130b03
+DATA  ·expandAVX512_8_outShufLo+0x20(SB)/8, $0x3c342c241c140c04
+DATA  ·expandAVX512_8_outShufLo+0x28(SB)/8, $0x3d352d251d150d05
+DATA  ·expandAVX512_8_outShufLo+0x30(SB)/8, $0x3e362e261e160e06
+DATA  ·expandAVX512_8_outShufLo+0x38(SB)/8, $0x3f372f271f170f07
+
+TEXT ·expandAVX512_8<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_8_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_8_mat0<>(SB), Z1
+       VMOVDQU64 ·expandAVX512_8_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_8_outShufLo(SB), Z3
        VMOVDQU64 (AX), Z4
        VPERMB Z4, Z0, Z0
        VGF2P8AFFINEQB $0, Z1, Z0, Z0
@@ -396,327 +396,327 @@ TEXT expandAVX512_8<>(SB), NOSPLIT, $0-0
        VPERMB Z2, Z3, Z2
        RET
 
-GLOBL expandAVX512_10_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_10_inShuf0<>+0x00(SB)/8, $0xff06050403020100
-DATA  expandAVX512_10_inShuf0<>+0x08(SB)/8, $0xff06050403020100
-DATA  expandAVX512_10_inShuf0<>+0x10(SB)/8, $0xff06050403020100
-DATA  expandAVX512_10_inShuf0<>+0x18(SB)/8, $0xff06050403020100
-DATA  expandAVX512_10_inShuf0<>+0x20(SB)/8, $0xffff050403020100
-DATA  expandAVX512_10_inShuf0<>+0x28(SB)/8, $0xffff050403020100
-DATA  expandAVX512_10_inShuf0<>+0x30(SB)/8, $0xffff050403020100
-DATA  expandAVX512_10_inShuf0<>+0x38(SB)/8, $0xffff050403020100
-
-GLOBL expandAVX512_10_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_10_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_10_mat0<>+0x08(SB)/8, $0x0101020202020202
-DATA  expandAVX512_10_mat0<>+0x10(SB)/8, $0x0202020204040404
-DATA  expandAVX512_10_mat0<>+0x18(SB)/8, $0x0404040404040808
-DATA  expandAVX512_10_mat0<>+0x20(SB)/8, $0x0808080808080808
-DATA  expandAVX512_10_mat0<>+0x28(SB)/8, $0x1010101010101010
-DATA  expandAVX512_10_mat0<>+0x30(SB)/8, $0x1010202020202020
-DATA  expandAVX512_10_mat0<>+0x38(SB)/8, $0x2020202040404040
-
-GLOBL expandAVX512_10_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_10_inShuf1<>+0x00(SB)/8, $0xffff050403020100
-DATA  expandAVX512_10_inShuf1<>+0x08(SB)/8, $0xffff050403020100
-DATA  expandAVX512_10_inShuf1<>+0x10(SB)/8, $0xff0c0b0a09080706
-DATA  expandAVX512_10_inShuf1<>+0x18(SB)/8, $0xff0c0b0a09080706
-DATA  expandAVX512_10_inShuf1<>+0x20(SB)/8, $0xff0c0b0a09080706
-DATA  expandAVX512_10_inShuf1<>+0x28(SB)/8, $0xff0c0b0a09080706
-DATA  expandAVX512_10_inShuf1<>+0x30(SB)/8, $0xffff0b0a09080706
-DATA  expandAVX512_10_inShuf1<>+0x38(SB)/8, $0xffff0b0a09080706
-
-GLOBL expandAVX512_10_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_10_mat1<>+0x00(SB)/8, $0x4040404040408080
-DATA  expandAVX512_10_mat1<>+0x08(SB)/8, $0x8080808080808080
-DATA  expandAVX512_10_mat1<>+0x10(SB)/8, $0x0808080808080808
-DATA  expandAVX512_10_mat1<>+0x18(SB)/8, $0x1010101010101010
-DATA  expandAVX512_10_mat1<>+0x20(SB)/8, $0x1010202020202020
-DATA  expandAVX512_10_mat1<>+0x28(SB)/8, $0x2020202040404040
-DATA  expandAVX512_10_mat1<>+0x30(SB)/8, $0x4040404040408080
-DATA  expandAVX512_10_mat1<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_10_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_10_inShuf2<>+0x00(SB)/8, $0xffff0c0b0a090807
-DATA  expandAVX512_10_inShuf2<>+0x08(SB)/8, $0xffff0c0b0a090807
-DATA  expandAVX512_10_inShuf2<>+0x10(SB)/8, $0xffff0c0b0a090807
-DATA  expandAVX512_10_inShuf2<>+0x18(SB)/8, $0xffff0c0b0a090807
-DATA  expandAVX512_10_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_10_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_10_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_10_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_10_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_10_mat2<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_10_mat2<>+0x08(SB)/8, $0x0101020202020202
-DATA  expandAVX512_10_mat2<>+0x10(SB)/8, $0x0202020204040404
-DATA  expandAVX512_10_mat2<>+0x18(SB)/8, $0x0404040404040808
-DATA  expandAVX512_10_mat2<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_10_mat2<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_10_mat2<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_10_mat2<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_10_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_10_outShufLo+0x00(SB)/8, $0x3830282018100800
-DATA  expandAVX512_10_outShufLo+0x08(SB)/8, $0x2921191109014840
-DATA  expandAVX512_10_outShufLo+0x10(SB)/8, $0x1a120a0249413931
-DATA  expandAVX512_10_outShufLo+0x18(SB)/8, $0x0b034a423a322a22
-DATA  expandAVX512_10_outShufLo+0x20(SB)/8, $0x4b433b332b231b13
-DATA  expandAVX512_10_outShufLo+0x28(SB)/8, $0x3c342c241c140c04
-DATA  expandAVX512_10_outShufLo+0x30(SB)/8, $0x2d251d150d054c44
-DATA  expandAVX512_10_outShufLo+0x38(SB)/8, $0x1e160e064d453d35
-
-GLOBL expandAVX512_10_outShufHi(SB), RODATA, $0x40
-DATA  expandAVX512_10_outShufHi+0x00(SB)/8, $0x4840383028201810
-DATA  expandAVX512_10_outShufHi+0x08(SB)/8, $0x3931292119115850
-DATA  expandAVX512_10_outShufHi+0x10(SB)/8, $0x2a221a1259514941
-DATA  expandAVX512_10_outShufHi+0x18(SB)/8, $0x1b135a524a423a32
-DATA  expandAVX512_10_outShufHi+0x20(SB)/8, $0x5b534b433b332b23
-DATA  expandAVX512_10_outShufHi+0x28(SB)/8, $0x4c443c342c241c14
-DATA  expandAVX512_10_outShufHi+0x30(SB)/8, $0x3d352d251d155c54
-DATA  expandAVX512_10_outShufHi+0x38(SB)/8, $0x2e261e165d554d45
-
-TEXT expandAVX512_10<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_10_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_10_inShuf1<>(SB), Z3
-       VMOVDQU64 expandAVX512_10_inShuf2<>(SB), Z4
-       VMOVDQU64 expandAVX512_10_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_10_outShufHi(SB), Z2
+GLOBL ·expandAVX512_10_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_10_inShuf0<>+0x00(SB)/8, $0xff06050403020100
+DATA  ·expandAVX512_10_inShuf0<>+0x08(SB)/8, $0xff06050403020100
+DATA  ·expandAVX512_10_inShuf0<>+0x10(SB)/8, $0xff06050403020100
+DATA  ·expandAVX512_10_inShuf0<>+0x18(SB)/8, $0xff06050403020100
+DATA  ·expandAVX512_10_inShuf0<>+0x20(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_10_inShuf0<>+0x28(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_10_inShuf0<>+0x30(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_10_inShuf0<>+0x38(SB)/8, $0xffff050403020100
+
+GLOBL ·expandAVX512_10_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_10_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_10_mat0<>+0x08(SB)/8, $0x0101020202020202
+DATA  ·expandAVX512_10_mat0<>+0x10(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_10_mat0<>+0x18(SB)/8, $0x0404040404040808
+DATA  ·expandAVX512_10_mat0<>+0x20(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_10_mat0<>+0x28(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_10_mat0<>+0x30(SB)/8, $0x1010202020202020
+DATA  ·expandAVX512_10_mat0<>+0x38(SB)/8, $0x2020202040404040
+
+GLOBL ·expandAVX512_10_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_10_inShuf1<>+0x00(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_10_inShuf1<>+0x08(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_10_inShuf1<>+0x10(SB)/8, $0xff0c0b0a09080706
+DATA  ·expandAVX512_10_inShuf1<>+0x18(SB)/8, $0xff0c0b0a09080706
+DATA  ·expandAVX512_10_inShuf1<>+0x20(SB)/8, $0xff0c0b0a09080706
+DATA  ·expandAVX512_10_inShuf1<>+0x28(SB)/8, $0xff0c0b0a09080706
+DATA  ·expandAVX512_10_inShuf1<>+0x30(SB)/8, $0xffff0b0a09080706
+DATA  ·expandAVX512_10_inShuf1<>+0x38(SB)/8, $0xffff0b0a09080706
+
+GLOBL ·expandAVX512_10_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_10_mat1<>+0x00(SB)/8, $0x4040404040408080
+DATA  ·expandAVX512_10_mat1<>+0x08(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_10_mat1<>+0x10(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_10_mat1<>+0x18(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_10_mat1<>+0x20(SB)/8, $0x1010202020202020
+DATA  ·expandAVX512_10_mat1<>+0x28(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_10_mat1<>+0x30(SB)/8, $0x4040404040408080
+DATA  ·expandAVX512_10_mat1<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_10_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_10_inShuf2<>+0x00(SB)/8, $0xffff0c0b0a090807
+DATA  ·expandAVX512_10_inShuf2<>+0x08(SB)/8, $0xffff0c0b0a090807
+DATA  ·expandAVX512_10_inShuf2<>+0x10(SB)/8, $0xffff0c0b0a090807
+DATA  ·expandAVX512_10_inShuf2<>+0x18(SB)/8, $0xffff0c0b0a090807
+DATA  ·expandAVX512_10_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_10_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_10_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_10_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_10_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_10_mat2<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_10_mat2<>+0x08(SB)/8, $0x0101020202020202
+DATA  ·expandAVX512_10_mat2<>+0x10(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_10_mat2<>+0x18(SB)/8, $0x0404040404040808
+DATA  ·expandAVX512_10_mat2<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_10_mat2<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_10_mat2<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_10_mat2<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_10_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_10_outShufLo+0x00(SB)/8, $0x3830282018100800
+DATA  ·expandAVX512_10_outShufLo+0x08(SB)/8, $0x2921191109014840
+DATA  ·expandAVX512_10_outShufLo+0x10(SB)/8, $0x1a120a0249413931
+DATA  ·expandAVX512_10_outShufLo+0x18(SB)/8, $0x0b034a423a322a22
+DATA  ·expandAVX512_10_outShufLo+0x20(SB)/8, $0x4b433b332b231b13
+DATA  ·expandAVX512_10_outShufLo+0x28(SB)/8, $0x3c342c241c140c04
+DATA  ·expandAVX512_10_outShufLo+0x30(SB)/8, $0x2d251d150d054c44
+DATA  ·expandAVX512_10_outShufLo+0x38(SB)/8, $0x1e160e064d453d35
+
+GLOBL ·expandAVX512_10_outShufHi(SB), RODATA, $0x40
+DATA  ·expandAVX512_10_outShufHi+0x00(SB)/8, $0x4840383028201810
+DATA  ·expandAVX512_10_outShufHi+0x08(SB)/8, $0x3931292119115850
+DATA  ·expandAVX512_10_outShufHi+0x10(SB)/8, $0x2a221a1259514941
+DATA  ·expandAVX512_10_outShufHi+0x18(SB)/8, $0x1b135a524a423a32
+DATA  ·expandAVX512_10_outShufHi+0x20(SB)/8, $0x5b534b433b332b23
+DATA  ·expandAVX512_10_outShufHi+0x28(SB)/8, $0x4c443c342c241c14
+DATA  ·expandAVX512_10_outShufHi+0x30(SB)/8, $0x3d352d251d155c54
+DATA  ·expandAVX512_10_outShufHi+0x38(SB)/8, $0x2e261e165d554d45
+
+TEXT ·expandAVX512_10<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_10_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_10_inShuf1<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_10_inShuf2<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_10_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_10_outShufHi(SB), Z2
        VMOVDQU64 (AX), Z5
        VPERMB Z5, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_10_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_10_mat0<>(SB), Z0, Z0
        VPERMB Z5, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_10_mat1<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_10_mat1<>(SB), Z3, Z3
        VPERMB Z5, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_10_mat2<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_10_mat2<>(SB), Z4, Z4
        VPERMI2B Z3, Z0, Z1
        VPERMI2B Z4, Z3, Z2
        RET
 
-GLOBL expandAVX512_12_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_12_inShuf0<>+0x00(SB)/8, $0xffff050403020100
-DATA  expandAVX512_12_inShuf0<>+0x08(SB)/8, $0xffff050403020100
-DATA  expandAVX512_12_inShuf0<>+0x10(SB)/8, $0xffff050403020100
-DATA  expandAVX512_12_inShuf0<>+0x18(SB)/8, $0xffff050403020100
-DATA  expandAVX512_12_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_12_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_12_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_12_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
-
-GLOBL expandAVX512_12_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_12_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_12_mat0<>+0x08(SB)/8, $0x0101010102020202
-DATA  expandAVX512_12_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_12_mat0<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_12_mat0<>+0x20(SB)/8, $0x0404040408080808
-DATA  expandAVX512_12_mat0<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_12_mat0<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_12_mat0<>+0x38(SB)/8, $0x1010101020202020
-
-GLOBL expandAVX512_12_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_12_inShuf1<>+0x00(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_12_inShuf1<>+0x08(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_12_inShuf1<>+0x10(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_12_inShuf1<>+0x18(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_12_inShuf1<>+0x20(SB)/8, $0xffff0a0908070605
-DATA  expandAVX512_12_inShuf1<>+0x28(SB)/8, $0xffff0a0908070605
-DATA  expandAVX512_12_inShuf1<>+0x30(SB)/8, $0xffff0a0908070605
-DATA  expandAVX512_12_inShuf1<>+0x38(SB)/8, $0xffff0a0908070605
-
-GLOBL expandAVX512_12_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_12_mat1<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_12_mat1<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_12_mat1<>+0x10(SB)/8, $0x4040404080808080
-DATA  expandAVX512_12_mat1<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_12_mat1<>+0x20(SB)/8, $0x0404040408080808
-DATA  expandAVX512_12_mat1<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_12_mat1<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_12_mat1<>+0x38(SB)/8, $0x1010101020202020
-
-GLOBL expandAVX512_12_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_12_inShuf2<>+0x00(SB)/8, $0xffffff0908070605
-DATA  expandAVX512_12_inShuf2<>+0x08(SB)/8, $0xffffff0908070605
-DATA  expandAVX512_12_inShuf2<>+0x10(SB)/8, $0xffffff0908070605
-DATA  expandAVX512_12_inShuf2<>+0x18(SB)/8, $0xffffff0908070605
-DATA  expandAVX512_12_inShuf2<>+0x20(SB)/8, $0xffffff0a09080706
-DATA  expandAVX512_12_inShuf2<>+0x28(SB)/8, $0xffffff0a09080706
-DATA  expandAVX512_12_inShuf2<>+0x30(SB)/8, $0xffffff0a09080706
-DATA  expandAVX512_12_inShuf2<>+0x38(SB)/8, $0xffffff0a09080706
-
-GLOBL expandAVX512_12_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_12_mat2<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_12_mat2<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_12_mat2<>+0x10(SB)/8, $0x4040404080808080
-DATA  expandAVX512_12_mat2<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_12_mat2<>+0x20(SB)/8, $0x0101010101010101
-DATA  expandAVX512_12_mat2<>+0x28(SB)/8, $0x0101010102020202
-DATA  expandAVX512_12_mat2<>+0x30(SB)/8, $0x0202020202020202
-DATA  expandAVX512_12_mat2<>+0x38(SB)/8, $0x0404040404040404
-
-GLOBL expandAVX512_12_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_12_outShufLo+0x00(SB)/8, $0x3830282018100800
-DATA  expandAVX512_12_outShufLo+0x08(SB)/8, $0x1911090158504840
-DATA  expandAVX512_12_outShufLo+0x10(SB)/8, $0x5951494139312921
-DATA  expandAVX512_12_outShufLo+0x18(SB)/8, $0x3a322a221a120a02
-DATA  expandAVX512_12_outShufLo+0x20(SB)/8, $0x1b130b035a524a42
-DATA  expandAVX512_12_outShufLo+0x28(SB)/8, $0x5b534b433b332b23
-DATA  expandAVX512_12_outShufLo+0x30(SB)/8, $0x3c342c241c140c04
-DATA  expandAVX512_12_outShufLo+0x38(SB)/8, $0x1d150d055c544c44
-
-GLOBL expandAVX512_12_outShufHi(SB), RODATA, $0x40
-DATA  expandAVX512_12_outShufHi+0x00(SB)/8, $0x5850484038302820
-DATA  expandAVX512_12_outShufHi+0x08(SB)/8, $0x3931292178706860
-DATA  expandAVX512_12_outShufHi+0x10(SB)/8, $0x7971696159514941
-DATA  expandAVX512_12_outShufHi+0x18(SB)/8, $0x5a524a423a322a22
-DATA  expandAVX512_12_outShufHi+0x20(SB)/8, $0x3b332b237a726a62
-DATA  expandAVX512_12_outShufHi+0x28(SB)/8, $0x7b736b635b534b43
-DATA  expandAVX512_12_outShufHi+0x30(SB)/8, $0x5c544c443c342c24
-DATA  expandAVX512_12_outShufHi+0x38(SB)/8, $0x3d352d257c746c64
-
-TEXT expandAVX512_12<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_12_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_12_inShuf1<>(SB), Z3
-       VMOVDQU64 expandAVX512_12_inShuf2<>(SB), Z4
-       VMOVDQU64 expandAVX512_12_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_12_outShufHi(SB), Z2
+GLOBL ·expandAVX512_12_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_12_inShuf0<>+0x00(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_12_inShuf0<>+0x08(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_12_inShuf0<>+0x10(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_12_inShuf0<>+0x18(SB)/8, $0xffff050403020100
+DATA  ·expandAVX512_12_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_12_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_12_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_12_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
+
+GLOBL ·expandAVX512_12_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_12_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_12_mat0<>+0x08(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_12_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_12_mat0<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_12_mat0<>+0x20(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_12_mat0<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_12_mat0<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_12_mat0<>+0x38(SB)/8, $0x1010101020202020
+
+GLOBL ·expandAVX512_12_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_12_inShuf1<>+0x00(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_12_inShuf1<>+0x08(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_12_inShuf1<>+0x10(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_12_inShuf1<>+0x18(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_12_inShuf1<>+0x20(SB)/8, $0xffff0a0908070605
+DATA  ·expandAVX512_12_inShuf1<>+0x28(SB)/8, $0xffff0a0908070605
+DATA  ·expandAVX512_12_inShuf1<>+0x30(SB)/8, $0xffff0a0908070605
+DATA  ·expandAVX512_12_inShuf1<>+0x38(SB)/8, $0xffff0a0908070605
+
+GLOBL ·expandAVX512_12_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_12_mat1<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_12_mat1<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_12_mat1<>+0x10(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_12_mat1<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_12_mat1<>+0x20(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_12_mat1<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_12_mat1<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_12_mat1<>+0x38(SB)/8, $0x1010101020202020
+
+GLOBL ·expandAVX512_12_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_12_inShuf2<>+0x00(SB)/8, $0xffffff0908070605
+DATA  ·expandAVX512_12_inShuf2<>+0x08(SB)/8, $0xffffff0908070605
+DATA  ·expandAVX512_12_inShuf2<>+0x10(SB)/8, $0xffffff0908070605
+DATA  ·expandAVX512_12_inShuf2<>+0x18(SB)/8, $0xffffff0908070605
+DATA  ·expandAVX512_12_inShuf2<>+0x20(SB)/8, $0xffffff0a09080706
+DATA  ·expandAVX512_12_inShuf2<>+0x28(SB)/8, $0xffffff0a09080706
+DATA  ·expandAVX512_12_inShuf2<>+0x30(SB)/8, $0xffffff0a09080706
+DATA  ·expandAVX512_12_inShuf2<>+0x38(SB)/8, $0xffffff0a09080706
+
+GLOBL ·expandAVX512_12_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_12_mat2<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_12_mat2<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_12_mat2<>+0x10(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_12_mat2<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_12_mat2<>+0x20(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_12_mat2<>+0x28(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_12_mat2<>+0x30(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_12_mat2<>+0x38(SB)/8, $0x0404040404040404
+
+GLOBL ·expandAVX512_12_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_12_outShufLo+0x00(SB)/8, $0x3830282018100800
+DATA  ·expandAVX512_12_outShufLo+0x08(SB)/8, $0x1911090158504840
+DATA  ·expandAVX512_12_outShufLo+0x10(SB)/8, $0x5951494139312921
+DATA  ·expandAVX512_12_outShufLo+0x18(SB)/8, $0x3a322a221a120a02
+DATA  ·expandAVX512_12_outShufLo+0x20(SB)/8, $0x1b130b035a524a42
+DATA  ·expandAVX512_12_outShufLo+0x28(SB)/8, $0x5b534b433b332b23
+DATA  ·expandAVX512_12_outShufLo+0x30(SB)/8, $0x3c342c241c140c04
+DATA  ·expandAVX512_12_outShufLo+0x38(SB)/8, $0x1d150d055c544c44
+
+GLOBL ·expandAVX512_12_outShufHi(SB), RODATA, $0x40
+DATA  ·expandAVX512_12_outShufHi+0x00(SB)/8, $0x5850484038302820
+DATA  ·expandAVX512_12_outShufHi+0x08(SB)/8, $0x3931292178706860
+DATA  ·expandAVX512_12_outShufHi+0x10(SB)/8, $0x7971696159514941
+DATA  ·expandAVX512_12_outShufHi+0x18(SB)/8, $0x5a524a423a322a22
+DATA  ·expandAVX512_12_outShufHi+0x20(SB)/8, $0x3b332b237a726a62
+DATA  ·expandAVX512_12_outShufHi+0x28(SB)/8, $0x7b736b635b534b43
+DATA  ·expandAVX512_12_outShufHi+0x30(SB)/8, $0x5c544c443c342c24
+DATA  ·expandAVX512_12_outShufHi+0x38(SB)/8, $0x3d352d257c746c64
+
+TEXT ·expandAVX512_12<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_12_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_12_inShuf1<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_12_inShuf2<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_12_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_12_outShufHi(SB), Z2
        VMOVDQU64 (AX), Z5
        VPERMB Z5, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_12_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_12_mat0<>(SB), Z0, Z0
        VPERMB Z5, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_12_mat1<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_12_mat1<>(SB), Z3, Z3
        VPERMB Z5, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_12_mat2<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_12_mat2<>(SB), Z4, Z4
        VPERMI2B Z3, Z0, Z1
        VPERMI2B Z4, Z3, Z2
        RET
 
-GLOBL expandAVX512_14_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_14_inShuf0<>+0x00(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_14_inShuf0<>+0x08(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_14_inShuf0<>+0x10(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_14_inShuf0<>+0x18(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_14_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_14_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_14_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
-DATA  expandAVX512_14_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
-
-GLOBL expandAVX512_14_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_14_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_14_mat0<>+0x08(SB)/8, $0x0101010101010202
-DATA  expandAVX512_14_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_14_mat0<>+0x18(SB)/8, $0x0202020204040404
-DATA  expandAVX512_14_mat0<>+0x20(SB)/8, $0x0404040404040404
-DATA  expandAVX512_14_mat0<>+0x28(SB)/8, $0x0404080808080808
-DATA  expandAVX512_14_mat0<>+0x30(SB)/8, $0x0808080808080808
-DATA  expandAVX512_14_mat0<>+0x38(SB)/8, $0x1010101010101010
-
-GLOBL expandAVX512_14_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_14_inShuf1<>+0x00(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_14_inShuf1<>+0x08(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_14_inShuf1<>+0x10(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_14_inShuf1<>+0x18(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_14_inShuf1<>+0x20(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_14_inShuf1<>+0x28(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_14_inShuf1<>+0x30(SB)/8, $0xffffff0807060504
-DATA  expandAVX512_14_inShuf1<>+0x38(SB)/8, $0xffffff0807060504
-
-GLOBL expandAVX512_14_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_14_mat1<>+0x00(SB)/8, $0x1010101010102020
-DATA  expandAVX512_14_mat1<>+0x08(SB)/8, $0x2020202020202020
-DATA  expandAVX512_14_mat1<>+0x10(SB)/8, $0x2020202040404040
-DATA  expandAVX512_14_mat1<>+0x18(SB)/8, $0x4040404040404040
-DATA  expandAVX512_14_mat1<>+0x20(SB)/8, $0x4040808080808080
-DATA  expandAVX512_14_mat1<>+0x28(SB)/8, $0x8080808080808080
-DATA  expandAVX512_14_mat1<>+0x30(SB)/8, $0x1010101010102020
-DATA  expandAVX512_14_mat1<>+0x38(SB)/8, $0x2020202020202020
-
-GLOBL expandAVX512_14_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_14_inShuf2<>+0x00(SB)/8, $0xffffff0807060504
-DATA  expandAVX512_14_inShuf2<>+0x08(SB)/8, $0xffffff0807060504
-DATA  expandAVX512_14_inShuf2<>+0x10(SB)/8, $0xffffff0807060504
-DATA  expandAVX512_14_inShuf2<>+0x18(SB)/8, $0xffffff0807060504
-DATA  expandAVX512_14_inShuf2<>+0x20(SB)/8, $0xffffff0908070605
-DATA  expandAVX512_14_inShuf2<>+0x28(SB)/8, $0xffffff0908070605
-DATA  expandAVX512_14_inShuf2<>+0x30(SB)/8, $0xffffffff08070605
-DATA  expandAVX512_14_inShuf2<>+0x38(SB)/8, $0xffffffff08070605
-
-GLOBL expandAVX512_14_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_14_mat2<>+0x00(SB)/8, $0x2020202040404040
-DATA  expandAVX512_14_mat2<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_14_mat2<>+0x10(SB)/8, $0x4040808080808080
-DATA  expandAVX512_14_mat2<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_14_mat2<>+0x20(SB)/8, $0x0101010101010101
-DATA  expandAVX512_14_mat2<>+0x28(SB)/8, $0x0101010101010202
-DATA  expandAVX512_14_mat2<>+0x30(SB)/8, $0x0202020202020202
-DATA  expandAVX512_14_mat2<>+0x38(SB)/8, $0x0202020204040404
-
-GLOBL expandAVX512_14_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_14_inShuf3<>+0x00(SB)/8, $0xffffffff08070605
-DATA  expandAVX512_14_inShuf3<>+0x08(SB)/8, $0xffffffff08070605
-DATA  expandAVX512_14_inShuf3<>+0x10(SB)/8, $0xffffffff08070605
-DATA  expandAVX512_14_inShuf3<>+0x18(SB)/8, $0xffffffff08070605
-DATA  expandAVX512_14_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_14_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_14_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_14_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_14_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_14_mat3<>+0x00(SB)/8, $0x0404040404040404
-DATA  expandAVX512_14_mat3<>+0x08(SB)/8, $0x0404080808080808
-DATA  expandAVX512_14_mat3<>+0x10(SB)/8, $0x0808080808080808
-DATA  expandAVX512_14_mat3<>+0x18(SB)/8, $0x1010101010101010
-DATA  expandAVX512_14_mat3<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_14_mat3<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_14_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_14_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_14_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_14_outShufLo+0x00(SB)/8, $0x3830282018100800
-DATA  expandAVX512_14_outShufLo+0x08(SB)/8, $0x0901686058504840
-DATA  expandAVX512_14_outShufLo+0x10(SB)/8, $0x4941393129211911
-DATA  expandAVX512_14_outShufLo+0x18(SB)/8, $0x1a120a0269615951
-DATA  expandAVX512_14_outShufLo+0x20(SB)/8, $0x5a524a423a322a22
-DATA  expandAVX512_14_outShufLo+0x28(SB)/8, $0x2b231b130b036a62
-DATA  expandAVX512_14_outShufLo+0x30(SB)/8, $0x6b635b534b433b33
-DATA  expandAVX512_14_outShufLo+0x38(SB)/8, $0x3c342c241c140c04
-
-GLOBL expandAVX512_14_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_14_outShufHi0+0x00(SB)/8, $0x6860585048403830
-DATA  expandAVX512_14_outShufHi0+0x08(SB)/8, $0x3931ffffffff7870
-DATA  expandAVX512_14_outShufHi0+0x10(SB)/8, $0x7971696159514941
-DATA  expandAVX512_14_outShufHi0+0x18(SB)/8, $0x4a423a32ffffffff
-DATA  expandAVX512_14_outShufHi0+0x20(SB)/8, $0xffff7a726a625a52
-DATA  expandAVX512_14_outShufHi0+0x28(SB)/8, $0x5b534b433b33ffff
-DATA  expandAVX512_14_outShufHi0+0x30(SB)/8, $0xffffffff7b736b63
-DATA  expandAVX512_14_outShufHi0+0x38(SB)/8, $0x6c645c544c443c34
-
-GLOBL expandAVX512_14_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_14_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_14_outShufHi1+0x08(SB)/8, $0xffff18100800ffff
-DATA  expandAVX512_14_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_14_outShufHi1+0x18(SB)/8, $0xffffffff19110901
-DATA  expandAVX512_14_outShufHi1+0x20(SB)/8, $0x0a02ffffffffffff
-DATA  expandAVX512_14_outShufHi1+0x28(SB)/8, $0xffffffffffff1a12
-DATA  expandAVX512_14_outShufHi1+0x30(SB)/8, $0x1b130b03ffffffff
-DATA  expandAVX512_14_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
-
-TEXT expandAVX512_14<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_14_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_14_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_14_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_14_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_14_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_14_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_14_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_14_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_inShuf0<>+0x00(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_14_inShuf0<>+0x08(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_14_inShuf0<>+0x10(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_14_inShuf0<>+0x18(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_14_inShuf0<>+0x20(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_14_inShuf0<>+0x28(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_14_inShuf0<>+0x30(SB)/8, $0xffffff0403020100
+DATA  ·expandAVX512_14_inShuf0<>+0x38(SB)/8, $0xffffff0403020100
+
+GLOBL ·expandAVX512_14_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_14_mat0<>+0x08(SB)/8, $0x0101010101010202
+DATA  ·expandAVX512_14_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_14_mat0<>+0x18(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_14_mat0<>+0x20(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_14_mat0<>+0x28(SB)/8, $0x0404080808080808
+DATA  ·expandAVX512_14_mat0<>+0x30(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_14_mat0<>+0x38(SB)/8, $0x1010101010101010
+
+GLOBL ·expandAVX512_14_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_inShuf1<>+0x00(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_14_inShuf1<>+0x08(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_14_inShuf1<>+0x10(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_14_inShuf1<>+0x18(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_14_inShuf1<>+0x20(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_14_inShuf1<>+0x28(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_14_inShuf1<>+0x30(SB)/8, $0xffffff0807060504
+DATA  ·expandAVX512_14_inShuf1<>+0x38(SB)/8, $0xffffff0807060504
+
+GLOBL ·expandAVX512_14_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_mat1<>+0x00(SB)/8, $0x1010101010102020
+DATA  ·expandAVX512_14_mat1<>+0x08(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_14_mat1<>+0x10(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_14_mat1<>+0x18(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_14_mat1<>+0x20(SB)/8, $0x4040808080808080
+DATA  ·expandAVX512_14_mat1<>+0x28(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_14_mat1<>+0x30(SB)/8, $0x1010101010102020
+DATA  ·expandAVX512_14_mat1<>+0x38(SB)/8, $0x2020202020202020
+
+GLOBL ·expandAVX512_14_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_inShuf2<>+0x00(SB)/8, $0xffffff0807060504
+DATA  ·expandAVX512_14_inShuf2<>+0x08(SB)/8, $0xffffff0807060504
+DATA  ·expandAVX512_14_inShuf2<>+0x10(SB)/8, $0xffffff0807060504
+DATA  ·expandAVX512_14_inShuf2<>+0x18(SB)/8, $0xffffff0807060504
+DATA  ·expandAVX512_14_inShuf2<>+0x20(SB)/8, $0xffffff0908070605
+DATA  ·expandAVX512_14_inShuf2<>+0x28(SB)/8, $0xffffff0908070605
+DATA  ·expandAVX512_14_inShuf2<>+0x30(SB)/8, $0xffffffff08070605
+DATA  ·expandAVX512_14_inShuf2<>+0x38(SB)/8, $0xffffffff08070605
+
+GLOBL ·expandAVX512_14_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_mat2<>+0x00(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_14_mat2<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_14_mat2<>+0x10(SB)/8, $0x4040808080808080
+DATA  ·expandAVX512_14_mat2<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_14_mat2<>+0x20(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_14_mat2<>+0x28(SB)/8, $0x0101010101010202
+DATA  ·expandAVX512_14_mat2<>+0x30(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_14_mat2<>+0x38(SB)/8, $0x0202020204040404
+
+GLOBL ·expandAVX512_14_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_inShuf3<>+0x00(SB)/8, $0xffffffff08070605
+DATA  ·expandAVX512_14_inShuf3<>+0x08(SB)/8, $0xffffffff08070605
+DATA  ·expandAVX512_14_inShuf3<>+0x10(SB)/8, $0xffffffff08070605
+DATA  ·expandAVX512_14_inShuf3<>+0x18(SB)/8, $0xffffffff08070605
+DATA  ·expandAVX512_14_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_14_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_14_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_14_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_14_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_mat3<>+0x00(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_14_mat3<>+0x08(SB)/8, $0x0404080808080808
+DATA  ·expandAVX512_14_mat3<>+0x10(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_14_mat3<>+0x18(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_14_mat3<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_14_mat3<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_14_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_14_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_14_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_outShufLo+0x00(SB)/8, $0x3830282018100800
+DATA  ·expandAVX512_14_outShufLo+0x08(SB)/8, $0x0901686058504840
+DATA  ·expandAVX512_14_outShufLo+0x10(SB)/8, $0x4941393129211911
+DATA  ·expandAVX512_14_outShufLo+0x18(SB)/8, $0x1a120a0269615951
+DATA  ·expandAVX512_14_outShufLo+0x20(SB)/8, $0x5a524a423a322a22
+DATA  ·expandAVX512_14_outShufLo+0x28(SB)/8, $0x2b231b130b036a62
+DATA  ·expandAVX512_14_outShufLo+0x30(SB)/8, $0x6b635b534b433b33
+DATA  ·expandAVX512_14_outShufLo+0x38(SB)/8, $0x3c342c241c140c04
+
+GLOBL ·expandAVX512_14_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_outShufHi0+0x00(SB)/8, $0x6860585048403830
+DATA  ·expandAVX512_14_outShufHi0+0x08(SB)/8, $0x3931ffffffff7870
+DATA  ·expandAVX512_14_outShufHi0+0x10(SB)/8, $0x7971696159514941
+DATA  ·expandAVX512_14_outShufHi0+0x18(SB)/8, $0x4a423a32ffffffff
+DATA  ·expandAVX512_14_outShufHi0+0x20(SB)/8, $0xffff7a726a625a52
+DATA  ·expandAVX512_14_outShufHi0+0x28(SB)/8, $0x5b534b433b33ffff
+DATA  ·expandAVX512_14_outShufHi0+0x30(SB)/8, $0xffffffff7b736b63
+DATA  ·expandAVX512_14_outShufHi0+0x38(SB)/8, $0x6c645c544c443c34
+
+GLOBL ·expandAVX512_14_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_14_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_14_outShufHi1+0x08(SB)/8, $0xffff18100800ffff
+DATA  ·expandAVX512_14_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_14_outShufHi1+0x18(SB)/8, $0xffffffff19110901
+DATA  ·expandAVX512_14_outShufHi1+0x20(SB)/8, $0x0a02ffffffffffff
+DATA  ·expandAVX512_14_outShufHi1+0x28(SB)/8, $0xffffffffffff1a12
+DATA  ·expandAVX512_14_outShufHi1+0x30(SB)/8, $0x1b130b03ffffffff
+DATA  ·expandAVX512_14_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
+
+TEXT ·expandAVX512_14<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_14_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_14_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_14_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_14_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_14_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_14_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_14_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_14_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_14_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_14_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_14_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_14_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_14_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_14_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_14_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xff0ffc3ff0ffc3ff, AX
        KMOVQ AX, K1
@@ -727,51 +727,51 @@ TEXT expandAVX512_14<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_16_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_16_inShuf0<>+0x00(SB)/8, $0x0303020201010000
-DATA  expandAVX512_16_inShuf0<>+0x08(SB)/8, $0x0303020201010000
-DATA  expandAVX512_16_inShuf0<>+0x10(SB)/8, $0x0303020201010000
-DATA  expandAVX512_16_inShuf0<>+0x18(SB)/8, $0x0303020201010000
-DATA  expandAVX512_16_inShuf0<>+0x20(SB)/8, $0x0303020201010000
-DATA  expandAVX512_16_inShuf0<>+0x28(SB)/8, $0x0303020201010000
-DATA  expandAVX512_16_inShuf0<>+0x30(SB)/8, $0x0303020201010000
-DATA  expandAVX512_16_inShuf0<>+0x38(SB)/8, $0x0303020201010000
-
-GLOBL expandAVX512_16_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_16_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_16_mat0<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_16_mat0<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_16_mat0<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_16_mat0<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_16_mat0<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_16_mat0<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_16_mat0<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_16_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_16_inShuf1<>+0x00(SB)/8, $0x0707060605050404
-DATA  expandAVX512_16_inShuf1<>+0x08(SB)/8, $0x0707060605050404
-DATA  expandAVX512_16_inShuf1<>+0x10(SB)/8, $0x0707060605050404
-DATA  expandAVX512_16_inShuf1<>+0x18(SB)/8, $0x0707060605050404
-DATA  expandAVX512_16_inShuf1<>+0x20(SB)/8, $0x0707060605050404
-DATA  expandAVX512_16_inShuf1<>+0x28(SB)/8, $0x0707060605050404
-DATA  expandAVX512_16_inShuf1<>+0x30(SB)/8, $0x0707060605050404
-DATA  expandAVX512_16_inShuf1<>+0x38(SB)/8, $0x0707060605050404
-
-GLOBL expandAVX512_16_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_16_outShufLo+0x00(SB)/8, $0x1918111009080100
-DATA  expandAVX512_16_outShufLo+0x08(SB)/8, $0x3938313029282120
-DATA  expandAVX512_16_outShufLo+0x10(SB)/8, $0x1b1a13120b0a0302
-DATA  expandAVX512_16_outShufLo+0x18(SB)/8, $0x3b3a33322b2a2322
-DATA  expandAVX512_16_outShufLo+0x20(SB)/8, $0x1d1c15140d0c0504
-DATA  expandAVX512_16_outShufLo+0x28(SB)/8, $0x3d3c35342d2c2524
-DATA  expandAVX512_16_outShufLo+0x30(SB)/8, $0x1f1e17160f0e0706
-DATA  expandAVX512_16_outShufLo+0x38(SB)/8, $0x3f3e37362f2e2726
-
-TEXT expandAVX512_16<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_16_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_16_mat0<>(SB), Z1
-       VMOVDQU64 expandAVX512_16_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_16_outShufLo(SB), Z3
+GLOBL ·expandAVX512_16_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_16_inShuf0<>+0x00(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_16_inShuf0<>+0x08(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_16_inShuf0<>+0x10(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_16_inShuf0<>+0x18(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_16_inShuf0<>+0x20(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_16_inShuf0<>+0x28(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_16_inShuf0<>+0x30(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_16_inShuf0<>+0x38(SB)/8, $0x0303020201010000
+
+GLOBL ·expandAVX512_16_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_16_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_16_mat0<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_16_mat0<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_16_mat0<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_16_mat0<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_16_mat0<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_16_mat0<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_16_mat0<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_16_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_16_inShuf1<>+0x00(SB)/8, $0x0707060605050404
+DATA  ·expandAVX512_16_inShuf1<>+0x08(SB)/8, $0x0707060605050404
+DATA  ·expandAVX512_16_inShuf1<>+0x10(SB)/8, $0x0707060605050404
+DATA  ·expandAVX512_16_inShuf1<>+0x18(SB)/8, $0x0707060605050404
+DATA  ·expandAVX512_16_inShuf1<>+0x20(SB)/8, $0x0707060605050404
+DATA  ·expandAVX512_16_inShuf1<>+0x28(SB)/8, $0x0707060605050404
+DATA  ·expandAVX512_16_inShuf1<>+0x30(SB)/8, $0x0707060605050404
+DATA  ·expandAVX512_16_inShuf1<>+0x38(SB)/8, $0x0707060605050404
+
+GLOBL ·expandAVX512_16_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_16_outShufLo+0x00(SB)/8, $0x1918111009080100
+DATA  ·expandAVX512_16_outShufLo+0x08(SB)/8, $0x3938313029282120
+DATA  ·expandAVX512_16_outShufLo+0x10(SB)/8, $0x1b1a13120b0a0302
+DATA  ·expandAVX512_16_outShufLo+0x18(SB)/8, $0x3b3a33322b2a2322
+DATA  ·expandAVX512_16_outShufLo+0x20(SB)/8, $0x1d1c15140d0c0504
+DATA  ·expandAVX512_16_outShufLo+0x28(SB)/8, $0x3d3c35342d2c2524
+DATA  ·expandAVX512_16_outShufLo+0x30(SB)/8, $0x1f1e17160f0e0706
+DATA  ·expandAVX512_16_outShufLo+0x38(SB)/8, $0x3f3e37362f2e2726
+
+TEXT ·expandAVX512_16<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_16_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_16_mat0<>(SB), Z1
+       VMOVDQU64 ·expandAVX512_16_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_16_outShufLo(SB), Z3
        VMOVDQU64 (AX), Z4
        VPERMB Z4, Z0, Z0
        VGF2P8AFFINEQB $0, Z1, Z0, Z0
@@ -781,133 +781,133 @@ TEXT expandAVX512_16<>(SB), NOSPLIT, $0-0
        VPERMB Z2, Z3, Z2
        RET
 
-GLOBL expandAVX512_18_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_18_inShuf0<>+0x00(SB)/8, $0x0303020201010000
-DATA  expandAVX512_18_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_18_inShuf0<>+0x10(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_18_inShuf0<>+0x18(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_18_inShuf0<>+0x20(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_18_inShuf0<>+0x28(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_18_inShuf0<>+0x30(SB)/8, $0x0303020201010000
-DATA  expandAVX512_18_inShuf0<>+0x38(SB)/8, $0xff03020201010000
-
-GLOBL expandAVX512_18_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_18_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_18_mat0<>+0x08(SB)/8, $0x0101020202020202
-DATA  expandAVX512_18_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_18_mat0<>+0x18(SB)/8, $0x0202020204040404
-DATA  expandAVX512_18_mat0<>+0x20(SB)/8, $0x0404040404040404
-DATA  expandAVX512_18_mat0<>+0x28(SB)/8, $0x0404040404040808
-DATA  expandAVX512_18_mat0<>+0x30(SB)/8, $0x0808080808080808
-DATA  expandAVX512_18_mat0<>+0x38(SB)/8, $0x1010101010101010
-
-GLOBL expandAVX512_18_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_18_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_18_inShuf1<>+0x08(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_18_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_18_inShuf1<>+0x18(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_18_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_18_inShuf1<>+0x28(SB)/8, $0xffff020201010000
-DATA  expandAVX512_18_inShuf1<>+0x30(SB)/8, $0xff06060505040403
-DATA  expandAVX512_18_inShuf1<>+0x38(SB)/8, $0xffffffff06050403
-
-GLOBL expandAVX512_18_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_18_mat1<>+0x00(SB)/8, $0x1010202020202020
-DATA  expandAVX512_18_mat1<>+0x08(SB)/8, $0x2020202020202020
-DATA  expandAVX512_18_mat1<>+0x10(SB)/8, $0x2020202040404040
-DATA  expandAVX512_18_mat1<>+0x18(SB)/8, $0x4040404040404040
-DATA  expandAVX512_18_mat1<>+0x20(SB)/8, $0x4040404040408080
-DATA  expandAVX512_18_mat1<>+0x28(SB)/8, $0x8080808080808080
-DATA  expandAVX512_18_mat1<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_18_mat1<>+0x38(SB)/8, $0x1010202020202020
-
-GLOBL expandAVX512_18_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_18_inShuf2<>+0x00(SB)/8, $0xffffffff06050403
-DATA  expandAVX512_18_inShuf2<>+0x08(SB)/8, $0xffffffff06050403
-DATA  expandAVX512_18_inShuf2<>+0x10(SB)/8, $0xffffffff06050403
-DATA  expandAVX512_18_inShuf2<>+0x18(SB)/8, $0xffffffff06050403
-DATA  expandAVX512_18_inShuf2<>+0x20(SB)/8, $0x0606050504040303
-DATA  expandAVX512_18_inShuf2<>+0x28(SB)/8, $0x0707060605050404
-DATA  expandAVX512_18_inShuf2<>+0x30(SB)/8, $0xffffffffff060504
-DATA  expandAVX512_18_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
-
-GLOBL expandAVX512_18_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_18_mat2<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_18_mat2<>+0x08(SB)/8, $0x2020202040404040
-DATA  expandAVX512_18_mat2<>+0x10(SB)/8, $0x4040404040404040
-DATA  expandAVX512_18_mat2<>+0x18(SB)/8, $0x4040404040408080
-DATA  expandAVX512_18_mat2<>+0x20(SB)/8, $0x8080808080808080
-DATA  expandAVX512_18_mat2<>+0x28(SB)/8, $0x0101010101010101
-DATA  expandAVX512_18_mat2<>+0x30(SB)/8, $0x0101020202020202
-DATA  expandAVX512_18_mat2<>+0x38(SB)/8, $0x0202020202020202
-
-GLOBL expandAVX512_18_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_18_inShuf3<>+0x00(SB)/8, $0xffffffffff060504
-DATA  expandAVX512_18_inShuf3<>+0x08(SB)/8, $0xffffffffff060504
-DATA  expandAVX512_18_inShuf3<>+0x10(SB)/8, $0xffffffffff060504
-DATA  expandAVX512_18_inShuf3<>+0x18(SB)/8, $0xffff060605050404
-DATA  expandAVX512_18_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_18_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_18_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_18_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_18_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_18_mat3<>+0x00(SB)/8, $0x0202020204040404
-DATA  expandAVX512_18_mat3<>+0x08(SB)/8, $0x0404040404040404
-DATA  expandAVX512_18_mat3<>+0x10(SB)/8, $0x0404040404040808
-DATA  expandAVX512_18_mat3<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_18_mat3<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_18_mat3<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_18_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_18_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_18_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_18_outShufLo+0x00(SB)/8, $0x3028201810080100
-DATA  expandAVX512_18_outShufLo+0x08(SB)/8, $0x6058504840393831
-DATA  expandAVX512_18_outShufLo+0x10(SB)/8, $0x2119110903026968
-DATA  expandAVX512_18_outShufLo+0x18(SB)/8, $0x5149413b3a333229
-DATA  expandAVX512_18_outShufLo+0x20(SB)/8, $0x120a05046b6a6159
-DATA  expandAVX512_18_outShufLo+0x28(SB)/8, $0x423d3c35342a221a
-DATA  expandAVX512_18_outShufLo+0x30(SB)/8, $0x07066d6c625a524a
-DATA  expandAVX512_18_outShufLo+0x38(SB)/8, $0x3e37362b231b130b
-
-GLOBL expandAVX512_18_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_18_outShufHi0+0x00(SB)/8, $0x6160585048403830
-DATA  expandAVX512_18_outShufHi0+0x08(SB)/8, $0xffffffff78706968
-DATA  expandAVX512_18_outShufHi0+0x10(SB)/8, $0x59514941393231ff
-DATA  expandAVX512_18_outShufHi0+0x18(SB)/8, $0xffff79716b6a6362
-DATA  expandAVX512_18_outShufHi0+0x20(SB)/8, $0x4a423a3433ffffff
-DATA  expandAVX512_18_outShufHi0+0x28(SB)/8, $0x7a726d6c65645a52
-DATA  expandAVX512_18_outShufHi0+0x30(SB)/8, $0x3b3635ffffffffff
-DATA  expandAVX512_18_outShufHi0+0x38(SB)/8, $0x6f6e67665b534b43
-
-GLOBL expandAVX512_18_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_18_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_18_outShufHi1+0x08(SB)/8, $0x18100800ffffffff
-DATA  expandAVX512_18_outShufHi1+0x10(SB)/8, $0xffffffffffffff19
-DATA  expandAVX512_18_outShufHi1+0x18(SB)/8, $0x0901ffffffffffff
-DATA  expandAVX512_18_outShufHi1+0x20(SB)/8, $0xffffffffff1b1a11
-DATA  expandAVX512_18_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_18_outShufHi1+0x30(SB)/8, $0xffffff1d1c120a02
-DATA  expandAVX512_18_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
-
-TEXT expandAVX512_18<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_18_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_18_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_18_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_18_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_18_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_18_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_18_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_18_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_inShuf0<>+0x00(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_18_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_18_inShuf0<>+0x10(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_18_inShuf0<>+0x18(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_18_inShuf0<>+0x20(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_18_inShuf0<>+0x28(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_18_inShuf0<>+0x30(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_18_inShuf0<>+0x38(SB)/8, $0xff03020201010000
+
+GLOBL ·expandAVX512_18_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_18_mat0<>+0x08(SB)/8, $0x0101020202020202
+DATA  ·expandAVX512_18_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_18_mat0<>+0x18(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_18_mat0<>+0x20(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_18_mat0<>+0x28(SB)/8, $0x0404040404040808
+DATA  ·expandAVX512_18_mat0<>+0x30(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_18_mat0<>+0x38(SB)/8, $0x1010101010101010
+
+GLOBL ·expandAVX512_18_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_18_inShuf1<>+0x08(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_18_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_18_inShuf1<>+0x18(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_18_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_18_inShuf1<>+0x28(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_18_inShuf1<>+0x30(SB)/8, $0xff06060505040403
+DATA  ·expandAVX512_18_inShuf1<>+0x38(SB)/8, $0xffffffff06050403
+
+GLOBL ·expandAVX512_18_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_mat1<>+0x00(SB)/8, $0x1010202020202020
+DATA  ·expandAVX512_18_mat1<>+0x08(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_18_mat1<>+0x10(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_18_mat1<>+0x18(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_18_mat1<>+0x20(SB)/8, $0x4040404040408080
+DATA  ·expandAVX512_18_mat1<>+0x28(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_18_mat1<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_18_mat1<>+0x38(SB)/8, $0x1010202020202020
+
+GLOBL ·expandAVX512_18_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_inShuf2<>+0x00(SB)/8, $0xffffffff06050403
+DATA  ·expandAVX512_18_inShuf2<>+0x08(SB)/8, $0xffffffff06050403
+DATA  ·expandAVX512_18_inShuf2<>+0x10(SB)/8, $0xffffffff06050403
+DATA  ·expandAVX512_18_inShuf2<>+0x18(SB)/8, $0xffffffff06050403
+DATA  ·expandAVX512_18_inShuf2<>+0x20(SB)/8, $0x0606050504040303
+DATA  ·expandAVX512_18_inShuf2<>+0x28(SB)/8, $0x0707060605050404
+DATA  ·expandAVX512_18_inShuf2<>+0x30(SB)/8, $0xffffffffff060504
+DATA  ·expandAVX512_18_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
+
+GLOBL ·expandAVX512_18_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_mat2<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_18_mat2<>+0x08(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_18_mat2<>+0x10(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_18_mat2<>+0x18(SB)/8, $0x4040404040408080
+DATA  ·expandAVX512_18_mat2<>+0x20(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_18_mat2<>+0x28(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_18_mat2<>+0x30(SB)/8, $0x0101020202020202
+DATA  ·expandAVX512_18_mat2<>+0x38(SB)/8, $0x0202020202020202
+
+GLOBL ·expandAVX512_18_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_inShuf3<>+0x00(SB)/8, $0xffffffffff060504
+DATA  ·expandAVX512_18_inShuf3<>+0x08(SB)/8, $0xffffffffff060504
+DATA  ·expandAVX512_18_inShuf3<>+0x10(SB)/8, $0xffffffffff060504
+DATA  ·expandAVX512_18_inShuf3<>+0x18(SB)/8, $0xffff060605050404
+DATA  ·expandAVX512_18_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_18_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_18_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_18_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_18_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_mat3<>+0x00(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_18_mat3<>+0x08(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_18_mat3<>+0x10(SB)/8, $0x0404040404040808
+DATA  ·expandAVX512_18_mat3<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_18_mat3<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_18_mat3<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_18_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_18_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_18_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_outShufLo+0x00(SB)/8, $0x3028201810080100
+DATA  ·expandAVX512_18_outShufLo+0x08(SB)/8, $0x6058504840393831
+DATA  ·expandAVX512_18_outShufLo+0x10(SB)/8, $0x2119110903026968
+DATA  ·expandAVX512_18_outShufLo+0x18(SB)/8, $0x5149413b3a333229
+DATA  ·expandAVX512_18_outShufLo+0x20(SB)/8, $0x120a05046b6a6159
+DATA  ·expandAVX512_18_outShufLo+0x28(SB)/8, $0x423d3c35342a221a
+DATA  ·expandAVX512_18_outShufLo+0x30(SB)/8, $0x07066d6c625a524a
+DATA  ·expandAVX512_18_outShufLo+0x38(SB)/8, $0x3e37362b231b130b
+
+GLOBL ·expandAVX512_18_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_outShufHi0+0x00(SB)/8, $0x6160585048403830
+DATA  ·expandAVX512_18_outShufHi0+0x08(SB)/8, $0xffffffff78706968
+DATA  ·expandAVX512_18_outShufHi0+0x10(SB)/8, $0x59514941393231ff
+DATA  ·expandAVX512_18_outShufHi0+0x18(SB)/8, $0xffff79716b6a6362
+DATA  ·expandAVX512_18_outShufHi0+0x20(SB)/8, $0x4a423a3433ffffff
+DATA  ·expandAVX512_18_outShufHi0+0x28(SB)/8, $0x7a726d6c65645a52
+DATA  ·expandAVX512_18_outShufHi0+0x30(SB)/8, $0x3b3635ffffffffff
+DATA  ·expandAVX512_18_outShufHi0+0x38(SB)/8, $0x6f6e67665b534b43
+
+GLOBL ·expandAVX512_18_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_18_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_18_outShufHi1+0x08(SB)/8, $0x18100800ffffffff
+DATA  ·expandAVX512_18_outShufHi1+0x10(SB)/8, $0xffffffffffffff19
+DATA  ·expandAVX512_18_outShufHi1+0x18(SB)/8, $0x0901ffffffffffff
+DATA  ·expandAVX512_18_outShufHi1+0x20(SB)/8, $0xffffffffff1b1a11
+DATA  ·expandAVX512_18_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_18_outShufHi1+0x30(SB)/8, $0xffffff1d1c120a02
+DATA  ·expandAVX512_18_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
+
+TEXT ·expandAVX512_18<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_18_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_18_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_18_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_18_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_18_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_18_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_18_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_18_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_18_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_18_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_18_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_18_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_18_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_18_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_18_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xffe0fff83ffe0fff, AX
        KMOVQ AX, K1
@@ -918,230 +918,230 @@ TEXT expandAVX512_18<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_20_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_20_inShuf0<>+0x00(SB)/8, $0x0303020201010000
-DATA  expandAVX512_20_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
-DATA  expandAVX512_20_inShuf0<>+0x10(SB)/8, $0xff03020201010000
-DATA  expandAVX512_20_inShuf0<>+0x18(SB)/8, $0xffff020201010000
-DATA  expandAVX512_20_inShuf0<>+0x20(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_20_inShuf0<>+0x28(SB)/8, $0xffff020201010000
-DATA  expandAVX512_20_inShuf0<>+0x30(SB)/8, $0xffff020201010000
-DATA  expandAVX512_20_inShuf0<>+0x38(SB)/8, $0xffffffffff020100
-
-GLOBL expandAVX512_20_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_20_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_20_mat0<>+0x08(SB)/8, $0x0101010102020202
-DATA  expandAVX512_20_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_20_mat0<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_20_mat0<>+0x20(SB)/8, $0x0404040408080808
-DATA  expandAVX512_20_mat0<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_20_mat0<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_20_mat0<>+0x38(SB)/8, $0x1010101020202020
-
-GLOBL expandAVX512_20_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_20_inShuf1<>+0x00(SB)/8, $0xffff020201010000
-DATA  expandAVX512_20_inShuf1<>+0x08(SB)/8, $0xffff020201010000
-DATA  expandAVX512_20_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_20_inShuf1<>+0x18(SB)/8, $0xffff020201010000
-DATA  expandAVX512_20_inShuf1<>+0x20(SB)/8, $0xff06060505040403
-DATA  expandAVX512_20_inShuf1<>+0x28(SB)/8, $0x0606050504040303
-DATA  expandAVX512_20_inShuf1<>+0x30(SB)/8, $0xffffffff06050403
-DATA  expandAVX512_20_inShuf1<>+0x38(SB)/8, $0xffff050504040303
-
-GLOBL expandAVX512_20_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_20_mat1<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_20_mat1<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_20_mat1<>+0x10(SB)/8, $0x4040404080808080
-DATA  expandAVX512_20_mat1<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_20_mat1<>+0x20(SB)/8, $0x0202020202020202
-DATA  expandAVX512_20_mat1<>+0x28(SB)/8, $0x0404040404040404
-DATA  expandAVX512_20_mat1<>+0x30(SB)/8, $0x0404040408080808
-DATA  expandAVX512_20_mat1<>+0x38(SB)/8, $0x0808080808080808
-
-GLOBL expandAVX512_20_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_20_inShuf2<>+0x00(SB)/8, $0xffff050504040303
-DATA  expandAVX512_20_inShuf2<>+0x08(SB)/8, $0xffffffffff050403
-DATA  expandAVX512_20_inShuf2<>+0x10(SB)/8, $0xffff050504040303
-DATA  expandAVX512_20_inShuf2<>+0x18(SB)/8, $0xffff050504040303
-DATA  expandAVX512_20_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
-DATA  expandAVX512_20_inShuf2<>+0x28(SB)/8, $0xffff050504040303
-DATA  expandAVX512_20_inShuf2<>+0x30(SB)/8, $0xffff060605050404
-DATA  expandAVX512_20_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
-
-GLOBL expandAVX512_20_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_20_mat2<>+0x00(SB)/8, $0x1010101010101010
-DATA  expandAVX512_20_mat2<>+0x08(SB)/8, $0x1010101020202020
-DATA  expandAVX512_20_mat2<>+0x10(SB)/8, $0x2020202020202020
-DATA  expandAVX512_20_mat2<>+0x18(SB)/8, $0x4040404040404040
-DATA  expandAVX512_20_mat2<>+0x20(SB)/8, $0x4040404080808080
-DATA  expandAVX512_20_mat2<>+0x28(SB)/8, $0x8080808080808080
-DATA  expandAVX512_20_mat2<>+0x30(SB)/8, $0x0101010101010101
-DATA  expandAVX512_20_mat2<>+0x38(SB)/8, $0x0101010102020202
-
-GLOBL expandAVX512_20_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_20_outShufLo+0x00(SB)/8, $0x2019181110080100
-DATA  expandAVX512_20_outShufLo+0x08(SB)/8, $0x4841403831302928
-DATA  expandAVX512_20_outShufLo+0x10(SB)/8, $0x1209030259585049
-DATA  expandAVX512_20_outShufLo+0x18(SB)/8, $0x33322b2a211b1a13
-DATA  expandAVX512_20_outShufLo+0x20(SB)/8, $0x5b5a514b4a434239
-DATA  expandAVX512_20_outShufLo+0x28(SB)/8, $0x221d1c15140a0504
-DATA  expandAVX512_20_outShufLo+0x30(SB)/8, $0x4c45443a35342d2c
-DATA  expandAVX512_20_outShufLo+0x38(SB)/8, $0x160b07065d5c524d
-
-GLOBL expandAVX512_20_outShufHi(SB), RODATA, $0x40
-DATA  expandAVX512_20_outShufHi+0x00(SB)/8, $0x4140393830292820
-DATA  expandAVX512_20_outShufHi+0x08(SB)/8, $0x6968605958515048
-DATA  expandAVX512_20_outShufHi+0x10(SB)/8, $0x312b2a2221787170
-DATA  expandAVX512_20_outShufHi+0x18(SB)/8, $0x5a53524943423b3a
-DATA  expandAVX512_20_outShufHi+0x20(SB)/8, $0x237973726b6a615b
-DATA  expandAVX512_20_outShufHi+0x28(SB)/8, $0x45443d3c322d2c24
-DATA  expandAVX512_20_outShufHi+0x30(SB)/8, $0x6d6c625d5c55544a
-DATA  expandAVX512_20_outShufHi+0x38(SB)/8, $0x332f2e26257a7574
-
-TEXT expandAVX512_20<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_20_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_20_inShuf1<>(SB), Z3
-       VMOVDQU64 expandAVX512_20_inShuf2<>(SB), Z4
-       VMOVDQU64 expandAVX512_20_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_20_outShufHi(SB), Z2
+GLOBL ·expandAVX512_20_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_20_inShuf0<>+0x00(SB)/8, $0x0303020201010000
+DATA  ·expandAVX512_20_inShuf0<>+0x08(SB)/8, $0xffffffff03020100
+DATA  ·expandAVX512_20_inShuf0<>+0x10(SB)/8, $0xff03020201010000
+DATA  ·expandAVX512_20_inShuf0<>+0x18(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_20_inShuf0<>+0x20(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_20_inShuf0<>+0x28(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_20_inShuf0<>+0x30(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_20_inShuf0<>+0x38(SB)/8, $0xffffffffff020100
+
+GLOBL ·expandAVX512_20_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_20_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_20_mat0<>+0x08(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_20_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_20_mat0<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_20_mat0<>+0x20(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_20_mat0<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_20_mat0<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_20_mat0<>+0x38(SB)/8, $0x1010101020202020
+
+GLOBL ·expandAVX512_20_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_20_inShuf1<>+0x00(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_20_inShuf1<>+0x08(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_20_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_20_inShuf1<>+0x18(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_20_inShuf1<>+0x20(SB)/8, $0xff06060505040403
+DATA  ·expandAVX512_20_inShuf1<>+0x28(SB)/8, $0x0606050504040303
+DATA  ·expandAVX512_20_inShuf1<>+0x30(SB)/8, $0xffffffff06050403
+DATA  ·expandAVX512_20_inShuf1<>+0x38(SB)/8, $0xffff050504040303
+
+GLOBL ·expandAVX512_20_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_20_mat1<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_20_mat1<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_20_mat1<>+0x10(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_20_mat1<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_20_mat1<>+0x20(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_20_mat1<>+0x28(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_20_mat1<>+0x30(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_20_mat1<>+0x38(SB)/8, $0x0808080808080808
+
+GLOBL ·expandAVX512_20_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_20_inShuf2<>+0x00(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_20_inShuf2<>+0x08(SB)/8, $0xffffffffff050403
+DATA  ·expandAVX512_20_inShuf2<>+0x10(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_20_inShuf2<>+0x18(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_20_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
+DATA  ·expandAVX512_20_inShuf2<>+0x28(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_20_inShuf2<>+0x30(SB)/8, $0xffff060605050404
+DATA  ·expandAVX512_20_inShuf2<>+0x38(SB)/8, $0xffffffffff060504
+
+GLOBL ·expandAVX512_20_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_20_mat2<>+0x00(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_20_mat2<>+0x08(SB)/8, $0x1010101020202020
+DATA  ·expandAVX512_20_mat2<>+0x10(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_20_mat2<>+0x18(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_20_mat2<>+0x20(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_20_mat2<>+0x28(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_20_mat2<>+0x30(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_20_mat2<>+0x38(SB)/8, $0x0101010102020202
+
+GLOBL ·expandAVX512_20_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_20_outShufLo+0x00(SB)/8, $0x2019181110080100
+DATA  ·expandAVX512_20_outShufLo+0x08(SB)/8, $0x4841403831302928
+DATA  ·expandAVX512_20_outShufLo+0x10(SB)/8, $0x1209030259585049
+DATA  ·expandAVX512_20_outShufLo+0x18(SB)/8, $0x33322b2a211b1a13
+DATA  ·expandAVX512_20_outShufLo+0x20(SB)/8, $0x5b5a514b4a434239
+DATA  ·expandAVX512_20_outShufLo+0x28(SB)/8, $0x221d1c15140a0504
+DATA  ·expandAVX512_20_outShufLo+0x30(SB)/8, $0x4c45443a35342d2c
+DATA  ·expandAVX512_20_outShufLo+0x38(SB)/8, $0x160b07065d5c524d
+
+GLOBL ·expandAVX512_20_outShufHi(SB), RODATA, $0x40
+DATA  ·expandAVX512_20_outShufHi+0x00(SB)/8, $0x4140393830292820
+DATA  ·expandAVX512_20_outShufHi+0x08(SB)/8, $0x6968605958515048
+DATA  ·expandAVX512_20_outShufHi+0x10(SB)/8, $0x312b2a2221787170
+DATA  ·expandAVX512_20_outShufHi+0x18(SB)/8, $0x5a53524943423b3a
+DATA  ·expandAVX512_20_outShufHi+0x20(SB)/8, $0x237973726b6a615b
+DATA  ·expandAVX512_20_outShufHi+0x28(SB)/8, $0x45443d3c322d2c24
+DATA  ·expandAVX512_20_outShufHi+0x30(SB)/8, $0x6d6c625d5c55544a
+DATA  ·expandAVX512_20_outShufHi+0x38(SB)/8, $0x332f2e26257a7574
+
+TEXT ·expandAVX512_20<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_20_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_20_inShuf1<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_20_inShuf2<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_20_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_20_outShufHi(SB), Z2
        VMOVDQU64 (AX), Z5
        VPERMB Z5, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_20_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_20_mat0<>(SB), Z0, Z0
        VPERMB Z5, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_20_mat1<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_20_mat1<>(SB), Z3, Z3
        VPERMB Z5, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_20_mat2<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_20_mat2<>(SB), Z4, Z4
        VPERMI2B Z3, Z0, Z1
        VPERMI2B Z4, Z3, Z2
        RET
 
-GLOBL expandAVX512_22_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_22_inShuf0<>+0x00(SB)/8, $0xffff020201010000
-DATA  expandAVX512_22_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_22_inShuf0<>+0x10(SB)/8, $0xffff020201010000
-DATA  expandAVX512_22_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_22_inShuf0<>+0x20(SB)/8, $0xffff020201010000
-DATA  expandAVX512_22_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_22_inShuf0<>+0x30(SB)/8, $0xffff020201010000
-DATA  expandAVX512_22_inShuf0<>+0x38(SB)/8, $0xffff020201010000
-
-GLOBL expandAVX512_22_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_22_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_22_mat0<>+0x08(SB)/8, $0x0101010101010202
-DATA  expandAVX512_22_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_22_mat0<>+0x18(SB)/8, $0x0202020204040404
-DATA  expandAVX512_22_mat0<>+0x20(SB)/8, $0x0404040404040404
-DATA  expandAVX512_22_mat0<>+0x28(SB)/8, $0x0404080808080808
-DATA  expandAVX512_22_mat0<>+0x30(SB)/8, $0x0808080808080808
-DATA  expandAVX512_22_mat0<>+0x38(SB)/8, $0x1010101010101010
-
-GLOBL expandAVX512_22_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_22_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_22_inShuf1<>+0x08(SB)/8, $0xffff020201010000
-DATA  expandAVX512_22_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_22_inShuf1<>+0x18(SB)/8, $0xffff020201010000
-DATA  expandAVX512_22_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_22_inShuf1<>+0x28(SB)/8, $0xffffffff01010000
-DATA  expandAVX512_22_inShuf1<>+0x30(SB)/8, $0xffff040403030202
-DATA  expandAVX512_22_inShuf1<>+0x38(SB)/8, $0xffff050504040303
-
-GLOBL expandAVX512_22_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_22_mat1<>+0x00(SB)/8, $0x1010101010102020
-DATA  expandAVX512_22_mat1<>+0x08(SB)/8, $0x2020202020202020
-DATA  expandAVX512_22_mat1<>+0x10(SB)/8, $0x2020202040404040
-DATA  expandAVX512_22_mat1<>+0x18(SB)/8, $0x4040404040404040
-DATA  expandAVX512_22_mat1<>+0x20(SB)/8, $0x4040808080808080
-DATA  expandAVX512_22_mat1<>+0x28(SB)/8, $0x8080808080808080
-DATA  expandAVX512_22_mat1<>+0x30(SB)/8, $0x8080808080808080
-DATA  expandAVX512_22_mat1<>+0x38(SB)/8, $0x0101010101010101
-
-GLOBL expandAVX512_22_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_22_inShuf2<>+0x00(SB)/8, $0xffffffffff050403
-DATA  expandAVX512_22_inShuf2<>+0x08(SB)/8, $0xffff050504040303
-DATA  expandAVX512_22_inShuf2<>+0x10(SB)/8, $0xffffffffff050403
-DATA  expandAVX512_22_inShuf2<>+0x18(SB)/8, $0xffff050504040303
-DATA  expandAVX512_22_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
-DATA  expandAVX512_22_inShuf2<>+0x28(SB)/8, $0xffff050504040303
-DATA  expandAVX512_22_inShuf2<>+0x30(SB)/8, $0xffff050504040303
-DATA  expandAVX512_22_inShuf2<>+0x38(SB)/8, $0xffffffffff050403
-
-GLOBL expandAVX512_22_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_22_mat2<>+0x00(SB)/8, $0x0101010101010202
-DATA  expandAVX512_22_mat2<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_22_mat2<>+0x10(SB)/8, $0x0202020204040404
-DATA  expandAVX512_22_mat2<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_22_mat2<>+0x20(SB)/8, $0x0404080808080808
-DATA  expandAVX512_22_mat2<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_22_mat2<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_22_mat2<>+0x38(SB)/8, $0x1010101010102020
-
-GLOBL expandAVX512_22_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_22_inShuf3<>+0x00(SB)/8, $0xffff050504040303
-DATA  expandAVX512_22_inShuf3<>+0x08(SB)/8, $0xffffffffff050403
-DATA  expandAVX512_22_inShuf3<>+0x10(SB)/8, $0xffffff0504040303
-DATA  expandAVX512_22_inShuf3<>+0x18(SB)/8, $0xffffffffffff0403
-DATA  expandAVX512_22_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_22_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_22_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_22_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_22_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_22_mat3<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_22_mat3<>+0x08(SB)/8, $0x2020202040404040
-DATA  expandAVX512_22_mat3<>+0x10(SB)/8, $0x4040404040404040
-DATA  expandAVX512_22_mat3<>+0x18(SB)/8, $0x4040808080808080
-DATA  expandAVX512_22_mat3<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_22_mat3<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_22_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_22_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_22_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_22_outShufLo+0x00(SB)/8, $0x2120181110080100
-DATA  expandAVX512_22_outShufLo+0x08(SB)/8, $0x4948403938313028
-DATA  expandAVX512_22_outShufLo+0x10(SB)/8, $0x0302696860595850
-DATA  expandAVX512_22_outShufLo+0x18(SB)/8, $0x3229232219131209
-DATA  expandAVX512_22_outShufLo+0x20(SB)/8, $0x5a514b4a413b3a33
-DATA  expandAVX512_22_outShufLo+0x28(SB)/8, $0x140a05046b6a615b
-DATA  expandAVX512_22_outShufLo+0x30(SB)/8, $0x3c35342a25241a15
-DATA  expandAVX512_22_outShufLo+0x38(SB)/8, $0x625d5c524d4c423d
-
-GLOBL expandAVX512_22_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_22_outShufHi0+0x00(SB)/8, $0x5049484039383130
-DATA  expandAVX512_22_outShufHi0+0x08(SB)/8, $0x7871706968605958
-DATA  expandAVX512_22_outShufHi0+0x10(SB)/8, $0x3332ffffffffffff
-DATA  expandAVX512_22_outShufHi0+0x18(SB)/8, $0x5b5a514b4a413b3a
-DATA  expandAVX512_22_outShufHi0+0x20(SB)/8, $0xffff7973726b6a61
-DATA  expandAVX512_22_outShufHi0+0x28(SB)/8, $0x3d3c3534ffffffff
-DATA  expandAVX512_22_outShufHi0+0x30(SB)/8, $0x6c625d5c524d4c42
-DATA  expandAVX512_22_outShufHi0+0x38(SB)/8, $0xffffffff7a75746d
-
-GLOBL expandAVX512_22_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_22_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_22_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_22_outShufHi1+0x10(SB)/8, $0xffff181110080100
-DATA  expandAVX512_22_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_22_outShufHi1+0x20(SB)/8, $0x0302ffffffffffff
-DATA  expandAVX512_22_outShufHi1+0x28(SB)/8, $0xffffffff19131209
-DATA  expandAVX512_22_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_22_outShufHi1+0x38(SB)/8, $0x140a0504ffffffff
-
-TEXT expandAVX512_22<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_22_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_22_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_22_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_22_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_22_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_22_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_22_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_22_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_inShuf0<>+0x00(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_22_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_22_inShuf0<>+0x10(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_22_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_22_inShuf0<>+0x20(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_22_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_22_inShuf0<>+0x30(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_22_inShuf0<>+0x38(SB)/8, $0xffff020201010000
+
+GLOBL ·expandAVX512_22_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_22_mat0<>+0x08(SB)/8, $0x0101010101010202
+DATA  ·expandAVX512_22_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_22_mat0<>+0x18(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_22_mat0<>+0x20(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_22_mat0<>+0x28(SB)/8, $0x0404080808080808
+DATA  ·expandAVX512_22_mat0<>+0x30(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_22_mat0<>+0x38(SB)/8, $0x1010101010101010
+
+GLOBL ·expandAVX512_22_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_inShuf1<>+0x00(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_22_inShuf1<>+0x08(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_22_inShuf1<>+0x10(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_22_inShuf1<>+0x18(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_22_inShuf1<>+0x20(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_22_inShuf1<>+0x28(SB)/8, $0xffffffff01010000
+DATA  ·expandAVX512_22_inShuf1<>+0x30(SB)/8, $0xffff040403030202
+DATA  ·expandAVX512_22_inShuf1<>+0x38(SB)/8, $0xffff050504040303
+
+GLOBL ·expandAVX512_22_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_mat1<>+0x00(SB)/8, $0x1010101010102020
+DATA  ·expandAVX512_22_mat1<>+0x08(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_22_mat1<>+0x10(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_22_mat1<>+0x18(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_22_mat1<>+0x20(SB)/8, $0x4040808080808080
+DATA  ·expandAVX512_22_mat1<>+0x28(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_22_mat1<>+0x30(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_22_mat1<>+0x38(SB)/8, $0x0101010101010101
+
+GLOBL ·expandAVX512_22_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_inShuf2<>+0x00(SB)/8, $0xffffffffff050403
+DATA  ·expandAVX512_22_inShuf2<>+0x08(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_22_inShuf2<>+0x10(SB)/8, $0xffffffffff050403
+DATA  ·expandAVX512_22_inShuf2<>+0x18(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_22_inShuf2<>+0x20(SB)/8, $0xffffffffff050403
+DATA  ·expandAVX512_22_inShuf2<>+0x28(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_22_inShuf2<>+0x30(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_22_inShuf2<>+0x38(SB)/8, $0xffffffffff050403
+
+GLOBL ·expandAVX512_22_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_mat2<>+0x00(SB)/8, $0x0101010101010202
+DATA  ·expandAVX512_22_mat2<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_22_mat2<>+0x10(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_22_mat2<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_22_mat2<>+0x20(SB)/8, $0x0404080808080808
+DATA  ·expandAVX512_22_mat2<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_22_mat2<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_22_mat2<>+0x38(SB)/8, $0x1010101010102020
+
+GLOBL ·expandAVX512_22_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_inShuf3<>+0x00(SB)/8, $0xffff050504040303
+DATA  ·expandAVX512_22_inShuf3<>+0x08(SB)/8, $0xffffffffff050403
+DATA  ·expandAVX512_22_inShuf3<>+0x10(SB)/8, $0xffffff0504040303
+DATA  ·expandAVX512_22_inShuf3<>+0x18(SB)/8, $0xffffffffffff0403
+DATA  ·expandAVX512_22_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_22_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_22_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_22_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_22_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_mat3<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_22_mat3<>+0x08(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_22_mat3<>+0x10(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_22_mat3<>+0x18(SB)/8, $0x4040808080808080
+DATA  ·expandAVX512_22_mat3<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_22_mat3<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_22_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_22_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_22_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_outShufLo+0x00(SB)/8, $0x2120181110080100
+DATA  ·expandAVX512_22_outShufLo+0x08(SB)/8, $0x4948403938313028
+DATA  ·expandAVX512_22_outShufLo+0x10(SB)/8, $0x0302696860595850
+DATA  ·expandAVX512_22_outShufLo+0x18(SB)/8, $0x3229232219131209
+DATA  ·expandAVX512_22_outShufLo+0x20(SB)/8, $0x5a514b4a413b3a33
+DATA  ·expandAVX512_22_outShufLo+0x28(SB)/8, $0x140a05046b6a615b
+DATA  ·expandAVX512_22_outShufLo+0x30(SB)/8, $0x3c35342a25241a15
+DATA  ·expandAVX512_22_outShufLo+0x38(SB)/8, $0x625d5c524d4c423d
+
+GLOBL ·expandAVX512_22_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_outShufHi0+0x00(SB)/8, $0x5049484039383130
+DATA  ·expandAVX512_22_outShufHi0+0x08(SB)/8, $0x7871706968605958
+DATA  ·expandAVX512_22_outShufHi0+0x10(SB)/8, $0x3332ffffffffffff
+DATA  ·expandAVX512_22_outShufHi0+0x18(SB)/8, $0x5b5a514b4a413b3a
+DATA  ·expandAVX512_22_outShufHi0+0x20(SB)/8, $0xffff7973726b6a61
+DATA  ·expandAVX512_22_outShufHi0+0x28(SB)/8, $0x3d3c3534ffffffff
+DATA  ·expandAVX512_22_outShufHi0+0x30(SB)/8, $0x6c625d5c524d4c42
+DATA  ·expandAVX512_22_outShufHi0+0x38(SB)/8, $0xffffffff7a75746d
+
+GLOBL ·expandAVX512_22_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_22_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_22_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_22_outShufHi1+0x10(SB)/8, $0xffff181110080100
+DATA  ·expandAVX512_22_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_22_outShufHi1+0x20(SB)/8, $0x0302ffffffffffff
+DATA  ·expandAVX512_22_outShufHi1+0x28(SB)/8, $0xffffffff19131209
+DATA  ·expandAVX512_22_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_22_outShufHi1+0x38(SB)/8, $0x140a0504ffffffff
+
+TEXT ·expandAVX512_22<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_22_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_22_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_22_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_22_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_22_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_22_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_22_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_22_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_22_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_22_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_22_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_22_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_22_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_22_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_22_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xffff03fffc0ffff, AX
        KMOVQ AX, K1
@@ -1152,124 +1152,124 @@ TEXT expandAVX512_22<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_24_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_24_inShuf0<>+0x00(SB)/8, $0x0202010101000000
-DATA  expandAVX512_24_inShuf0<>+0x08(SB)/8, $0x0202010101000000
-DATA  expandAVX512_24_inShuf0<>+0x10(SB)/8, $0x0202010101000000
-DATA  expandAVX512_24_inShuf0<>+0x18(SB)/8, $0x0202010101000000
-DATA  expandAVX512_24_inShuf0<>+0x20(SB)/8, $0x0202010101000000
-DATA  expandAVX512_24_inShuf0<>+0x28(SB)/8, $0xff02010101000000
-DATA  expandAVX512_24_inShuf0<>+0x30(SB)/8, $0xffff010101000000
-DATA  expandAVX512_24_inShuf0<>+0x38(SB)/8, $0xffff010101000000
-
-GLOBL expandAVX512_24_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_24_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_24_mat0<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_24_mat0<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_24_mat0<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_24_mat0<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_24_mat0<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_24_mat0<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_24_mat0<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_24_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_24_inShuf1<>+0x00(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_24_inShuf1<>+0x08(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_24_inShuf1<>+0x10(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_24_inShuf1<>+0x18(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_24_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_24_inShuf1<>+0x28(SB)/8, $0x0404040303030202
-DATA  expandAVX512_24_inShuf1<>+0x30(SB)/8, $0x0404030303020202
-DATA  expandAVX512_24_inShuf1<>+0x38(SB)/8, $0x0404030303020202
-
-GLOBL expandAVX512_24_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_24_inShuf2<>+0x00(SB)/8, $0x0505040404030303
-DATA  expandAVX512_24_inShuf2<>+0x08(SB)/8, $0x0505040404030303
-DATA  expandAVX512_24_inShuf2<>+0x10(SB)/8, $0x0505040404030303
-DATA  expandAVX512_24_inShuf2<>+0x18(SB)/8, $0xffff040404030303
-DATA  expandAVX512_24_inShuf2<>+0x20(SB)/8, $0xffff040404030303
-DATA  expandAVX512_24_inShuf2<>+0x28(SB)/8, $0xffffffffffffff04
-DATA  expandAVX512_24_inShuf2<>+0x30(SB)/8, $0xffffffffffffff04
-DATA  expandAVX512_24_inShuf2<>+0x38(SB)/8, $0xffffffffffffff05
-
-GLOBL expandAVX512_24_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_24_mat2<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_24_mat2<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_24_mat2<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_24_mat2<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_24_mat2<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_24_mat2<>+0x28(SB)/8, $0x4040404040404040
-DATA  expandAVX512_24_mat2<>+0x30(SB)/8, $0x8080808080808080
-DATA  expandAVX512_24_mat2<>+0x38(SB)/8, $0x0101010101010101
-
-GLOBL expandAVX512_24_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_24_inShuf3<>+0x00(SB)/8, $0xffffffffffffff05
-DATA  expandAVX512_24_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_24_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_24_mat3<>+0x00(SB)/8, $0x0202020202020202
-DATA  expandAVX512_24_mat3<>+0x08(SB)/8, $0x0000000000000000
-DATA  expandAVX512_24_mat3<>+0x10(SB)/8, $0x0000000000000000
-DATA  expandAVX512_24_mat3<>+0x18(SB)/8, $0x0000000000000000
-DATA  expandAVX512_24_mat3<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_24_mat3<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_24_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_24_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_24_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_24_outShufLo+0x00(SB)/8, $0x11100a0908020100
-DATA  expandAVX512_24_outShufLo+0x08(SB)/8, $0x282221201a191812
-DATA  expandAVX512_24_outShufLo+0x10(SB)/8, $0x3a39383231302a29
-DATA  expandAVX512_24_outShufLo+0x18(SB)/8, $0x14130d0c0b050403
-DATA  expandAVX512_24_outShufLo+0x20(SB)/8, $0x2b2524231d1c1b15
-DATA  expandAVX512_24_outShufLo+0x28(SB)/8, $0x3d3c3b3534332d2c
-DATA  expandAVX512_24_outShufLo+0x30(SB)/8, $0x1716480f0e400706
-DATA  expandAVX512_24_outShufLo+0x38(SB)/8, $0x2e602726581f1e50
-
-GLOBL expandAVX512_24_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_24_outShufHi0+0x00(SB)/8, $0x3a39383231302928
-DATA  expandAVX512_24_outShufHi0+0x08(SB)/8, $0x51504a4948424140
-DATA  expandAVX512_24_outShufHi0+0x10(SB)/8, $0x2a6261605a595852
-DATA  expandAVX512_24_outShufHi0+0x18(SB)/8, $0x3d3c3b3534332c2b
-DATA  expandAVX512_24_outShufHi0+0x20(SB)/8, $0x54534d4c4b454443
-DATA  expandAVX512_24_outShufHi0+0x28(SB)/8, $0x2d6564635d5c5b55
-DATA  expandAVX512_24_outShufHi0+0x30(SB)/8, $0x703f3e6837362f2e
-DATA  expandAVX512_24_outShufHi0+0x38(SB)/8, $0x5756ff4f4e784746
-
-GLOBL expandAVX512_24_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_24_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_24_outShufHi1+0x38(SB)/8, $0xffff00ffffffffff
-
-TEXT expandAVX512_24<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_24_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_24_mat0<>(SB), Z2
-       VMOVDQU64 expandAVX512_24_inShuf1<>(SB), Z3
-       VMOVDQU64 expandAVX512_24_inShuf2<>(SB), Z4
-       VMOVDQU64 expandAVX512_24_inShuf3<>(SB), Z5
-       VMOVDQU64 expandAVX512_24_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_24_outShufHi0(SB), Z6
-       VMOVDQU64 expandAVX512_24_outShufHi1(SB), Z7
+GLOBL ·expandAVX512_24_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_inShuf0<>+0x00(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_24_inShuf0<>+0x08(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_24_inShuf0<>+0x10(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_24_inShuf0<>+0x18(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_24_inShuf0<>+0x20(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_24_inShuf0<>+0x28(SB)/8, $0xff02010101000000
+DATA  ·expandAVX512_24_inShuf0<>+0x30(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_24_inShuf0<>+0x38(SB)/8, $0xffff010101000000
+
+GLOBL ·expandAVX512_24_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_24_mat0<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_24_mat0<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_24_mat0<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_24_mat0<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_24_mat0<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_24_mat0<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_24_mat0<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_24_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_inShuf1<>+0x00(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_24_inShuf1<>+0x08(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_24_inShuf1<>+0x10(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_24_inShuf1<>+0x18(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_24_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_24_inShuf1<>+0x28(SB)/8, $0x0404040303030202
+DATA  ·expandAVX512_24_inShuf1<>+0x30(SB)/8, $0x0404030303020202
+DATA  ·expandAVX512_24_inShuf1<>+0x38(SB)/8, $0x0404030303020202
+
+GLOBL ·expandAVX512_24_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_inShuf2<>+0x00(SB)/8, $0x0505040404030303
+DATA  ·expandAVX512_24_inShuf2<>+0x08(SB)/8, $0x0505040404030303
+DATA  ·expandAVX512_24_inShuf2<>+0x10(SB)/8, $0x0505040404030303
+DATA  ·expandAVX512_24_inShuf2<>+0x18(SB)/8, $0xffff040404030303
+DATA  ·expandAVX512_24_inShuf2<>+0x20(SB)/8, $0xffff040404030303
+DATA  ·expandAVX512_24_inShuf2<>+0x28(SB)/8, $0xffffffffffffff04
+DATA  ·expandAVX512_24_inShuf2<>+0x30(SB)/8, $0xffffffffffffff04
+DATA  ·expandAVX512_24_inShuf2<>+0x38(SB)/8, $0xffffffffffffff05
+
+GLOBL ·expandAVX512_24_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_mat2<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_24_mat2<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_24_mat2<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_24_mat2<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_24_mat2<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_24_mat2<>+0x28(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_24_mat2<>+0x30(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_24_mat2<>+0x38(SB)/8, $0x0101010101010101
+
+GLOBL ·expandAVX512_24_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_inShuf3<>+0x00(SB)/8, $0xffffffffffffff05
+DATA  ·expandAVX512_24_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_24_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_mat3<>+0x00(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_24_mat3<>+0x08(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_24_mat3<>+0x10(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_24_mat3<>+0x18(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_24_mat3<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_24_mat3<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_24_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_24_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_24_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_outShufLo+0x00(SB)/8, $0x11100a0908020100
+DATA  ·expandAVX512_24_outShufLo+0x08(SB)/8, $0x282221201a191812
+DATA  ·expandAVX512_24_outShufLo+0x10(SB)/8, $0x3a39383231302a29
+DATA  ·expandAVX512_24_outShufLo+0x18(SB)/8, $0x14130d0c0b050403
+DATA  ·expandAVX512_24_outShufLo+0x20(SB)/8, $0x2b2524231d1c1b15
+DATA  ·expandAVX512_24_outShufLo+0x28(SB)/8, $0x3d3c3b3534332d2c
+DATA  ·expandAVX512_24_outShufLo+0x30(SB)/8, $0x1716480f0e400706
+DATA  ·expandAVX512_24_outShufLo+0x38(SB)/8, $0x2e602726581f1e50
+
+GLOBL ·expandAVX512_24_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_outShufHi0+0x00(SB)/8, $0x3a39383231302928
+DATA  ·expandAVX512_24_outShufHi0+0x08(SB)/8, $0x51504a4948424140
+DATA  ·expandAVX512_24_outShufHi0+0x10(SB)/8, $0x2a6261605a595852
+DATA  ·expandAVX512_24_outShufHi0+0x18(SB)/8, $0x3d3c3b3534332c2b
+DATA  ·expandAVX512_24_outShufHi0+0x20(SB)/8, $0x54534d4c4b454443
+DATA  ·expandAVX512_24_outShufHi0+0x28(SB)/8, $0x2d6564635d5c5b55
+DATA  ·expandAVX512_24_outShufHi0+0x30(SB)/8, $0x703f3e6837362f2e
+DATA  ·expandAVX512_24_outShufHi0+0x38(SB)/8, $0x5756ff4f4e784746
+
+GLOBL ·expandAVX512_24_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_24_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_24_outShufHi1+0x38(SB)/8, $0xffff00ffffffffff
+
+TEXT ·expandAVX512_24<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_24_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_24_mat0<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_24_inShuf1<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_24_inShuf2<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_24_inShuf3<>(SB), Z5
+       VMOVDQU64 ·expandAVX512_24_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_24_outShufHi0(SB), Z6
+       VMOVDQU64 ·expandAVX512_24_outShufHi1(SB), Z7
        VMOVDQU64 (AX), Z8
        VPERMB Z8, Z0, Z0
        VGF2P8AFFINEQB $0, Z2, Z0, Z0
        VPERMB Z8, Z3, Z3
        VGF2P8AFFINEQB $0, Z2, Z3, Z2
        VPERMB Z8, Z4, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_24_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_24_mat2<>(SB), Z3, Z3
        VPERMB Z8, Z5, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_24_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_24_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xdfffffffffffffff, AX
        KMOVQ AX, K1
@@ -1280,133 +1280,133 @@ TEXT expandAVX512_24<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z6, Z2
        RET
 
-GLOBL expandAVX512_26_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_26_inShuf0<>+0x00(SB)/8, $0x0202010101000000
-DATA  expandAVX512_26_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_26_inShuf0<>+0x10(SB)/8, $0xffff020201010000
-DATA  expandAVX512_26_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_26_inShuf0<>+0x20(SB)/8, $0xffff020201010000
-DATA  expandAVX512_26_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_26_inShuf0<>+0x30(SB)/8, $0x0202010101000000
-DATA  expandAVX512_26_inShuf0<>+0x38(SB)/8, $0xffff010101000000
-
-GLOBL expandAVX512_26_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_26_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_26_mat0<>+0x08(SB)/8, $0x0101020202020202
-DATA  expandAVX512_26_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_26_mat0<>+0x18(SB)/8, $0x0202020204040404
-DATA  expandAVX512_26_mat0<>+0x20(SB)/8, $0x0404040404040404
-DATA  expandAVX512_26_mat0<>+0x28(SB)/8, $0x0404040404040808
-DATA  expandAVX512_26_mat0<>+0x30(SB)/8, $0x0808080808080808
-DATA  expandAVX512_26_mat0<>+0x38(SB)/8, $0x1010101010101010
-
-GLOBL expandAVX512_26_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_26_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_26_inShuf1<>+0x08(SB)/8, $0xffffffff01010000
-DATA  expandAVX512_26_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_26_inShuf1<>+0x18(SB)/8, $0xffffffff01010000
-DATA  expandAVX512_26_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_26_inShuf1<>+0x28(SB)/8, $0xffff010101000000
-DATA  expandAVX512_26_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_26_inShuf1<>+0x38(SB)/8, $0xff04040403030302
-
-GLOBL expandAVX512_26_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_26_mat1<>+0x00(SB)/8, $0x1010202020202020
-DATA  expandAVX512_26_mat1<>+0x08(SB)/8, $0x2020202020202020
-DATA  expandAVX512_26_mat1<>+0x10(SB)/8, $0x2020202040404040
-DATA  expandAVX512_26_mat1<>+0x18(SB)/8, $0x4040404040404040
-DATA  expandAVX512_26_mat1<>+0x20(SB)/8, $0x4040404040408080
-DATA  expandAVX512_26_mat1<>+0x28(SB)/8, $0x8080808080808080
-DATA  expandAVX512_26_mat1<>+0x30(SB)/8, $0x0101010101010101
-DATA  expandAVX512_26_mat1<>+0x38(SB)/8, $0x0808080808080808
-
-GLOBL expandAVX512_26_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_26_inShuf2<>+0x00(SB)/8, $0x0404030303020202
-DATA  expandAVX512_26_inShuf2<>+0x08(SB)/8, $0xffffffffff040302
-DATA  expandAVX512_26_inShuf2<>+0x10(SB)/8, $0xffff040403030202
-DATA  expandAVX512_26_inShuf2<>+0x18(SB)/8, $0xffffffffff040302
-DATA  expandAVX512_26_inShuf2<>+0x20(SB)/8, $0xffff040403030202
-DATA  expandAVX512_26_inShuf2<>+0x28(SB)/8, $0xffffffffff040302
-DATA  expandAVX512_26_inShuf2<>+0x30(SB)/8, $0xff04030303020202
-DATA  expandAVX512_26_inShuf2<>+0x38(SB)/8, $0xffff040404030303
-
-GLOBL expandAVX512_26_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_26_mat2<>+0x00(SB)/8, $0x1010101010101010
-DATA  expandAVX512_26_mat2<>+0x08(SB)/8, $0x1010202020202020
-DATA  expandAVX512_26_mat2<>+0x10(SB)/8, $0x2020202020202020
-DATA  expandAVX512_26_mat2<>+0x18(SB)/8, $0x2020202040404040
-DATA  expandAVX512_26_mat2<>+0x20(SB)/8, $0x4040404040404040
-DATA  expandAVX512_26_mat2<>+0x28(SB)/8, $0x4040404040408080
-DATA  expandAVX512_26_mat2<>+0x30(SB)/8, $0x8080808080808080
-DATA  expandAVX512_26_mat2<>+0x38(SB)/8, $0x0101010101010101
-
-GLOBL expandAVX512_26_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_26_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
-DATA  expandAVX512_26_inShuf3<>+0x08(SB)/8, $0xffffffff04040303
-DATA  expandAVX512_26_inShuf3<>+0x10(SB)/8, $0xffffffffffff0403
-DATA  expandAVX512_26_inShuf3<>+0x18(SB)/8, $0xffffffff04040303
-DATA  expandAVX512_26_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
-DATA  expandAVX512_26_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
-DATA  expandAVX512_26_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_26_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_26_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_26_mat3<>+0x00(SB)/8, $0x0101020202020202
-DATA  expandAVX512_26_mat3<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_26_mat3<>+0x10(SB)/8, $0x0202020204040404
-DATA  expandAVX512_26_mat3<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_26_mat3<>+0x20(SB)/8, $0x0404040404040808
-DATA  expandAVX512_26_mat3<>+0x28(SB)/8, $0x1010101010101010
-DATA  expandAVX512_26_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_26_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_26_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_26_outShufLo+0x00(SB)/8, $0x2018111008020100
-DATA  expandAVX512_26_outShufLo+0x08(SB)/8, $0x3a39383231302821
-DATA  expandAVX512_26_outShufLo+0x10(SB)/8, $0x6860595850494840
-DATA  expandAVX512_26_outShufLo+0x18(SB)/8, $0x1312090504036a69
-DATA  expandAVX512_26_outShufLo+0x20(SB)/8, $0x3b35343329232219
-DATA  expandAVX512_26_outShufLo+0x28(SB)/8, $0x5b5a514b4a413d3c
-DATA  expandAVX512_26_outShufLo+0x30(SB)/8, $0x0a7007066d6c6b61
-DATA  expandAVX512_26_outShufLo+0x38(SB)/8, $0x37362a25241a1514
-
-GLOBL expandAVX512_26_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_26_outShufHi0+0x00(SB)/8, $0x5851504842414038
-DATA  expandAVX512_26_outShufHi0+0x08(SB)/8, $0x7978727170686160
-DATA  expandAVX512_26_outShufHi0+0x10(SB)/8, $0xffffffffffffff7a
-DATA  expandAVX512_26_outShufHi0+0x18(SB)/8, $0x52494544433b3a39
-DATA  expandAVX512_26_outShufHi0+0x20(SB)/8, $0x7574736963625953
-DATA  expandAVX512_26_outShufHi0+0x28(SB)/8, $0xffffffffff7d7c7b
-DATA  expandAVX512_26_outShufHi0+0x30(SB)/8, $0xff47463e3d3cffff
-DATA  expandAVX512_26_outShufHi0+0x38(SB)/8, $0x766a65645a55544a
-
-GLOBL expandAVX512_26_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_26_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_26_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_26_outShufHi1+0x10(SB)/8, $0x20191810090800ff
-DATA  expandAVX512_26_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_26_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_26_outShufHi1+0x28(SB)/8, $0x1a110b0a01ffffff
-DATA  expandAVX512_26_outShufHi1+0x30(SB)/8, $0x28ffffffffff211b
-DATA  expandAVX512_26_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
-
-TEXT expandAVX512_26<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_26_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_26_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_26_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_26_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_26_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_26_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_26_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_26_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_inShuf0<>+0x00(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_26_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_26_inShuf0<>+0x10(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_26_inShuf0<>+0x18(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_26_inShuf0<>+0x20(SB)/8, $0xffff020201010000
+DATA  ·expandAVX512_26_inShuf0<>+0x28(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_26_inShuf0<>+0x30(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_26_inShuf0<>+0x38(SB)/8, $0xffff010101000000
+
+GLOBL ·expandAVX512_26_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_26_mat0<>+0x08(SB)/8, $0x0101020202020202
+DATA  ·expandAVX512_26_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_26_mat0<>+0x18(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_26_mat0<>+0x20(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_26_mat0<>+0x28(SB)/8, $0x0404040404040808
+DATA  ·expandAVX512_26_mat0<>+0x30(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_26_mat0<>+0x38(SB)/8, $0x1010101010101010
+
+GLOBL ·expandAVX512_26_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_26_inShuf1<>+0x08(SB)/8, $0xffffffff01010000
+DATA  ·expandAVX512_26_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_26_inShuf1<>+0x18(SB)/8, $0xffffffff01010000
+DATA  ·expandAVX512_26_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_26_inShuf1<>+0x28(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_26_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_26_inShuf1<>+0x38(SB)/8, $0xff04040403030302
+
+GLOBL ·expandAVX512_26_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_mat1<>+0x00(SB)/8, $0x1010202020202020
+DATA  ·expandAVX512_26_mat1<>+0x08(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_26_mat1<>+0x10(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_26_mat1<>+0x18(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_26_mat1<>+0x20(SB)/8, $0x4040404040408080
+DATA  ·expandAVX512_26_mat1<>+0x28(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_26_mat1<>+0x30(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_26_mat1<>+0x38(SB)/8, $0x0808080808080808
+
+GLOBL ·expandAVX512_26_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_inShuf2<>+0x00(SB)/8, $0x0404030303020202
+DATA  ·expandAVX512_26_inShuf2<>+0x08(SB)/8, $0xffffffffff040302
+DATA  ·expandAVX512_26_inShuf2<>+0x10(SB)/8, $0xffff040403030202
+DATA  ·expandAVX512_26_inShuf2<>+0x18(SB)/8, $0xffffffffff040302
+DATA  ·expandAVX512_26_inShuf2<>+0x20(SB)/8, $0xffff040403030202
+DATA  ·expandAVX512_26_inShuf2<>+0x28(SB)/8, $0xffffffffff040302
+DATA  ·expandAVX512_26_inShuf2<>+0x30(SB)/8, $0xff04030303020202
+DATA  ·expandAVX512_26_inShuf2<>+0x38(SB)/8, $0xffff040404030303
+
+GLOBL ·expandAVX512_26_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_mat2<>+0x00(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_26_mat2<>+0x08(SB)/8, $0x1010202020202020
+DATA  ·expandAVX512_26_mat2<>+0x10(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_26_mat2<>+0x18(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_26_mat2<>+0x20(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_26_mat2<>+0x28(SB)/8, $0x4040404040408080
+DATA  ·expandAVX512_26_mat2<>+0x30(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_26_mat2<>+0x38(SB)/8, $0x0101010101010101
+
+GLOBL ·expandAVX512_26_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
+DATA  ·expandAVX512_26_inShuf3<>+0x08(SB)/8, $0xffffffff04040303
+DATA  ·expandAVX512_26_inShuf3<>+0x10(SB)/8, $0xffffffffffff0403
+DATA  ·expandAVX512_26_inShuf3<>+0x18(SB)/8, $0xffffffff04040303
+DATA  ·expandAVX512_26_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
+DATA  ·expandAVX512_26_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
+DATA  ·expandAVX512_26_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_26_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_26_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_mat3<>+0x00(SB)/8, $0x0101020202020202
+DATA  ·expandAVX512_26_mat3<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_26_mat3<>+0x10(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_26_mat3<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_26_mat3<>+0x20(SB)/8, $0x0404040404040808
+DATA  ·expandAVX512_26_mat3<>+0x28(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_26_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_26_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_26_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_outShufLo+0x00(SB)/8, $0x2018111008020100
+DATA  ·expandAVX512_26_outShufLo+0x08(SB)/8, $0x3a39383231302821
+DATA  ·expandAVX512_26_outShufLo+0x10(SB)/8, $0x6860595850494840
+DATA  ·expandAVX512_26_outShufLo+0x18(SB)/8, $0x1312090504036a69
+DATA  ·expandAVX512_26_outShufLo+0x20(SB)/8, $0x3b35343329232219
+DATA  ·expandAVX512_26_outShufLo+0x28(SB)/8, $0x5b5a514b4a413d3c
+DATA  ·expandAVX512_26_outShufLo+0x30(SB)/8, $0x0a7007066d6c6b61
+DATA  ·expandAVX512_26_outShufLo+0x38(SB)/8, $0x37362a25241a1514
+
+GLOBL ·expandAVX512_26_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_outShufHi0+0x00(SB)/8, $0x5851504842414038
+DATA  ·expandAVX512_26_outShufHi0+0x08(SB)/8, $0x7978727170686160
+DATA  ·expandAVX512_26_outShufHi0+0x10(SB)/8, $0xffffffffffffff7a
+DATA  ·expandAVX512_26_outShufHi0+0x18(SB)/8, $0x52494544433b3a39
+DATA  ·expandAVX512_26_outShufHi0+0x20(SB)/8, $0x7574736963625953
+DATA  ·expandAVX512_26_outShufHi0+0x28(SB)/8, $0xffffffffff7d7c7b
+DATA  ·expandAVX512_26_outShufHi0+0x30(SB)/8, $0xff47463e3d3cffff
+DATA  ·expandAVX512_26_outShufHi0+0x38(SB)/8, $0x766a65645a55544a
+
+GLOBL ·expandAVX512_26_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_26_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_26_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_26_outShufHi1+0x10(SB)/8, $0x20191810090800ff
+DATA  ·expandAVX512_26_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_26_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_26_outShufHi1+0x28(SB)/8, $0x1a110b0a01ffffff
+DATA  ·expandAVX512_26_outShufHi1+0x30(SB)/8, $0x28ffffffffff211b
+DATA  ·expandAVX512_26_outShufHi1+0x38(SB)/8, $0xffffffffffffffff
+
+TEXT ·expandAVX512_26<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_26_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_26_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_26_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_26_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_26_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_26_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_26_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_26_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_26_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_26_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_26_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_26_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_26_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_26_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_26_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xff7c07ffff01ffff, AX
        KMOVQ AX, K1
@@ -1417,133 +1417,133 @@ TEXT expandAVX512_26<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_28_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_28_inShuf0<>+0x00(SB)/8, $0x0202010101000000
-DATA  expandAVX512_28_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_28_inShuf0<>+0x10(SB)/8, $0x0202010101000000
-DATA  expandAVX512_28_inShuf0<>+0x18(SB)/8, $0xff02010101000000
-DATA  expandAVX512_28_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_28_inShuf0<>+0x28(SB)/8, $0xffff010101000000
-DATA  expandAVX512_28_inShuf0<>+0x30(SB)/8, $0xffff010101000000
-DATA  expandAVX512_28_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
-
-GLOBL expandAVX512_28_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_28_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_28_mat0<>+0x08(SB)/8, $0x0101010102020202
-DATA  expandAVX512_28_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_28_mat0<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_28_mat0<>+0x20(SB)/8, $0x0404040408080808
-DATA  expandAVX512_28_mat0<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_28_mat0<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_28_mat0<>+0x38(SB)/8, $0x1010101020202020
-
-GLOBL expandAVX512_28_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_28_inShuf1<>+0x00(SB)/8, $0xffff010101000000
-DATA  expandAVX512_28_inShuf1<>+0x08(SB)/8, $0xffff010101000000
-DATA  expandAVX512_28_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_28_inShuf1<>+0x18(SB)/8, $0xffff010101000000
-DATA  expandAVX512_28_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_28_inShuf1<>+0x28(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_28_inShuf1<>+0x30(SB)/8, $0x0404040303030202
-DATA  expandAVX512_28_inShuf1<>+0x38(SB)/8, $0xffffffffff040302
-
-GLOBL expandAVX512_28_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_28_mat1<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_28_mat1<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_28_mat1<>+0x10(SB)/8, $0x4040404080808080
-DATA  expandAVX512_28_mat1<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_28_mat1<>+0x20(SB)/8, $0x0101010101010101
-DATA  expandAVX512_28_mat1<>+0x28(SB)/8, $0x0202020202020202
-DATA  expandAVX512_28_mat1<>+0x30(SB)/8, $0x0404040404040404
-DATA  expandAVX512_28_mat1<>+0x38(SB)/8, $0x0404040408080808
-
-GLOBL expandAVX512_28_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_28_inShuf2<>+0x00(SB)/8, $0x0404030303020202
-DATA  expandAVX512_28_inShuf2<>+0x08(SB)/8, $0x0404030303020202
-DATA  expandAVX512_28_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
-DATA  expandAVX512_28_inShuf2<>+0x18(SB)/8, $0xffff030303020202
-DATA  expandAVX512_28_inShuf2<>+0x20(SB)/8, $0xffff030303020202
-DATA  expandAVX512_28_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
-DATA  expandAVX512_28_inShuf2<>+0x30(SB)/8, $0xffff030303020202
-DATA  expandAVX512_28_inShuf2<>+0x38(SB)/8, $0xffff040404030303
-
-GLOBL expandAVX512_28_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_28_mat2<>+0x00(SB)/8, $0x0808080808080808
-DATA  expandAVX512_28_mat2<>+0x08(SB)/8, $0x1010101010101010
-DATA  expandAVX512_28_mat2<>+0x10(SB)/8, $0x1010101020202020
-DATA  expandAVX512_28_mat2<>+0x18(SB)/8, $0x2020202020202020
-DATA  expandAVX512_28_mat2<>+0x20(SB)/8, $0x4040404040404040
-DATA  expandAVX512_28_mat2<>+0x28(SB)/8, $0x4040404080808080
-DATA  expandAVX512_28_mat2<>+0x30(SB)/8, $0x8080808080808080
-DATA  expandAVX512_28_mat2<>+0x38(SB)/8, $0x0101010101010101
-
-GLOBL expandAVX512_28_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_28_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
-DATA  expandAVX512_28_inShuf3<>+0x08(SB)/8, $0xffff040404030303
-DATA  expandAVX512_28_inShuf3<>+0x10(SB)/8, $0xffffffffffffff04
-DATA  expandAVX512_28_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_28_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_28_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_28_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_28_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_28_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_28_mat3<>+0x00(SB)/8, $0x0101010102020202
-DATA  expandAVX512_28_mat3<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_28_mat3<>+0x10(SB)/8, $0x0808080808080808
-DATA  expandAVX512_28_mat3<>+0x18(SB)/8, $0x0000000000000000
-DATA  expandAVX512_28_mat3<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_28_mat3<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_28_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_28_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_28_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_28_outShufLo+0x00(SB)/8, $0x1812111008020100
-DATA  expandAVX512_28_outShufLo+0x08(SB)/8, $0x31302a2928201a19
-DATA  expandAVX512_28_outShufLo+0x10(SB)/8, $0x4a49484241403832
-DATA  expandAVX512_28_outShufLo+0x18(SB)/8, $0x090504035a595850
-DATA  expandAVX512_28_outShufLo+0x20(SB)/8, $0x2b211d1c1b151413
-DATA  expandAVX512_28_outShufLo+0x28(SB)/8, $0x4443393534332d2c
-DATA  expandAVX512_28_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b45
-DATA  expandAVX512_28_outShufLo+0x38(SB)/8, $0x1e6817160a600706
-
-GLOBL expandAVX512_28_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_28_outShufHi0+0x00(SB)/8, $0x4948424140383130
-DATA  expandAVX512_28_outShufHi0+0x08(SB)/8, $0x6261605a5958504a
-DATA  expandAVX512_28_outShufHi0+0x10(SB)/8, $0xff7a797872717068
-DATA  expandAVX512_28_outShufHi0+0x18(SB)/8, $0x4339343332ffffff
-DATA  expandAVX512_28_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b4544
-DATA  expandAVX512_28_outShufHi0+0x28(SB)/8, $0x757473696564635d
-DATA  expandAVX512_28_outShufHi0+0x30(SB)/8, $0x35ffffffff7d7c7b
-DATA  expandAVX512_28_outShufHi0+0x38(SB)/8, $0x4f4eff47463a3736
-
-GLOBL expandAVX512_28_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_28_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_28_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_28_outShufHi1+0x10(SB)/8, $0x00ffffffffffffff
-DATA  expandAVX512_28_outShufHi1+0x18(SB)/8, $0xffffffffff0a0908
-DATA  expandAVX512_28_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_28_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_28_outShufHi1+0x30(SB)/8, $0xff0d0c0b01ffffff
-DATA  expandAVX512_28_outShufHi1+0x38(SB)/8, $0xffff10ffffffffff
-
-TEXT expandAVX512_28<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_28_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_28_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_28_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_28_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_28_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_28_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_28_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_28_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_inShuf0<>+0x00(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_28_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_28_inShuf0<>+0x10(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_28_inShuf0<>+0x18(SB)/8, $0xff02010101000000
+DATA  ·expandAVX512_28_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_28_inShuf0<>+0x28(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_28_inShuf0<>+0x30(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_28_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
+
+GLOBL ·expandAVX512_28_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_28_mat0<>+0x08(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_28_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_28_mat0<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_28_mat0<>+0x20(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_28_mat0<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_28_mat0<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_28_mat0<>+0x38(SB)/8, $0x1010101020202020
+
+GLOBL ·expandAVX512_28_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_inShuf1<>+0x00(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_28_inShuf1<>+0x08(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_28_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_28_inShuf1<>+0x18(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_28_inShuf1<>+0x20(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_28_inShuf1<>+0x28(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_28_inShuf1<>+0x30(SB)/8, $0x0404040303030202
+DATA  ·expandAVX512_28_inShuf1<>+0x38(SB)/8, $0xffffffffff040302
+
+GLOBL ·expandAVX512_28_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_mat1<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_28_mat1<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_28_mat1<>+0x10(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_28_mat1<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_28_mat1<>+0x20(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_28_mat1<>+0x28(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_28_mat1<>+0x30(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_28_mat1<>+0x38(SB)/8, $0x0404040408080808
+
+GLOBL ·expandAVX512_28_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_inShuf2<>+0x00(SB)/8, $0x0404030303020202
+DATA  ·expandAVX512_28_inShuf2<>+0x08(SB)/8, $0x0404030303020202
+DATA  ·expandAVX512_28_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
+DATA  ·expandAVX512_28_inShuf2<>+0x18(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_28_inShuf2<>+0x20(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_28_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
+DATA  ·expandAVX512_28_inShuf2<>+0x30(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_28_inShuf2<>+0x38(SB)/8, $0xffff040404030303
+
+GLOBL ·expandAVX512_28_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_mat2<>+0x00(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_28_mat2<>+0x08(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_28_mat2<>+0x10(SB)/8, $0x1010101020202020
+DATA  ·expandAVX512_28_mat2<>+0x18(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_28_mat2<>+0x20(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_28_mat2<>+0x28(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_28_mat2<>+0x30(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_28_mat2<>+0x38(SB)/8, $0x0101010101010101
+
+GLOBL ·expandAVX512_28_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_inShuf3<>+0x00(SB)/8, $0xffffffffffff0403
+DATA  ·expandAVX512_28_inShuf3<>+0x08(SB)/8, $0xffff040404030303
+DATA  ·expandAVX512_28_inShuf3<>+0x10(SB)/8, $0xffffffffffffff04
+DATA  ·expandAVX512_28_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_28_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_28_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_28_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_28_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_28_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_mat3<>+0x00(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_28_mat3<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_28_mat3<>+0x10(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_28_mat3<>+0x18(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_28_mat3<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_28_mat3<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_28_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_28_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_28_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_outShufLo+0x00(SB)/8, $0x1812111008020100
+DATA  ·expandAVX512_28_outShufLo+0x08(SB)/8, $0x31302a2928201a19
+DATA  ·expandAVX512_28_outShufLo+0x10(SB)/8, $0x4a49484241403832
+DATA  ·expandAVX512_28_outShufLo+0x18(SB)/8, $0x090504035a595850
+DATA  ·expandAVX512_28_outShufLo+0x20(SB)/8, $0x2b211d1c1b151413
+DATA  ·expandAVX512_28_outShufLo+0x28(SB)/8, $0x4443393534332d2c
+DATA  ·expandAVX512_28_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b45
+DATA  ·expandAVX512_28_outShufLo+0x38(SB)/8, $0x1e6817160a600706
+
+GLOBL ·expandAVX512_28_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_outShufHi0+0x00(SB)/8, $0x4948424140383130
+DATA  ·expandAVX512_28_outShufHi0+0x08(SB)/8, $0x6261605a5958504a
+DATA  ·expandAVX512_28_outShufHi0+0x10(SB)/8, $0xff7a797872717068
+DATA  ·expandAVX512_28_outShufHi0+0x18(SB)/8, $0x4339343332ffffff
+DATA  ·expandAVX512_28_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b4544
+DATA  ·expandAVX512_28_outShufHi0+0x28(SB)/8, $0x757473696564635d
+DATA  ·expandAVX512_28_outShufHi0+0x30(SB)/8, $0x35ffffffff7d7c7b
+DATA  ·expandAVX512_28_outShufHi0+0x38(SB)/8, $0x4f4eff47463a3736
+
+GLOBL ·expandAVX512_28_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_28_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_28_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_28_outShufHi1+0x10(SB)/8, $0x00ffffffffffffff
+DATA  ·expandAVX512_28_outShufHi1+0x18(SB)/8, $0xffffffffff0a0908
+DATA  ·expandAVX512_28_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_28_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_28_outShufHi1+0x30(SB)/8, $0xff0d0c0b01ffffff
+DATA  ·expandAVX512_28_outShufHi1+0x38(SB)/8, $0xffff10ffffffffff
+
+TEXT ·expandAVX512_28<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_28_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_28_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_28_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_28_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_28_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_28_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_28_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_28_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_28_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_28_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_28_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_28_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_28_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_28_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_28_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xdf87fffff87fffff, AX
        KMOVQ AX, K1
@@ -1554,133 +1554,133 @@ TEXT expandAVX512_28<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_30_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_30_inShuf0<>+0x00(SB)/8, $0x0202010101000000
-DATA  expandAVX512_30_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
-DATA  expandAVX512_30_inShuf0<>+0x10(SB)/8, $0xffff010101000000
-DATA  expandAVX512_30_inShuf0<>+0x18(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_30_inShuf0<>+0x20(SB)/8, $0xffff010101000000
-DATA  expandAVX512_30_inShuf0<>+0x28(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_30_inShuf0<>+0x30(SB)/8, $0xffff010101000000
-DATA  expandAVX512_30_inShuf0<>+0x38(SB)/8, $0xffff010101000000
-
-GLOBL expandAVX512_30_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_30_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_30_mat0<>+0x08(SB)/8, $0x0101010101010202
-DATA  expandAVX512_30_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_30_mat0<>+0x18(SB)/8, $0x0202020204040404
-DATA  expandAVX512_30_mat0<>+0x20(SB)/8, $0x0404040404040404
-DATA  expandAVX512_30_mat0<>+0x28(SB)/8, $0x0404080808080808
-DATA  expandAVX512_30_mat0<>+0x30(SB)/8, $0x0808080808080808
-DATA  expandAVX512_30_mat0<>+0x38(SB)/8, $0x1010101010101010
-
-GLOBL expandAVX512_30_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_30_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_30_inShuf1<>+0x08(SB)/8, $0xffff010101000000
-DATA  expandAVX512_30_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_30_inShuf1<>+0x18(SB)/8, $0xffff010101000000
-DATA  expandAVX512_30_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_30_inShuf1<>+0x28(SB)/8, $0xffff010101000000
-DATA  expandAVX512_30_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_30_inShuf1<>+0x38(SB)/8, $0x0404030303020202
-
-GLOBL expandAVX512_30_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_30_mat1<>+0x00(SB)/8, $0x1010101010102020
-DATA  expandAVX512_30_mat1<>+0x08(SB)/8, $0x2020202020202020
-DATA  expandAVX512_30_mat1<>+0x10(SB)/8, $0x2020202040404040
-DATA  expandAVX512_30_mat1<>+0x18(SB)/8, $0x4040404040404040
-DATA  expandAVX512_30_mat1<>+0x20(SB)/8, $0x4040808080808080
-DATA  expandAVX512_30_mat1<>+0x28(SB)/8, $0x8080808080808080
-DATA  expandAVX512_30_mat1<>+0x30(SB)/8, $0x0101010101010101
-DATA  expandAVX512_30_mat1<>+0x38(SB)/8, $0x0202020202020202
-
-GLOBL expandAVX512_30_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_30_inShuf2<>+0x00(SB)/8, $0xffffffffff040302
-DATA  expandAVX512_30_inShuf2<>+0x08(SB)/8, $0xffff030303020202
-DATA  expandAVX512_30_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
-DATA  expandAVX512_30_inShuf2<>+0x18(SB)/8, $0xffff030303020202
-DATA  expandAVX512_30_inShuf2<>+0x20(SB)/8, $0xffff030303020202
-DATA  expandAVX512_30_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
-DATA  expandAVX512_30_inShuf2<>+0x30(SB)/8, $0xffff030303020202
-DATA  expandAVX512_30_inShuf2<>+0x38(SB)/8, $0xffffffffffff0302
-
-GLOBL expandAVX512_30_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_30_mat2<>+0x00(SB)/8, $0x0202020204040404
-DATA  expandAVX512_30_mat2<>+0x08(SB)/8, $0x0404040404040404
-DATA  expandAVX512_30_mat2<>+0x10(SB)/8, $0x0404080808080808
-DATA  expandAVX512_30_mat2<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_30_mat2<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_30_mat2<>+0x28(SB)/8, $0x1010101010102020
-DATA  expandAVX512_30_mat2<>+0x30(SB)/8, $0x2020202020202020
-DATA  expandAVX512_30_mat2<>+0x38(SB)/8, $0x2020202040404040
-
-GLOBL expandAVX512_30_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_30_inShuf3<>+0x00(SB)/8, $0xffff030303020202
-DATA  expandAVX512_30_inShuf3<>+0x08(SB)/8, $0xffffffffffff0302
-DATA  expandAVX512_30_inShuf3<>+0x10(SB)/8, $0xffff030303020202
-DATA  expandAVX512_30_inShuf3<>+0x18(SB)/8, $0xffff040404030303
-DATA  expandAVX512_30_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
-DATA  expandAVX512_30_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
-DATA  expandAVX512_30_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_30_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_30_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_30_mat3<>+0x00(SB)/8, $0x4040404040404040
-DATA  expandAVX512_30_mat3<>+0x08(SB)/8, $0x4040808080808080
-DATA  expandAVX512_30_mat3<>+0x10(SB)/8, $0x8080808080808080
-DATA  expandAVX512_30_mat3<>+0x18(SB)/8, $0x0101010101010101
-DATA  expandAVX512_30_mat3<>+0x20(SB)/8, $0x0101010101010202
-DATA  expandAVX512_30_mat3<>+0x28(SB)/8, $0x0202020202020202
-DATA  expandAVX512_30_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_30_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_30_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_30_outShufLo+0x00(SB)/8, $0x1812111008020100
-DATA  expandAVX512_30_outShufLo+0x08(SB)/8, $0x3832313028222120
-DATA  expandAVX512_30_outShufLo+0x10(SB)/8, $0x58504a4948403a39
-DATA  expandAVX512_30_outShufLo+0x18(SB)/8, $0x04036a6968605a59
-DATA  expandAVX512_30_outShufLo+0x20(SB)/8, $0x2423191514130905
-DATA  expandAVX512_30_outShufLo+0x28(SB)/8, $0x3d3c3b3534332925
-DATA  expandAVX512_30_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b41
-DATA  expandAVX512_30_outShufLo+0x38(SB)/8, $0x0a7007066d6c6b61
-
-GLOBL expandAVX512_30_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_30_outShufHi0+0x00(SB)/8, $0x504a4948403a3938
-DATA  expandAVX512_30_outShufHi0+0x08(SB)/8, $0x70686261605a5958
-DATA  expandAVX512_30_outShufHi0+0x10(SB)/8, $0xffffffffff787271
-DATA  expandAVX512_30_outShufHi0+0x18(SB)/8, $0x3c3bffffffffffff
-DATA  expandAVX512_30_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b413d
-DATA  expandAVX512_30_outShufHi0+0x28(SB)/8, $0x757473696564635d
-DATA  expandAVX512_30_outShufHi0+0x30(SB)/8, $0xffffffffffffff79
-DATA  expandAVX512_30_outShufHi0+0x38(SB)/8, $0x42ff3f3effffffff
-
-GLOBL expandAVX512_30_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_30_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_30_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_30_outShufHi1+0x10(SB)/8, $0x1008020100ffffff
-DATA  expandAVX512_30_outShufHi1+0x18(SB)/8, $0xffff201a19181211
-DATA  expandAVX512_30_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_30_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_30_outShufHi1+0x30(SB)/8, $0x15141309050403ff
-DATA  expandAVX512_30_outShufHi1+0x38(SB)/8, $0xff28ffff211d1c1b
-
-TEXT expandAVX512_30<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_30_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_30_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_30_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_30_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_30_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_30_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_30_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_30_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_inShuf0<>+0x00(SB)/8, $0x0202010101000000
+DATA  ·expandAVX512_30_inShuf0<>+0x08(SB)/8, $0xffffffffff020100
+DATA  ·expandAVX512_30_inShuf0<>+0x10(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_30_inShuf0<>+0x18(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_30_inShuf0<>+0x20(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_30_inShuf0<>+0x28(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_30_inShuf0<>+0x30(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_30_inShuf0<>+0x38(SB)/8, $0xffff010101000000
+
+GLOBL ·expandAVX512_30_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_30_mat0<>+0x08(SB)/8, $0x0101010101010202
+DATA  ·expandAVX512_30_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_30_mat0<>+0x18(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_30_mat0<>+0x20(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_30_mat0<>+0x28(SB)/8, $0x0404080808080808
+DATA  ·expandAVX512_30_mat0<>+0x30(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_30_mat0<>+0x38(SB)/8, $0x1010101010101010
+
+GLOBL ·expandAVX512_30_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_inShuf1<>+0x00(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_30_inShuf1<>+0x08(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_30_inShuf1<>+0x10(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_30_inShuf1<>+0x18(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_30_inShuf1<>+0x20(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_30_inShuf1<>+0x28(SB)/8, $0xffff010101000000
+DATA  ·expandAVX512_30_inShuf1<>+0x30(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_30_inShuf1<>+0x38(SB)/8, $0x0404030303020202
+
+GLOBL ·expandAVX512_30_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_mat1<>+0x00(SB)/8, $0x1010101010102020
+DATA  ·expandAVX512_30_mat1<>+0x08(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_30_mat1<>+0x10(SB)/8, $0x2020202040404040
+DATA  ·expandAVX512_30_mat1<>+0x18(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_30_mat1<>+0x20(SB)/8, $0x4040808080808080
+DATA  ·expandAVX512_30_mat1<>+0x28(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_30_mat1<>+0x30(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_30_mat1<>+0x38(SB)/8, $0x0202020202020202
+
+GLOBL ·expandAVX512_30_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_inShuf2<>+0x00(SB)/8, $0xffffffffff040302
+DATA  ·expandAVX512_30_inShuf2<>+0x08(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_30_inShuf2<>+0x10(SB)/8, $0xffffffffffff0302
+DATA  ·expandAVX512_30_inShuf2<>+0x18(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_30_inShuf2<>+0x20(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_30_inShuf2<>+0x28(SB)/8, $0xffffffffffff0302
+DATA  ·expandAVX512_30_inShuf2<>+0x30(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_30_inShuf2<>+0x38(SB)/8, $0xffffffffffff0302
+
+GLOBL ·expandAVX512_30_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_mat2<>+0x00(SB)/8, $0x0202020204040404
+DATA  ·expandAVX512_30_mat2<>+0x08(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_30_mat2<>+0x10(SB)/8, $0x0404080808080808
+DATA  ·expandAVX512_30_mat2<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_30_mat2<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_30_mat2<>+0x28(SB)/8, $0x1010101010102020
+DATA  ·expandAVX512_30_mat2<>+0x30(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_30_mat2<>+0x38(SB)/8, $0x2020202040404040
+
+GLOBL ·expandAVX512_30_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_inShuf3<>+0x00(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_30_inShuf3<>+0x08(SB)/8, $0xffffffffffff0302
+DATA  ·expandAVX512_30_inShuf3<>+0x10(SB)/8, $0xffff030303020202
+DATA  ·expandAVX512_30_inShuf3<>+0x18(SB)/8, $0xffff040404030303
+DATA  ·expandAVX512_30_inShuf3<>+0x20(SB)/8, $0xffffffffffff0403
+DATA  ·expandAVX512_30_inShuf3<>+0x28(SB)/8, $0xffffffffffffff04
+DATA  ·expandAVX512_30_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_30_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_30_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_mat3<>+0x00(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_30_mat3<>+0x08(SB)/8, $0x4040808080808080
+DATA  ·expandAVX512_30_mat3<>+0x10(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_30_mat3<>+0x18(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_30_mat3<>+0x20(SB)/8, $0x0101010101010202
+DATA  ·expandAVX512_30_mat3<>+0x28(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_30_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_30_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_30_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_outShufLo+0x00(SB)/8, $0x1812111008020100
+DATA  ·expandAVX512_30_outShufLo+0x08(SB)/8, $0x3832313028222120
+DATA  ·expandAVX512_30_outShufLo+0x10(SB)/8, $0x58504a4948403a39
+DATA  ·expandAVX512_30_outShufLo+0x18(SB)/8, $0x04036a6968605a59
+DATA  ·expandAVX512_30_outShufLo+0x20(SB)/8, $0x2423191514130905
+DATA  ·expandAVX512_30_outShufLo+0x28(SB)/8, $0x3d3c3b3534332925
+DATA  ·expandAVX512_30_outShufLo+0x30(SB)/8, $0x5d5c5b514d4c4b41
+DATA  ·expandAVX512_30_outShufLo+0x38(SB)/8, $0x0a7007066d6c6b61
+
+GLOBL ·expandAVX512_30_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_outShufHi0+0x00(SB)/8, $0x504a4948403a3938
+DATA  ·expandAVX512_30_outShufHi0+0x08(SB)/8, $0x70686261605a5958
+DATA  ·expandAVX512_30_outShufHi0+0x10(SB)/8, $0xffffffffff787271
+DATA  ·expandAVX512_30_outShufHi0+0x18(SB)/8, $0x3c3bffffffffffff
+DATA  ·expandAVX512_30_outShufHi0+0x20(SB)/8, $0x5c5b514d4c4b413d
+DATA  ·expandAVX512_30_outShufHi0+0x28(SB)/8, $0x757473696564635d
+DATA  ·expandAVX512_30_outShufHi0+0x30(SB)/8, $0xffffffffffffff79
+DATA  ·expandAVX512_30_outShufHi0+0x38(SB)/8, $0x42ff3f3effffffff
+
+GLOBL ·expandAVX512_30_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_30_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_30_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_30_outShufHi1+0x10(SB)/8, $0x1008020100ffffff
+DATA  ·expandAVX512_30_outShufHi1+0x18(SB)/8, $0xffff201a19181211
+DATA  ·expandAVX512_30_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_30_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_30_outShufHi1+0x30(SB)/8, $0x15141309050403ff
+DATA  ·expandAVX512_30_outShufHi1+0x38(SB)/8, $0xff28ffff211d1c1b
+
+TEXT ·expandAVX512_30<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_30_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_30_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_30_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_30_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_30_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_30_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_30_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_30_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_30_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_30_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_30_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_30_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_30_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_30_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_30_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xb001ffffc007ffff, AX
        KMOVQ AX, K1
@@ -1691,51 +1691,51 @@ TEXT expandAVX512_30<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_32_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_32_inShuf0<>+0x00(SB)/8, $0x0101010100000000
-DATA  expandAVX512_32_inShuf0<>+0x08(SB)/8, $0x0101010100000000
-DATA  expandAVX512_32_inShuf0<>+0x10(SB)/8, $0x0101010100000000
-DATA  expandAVX512_32_inShuf0<>+0x18(SB)/8, $0x0101010100000000
-DATA  expandAVX512_32_inShuf0<>+0x20(SB)/8, $0x0101010100000000
-DATA  expandAVX512_32_inShuf0<>+0x28(SB)/8, $0x0101010100000000
-DATA  expandAVX512_32_inShuf0<>+0x30(SB)/8, $0x0101010100000000
-DATA  expandAVX512_32_inShuf0<>+0x38(SB)/8, $0x0101010100000000
-
-GLOBL expandAVX512_32_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_32_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_32_mat0<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_32_mat0<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_32_mat0<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_32_mat0<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_32_mat0<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_32_mat0<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_32_mat0<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_32_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_32_inShuf1<>+0x00(SB)/8, $0x0303030302020202
-DATA  expandAVX512_32_inShuf1<>+0x08(SB)/8, $0x0303030302020202
-DATA  expandAVX512_32_inShuf1<>+0x10(SB)/8, $0x0303030302020202
-DATA  expandAVX512_32_inShuf1<>+0x18(SB)/8, $0x0303030302020202
-DATA  expandAVX512_32_inShuf1<>+0x20(SB)/8, $0x0303030302020202
-DATA  expandAVX512_32_inShuf1<>+0x28(SB)/8, $0x0303030302020202
-DATA  expandAVX512_32_inShuf1<>+0x30(SB)/8, $0x0303030302020202
-DATA  expandAVX512_32_inShuf1<>+0x38(SB)/8, $0x0303030302020202
-
-GLOBL expandAVX512_32_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_32_outShufLo+0x00(SB)/8, $0x0b0a090803020100
-DATA  expandAVX512_32_outShufLo+0x08(SB)/8, $0x1b1a191813121110
-DATA  expandAVX512_32_outShufLo+0x10(SB)/8, $0x2b2a292823222120
-DATA  expandAVX512_32_outShufLo+0x18(SB)/8, $0x3b3a393833323130
-DATA  expandAVX512_32_outShufLo+0x20(SB)/8, $0x0f0e0d0c07060504
-DATA  expandAVX512_32_outShufLo+0x28(SB)/8, $0x1f1e1d1c17161514
-DATA  expandAVX512_32_outShufLo+0x30(SB)/8, $0x2f2e2d2c27262524
-DATA  expandAVX512_32_outShufLo+0x38(SB)/8, $0x3f3e3d3c37363534
-
-TEXT expandAVX512_32<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_32_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_32_mat0<>(SB), Z1
-       VMOVDQU64 expandAVX512_32_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_32_outShufLo(SB), Z3
+GLOBL ·expandAVX512_32_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_32_inShuf0<>+0x00(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_32_inShuf0<>+0x08(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_32_inShuf0<>+0x10(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_32_inShuf0<>+0x18(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_32_inShuf0<>+0x20(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_32_inShuf0<>+0x28(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_32_inShuf0<>+0x30(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_32_inShuf0<>+0x38(SB)/8, $0x0101010100000000
+
+GLOBL ·expandAVX512_32_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_32_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_32_mat0<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_32_mat0<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_32_mat0<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_32_mat0<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_32_mat0<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_32_mat0<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_32_mat0<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_32_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_32_inShuf1<>+0x00(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_32_inShuf1<>+0x08(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_32_inShuf1<>+0x10(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_32_inShuf1<>+0x18(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_32_inShuf1<>+0x20(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_32_inShuf1<>+0x28(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_32_inShuf1<>+0x30(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_32_inShuf1<>+0x38(SB)/8, $0x0303030302020202
+
+GLOBL ·expandAVX512_32_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_32_outShufLo+0x00(SB)/8, $0x0b0a090803020100
+DATA  ·expandAVX512_32_outShufLo+0x08(SB)/8, $0x1b1a191813121110
+DATA  ·expandAVX512_32_outShufLo+0x10(SB)/8, $0x2b2a292823222120
+DATA  ·expandAVX512_32_outShufLo+0x18(SB)/8, $0x3b3a393833323130
+DATA  ·expandAVX512_32_outShufLo+0x20(SB)/8, $0x0f0e0d0c07060504
+DATA  ·expandAVX512_32_outShufLo+0x28(SB)/8, $0x1f1e1d1c17161514
+DATA  ·expandAVX512_32_outShufLo+0x30(SB)/8, $0x2f2e2d2c27262524
+DATA  ·expandAVX512_32_outShufLo+0x38(SB)/8, $0x3f3e3d3c37363534
+
+TEXT ·expandAVX512_32<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_32_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_32_mat0<>(SB), Z1
+       VMOVDQU64 ·expandAVX512_32_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_32_outShufLo(SB), Z3
        VMOVDQU64 (AX), Z4
        VPERMB Z4, Z0, Z0
        VGF2P8AFFINEQB $0, Z1, Z0, Z0
@@ -1745,230 +1745,230 @@ TEXT expandAVX512_32<>(SB), NOSPLIT, $0-0
        VPERMB Z2, Z3, Z2
        RET
 
-GLOBL expandAVX512_36_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_36_inShuf0<>+0x00(SB)/8, $0x0101010100000000
-DATA  expandAVX512_36_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_36_inShuf0<>+0x10(SB)/8, $0x0101010100000000
-DATA  expandAVX512_36_inShuf0<>+0x18(SB)/8, $0x0101010100000000
-DATA  expandAVX512_36_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_36_inShuf0<>+0x28(SB)/8, $0x0101010100000000
-DATA  expandAVX512_36_inShuf0<>+0x30(SB)/8, $0x0101010100000000
-DATA  expandAVX512_36_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
-
-GLOBL expandAVX512_36_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_36_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_36_mat0<>+0x08(SB)/8, $0x0101010102020202
-DATA  expandAVX512_36_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_36_mat0<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_36_mat0<>+0x20(SB)/8, $0x0404040408080808
-DATA  expandAVX512_36_mat0<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_36_mat0<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_36_mat0<>+0x38(SB)/8, $0x1010101020202020
-
-GLOBL expandAVX512_36_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_36_inShuf1<>+0x00(SB)/8, $0x0101010100000000
-DATA  expandAVX512_36_inShuf1<>+0x08(SB)/8, $0xffffff0100000000
-DATA  expandAVX512_36_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
-DATA  expandAVX512_36_inShuf1<>+0x18(SB)/8, $0xffffffff00000000
-DATA  expandAVX512_36_inShuf1<>+0x20(SB)/8, $0xff02020202010101
-DATA  expandAVX512_36_inShuf1<>+0x28(SB)/8, $0xffffffffffff0201
-DATA  expandAVX512_36_inShuf1<>+0x30(SB)/8, $0x0202020201010101
-DATA  expandAVX512_36_inShuf1<>+0x38(SB)/8, $0x0303030302020202
-
-GLOBL expandAVX512_36_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_36_mat1<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_36_mat1<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_36_mat1<>+0x10(SB)/8, $0x4040404080808080
-DATA  expandAVX512_36_mat1<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_36_mat1<>+0x20(SB)/8, $0x4040404040404040
-DATA  expandAVX512_36_mat1<>+0x28(SB)/8, $0x4040404080808080
-DATA  expandAVX512_36_mat1<>+0x30(SB)/8, $0x8080808080808080
-DATA  expandAVX512_36_mat1<>+0x38(SB)/8, $0x0101010101010101
-
-GLOBL expandAVX512_36_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_36_inShuf2<>+0x00(SB)/8, $0xffffffffffff0302
-DATA  expandAVX512_36_inShuf2<>+0x08(SB)/8, $0x0303030302020202
-DATA  expandAVX512_36_inShuf2<>+0x10(SB)/8, $0x0303030302020202
-DATA  expandAVX512_36_inShuf2<>+0x18(SB)/8, $0xffffffffffff0302
-DATA  expandAVX512_36_inShuf2<>+0x20(SB)/8, $0x0303030302020202
-DATA  expandAVX512_36_inShuf2<>+0x28(SB)/8, $0xffff030302020202
-DATA  expandAVX512_36_inShuf2<>+0x30(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_36_inShuf2<>+0x38(SB)/8, $0xffffffff02020202
-
-GLOBL expandAVX512_36_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_36_mat2<>+0x00(SB)/8, $0x0101010102020202
-DATA  expandAVX512_36_mat2<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_36_mat2<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_36_mat2<>+0x18(SB)/8, $0x0404040408080808
-DATA  expandAVX512_36_mat2<>+0x20(SB)/8, $0x0808080808080808
-DATA  expandAVX512_36_mat2<>+0x28(SB)/8, $0x1010101010101010
-DATA  expandAVX512_36_mat2<>+0x30(SB)/8, $0x1010101020202020
-DATA  expandAVX512_36_mat2<>+0x38(SB)/8, $0x2020202020202020
-
-GLOBL expandAVX512_36_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_36_outShufLo+0x00(SB)/8, $0x1211100803020100
-DATA  expandAVX512_36_outShufLo+0x08(SB)/8, $0x2928201b1a191813
-DATA  expandAVX512_36_outShufLo+0x10(SB)/8, $0x4038333231302b2a
-DATA  expandAVX512_36_outShufLo+0x18(SB)/8, $0x504b4a4948434241
-DATA  expandAVX512_36_outShufLo+0x20(SB)/8, $0x070605045b5a5958
-DATA  expandAVX512_36_outShufLo+0x28(SB)/8, $0x1e1d1c1716151409
-DATA  expandAVX512_36_outShufLo+0x30(SB)/8, $0x35342f2e2d2c211f
-DATA  expandAVX512_36_outShufLo+0x38(SB)/8, $0x4c47464544393736
-
-GLOBL expandAVX512_36_outShufHi(SB), RODATA, $0x40
-DATA  expandAVX512_36_outShufHi+0x00(SB)/8, $0x3332313028222120
-DATA  expandAVX512_36_outShufHi+0x08(SB)/8, $0x4a4948403b3a3938
-DATA  expandAVX512_36_outShufHi+0x10(SB)/8, $0x616058535251504b
-DATA  expandAVX512_36_outShufHi+0x18(SB)/8, $0x78706b6a69686362
-DATA  expandAVX512_36_outShufHi+0x20(SB)/8, $0x29262524237b7a79
-DATA  expandAVX512_36_outShufHi+0x28(SB)/8, $0x3f3e3d3c37363534
-DATA  expandAVX512_36_outShufHi+0x30(SB)/8, $0x5655544f4e4d4c41
-DATA  expandAVX512_36_outShufHi+0x38(SB)/8, $0x6d6c676665645957
-
-TEXT expandAVX512_36<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_36_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_36_inShuf1<>(SB), Z3
-       VMOVDQU64 expandAVX512_36_inShuf2<>(SB), Z4
-       VMOVDQU64 expandAVX512_36_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_36_outShufHi(SB), Z2
+GLOBL ·expandAVX512_36_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_36_inShuf0<>+0x00(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_36_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_36_inShuf0<>+0x10(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_36_inShuf0<>+0x18(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_36_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_36_inShuf0<>+0x28(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_36_inShuf0<>+0x30(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_36_inShuf0<>+0x38(SB)/8, $0xffffffffffff0100
+
+GLOBL ·expandAVX512_36_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_36_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_36_mat0<>+0x08(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_36_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_36_mat0<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_36_mat0<>+0x20(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_36_mat0<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_36_mat0<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_36_mat0<>+0x38(SB)/8, $0x1010101020202020
+
+GLOBL ·expandAVX512_36_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_36_inShuf1<>+0x00(SB)/8, $0x0101010100000000
+DATA  ·expandAVX512_36_inShuf1<>+0x08(SB)/8, $0xffffff0100000000
+DATA  ·expandAVX512_36_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
+DATA  ·expandAVX512_36_inShuf1<>+0x18(SB)/8, $0xffffffff00000000
+DATA  ·expandAVX512_36_inShuf1<>+0x20(SB)/8, $0xff02020202010101
+DATA  ·expandAVX512_36_inShuf1<>+0x28(SB)/8, $0xffffffffffff0201
+DATA  ·expandAVX512_36_inShuf1<>+0x30(SB)/8, $0x0202020201010101
+DATA  ·expandAVX512_36_inShuf1<>+0x38(SB)/8, $0x0303030302020202
+
+GLOBL ·expandAVX512_36_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_36_mat1<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_36_mat1<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_36_mat1<>+0x10(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_36_mat1<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_36_mat1<>+0x20(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_36_mat1<>+0x28(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_36_mat1<>+0x30(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_36_mat1<>+0x38(SB)/8, $0x0101010101010101
+
+GLOBL ·expandAVX512_36_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_36_inShuf2<>+0x00(SB)/8, $0xffffffffffff0302
+DATA  ·expandAVX512_36_inShuf2<>+0x08(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_36_inShuf2<>+0x10(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_36_inShuf2<>+0x18(SB)/8, $0xffffffffffff0302
+DATA  ·expandAVX512_36_inShuf2<>+0x20(SB)/8, $0x0303030302020202
+DATA  ·expandAVX512_36_inShuf2<>+0x28(SB)/8, $0xffff030302020202
+DATA  ·expandAVX512_36_inShuf2<>+0x30(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_36_inShuf2<>+0x38(SB)/8, $0xffffffff02020202
+
+GLOBL ·expandAVX512_36_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_36_mat2<>+0x00(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_36_mat2<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_36_mat2<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_36_mat2<>+0x18(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_36_mat2<>+0x20(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_36_mat2<>+0x28(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_36_mat2<>+0x30(SB)/8, $0x1010101020202020
+DATA  ·expandAVX512_36_mat2<>+0x38(SB)/8, $0x2020202020202020
+
+GLOBL ·expandAVX512_36_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_36_outShufLo+0x00(SB)/8, $0x1211100803020100
+DATA  ·expandAVX512_36_outShufLo+0x08(SB)/8, $0x2928201b1a191813
+DATA  ·expandAVX512_36_outShufLo+0x10(SB)/8, $0x4038333231302b2a
+DATA  ·expandAVX512_36_outShufLo+0x18(SB)/8, $0x504b4a4948434241
+DATA  ·expandAVX512_36_outShufLo+0x20(SB)/8, $0x070605045b5a5958
+DATA  ·expandAVX512_36_outShufLo+0x28(SB)/8, $0x1e1d1c1716151409
+DATA  ·expandAVX512_36_outShufLo+0x30(SB)/8, $0x35342f2e2d2c211f
+DATA  ·expandAVX512_36_outShufLo+0x38(SB)/8, $0x4c47464544393736
+
+GLOBL ·expandAVX512_36_outShufHi(SB), RODATA, $0x40
+DATA  ·expandAVX512_36_outShufHi+0x00(SB)/8, $0x3332313028222120
+DATA  ·expandAVX512_36_outShufHi+0x08(SB)/8, $0x4a4948403b3a3938
+DATA  ·expandAVX512_36_outShufHi+0x10(SB)/8, $0x616058535251504b
+DATA  ·expandAVX512_36_outShufHi+0x18(SB)/8, $0x78706b6a69686362
+DATA  ·expandAVX512_36_outShufHi+0x20(SB)/8, $0x29262524237b7a79
+DATA  ·expandAVX512_36_outShufHi+0x28(SB)/8, $0x3f3e3d3c37363534
+DATA  ·expandAVX512_36_outShufHi+0x30(SB)/8, $0x5655544f4e4d4c41
+DATA  ·expandAVX512_36_outShufHi+0x38(SB)/8, $0x6d6c676665645957
+
+TEXT ·expandAVX512_36<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_36_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_36_inShuf1<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_36_inShuf2<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_36_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_36_outShufHi(SB), Z2
        VMOVDQU64 (AX), Z5
        VPERMB Z5, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_36_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_36_mat0<>(SB), Z0, Z0
        VPERMB Z5, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_36_mat1<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_36_mat1<>(SB), Z3, Z3
        VPERMB Z5, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_36_mat2<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_36_mat2<>(SB), Z4, Z4
        VPERMI2B Z3, Z0, Z1
        VPERMI2B Z4, Z3, Z2
        RET
 
-GLOBL expandAVX512_40_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_40_inShuf0<>+0x00(SB)/8, $0x0101010000000000
-DATA  expandAVX512_40_inShuf0<>+0x08(SB)/8, $0x0101010000000000
-DATA  expandAVX512_40_inShuf0<>+0x10(SB)/8, $0x0101010000000000
-DATA  expandAVX512_40_inShuf0<>+0x18(SB)/8, $0x0101010000000000
-DATA  expandAVX512_40_inShuf0<>+0x20(SB)/8, $0x0101010000000000
-DATA  expandAVX512_40_inShuf0<>+0x28(SB)/8, $0xffffff0000000000
-DATA  expandAVX512_40_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
-DATA  expandAVX512_40_inShuf0<>+0x38(SB)/8, $0xffffff0000000000
-
-GLOBL expandAVX512_40_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_40_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_40_mat0<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_40_mat0<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_40_mat0<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_40_mat0<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_40_mat0<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_40_mat0<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_40_mat0<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_40_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_40_inShuf1<>+0x00(SB)/8, $0xffffffffffff0101
-DATA  expandAVX512_40_inShuf1<>+0x08(SB)/8, $0xffffffffffff0101
-DATA  expandAVX512_40_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
-DATA  expandAVX512_40_inShuf1<>+0x18(SB)/8, $0xffffffffffff0101
-DATA  expandAVX512_40_inShuf1<>+0x20(SB)/8, $0xffffffffffffff01
-DATA  expandAVX512_40_inShuf1<>+0x28(SB)/8, $0xffff020202020201
-DATA  expandAVX512_40_inShuf1<>+0x30(SB)/8, $0x0202020101010101
-DATA  expandAVX512_40_inShuf1<>+0x38(SB)/8, $0x0202020101010101
-
-GLOBL expandAVX512_40_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_40_mat1<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_40_mat1<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_40_mat1<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_40_mat1<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_40_mat1<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_40_mat1<>+0x28(SB)/8, $0x1010101010101010
-DATA  expandAVX512_40_mat1<>+0x30(SB)/8, $0x2020202020202020
-DATA  expandAVX512_40_mat1<>+0x38(SB)/8, $0x4040404040404040
-
-GLOBL expandAVX512_40_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_40_inShuf2<>+0x00(SB)/8, $0x0202020101010101
-DATA  expandAVX512_40_inShuf2<>+0x08(SB)/8, $0x0303030202020202
-DATA  expandAVX512_40_inShuf2<>+0x10(SB)/8, $0x0303030202020202
-DATA  expandAVX512_40_inShuf2<>+0x18(SB)/8, $0xffffff0202020202
-DATA  expandAVX512_40_inShuf2<>+0x20(SB)/8, $0xffffff0202020202
-DATA  expandAVX512_40_inShuf2<>+0x28(SB)/8, $0xffffffffffff0202
-DATA  expandAVX512_40_inShuf2<>+0x30(SB)/8, $0xffffffffffff0202
-DATA  expandAVX512_40_inShuf2<>+0x38(SB)/8, $0xffffffffffff0202
-
-GLOBL expandAVX512_40_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_40_mat2<>+0x00(SB)/8, $0x8080808080808080
-DATA  expandAVX512_40_mat2<>+0x08(SB)/8, $0x0101010101010101
-DATA  expandAVX512_40_mat2<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_40_mat2<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_40_mat2<>+0x20(SB)/8, $0x0808080808080808
-DATA  expandAVX512_40_mat2<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_40_mat2<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_40_mat2<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_40_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_40_inShuf3<>+0x00(SB)/8, $0xffffffffffff0303
-DATA  expandAVX512_40_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_40_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_40_mat3<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_40_mat3<>+0x08(SB)/8, $0x0000000000000000
-DATA  expandAVX512_40_mat3<>+0x10(SB)/8, $0x0000000000000000
-DATA  expandAVX512_40_mat3<>+0x18(SB)/8, $0x0000000000000000
-DATA  expandAVX512_40_mat3<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_40_mat3<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_40_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_40_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_40_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_40_outShufLo+0x00(SB)/8, $0x0a09080403020100
-DATA  expandAVX512_40_outShufLo+0x08(SB)/8, $0x1814131211100c0b
-DATA  expandAVX512_40_outShufLo+0x10(SB)/8, $0x232221201c1b1a19
-DATA  expandAVX512_40_outShufLo+0x18(SB)/8, $0x31302c2b2a292824
-DATA  expandAVX512_40_outShufLo+0x20(SB)/8, $0x3c3b3a3938343332
-DATA  expandAVX512_40_outShufLo+0x28(SB)/8, $0x0f0e0d4140070605
-DATA  expandAVX512_40_outShufLo+0x30(SB)/8, $0x1d51501716154948
-DATA  expandAVX512_40_outShufLo+0x38(SB)/8, $0x6027262559581f1e
-
-GLOBL expandAVX512_40_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_40_outShufHi0+0x00(SB)/8, $0x3938343332313028
-DATA  expandAVX512_40_outShufHi0+0x08(SB)/8, $0x44434241403c3b3a
-DATA  expandAVX512_40_outShufHi0+0x10(SB)/8, $0x5251504c4b4a4948
-DATA  expandAVX512_40_outShufHi0+0x18(SB)/8, $0x605c5b5a59585453
-DATA  expandAVX512_40_outShufHi0+0x20(SB)/8, $0x2c2b2a2964636261
-DATA  expandAVX512_40_outShufHi0+0x28(SB)/8, $0x3e3d69683736352d
-DATA  expandAVX512_40_outShufHi0+0x30(SB)/8, $0x797847464571703f
-DATA  expandAVX512_40_outShufHi0+0x38(SB)/8, $0x575655ffff4f4e4d
-
-GLOBL expandAVX512_40_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_40_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_40_outShufHi1+0x38(SB)/8, $0xffffff0100ffffff
-
-TEXT expandAVX512_40<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_40_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_40_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_40_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_40_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_40_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_40_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_40_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_40_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_inShuf0<>+0x00(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_40_inShuf0<>+0x08(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_40_inShuf0<>+0x10(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_40_inShuf0<>+0x18(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_40_inShuf0<>+0x20(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_40_inShuf0<>+0x28(SB)/8, $0xffffff0000000000
+DATA  ·expandAVX512_40_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
+DATA  ·expandAVX512_40_inShuf0<>+0x38(SB)/8, $0xffffff0000000000
+
+GLOBL ·expandAVX512_40_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_40_mat0<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_40_mat0<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_40_mat0<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_40_mat0<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_40_mat0<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_40_mat0<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_40_mat0<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_40_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_inShuf1<>+0x00(SB)/8, $0xffffffffffff0101
+DATA  ·expandAVX512_40_inShuf1<>+0x08(SB)/8, $0xffffffffffff0101
+DATA  ·expandAVX512_40_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
+DATA  ·expandAVX512_40_inShuf1<>+0x18(SB)/8, $0xffffffffffff0101
+DATA  ·expandAVX512_40_inShuf1<>+0x20(SB)/8, $0xffffffffffffff01
+DATA  ·expandAVX512_40_inShuf1<>+0x28(SB)/8, $0xffff020202020201
+DATA  ·expandAVX512_40_inShuf1<>+0x30(SB)/8, $0x0202020101010101
+DATA  ·expandAVX512_40_inShuf1<>+0x38(SB)/8, $0x0202020101010101
+
+GLOBL ·expandAVX512_40_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_mat1<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_40_mat1<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_40_mat1<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_40_mat1<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_40_mat1<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_40_mat1<>+0x28(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_40_mat1<>+0x30(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_40_mat1<>+0x38(SB)/8, $0x4040404040404040
+
+GLOBL ·expandAVX512_40_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_inShuf2<>+0x00(SB)/8, $0x0202020101010101
+DATA  ·expandAVX512_40_inShuf2<>+0x08(SB)/8, $0x0303030202020202
+DATA  ·expandAVX512_40_inShuf2<>+0x10(SB)/8, $0x0303030202020202
+DATA  ·expandAVX512_40_inShuf2<>+0x18(SB)/8, $0xffffff0202020202
+DATA  ·expandAVX512_40_inShuf2<>+0x20(SB)/8, $0xffffff0202020202
+DATA  ·expandAVX512_40_inShuf2<>+0x28(SB)/8, $0xffffffffffff0202
+DATA  ·expandAVX512_40_inShuf2<>+0x30(SB)/8, $0xffffffffffff0202
+DATA  ·expandAVX512_40_inShuf2<>+0x38(SB)/8, $0xffffffffffff0202
+
+GLOBL ·expandAVX512_40_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_mat2<>+0x00(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_40_mat2<>+0x08(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_40_mat2<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_40_mat2<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_40_mat2<>+0x20(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_40_mat2<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_40_mat2<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_40_mat2<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_40_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_inShuf3<>+0x00(SB)/8, $0xffffffffffff0303
+DATA  ·expandAVX512_40_inShuf3<>+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_40_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_mat3<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_40_mat3<>+0x08(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_40_mat3<>+0x10(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_40_mat3<>+0x18(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_40_mat3<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_40_mat3<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_40_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_40_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_40_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_outShufLo+0x00(SB)/8, $0x0a09080403020100
+DATA  ·expandAVX512_40_outShufLo+0x08(SB)/8, $0x1814131211100c0b
+DATA  ·expandAVX512_40_outShufLo+0x10(SB)/8, $0x232221201c1b1a19
+DATA  ·expandAVX512_40_outShufLo+0x18(SB)/8, $0x31302c2b2a292824
+DATA  ·expandAVX512_40_outShufLo+0x20(SB)/8, $0x3c3b3a3938343332
+DATA  ·expandAVX512_40_outShufLo+0x28(SB)/8, $0x0f0e0d4140070605
+DATA  ·expandAVX512_40_outShufLo+0x30(SB)/8, $0x1d51501716154948
+DATA  ·expandAVX512_40_outShufLo+0x38(SB)/8, $0x6027262559581f1e
+
+GLOBL ·expandAVX512_40_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_outShufHi0+0x00(SB)/8, $0x3938343332313028
+DATA  ·expandAVX512_40_outShufHi0+0x08(SB)/8, $0x44434241403c3b3a
+DATA  ·expandAVX512_40_outShufHi0+0x10(SB)/8, $0x5251504c4b4a4948
+DATA  ·expandAVX512_40_outShufHi0+0x18(SB)/8, $0x605c5b5a59585453
+DATA  ·expandAVX512_40_outShufHi0+0x20(SB)/8, $0x2c2b2a2964636261
+DATA  ·expandAVX512_40_outShufHi0+0x28(SB)/8, $0x3e3d69683736352d
+DATA  ·expandAVX512_40_outShufHi0+0x30(SB)/8, $0x797847464571703f
+DATA  ·expandAVX512_40_outShufHi0+0x38(SB)/8, $0x575655ffff4f4e4d
+
+GLOBL ·expandAVX512_40_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_40_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_outShufHi1+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_40_outShufHi1+0x38(SB)/8, $0xffffff0100ffffff
+
+TEXT ·expandAVX512_40<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_40_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_40_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_40_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_40_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_40_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_40_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_40_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_40_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_40_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_40_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_40_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_40_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_40_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_40_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_40_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xe7ffffffffffffff, AX
        KMOVQ AX, K1
@@ -1979,133 +1979,133 @@ TEXT expandAVX512_40<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_44_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_44_inShuf0<>+0x00(SB)/8, $0x0101010000000000
-DATA  expandAVX512_44_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_44_inShuf0<>+0x10(SB)/8, $0x0101010000000000
-DATA  expandAVX512_44_inShuf0<>+0x18(SB)/8, $0x0101010000000000
-DATA  expandAVX512_44_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_44_inShuf0<>+0x28(SB)/8, $0x0101010000000000
-DATA  expandAVX512_44_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
-DATA  expandAVX512_44_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
-
-GLOBL expandAVX512_44_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_44_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_44_mat0<>+0x08(SB)/8, $0x0101010102020202
-DATA  expandAVX512_44_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_44_mat0<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_44_mat0<>+0x20(SB)/8, $0x0404040408080808
-DATA  expandAVX512_44_mat0<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_44_mat0<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_44_mat0<>+0x38(SB)/8, $0x1010101020202020
-
-GLOBL expandAVX512_44_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_44_inShuf1<>+0x00(SB)/8, $0xffffff0000000000
-DATA  expandAVX512_44_inShuf1<>+0x08(SB)/8, $0xffffff0000000000
-DATA  expandAVX512_44_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
-DATA  expandAVX512_44_inShuf1<>+0x18(SB)/8, $0xffffff0000000000
-DATA  expandAVX512_44_inShuf1<>+0x20(SB)/8, $0xffffffffffff0101
-DATA  expandAVX512_44_inShuf1<>+0x28(SB)/8, $0xffffffffffff0101
-DATA  expandAVX512_44_inShuf1<>+0x30(SB)/8, $0xffffffffffff0101
-DATA  expandAVX512_44_inShuf1<>+0x38(SB)/8, $0xff02020202020101
-
-GLOBL expandAVX512_44_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_44_mat1<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_44_mat1<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_44_mat1<>+0x10(SB)/8, $0x4040404080808080
-DATA  expandAVX512_44_mat1<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_44_mat1<>+0x20(SB)/8, $0x0101010101010101
-DATA  expandAVX512_44_mat1<>+0x28(SB)/8, $0x0202020202020202
-DATA  expandAVX512_44_mat1<>+0x30(SB)/8, $0x0404040404040404
-DATA  expandAVX512_44_mat1<>+0x38(SB)/8, $0x0808080808080808
-
-GLOBL expandAVX512_44_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_44_inShuf2<>+0x00(SB)/8, $0x0202020101010101
-DATA  expandAVX512_44_inShuf2<>+0x08(SB)/8, $0xffffffffffff0201
-DATA  expandAVX512_44_inShuf2<>+0x10(SB)/8, $0x0202020101010101
-DATA  expandAVX512_44_inShuf2<>+0x18(SB)/8, $0x0202020101010101
-DATA  expandAVX512_44_inShuf2<>+0x20(SB)/8, $0xffffffffffff0201
-DATA  expandAVX512_44_inShuf2<>+0x28(SB)/8, $0xffff020101010101
-DATA  expandAVX512_44_inShuf2<>+0x30(SB)/8, $0xffffff0202020202
-DATA  expandAVX512_44_inShuf2<>+0x38(SB)/8, $0xffffffffffffff02
-
-GLOBL expandAVX512_44_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_44_mat2<>+0x00(SB)/8, $0x1010101010101010
-DATA  expandAVX512_44_mat2<>+0x08(SB)/8, $0x1010101020202020
-DATA  expandAVX512_44_mat2<>+0x10(SB)/8, $0x2020202020202020
-DATA  expandAVX512_44_mat2<>+0x18(SB)/8, $0x4040404040404040
-DATA  expandAVX512_44_mat2<>+0x20(SB)/8, $0x4040404080808080
-DATA  expandAVX512_44_mat2<>+0x28(SB)/8, $0x8080808080808080
-DATA  expandAVX512_44_mat2<>+0x30(SB)/8, $0x0101010101010101
-DATA  expandAVX512_44_mat2<>+0x38(SB)/8, $0x0101010102020202
-
-GLOBL expandAVX512_44_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_44_inShuf3<>+0x00(SB)/8, $0xffffff0202020202
-DATA  expandAVX512_44_inShuf3<>+0x08(SB)/8, $0xffffff0202020202
-DATA  expandAVX512_44_inShuf3<>+0x10(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_44_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
-DATA  expandAVX512_44_inShuf3<>+0x20(SB)/8, $0xffffffffffff0202
-DATA  expandAVX512_44_inShuf3<>+0x28(SB)/8, $0xffffffffffff0202
-DATA  expandAVX512_44_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_44_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_44_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_44_mat3<>+0x00(SB)/8, $0x0202020202020202
-DATA  expandAVX512_44_mat3<>+0x08(SB)/8, $0x0404040404040404
-DATA  expandAVX512_44_mat3<>+0x10(SB)/8, $0x0404040408080808
-DATA  expandAVX512_44_mat3<>+0x18(SB)/8, $0x1010101010101010
-DATA  expandAVX512_44_mat3<>+0x20(SB)/8, $0x2020202020202020
-DATA  expandAVX512_44_mat3<>+0x28(SB)/8, $0x4040404040404040
-DATA  expandAVX512_44_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_44_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_44_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_44_outShufLo+0x00(SB)/8, $0x1110080403020100
-DATA  expandAVX512_44_outShufLo+0x08(SB)/8, $0x1c1b1a1918141312
-DATA  expandAVX512_44_outShufLo+0x10(SB)/8, $0x31302c2b2a292820
-DATA  expandAVX512_44_outShufLo+0x18(SB)/8, $0x4342414038343332
-DATA  expandAVX512_44_outShufLo+0x20(SB)/8, $0x58504c4b4a494844
-DATA  expandAVX512_44_outShufLo+0x28(SB)/8, $0x600706055c5b5a59
-DATA  expandAVX512_44_outShufLo+0x30(SB)/8, $0x1d69681716150961
-DATA  expandAVX512_44_outShufLo+0x38(SB)/8, $0x2f2e2d2171701f1e
-
-GLOBL expandAVX512_44_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_44_outShufHi0+0x00(SB)/8, $0x4844434241403938
-DATA  expandAVX512_44_outShufHi0+0x08(SB)/8, $0x5a59585453525150
-DATA  expandAVX512_44_outShufHi0+0x10(SB)/8, $0x6c6b6a6968605c5b
-DATA  expandAVX512_44_outShufHi0+0x18(SB)/8, $0xffff787473727170
-DATA  expandAVX512_44_outShufHi0+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_44_outShufHi0+0x28(SB)/8, $0x46453e3d3c3b3aff
-DATA  expandAVX512_44_outShufHi0+0x30(SB)/8, $0xff57565549ffff47
-DATA  expandAVX512_44_outShufHi0+0x38(SB)/8, $0x6d61ffff5f5e5dff
-
-GLOBL expandAVX512_44_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_44_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_44_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_44_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_44_outShufHi1+0x18(SB)/8, $0x0100ffffffffffff
-DATA  expandAVX512_44_outShufHi1+0x20(SB)/8, $0x0c0b0a0908040302
-DATA  expandAVX512_44_outShufHi1+0x28(SB)/8, $0xffffffffffffff10
-DATA  expandAVX512_44_outShufHi1+0x30(SB)/8, $0x20ffffffff1918ff
-DATA  expandAVX512_44_outShufHi1+0x38(SB)/8, $0xffff2928ffffff21
-
-TEXT expandAVX512_44<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_44_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_44_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_44_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_44_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_44_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_44_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_44_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_44_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_inShuf0<>+0x00(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_44_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_44_inShuf0<>+0x10(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_44_inShuf0<>+0x18(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_44_inShuf0<>+0x20(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_44_inShuf0<>+0x28(SB)/8, $0x0101010000000000
+DATA  ·expandAVX512_44_inShuf0<>+0x30(SB)/8, $0xffffff0000000000
+DATA  ·expandAVX512_44_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
+
+GLOBL ·expandAVX512_44_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_44_mat0<>+0x08(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_44_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_44_mat0<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_44_mat0<>+0x20(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_44_mat0<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_44_mat0<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_44_mat0<>+0x38(SB)/8, $0x1010101020202020
+
+GLOBL ·expandAVX512_44_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_inShuf1<>+0x00(SB)/8, $0xffffff0000000000
+DATA  ·expandAVX512_44_inShuf1<>+0x08(SB)/8, $0xffffff0000000000
+DATA  ·expandAVX512_44_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
+DATA  ·expandAVX512_44_inShuf1<>+0x18(SB)/8, $0xffffff0000000000
+DATA  ·expandAVX512_44_inShuf1<>+0x20(SB)/8, $0xffffffffffff0101
+DATA  ·expandAVX512_44_inShuf1<>+0x28(SB)/8, $0xffffffffffff0101
+DATA  ·expandAVX512_44_inShuf1<>+0x30(SB)/8, $0xffffffffffff0101
+DATA  ·expandAVX512_44_inShuf1<>+0x38(SB)/8, $0xff02020202020101
+
+GLOBL ·expandAVX512_44_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_mat1<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_44_mat1<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_44_mat1<>+0x10(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_44_mat1<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_44_mat1<>+0x20(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_44_mat1<>+0x28(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_44_mat1<>+0x30(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_44_mat1<>+0x38(SB)/8, $0x0808080808080808
+
+GLOBL ·expandAVX512_44_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_inShuf2<>+0x00(SB)/8, $0x0202020101010101
+DATA  ·expandAVX512_44_inShuf2<>+0x08(SB)/8, $0xffffffffffff0201
+DATA  ·expandAVX512_44_inShuf2<>+0x10(SB)/8, $0x0202020101010101
+DATA  ·expandAVX512_44_inShuf2<>+0x18(SB)/8, $0x0202020101010101
+DATA  ·expandAVX512_44_inShuf2<>+0x20(SB)/8, $0xffffffffffff0201
+DATA  ·expandAVX512_44_inShuf2<>+0x28(SB)/8, $0xffff020101010101
+DATA  ·expandAVX512_44_inShuf2<>+0x30(SB)/8, $0xffffff0202020202
+DATA  ·expandAVX512_44_inShuf2<>+0x38(SB)/8, $0xffffffffffffff02
+
+GLOBL ·expandAVX512_44_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_mat2<>+0x00(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_44_mat2<>+0x08(SB)/8, $0x1010101020202020
+DATA  ·expandAVX512_44_mat2<>+0x10(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_44_mat2<>+0x18(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_44_mat2<>+0x20(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_44_mat2<>+0x28(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_44_mat2<>+0x30(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_44_mat2<>+0x38(SB)/8, $0x0101010102020202
+
+GLOBL ·expandAVX512_44_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_inShuf3<>+0x00(SB)/8, $0xffffff0202020202
+DATA  ·expandAVX512_44_inShuf3<>+0x08(SB)/8, $0xffffff0202020202
+DATA  ·expandAVX512_44_inShuf3<>+0x10(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_44_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
+DATA  ·expandAVX512_44_inShuf3<>+0x20(SB)/8, $0xffffffffffff0202
+DATA  ·expandAVX512_44_inShuf3<>+0x28(SB)/8, $0xffffffffffff0202
+DATA  ·expandAVX512_44_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_44_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_44_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_mat3<>+0x00(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_44_mat3<>+0x08(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_44_mat3<>+0x10(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_44_mat3<>+0x18(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_44_mat3<>+0x20(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_44_mat3<>+0x28(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_44_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_44_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_44_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_outShufLo+0x00(SB)/8, $0x1110080403020100
+DATA  ·expandAVX512_44_outShufLo+0x08(SB)/8, $0x1c1b1a1918141312
+DATA  ·expandAVX512_44_outShufLo+0x10(SB)/8, $0x31302c2b2a292820
+DATA  ·expandAVX512_44_outShufLo+0x18(SB)/8, $0x4342414038343332
+DATA  ·expandAVX512_44_outShufLo+0x20(SB)/8, $0x58504c4b4a494844
+DATA  ·expandAVX512_44_outShufLo+0x28(SB)/8, $0x600706055c5b5a59
+DATA  ·expandAVX512_44_outShufLo+0x30(SB)/8, $0x1d69681716150961
+DATA  ·expandAVX512_44_outShufLo+0x38(SB)/8, $0x2f2e2d2171701f1e
+
+GLOBL ·expandAVX512_44_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_outShufHi0+0x00(SB)/8, $0x4844434241403938
+DATA  ·expandAVX512_44_outShufHi0+0x08(SB)/8, $0x5a59585453525150
+DATA  ·expandAVX512_44_outShufHi0+0x10(SB)/8, $0x6c6b6a6968605c5b
+DATA  ·expandAVX512_44_outShufHi0+0x18(SB)/8, $0xffff787473727170
+DATA  ·expandAVX512_44_outShufHi0+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_44_outShufHi0+0x28(SB)/8, $0x46453e3d3c3b3aff
+DATA  ·expandAVX512_44_outShufHi0+0x30(SB)/8, $0xff57565549ffff47
+DATA  ·expandAVX512_44_outShufHi0+0x38(SB)/8, $0x6d61ffff5f5e5dff
+
+GLOBL ·expandAVX512_44_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_44_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_44_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_44_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_44_outShufHi1+0x18(SB)/8, $0x0100ffffffffffff
+DATA  ·expandAVX512_44_outShufHi1+0x20(SB)/8, $0x0c0b0a0908040302
+DATA  ·expandAVX512_44_outShufHi1+0x28(SB)/8, $0xffffffffffffff10
+DATA  ·expandAVX512_44_outShufHi1+0x30(SB)/8, $0x20ffffffff1918ff
+DATA  ·expandAVX512_44_outShufHi1+0x38(SB)/8, $0xffff2928ffffff21
+
+TEXT ·expandAVX512_44<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_44_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_44_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_44_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_44_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_44_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_44_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_44_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_44_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_44_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_44_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_44_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_44_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_44_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_44_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_44_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0xce79fe003fffffff, AX
        KMOVQ AX, K1
@@ -2116,230 +2116,230 @@ TEXT expandAVX512_44<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_48_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_48_inShuf0<>+0x00(SB)/8, $0x0101000000000000
-DATA  expandAVX512_48_inShuf0<>+0x08(SB)/8, $0x0101000000000000
-DATA  expandAVX512_48_inShuf0<>+0x10(SB)/8, $0x0101000000000000
-DATA  expandAVX512_48_inShuf0<>+0x18(SB)/8, $0xffff000000000000
-DATA  expandAVX512_48_inShuf0<>+0x20(SB)/8, $0xffff000000000000
-DATA  expandAVX512_48_inShuf0<>+0x28(SB)/8, $0xffff000000000000
-DATA  expandAVX512_48_inShuf0<>+0x30(SB)/8, $0xffff000000000000
-DATA  expandAVX512_48_inShuf0<>+0x38(SB)/8, $0xffff000000000000
-
-GLOBL expandAVX512_48_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_48_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_48_mat0<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_48_mat0<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_48_mat0<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_48_mat0<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_48_mat0<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_48_mat0<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_48_mat0<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_48_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_48_inShuf1<>+0x00(SB)/8, $0xffffffff01010101
-DATA  expandAVX512_48_inShuf1<>+0x08(SB)/8, $0xffffffff01010101
-DATA  expandAVX512_48_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
-DATA  expandAVX512_48_inShuf1<>+0x18(SB)/8, $0x0202020202020101
-DATA  expandAVX512_48_inShuf1<>+0x20(SB)/8, $0x0202010101010101
-DATA  expandAVX512_48_inShuf1<>+0x28(SB)/8, $0x0202010101010101
-DATA  expandAVX512_48_inShuf1<>+0x30(SB)/8, $0x0202010101010101
-DATA  expandAVX512_48_inShuf1<>+0x38(SB)/8, $0xffff010101010101
-
-GLOBL expandAVX512_48_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_48_mat1<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_48_mat1<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_48_mat1<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_48_mat1<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_48_mat1<>+0x20(SB)/8, $0x0808080808080808
-DATA  expandAVX512_48_mat1<>+0x28(SB)/8, $0x1010101010101010
-DATA  expandAVX512_48_mat1<>+0x30(SB)/8, $0x2020202020202020
-DATA  expandAVX512_48_mat1<>+0x38(SB)/8, $0x4040404040404040
-
-GLOBL expandAVX512_48_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_48_inShuf2<>+0x00(SB)/8, $0xffff010101010101
-DATA  expandAVX512_48_inShuf2<>+0x08(SB)/8, $0xffff020202020202
-DATA  expandAVX512_48_inShuf2<>+0x10(SB)/8, $0xffff020202020202
-DATA  expandAVX512_48_inShuf2<>+0x18(SB)/8, $0xffffffff02020202
-DATA  expandAVX512_48_inShuf2<>+0x20(SB)/8, $0xffffffff02020202
-DATA  expandAVX512_48_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_48_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_48_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_48_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_48_mat2<>+0x00(SB)/8, $0x8080808080808080
-DATA  expandAVX512_48_mat2<>+0x08(SB)/8, $0x0101010101010101
-DATA  expandAVX512_48_mat2<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_48_mat2<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_48_mat2<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_48_mat2<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_48_mat2<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_48_mat2<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_48_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_48_outShufLo+0x00(SB)/8, $0x0908050403020100
-DATA  expandAVX512_48_outShufLo+0x08(SB)/8, $0x131211100d0c0b0a
-DATA  expandAVX512_48_outShufLo+0x10(SB)/8, $0x1d1c1b1a19181514
-DATA  expandAVX512_48_outShufLo+0x18(SB)/8, $0x2928252423222120
-DATA  expandAVX512_48_outShufLo+0x20(SB)/8, $0x333231302d2c2b2a
-DATA  expandAVX512_48_outShufLo+0x28(SB)/8, $0x3d3c3b3a39383534
-DATA  expandAVX512_48_outShufLo+0x30(SB)/8, $0x0f0e434241400706
-DATA  expandAVX512_48_outShufLo+0x38(SB)/8, $0x515017164b4a4948
-
-GLOBL expandAVX512_48_outShufHi(SB), RODATA, $0x40
-DATA  expandAVX512_48_outShufHi+0x00(SB)/8, $0x2524232221201918
-DATA  expandAVX512_48_outShufHi+0x08(SB)/8, $0x31302d2c2b2a2928
-DATA  expandAVX512_48_outShufHi+0x10(SB)/8, $0x3b3a393835343332
-DATA  expandAVX512_48_outShufHi+0x18(SB)/8, $0x4544434241403d3c
-DATA  expandAVX512_48_outShufHi+0x20(SB)/8, $0x51504d4c4b4a4948
-DATA  expandAVX512_48_outShufHi+0x28(SB)/8, $0x1d1c1b1a55545352
-DATA  expandAVX512_48_outShufHi+0x30(SB)/8, $0x5b5a595827261f1e
-DATA  expandAVX512_48_outShufHi+0x38(SB)/8, $0x3736636261602f2e
-
-TEXT expandAVX512_48<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_48_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_48_inShuf1<>(SB), Z3
-       VMOVDQU64 expandAVX512_48_inShuf2<>(SB), Z4
-       VMOVDQU64 expandAVX512_48_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_48_outShufHi(SB), Z2
+GLOBL ·expandAVX512_48_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_48_inShuf0<>+0x00(SB)/8, $0x0101000000000000
+DATA  ·expandAVX512_48_inShuf0<>+0x08(SB)/8, $0x0101000000000000
+DATA  ·expandAVX512_48_inShuf0<>+0x10(SB)/8, $0x0101000000000000
+DATA  ·expandAVX512_48_inShuf0<>+0x18(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_48_inShuf0<>+0x20(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_48_inShuf0<>+0x28(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_48_inShuf0<>+0x30(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_48_inShuf0<>+0x38(SB)/8, $0xffff000000000000
+
+GLOBL ·expandAVX512_48_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_48_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_48_mat0<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_48_mat0<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_48_mat0<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_48_mat0<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_48_mat0<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_48_mat0<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_48_mat0<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_48_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_48_inShuf1<>+0x00(SB)/8, $0xffffffff01010101
+DATA  ·expandAVX512_48_inShuf1<>+0x08(SB)/8, $0xffffffff01010101
+DATA  ·expandAVX512_48_inShuf1<>+0x10(SB)/8, $0xffffffffffff0101
+DATA  ·expandAVX512_48_inShuf1<>+0x18(SB)/8, $0x0202020202020101
+DATA  ·expandAVX512_48_inShuf1<>+0x20(SB)/8, $0x0202010101010101
+DATA  ·expandAVX512_48_inShuf1<>+0x28(SB)/8, $0x0202010101010101
+DATA  ·expandAVX512_48_inShuf1<>+0x30(SB)/8, $0x0202010101010101
+DATA  ·expandAVX512_48_inShuf1<>+0x38(SB)/8, $0xffff010101010101
+
+GLOBL ·expandAVX512_48_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_48_mat1<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_48_mat1<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_48_mat1<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_48_mat1<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_48_mat1<>+0x20(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_48_mat1<>+0x28(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_48_mat1<>+0x30(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_48_mat1<>+0x38(SB)/8, $0x4040404040404040
+
+GLOBL ·expandAVX512_48_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_48_inShuf2<>+0x00(SB)/8, $0xffff010101010101
+DATA  ·expandAVX512_48_inShuf2<>+0x08(SB)/8, $0xffff020202020202
+DATA  ·expandAVX512_48_inShuf2<>+0x10(SB)/8, $0xffff020202020202
+DATA  ·expandAVX512_48_inShuf2<>+0x18(SB)/8, $0xffffffff02020202
+DATA  ·expandAVX512_48_inShuf2<>+0x20(SB)/8, $0xffffffff02020202
+DATA  ·expandAVX512_48_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_48_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_48_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_48_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_48_mat2<>+0x00(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_48_mat2<>+0x08(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_48_mat2<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_48_mat2<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_48_mat2<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_48_mat2<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_48_mat2<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_48_mat2<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_48_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_48_outShufLo+0x00(SB)/8, $0x0908050403020100
+DATA  ·expandAVX512_48_outShufLo+0x08(SB)/8, $0x131211100d0c0b0a
+DATA  ·expandAVX512_48_outShufLo+0x10(SB)/8, $0x1d1c1b1a19181514
+DATA  ·expandAVX512_48_outShufLo+0x18(SB)/8, $0x2928252423222120
+DATA  ·expandAVX512_48_outShufLo+0x20(SB)/8, $0x333231302d2c2b2a
+DATA  ·expandAVX512_48_outShufLo+0x28(SB)/8, $0x3d3c3b3a39383534
+DATA  ·expandAVX512_48_outShufLo+0x30(SB)/8, $0x0f0e434241400706
+DATA  ·expandAVX512_48_outShufLo+0x38(SB)/8, $0x515017164b4a4948
+
+GLOBL ·expandAVX512_48_outShufHi(SB), RODATA, $0x40
+DATA  ·expandAVX512_48_outShufHi+0x00(SB)/8, $0x2524232221201918
+DATA  ·expandAVX512_48_outShufHi+0x08(SB)/8, $0x31302d2c2b2a2928
+DATA  ·expandAVX512_48_outShufHi+0x10(SB)/8, $0x3b3a393835343332
+DATA  ·expandAVX512_48_outShufHi+0x18(SB)/8, $0x4544434241403d3c
+DATA  ·expandAVX512_48_outShufHi+0x20(SB)/8, $0x51504d4c4b4a4948
+DATA  ·expandAVX512_48_outShufHi+0x28(SB)/8, $0x1d1c1b1a55545352
+DATA  ·expandAVX512_48_outShufHi+0x30(SB)/8, $0x5b5a595827261f1e
+DATA  ·expandAVX512_48_outShufHi+0x38(SB)/8, $0x3736636261602f2e
+
+TEXT ·expandAVX512_48<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_48_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_48_inShuf1<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_48_inShuf2<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_48_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_48_outShufHi(SB), Z2
        VMOVDQU64 (AX), Z5
        VPERMB Z5, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_48_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_48_mat0<>(SB), Z0, Z0
        VPERMB Z5, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_48_mat1<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_48_mat1<>(SB), Z3, Z3
        VPERMB Z5, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_48_mat2<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_48_mat2<>(SB), Z4, Z4
        VPERMI2B Z3, Z0, Z1
        VPERMI2B Z4, Z3, Z2
        RET
 
-GLOBL expandAVX512_52_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_52_inShuf0<>+0x00(SB)/8, $0x0101000000000000
-DATA  expandAVX512_52_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
-DATA  expandAVX512_52_inShuf0<>+0x10(SB)/8, $0x0101000000000000
-DATA  expandAVX512_52_inShuf0<>+0x18(SB)/8, $0xffff000000000000
-DATA  expandAVX512_52_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
-DATA  expandAVX512_52_inShuf0<>+0x28(SB)/8, $0xffff000000000000
-DATA  expandAVX512_52_inShuf0<>+0x30(SB)/8, $0xffff000000000000
-DATA  expandAVX512_52_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
-
-GLOBL expandAVX512_52_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_52_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_52_mat0<>+0x08(SB)/8, $0x0101010102020202
-DATA  expandAVX512_52_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_52_mat0<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_52_mat0<>+0x20(SB)/8, $0x0404040408080808
-DATA  expandAVX512_52_mat0<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_52_mat0<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_52_mat0<>+0x38(SB)/8, $0x1010101020202020
-
-GLOBL expandAVX512_52_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_52_inShuf1<>+0x00(SB)/8, $0xffff000000000000
-DATA  expandAVX512_52_inShuf1<>+0x08(SB)/8, $0xffff000000000000
-DATA  expandAVX512_52_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
-DATA  expandAVX512_52_inShuf1<>+0x18(SB)/8, $0xffff000000000000
-DATA  expandAVX512_52_inShuf1<>+0x20(SB)/8, $0xffffffff01010101
-DATA  expandAVX512_52_inShuf1<>+0x28(SB)/8, $0xffffffffff010101
-DATA  expandAVX512_52_inShuf1<>+0x30(SB)/8, $0xff02020202020201
-DATA  expandAVX512_52_inShuf1<>+0x38(SB)/8, $0x0202010101010101
-
-GLOBL expandAVX512_52_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_52_mat1<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_52_mat1<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_52_mat1<>+0x10(SB)/8, $0x4040404080808080
-DATA  expandAVX512_52_mat1<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_52_mat1<>+0x20(SB)/8, $0x0101010101010101
-DATA  expandAVX512_52_mat1<>+0x28(SB)/8, $0x0202020202020202
-DATA  expandAVX512_52_mat1<>+0x30(SB)/8, $0x0202020202020202
-DATA  expandAVX512_52_mat1<>+0x38(SB)/8, $0x0404040404040404
-
-GLOBL expandAVX512_52_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_52_inShuf2<>+0x00(SB)/8, $0xffffffffffff0201
-DATA  expandAVX512_52_inShuf2<>+0x08(SB)/8, $0x0202010101010101
-DATA  expandAVX512_52_inShuf2<>+0x10(SB)/8, $0xffff010101010101
-DATA  expandAVX512_52_inShuf2<>+0x18(SB)/8, $0xffffffffffffff01
-DATA  expandAVX512_52_inShuf2<>+0x20(SB)/8, $0xffff010101010101
-DATA  expandAVX512_52_inShuf2<>+0x28(SB)/8, $0xffff010101010101
-DATA  expandAVX512_52_inShuf2<>+0x30(SB)/8, $0xffffffffffffff01
-DATA  expandAVX512_52_inShuf2<>+0x38(SB)/8, $0xffff010101010101
-
-GLOBL expandAVX512_52_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_52_mat2<>+0x00(SB)/8, $0x0404040408080808
-DATA  expandAVX512_52_mat2<>+0x08(SB)/8, $0x0808080808080808
-DATA  expandAVX512_52_mat2<>+0x10(SB)/8, $0x1010101010101010
-DATA  expandAVX512_52_mat2<>+0x18(SB)/8, $0x1010101020202020
-DATA  expandAVX512_52_mat2<>+0x20(SB)/8, $0x2020202020202020
-DATA  expandAVX512_52_mat2<>+0x28(SB)/8, $0x4040404040404040
-DATA  expandAVX512_52_mat2<>+0x30(SB)/8, $0x4040404080808080
-DATA  expandAVX512_52_mat2<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_52_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_52_inShuf3<>+0x00(SB)/8, $0xffff020202020202
-DATA  expandAVX512_52_inShuf3<>+0x08(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_52_inShuf3<>+0x10(SB)/8, $0xffffffff02020202
-DATA  expandAVX512_52_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
-DATA  expandAVX512_52_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_52_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_52_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_52_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_52_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_52_mat3<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_52_mat3<>+0x08(SB)/8, $0x0101010102020202
-DATA  expandAVX512_52_mat3<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_52_mat3<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_52_mat3<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_52_mat3<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_52_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_52_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_52_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_52_outShufLo+0x00(SB)/8, $0x1008050403020100
-DATA  expandAVX512_52_outShufLo+0x08(SB)/8, $0x1a19181514131211
-DATA  expandAVX512_52_outShufLo+0x10(SB)/8, $0x2b2a2928201d1c1b
-DATA  expandAVX512_52_outShufLo+0x18(SB)/8, $0x3534333231302d2c
-DATA  expandAVX512_52_outShufLo+0x20(SB)/8, $0x4845444342414038
-DATA  expandAVX512_52_outShufLo+0x28(SB)/8, $0x5958504d4c4b4a49
-DATA  expandAVX512_52_outShufLo+0x30(SB)/8, $0x616007065d5c5b5a
-DATA  expandAVX512_52_outShufLo+0x38(SB)/8, $0x6a69681716096362
-
-GLOBL expandAVX512_52_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_52_outShufHi0+0x00(SB)/8, $0x403d3c3b3a393830
-DATA  expandAVX512_52_outShufHi0+0x08(SB)/8, $0x51504d4c4b4a4948
-DATA  expandAVX512_52_outShufHi0+0x10(SB)/8, $0x6261605855545352
-DATA  expandAVX512_52_outShufHi0+0x18(SB)/8, $0x6c6b6a6968656463
-DATA  expandAVX512_52_outShufHi0+0x20(SB)/8, $0x7d7c7b7a7978706d
-DATA  expandAVX512_52_outShufHi0+0x28(SB)/8, $0x31ffffffffffffff
-DATA  expandAVX512_52_outShufHi0+0x30(SB)/8, $0xff3f3e3635343332
-DATA  expandAVX512_52_outShufHi0+0x38(SB)/8, $0xffff4f4e41ffffff
-
-GLOBL expandAVX512_52_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_52_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_52_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_52_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_52_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_52_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_52_outShufHi1+0x28(SB)/8, $0xff08050403020100
-DATA  expandAVX512_52_outShufHi1+0x30(SB)/8, $0x10ffffffffffffff
-DATA  expandAVX512_52_outShufHi1+0x38(SB)/8, $0x1918ffffff131211
-
-TEXT expandAVX512_52<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_52_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_52_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_52_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_52_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_52_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_52_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_52_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_52_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_inShuf0<>+0x00(SB)/8, $0x0101000000000000
+DATA  ·expandAVX512_52_inShuf0<>+0x08(SB)/8, $0xffffffffffff0100
+DATA  ·expandAVX512_52_inShuf0<>+0x10(SB)/8, $0x0101000000000000
+DATA  ·expandAVX512_52_inShuf0<>+0x18(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_52_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
+DATA  ·expandAVX512_52_inShuf0<>+0x28(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_52_inShuf0<>+0x30(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_52_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
+
+GLOBL ·expandAVX512_52_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_52_mat0<>+0x08(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_52_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_52_mat0<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_52_mat0<>+0x20(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_52_mat0<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_52_mat0<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_52_mat0<>+0x38(SB)/8, $0x1010101020202020
+
+GLOBL ·expandAVX512_52_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_inShuf1<>+0x00(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_52_inShuf1<>+0x08(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_52_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
+DATA  ·expandAVX512_52_inShuf1<>+0x18(SB)/8, $0xffff000000000000
+DATA  ·expandAVX512_52_inShuf1<>+0x20(SB)/8, $0xffffffff01010101
+DATA  ·expandAVX512_52_inShuf1<>+0x28(SB)/8, $0xffffffffff010101
+DATA  ·expandAVX512_52_inShuf1<>+0x30(SB)/8, $0xff02020202020201
+DATA  ·expandAVX512_52_inShuf1<>+0x38(SB)/8, $0x0202010101010101
+
+GLOBL ·expandAVX512_52_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_mat1<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_52_mat1<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_52_mat1<>+0x10(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_52_mat1<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_52_mat1<>+0x20(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_52_mat1<>+0x28(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_52_mat1<>+0x30(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_52_mat1<>+0x38(SB)/8, $0x0404040404040404
+
+GLOBL ·expandAVX512_52_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_inShuf2<>+0x00(SB)/8, $0xffffffffffff0201
+DATA  ·expandAVX512_52_inShuf2<>+0x08(SB)/8, $0x0202010101010101
+DATA  ·expandAVX512_52_inShuf2<>+0x10(SB)/8, $0xffff010101010101
+DATA  ·expandAVX512_52_inShuf2<>+0x18(SB)/8, $0xffffffffffffff01
+DATA  ·expandAVX512_52_inShuf2<>+0x20(SB)/8, $0xffff010101010101
+DATA  ·expandAVX512_52_inShuf2<>+0x28(SB)/8, $0xffff010101010101
+DATA  ·expandAVX512_52_inShuf2<>+0x30(SB)/8, $0xffffffffffffff01
+DATA  ·expandAVX512_52_inShuf2<>+0x38(SB)/8, $0xffff010101010101
+
+GLOBL ·expandAVX512_52_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_mat2<>+0x00(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_52_mat2<>+0x08(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_52_mat2<>+0x10(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_52_mat2<>+0x18(SB)/8, $0x1010101020202020
+DATA  ·expandAVX512_52_mat2<>+0x20(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_52_mat2<>+0x28(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_52_mat2<>+0x30(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_52_mat2<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_52_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_inShuf3<>+0x00(SB)/8, $0xffff020202020202
+DATA  ·expandAVX512_52_inShuf3<>+0x08(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_52_inShuf3<>+0x10(SB)/8, $0xffffffff02020202
+DATA  ·expandAVX512_52_inShuf3<>+0x18(SB)/8, $0xffffffffffff0202
+DATA  ·expandAVX512_52_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_52_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_52_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_52_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_52_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_mat3<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_52_mat3<>+0x08(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_52_mat3<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_52_mat3<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_52_mat3<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_52_mat3<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_52_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_52_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_52_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_outShufLo+0x00(SB)/8, $0x1008050403020100
+DATA  ·expandAVX512_52_outShufLo+0x08(SB)/8, $0x1a19181514131211
+DATA  ·expandAVX512_52_outShufLo+0x10(SB)/8, $0x2b2a2928201d1c1b
+DATA  ·expandAVX512_52_outShufLo+0x18(SB)/8, $0x3534333231302d2c
+DATA  ·expandAVX512_52_outShufLo+0x20(SB)/8, $0x4845444342414038
+DATA  ·expandAVX512_52_outShufLo+0x28(SB)/8, $0x5958504d4c4b4a49
+DATA  ·expandAVX512_52_outShufLo+0x30(SB)/8, $0x616007065d5c5b5a
+DATA  ·expandAVX512_52_outShufLo+0x38(SB)/8, $0x6a69681716096362
+
+GLOBL ·expandAVX512_52_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_outShufHi0+0x00(SB)/8, $0x403d3c3b3a393830
+DATA  ·expandAVX512_52_outShufHi0+0x08(SB)/8, $0x51504d4c4b4a4948
+DATA  ·expandAVX512_52_outShufHi0+0x10(SB)/8, $0x6261605855545352
+DATA  ·expandAVX512_52_outShufHi0+0x18(SB)/8, $0x6c6b6a6968656463
+DATA  ·expandAVX512_52_outShufHi0+0x20(SB)/8, $0x7d7c7b7a7978706d
+DATA  ·expandAVX512_52_outShufHi0+0x28(SB)/8, $0x31ffffffffffffff
+DATA  ·expandAVX512_52_outShufHi0+0x30(SB)/8, $0xff3f3e3635343332
+DATA  ·expandAVX512_52_outShufHi0+0x38(SB)/8, $0xffff4f4e41ffffff
+
+GLOBL ·expandAVX512_52_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_52_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_52_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_52_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_52_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_52_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_52_outShufHi1+0x28(SB)/8, $0xff08050403020100
+DATA  ·expandAVX512_52_outShufHi1+0x30(SB)/8, $0x10ffffffffffffff
+DATA  ·expandAVX512_52_outShufHi1+0x38(SB)/8, $0x1918ffffff131211
+
+TEXT ·expandAVX512_52<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_52_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_52_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_52_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_52_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_52_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_52_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_52_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_52_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_52_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_52_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_52_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_52_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_52_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_52_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_52_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0x387f80ffffffffff, AX
        KMOVQ AX, K1
@@ -2350,221 +2350,221 @@ TEXT expandAVX512_52<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_56_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_56_inShuf0<>+0x00(SB)/8, $0x0100000000000000
-DATA  expandAVX512_56_inShuf0<>+0x08(SB)/8, $0x0100000000000000
-DATA  expandAVX512_56_inShuf0<>+0x10(SB)/8, $0xff00000000000000
-DATA  expandAVX512_56_inShuf0<>+0x18(SB)/8, $0xff00000000000000
-DATA  expandAVX512_56_inShuf0<>+0x20(SB)/8, $0xff00000000000000
-DATA  expandAVX512_56_inShuf0<>+0x28(SB)/8, $0xff00000000000000
-DATA  expandAVX512_56_inShuf0<>+0x30(SB)/8, $0xff00000000000000
-DATA  expandAVX512_56_inShuf0<>+0x38(SB)/8, $0xff00000000000000
-
-GLOBL expandAVX512_56_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_56_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_56_mat0<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_56_mat0<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_56_mat0<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_56_mat0<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_56_mat0<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_56_mat0<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_56_mat0<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_56_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_56_inShuf1<>+0x00(SB)/8, $0xffff010101010101
-DATA  expandAVX512_56_inShuf1<>+0x08(SB)/8, $0x0202010101010101
-DATA  expandAVX512_56_inShuf1<>+0x10(SB)/8, $0x0201010101010101
-DATA  expandAVX512_56_inShuf1<>+0x18(SB)/8, $0xff01010101010101
-DATA  expandAVX512_56_inShuf1<>+0x20(SB)/8, $0xff01010101010101
-DATA  expandAVX512_56_inShuf1<>+0x28(SB)/8, $0xff01010101010101
-DATA  expandAVX512_56_inShuf1<>+0x30(SB)/8, $0xff01010101010101
-DATA  expandAVX512_56_inShuf1<>+0x38(SB)/8, $0xff01010101010101
-
-GLOBL expandAVX512_56_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_56_inShuf2<>+0x00(SB)/8, $0xff02020202020202
-DATA  expandAVX512_56_inShuf2<>+0x08(SB)/8, $0xffffff0202020202
-DATA  expandAVX512_56_inShuf2<>+0x10(SB)/8, $0xffffffffffffff02
-DATA  expandAVX512_56_inShuf2<>+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_56_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_56_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_56_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_56_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_56_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_56_mat2<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_56_mat2<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_56_mat2<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_56_mat2<>+0x18(SB)/8, $0x0000000000000000
-DATA  expandAVX512_56_mat2<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_56_mat2<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_56_mat2<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_56_mat2<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_56_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_56_outShufLo+0x00(SB)/8, $0x0806050403020100
-DATA  expandAVX512_56_outShufLo+0x08(SB)/8, $0x11100e0d0c0b0a09
-DATA  expandAVX512_56_outShufLo+0x10(SB)/8, $0x1a19181615141312
-DATA  expandAVX512_56_outShufLo+0x18(SB)/8, $0x232221201e1d1c1b
-DATA  expandAVX512_56_outShufLo+0x20(SB)/8, $0x2c2b2a2928262524
-DATA  expandAVX512_56_outShufLo+0x28(SB)/8, $0x3534333231302e2d
-DATA  expandAVX512_56_outShufLo+0x30(SB)/8, $0x3e3d3c3b3a393836
-DATA  expandAVX512_56_outShufLo+0x38(SB)/8, $0x0f45444342414007
-
-GLOBL expandAVX512_56_outShufHi(SB), RODATA, $0x40
-DATA  expandAVX512_56_outShufHi+0x00(SB)/8, $0x11100d0c0b0a0908
-DATA  expandAVX512_56_outShufHi+0x08(SB)/8, $0x1a19181615141312
-DATA  expandAVX512_56_outShufHi+0x10(SB)/8, $0x232221201e1d1c1b
-DATA  expandAVX512_56_outShufHi+0x18(SB)/8, $0x2c2b2a2928262524
-DATA  expandAVX512_56_outShufHi+0x20(SB)/8, $0x3534333231302e2d
-DATA  expandAVX512_56_outShufHi+0x28(SB)/8, $0x3e3d3c3b3a393836
-DATA  expandAVX512_56_outShufHi+0x30(SB)/8, $0x0e46454443424140
-DATA  expandAVX512_56_outShufHi+0x38(SB)/8, $0x50174c4b4a49480f
-
-TEXT expandAVX512_56<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_56_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_56_mat0<>(SB), Z3
-       VMOVDQU64 expandAVX512_56_inShuf1<>(SB), Z4
-       VMOVDQU64 expandAVX512_56_inShuf2<>(SB), Z5
-       VMOVDQU64 expandAVX512_56_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_56_outShufHi(SB), Z2
+GLOBL ·expandAVX512_56_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_56_inShuf0<>+0x00(SB)/8, $0x0100000000000000
+DATA  ·expandAVX512_56_inShuf0<>+0x08(SB)/8, $0x0100000000000000
+DATA  ·expandAVX512_56_inShuf0<>+0x10(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_56_inShuf0<>+0x18(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_56_inShuf0<>+0x20(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_56_inShuf0<>+0x28(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_56_inShuf0<>+0x30(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_56_inShuf0<>+0x38(SB)/8, $0xff00000000000000
+
+GLOBL ·expandAVX512_56_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_56_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_56_mat0<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_56_mat0<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_56_mat0<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_56_mat0<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_56_mat0<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_56_mat0<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_56_mat0<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_56_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_56_inShuf1<>+0x00(SB)/8, $0xffff010101010101
+DATA  ·expandAVX512_56_inShuf1<>+0x08(SB)/8, $0x0202010101010101
+DATA  ·expandAVX512_56_inShuf1<>+0x10(SB)/8, $0x0201010101010101
+DATA  ·expandAVX512_56_inShuf1<>+0x18(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_56_inShuf1<>+0x20(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_56_inShuf1<>+0x28(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_56_inShuf1<>+0x30(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_56_inShuf1<>+0x38(SB)/8, $0xff01010101010101
+
+GLOBL ·expandAVX512_56_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_56_inShuf2<>+0x00(SB)/8, $0xff02020202020202
+DATA  ·expandAVX512_56_inShuf2<>+0x08(SB)/8, $0xffffff0202020202
+DATA  ·expandAVX512_56_inShuf2<>+0x10(SB)/8, $0xffffffffffffff02
+DATA  ·expandAVX512_56_inShuf2<>+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_56_inShuf2<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_56_inShuf2<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_56_inShuf2<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_56_inShuf2<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_56_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_56_mat2<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_56_mat2<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_56_mat2<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_56_mat2<>+0x18(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_56_mat2<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_56_mat2<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_56_mat2<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_56_mat2<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_56_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_56_outShufLo+0x00(SB)/8, $0x0806050403020100
+DATA  ·expandAVX512_56_outShufLo+0x08(SB)/8, $0x11100e0d0c0b0a09
+DATA  ·expandAVX512_56_outShufLo+0x10(SB)/8, $0x1a19181615141312
+DATA  ·expandAVX512_56_outShufLo+0x18(SB)/8, $0x232221201e1d1c1b
+DATA  ·expandAVX512_56_outShufLo+0x20(SB)/8, $0x2c2b2a2928262524
+DATA  ·expandAVX512_56_outShufLo+0x28(SB)/8, $0x3534333231302e2d
+DATA  ·expandAVX512_56_outShufLo+0x30(SB)/8, $0x3e3d3c3b3a393836
+DATA  ·expandAVX512_56_outShufLo+0x38(SB)/8, $0x0f45444342414007
+
+GLOBL ·expandAVX512_56_outShufHi(SB), RODATA, $0x40
+DATA  ·expandAVX512_56_outShufHi+0x00(SB)/8, $0x11100d0c0b0a0908
+DATA  ·expandAVX512_56_outShufHi+0x08(SB)/8, $0x1a19181615141312
+DATA  ·expandAVX512_56_outShufHi+0x10(SB)/8, $0x232221201e1d1c1b
+DATA  ·expandAVX512_56_outShufHi+0x18(SB)/8, $0x2c2b2a2928262524
+DATA  ·expandAVX512_56_outShufHi+0x20(SB)/8, $0x3534333231302e2d
+DATA  ·expandAVX512_56_outShufHi+0x28(SB)/8, $0x3e3d3c3b3a393836
+DATA  ·expandAVX512_56_outShufHi+0x30(SB)/8, $0x0e46454443424140
+DATA  ·expandAVX512_56_outShufHi+0x38(SB)/8, $0x50174c4b4a49480f
+
+TEXT ·expandAVX512_56<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_56_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_56_mat0<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_56_inShuf1<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_56_inShuf2<>(SB), Z5
+       VMOVDQU64 ·expandAVX512_56_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_56_outShufHi(SB), Z2
        VMOVDQU64 (AX), Z6
        VPERMB Z6, Z0, Z0
        VGF2P8AFFINEQB $0, Z3, Z0, Z0
        VPERMB Z6, Z4, Z4
        VGF2P8AFFINEQB $0, Z3, Z4, Z3
        VPERMB Z6, Z5, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_56_mat2<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_56_mat2<>(SB), Z4, Z4
        VPERMI2B Z3, Z0, Z1
        VPERMI2B Z4, Z3, Z2
        RET
 
-GLOBL expandAVX512_60_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_60_inShuf0<>+0x00(SB)/8, $0x0100000000000000
-DATA  expandAVX512_60_inShuf0<>+0x08(SB)/8, $0xffffffffffffff00
-DATA  expandAVX512_60_inShuf0<>+0x10(SB)/8, $0xff00000000000000
-DATA  expandAVX512_60_inShuf0<>+0x18(SB)/8, $0xff00000000000000
-DATA  expandAVX512_60_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
-DATA  expandAVX512_60_inShuf0<>+0x28(SB)/8, $0xff00000000000000
-DATA  expandAVX512_60_inShuf0<>+0x30(SB)/8, $0xff00000000000000
-DATA  expandAVX512_60_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
-
-GLOBL expandAVX512_60_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_60_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_60_mat0<>+0x08(SB)/8, $0x0101010102020202
-DATA  expandAVX512_60_mat0<>+0x10(SB)/8, $0x0202020202020202
-DATA  expandAVX512_60_mat0<>+0x18(SB)/8, $0x0404040404040404
-DATA  expandAVX512_60_mat0<>+0x20(SB)/8, $0x0404040408080808
-DATA  expandAVX512_60_mat0<>+0x28(SB)/8, $0x0808080808080808
-DATA  expandAVX512_60_mat0<>+0x30(SB)/8, $0x1010101010101010
-DATA  expandAVX512_60_mat0<>+0x38(SB)/8, $0x1010101020202020
-
-GLOBL expandAVX512_60_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_60_inShuf1<>+0x00(SB)/8, $0xff00000000000000
-DATA  expandAVX512_60_inShuf1<>+0x08(SB)/8, $0xff00000000000000
-DATA  expandAVX512_60_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
-DATA  expandAVX512_60_inShuf1<>+0x18(SB)/8, $0xff00000000000000
-DATA  expandAVX512_60_inShuf1<>+0x20(SB)/8, $0xffffffffff010101
-DATA  expandAVX512_60_inShuf1<>+0x28(SB)/8, $0x0202020202010101
-DATA  expandAVX512_60_inShuf1<>+0x30(SB)/8, $0xffffffffffff0201
-DATA  expandAVX512_60_inShuf1<>+0x38(SB)/8, $0xff01010101010101
-
-GLOBL expandAVX512_60_mat1<>(SB), RODATA, $0x40
-DATA  expandAVX512_60_mat1<>+0x00(SB)/8, $0x2020202020202020
-DATA  expandAVX512_60_mat1<>+0x08(SB)/8, $0x4040404040404040
-DATA  expandAVX512_60_mat1<>+0x10(SB)/8, $0x4040404080808080
-DATA  expandAVX512_60_mat1<>+0x18(SB)/8, $0x8080808080808080
-DATA  expandAVX512_60_mat1<>+0x20(SB)/8, $0x0101010101010101
-DATA  expandAVX512_60_mat1<>+0x28(SB)/8, $0x0101010101010101
-DATA  expandAVX512_60_mat1<>+0x30(SB)/8, $0x0101010102020202
-DATA  expandAVX512_60_mat1<>+0x38(SB)/8, $0x0202020202020202
-
-GLOBL expandAVX512_60_inShuf2<>(SB), RODATA, $0x40
-DATA  expandAVX512_60_inShuf2<>+0x00(SB)/8, $0xff01010101010101
-DATA  expandAVX512_60_inShuf2<>+0x08(SB)/8, $0xffffffffffffff01
-DATA  expandAVX512_60_inShuf2<>+0x10(SB)/8, $0xff01010101010101
-DATA  expandAVX512_60_inShuf2<>+0x18(SB)/8, $0xff01010101010101
-DATA  expandAVX512_60_inShuf2<>+0x20(SB)/8, $0xffffffffffffff01
-DATA  expandAVX512_60_inShuf2<>+0x28(SB)/8, $0xff01010101010101
-DATA  expandAVX512_60_inShuf2<>+0x30(SB)/8, $0xff01010101010101
-DATA  expandAVX512_60_inShuf2<>+0x38(SB)/8, $0xffffffffffffff01
-
-GLOBL expandAVX512_60_mat2<>(SB), RODATA, $0x40
-DATA  expandAVX512_60_mat2<>+0x00(SB)/8, $0x0404040404040404
-DATA  expandAVX512_60_mat2<>+0x08(SB)/8, $0x0404040408080808
-DATA  expandAVX512_60_mat2<>+0x10(SB)/8, $0x0808080808080808
-DATA  expandAVX512_60_mat2<>+0x18(SB)/8, $0x1010101010101010
-DATA  expandAVX512_60_mat2<>+0x20(SB)/8, $0x1010101020202020
-DATA  expandAVX512_60_mat2<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_60_mat2<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_60_mat2<>+0x38(SB)/8, $0x4040404080808080
-
-GLOBL expandAVX512_60_inShuf3<>(SB), RODATA, $0x40
-DATA  expandAVX512_60_inShuf3<>+0x00(SB)/8, $0xff01010101010101
-DATA  expandAVX512_60_inShuf3<>+0x08(SB)/8, $0xffffffffffff0202
-DATA  expandAVX512_60_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
-
-GLOBL expandAVX512_60_mat3<>(SB), RODATA, $0x40
-DATA  expandAVX512_60_mat3<>+0x00(SB)/8, $0x8080808080808080
-DATA  expandAVX512_60_mat3<>+0x08(SB)/8, $0x0101010101010101
-DATA  expandAVX512_60_mat3<>+0x10(SB)/8, $0x0000000000000000
-DATA  expandAVX512_60_mat3<>+0x18(SB)/8, $0x0000000000000000
-DATA  expandAVX512_60_mat3<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_60_mat3<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_60_mat3<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_60_mat3<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_60_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_60_outShufLo+0x00(SB)/8, $0x0806050403020100
-DATA  expandAVX512_60_outShufLo+0x08(SB)/8, $0x1816151413121110
-DATA  expandAVX512_60_outShufLo+0x10(SB)/8, $0x28201e1d1c1b1a19
-DATA  expandAVX512_60_outShufLo+0x18(SB)/8, $0x31302e2d2c2b2a29
-DATA  expandAVX512_60_outShufLo+0x20(SB)/8, $0x4140383635343332
-DATA  expandAVX512_60_outShufLo+0x28(SB)/8, $0x4a49484645444342
-DATA  expandAVX512_60_outShufLo+0x30(SB)/8, $0x5a5958504e4d4c4b
-DATA  expandAVX512_60_outShufLo+0x38(SB)/8, $0x626160075e5d5c5b
-
-GLOBL expandAVX512_60_outShufHi0(SB), RODATA, $0x40
-DATA  expandAVX512_60_outShufHi0+0x00(SB)/8, $0x3b3a3938302a2928
-DATA  expandAVX512_60_outShufHi0+0x08(SB)/8, $0x44434241403e3d3c
-DATA  expandAVX512_60_outShufHi0+0x10(SB)/8, $0x5453525150484645
-DATA  expandAVX512_60_outShufHi0+0x18(SB)/8, $0x5d5c5b5a59585655
-DATA  expandAVX512_60_outShufHi0+0x20(SB)/8, $0x6d6c6b6a6968605e
-DATA  expandAVX512_60_outShufHi0+0x28(SB)/8, $0x767574737271706e
-DATA  expandAVX512_60_outShufHi0+0x30(SB)/8, $0xffffffffffffff78
-DATA  expandAVX512_60_outShufHi0+0x38(SB)/8, $0x31ffff2f2e2d2c2b
-
-GLOBL expandAVX512_60_outShufHi1(SB), RODATA, $0x40
-DATA  expandAVX512_60_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
-DATA  expandAVX512_60_outShufHi1+0x30(SB)/8, $0x06050403020100ff
-DATA  expandAVX512_60_outShufHi1+0x38(SB)/8, $0xff0908ffffffffff
-
-TEXT expandAVX512_60<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_60_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_60_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_60_inShuf2<>(SB), Z3
-       VMOVDQU64 expandAVX512_60_inShuf3<>(SB), Z4
-       VMOVDQU64 expandAVX512_60_outShufLo(SB), Z1
-       VMOVDQU64 expandAVX512_60_outShufHi0(SB), Z5
-       VMOVDQU64 expandAVX512_60_outShufHi1(SB), Z6
+GLOBL ·expandAVX512_60_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_inShuf0<>+0x00(SB)/8, $0x0100000000000000
+DATA  ·expandAVX512_60_inShuf0<>+0x08(SB)/8, $0xffffffffffffff00
+DATA  ·expandAVX512_60_inShuf0<>+0x10(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_60_inShuf0<>+0x18(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_60_inShuf0<>+0x20(SB)/8, $0xffffffffffffff00
+DATA  ·expandAVX512_60_inShuf0<>+0x28(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_60_inShuf0<>+0x30(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_60_inShuf0<>+0x38(SB)/8, $0xffffffffffffff00
+
+GLOBL ·expandAVX512_60_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_60_mat0<>+0x08(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_60_mat0<>+0x10(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_60_mat0<>+0x18(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_60_mat0<>+0x20(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_60_mat0<>+0x28(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_60_mat0<>+0x30(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_60_mat0<>+0x38(SB)/8, $0x1010101020202020
+
+GLOBL ·expandAVX512_60_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_inShuf1<>+0x00(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_60_inShuf1<>+0x08(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_60_inShuf1<>+0x10(SB)/8, $0xffffffffffffff00
+DATA  ·expandAVX512_60_inShuf1<>+0x18(SB)/8, $0xff00000000000000
+DATA  ·expandAVX512_60_inShuf1<>+0x20(SB)/8, $0xffffffffff010101
+DATA  ·expandAVX512_60_inShuf1<>+0x28(SB)/8, $0x0202020202010101
+DATA  ·expandAVX512_60_inShuf1<>+0x30(SB)/8, $0xffffffffffff0201
+DATA  ·expandAVX512_60_inShuf1<>+0x38(SB)/8, $0xff01010101010101
+
+GLOBL ·expandAVX512_60_mat1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_mat1<>+0x00(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_60_mat1<>+0x08(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_60_mat1<>+0x10(SB)/8, $0x4040404080808080
+DATA  ·expandAVX512_60_mat1<>+0x18(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_60_mat1<>+0x20(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_60_mat1<>+0x28(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_60_mat1<>+0x30(SB)/8, $0x0101010102020202
+DATA  ·expandAVX512_60_mat1<>+0x38(SB)/8, $0x0202020202020202
+
+GLOBL ·expandAVX512_60_inShuf2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_inShuf2<>+0x00(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_60_inShuf2<>+0x08(SB)/8, $0xffffffffffffff01
+DATA  ·expandAVX512_60_inShuf2<>+0x10(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_60_inShuf2<>+0x18(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_60_inShuf2<>+0x20(SB)/8, $0xffffffffffffff01
+DATA  ·expandAVX512_60_inShuf2<>+0x28(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_60_inShuf2<>+0x30(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_60_inShuf2<>+0x38(SB)/8, $0xffffffffffffff01
+
+GLOBL ·expandAVX512_60_mat2<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_mat2<>+0x00(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_60_mat2<>+0x08(SB)/8, $0x0404040408080808
+DATA  ·expandAVX512_60_mat2<>+0x10(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_60_mat2<>+0x18(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_60_mat2<>+0x20(SB)/8, $0x1010101020202020
+DATA  ·expandAVX512_60_mat2<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_60_mat2<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_60_mat2<>+0x38(SB)/8, $0x4040404080808080
+
+GLOBL ·expandAVX512_60_inShuf3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_inShuf3<>+0x00(SB)/8, $0xff01010101010101
+DATA  ·expandAVX512_60_inShuf3<>+0x08(SB)/8, $0xffffffffffff0202
+DATA  ·expandAVX512_60_inShuf3<>+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_inShuf3<>+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_inShuf3<>+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_inShuf3<>+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_inShuf3<>+0x30(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_inShuf3<>+0x38(SB)/8, $0xffffffffffffffff
+
+GLOBL ·expandAVX512_60_mat3<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_mat3<>+0x00(SB)/8, $0x8080808080808080
+DATA  ·expandAVX512_60_mat3<>+0x08(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_60_mat3<>+0x10(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_60_mat3<>+0x18(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_60_mat3<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_60_mat3<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_60_mat3<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_60_mat3<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_60_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_outShufLo+0x00(SB)/8, $0x0806050403020100
+DATA  ·expandAVX512_60_outShufLo+0x08(SB)/8, $0x1816151413121110
+DATA  ·expandAVX512_60_outShufLo+0x10(SB)/8, $0x28201e1d1c1b1a19
+DATA  ·expandAVX512_60_outShufLo+0x18(SB)/8, $0x31302e2d2c2b2a29
+DATA  ·expandAVX512_60_outShufLo+0x20(SB)/8, $0x4140383635343332
+DATA  ·expandAVX512_60_outShufLo+0x28(SB)/8, $0x4a49484645444342
+DATA  ·expandAVX512_60_outShufLo+0x30(SB)/8, $0x5a5958504e4d4c4b
+DATA  ·expandAVX512_60_outShufLo+0x38(SB)/8, $0x626160075e5d5c5b
+
+GLOBL ·expandAVX512_60_outShufHi0(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_outShufHi0+0x00(SB)/8, $0x3b3a3938302a2928
+DATA  ·expandAVX512_60_outShufHi0+0x08(SB)/8, $0x44434241403e3d3c
+DATA  ·expandAVX512_60_outShufHi0+0x10(SB)/8, $0x5453525150484645
+DATA  ·expandAVX512_60_outShufHi0+0x18(SB)/8, $0x5d5c5b5a59585655
+DATA  ·expandAVX512_60_outShufHi0+0x20(SB)/8, $0x6d6c6b6a6968605e
+DATA  ·expandAVX512_60_outShufHi0+0x28(SB)/8, $0x767574737271706e
+DATA  ·expandAVX512_60_outShufHi0+0x30(SB)/8, $0xffffffffffffff78
+DATA  ·expandAVX512_60_outShufHi0+0x38(SB)/8, $0x31ffff2f2e2d2c2b
+
+GLOBL ·expandAVX512_60_outShufHi1(SB), RODATA, $0x40
+DATA  ·expandAVX512_60_outShufHi1+0x00(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_outShufHi1+0x08(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_outShufHi1+0x10(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_outShufHi1+0x18(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_outShufHi1+0x20(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_outShufHi1+0x28(SB)/8, $0xffffffffffffffff
+DATA  ·expandAVX512_60_outShufHi1+0x30(SB)/8, $0x06050403020100ff
+DATA  ·expandAVX512_60_outShufHi1+0x38(SB)/8, $0xff0908ffffffffff
+
+TEXT ·expandAVX512_60<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_60_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_60_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_60_inShuf2<>(SB), Z3
+       VMOVDQU64 ·expandAVX512_60_inShuf3<>(SB), Z4
+       VMOVDQU64 ·expandAVX512_60_outShufLo(SB), Z1
+       VMOVDQU64 ·expandAVX512_60_outShufHi0(SB), Z5
+       VMOVDQU64 ·expandAVX512_60_outShufHi1(SB), Z6
        VMOVDQU64 (AX), Z7
        VPERMB Z7, Z0, Z0
-       VGF2P8AFFINEQB $0, expandAVX512_60_mat0<>(SB), Z0, Z0
+       VGF2P8AFFINEQB $0, ·expandAVX512_60_mat0<>(SB), Z0, Z0
        VPERMB Z7, Z2, Z2
-       VGF2P8AFFINEQB $0, expandAVX512_60_mat1<>(SB), Z2, Z2
+       VGF2P8AFFINEQB $0, ·expandAVX512_60_mat1<>(SB), Z2, Z2
        VPERMB Z7, Z3, Z3
-       VGF2P8AFFINEQB $0, expandAVX512_60_mat2<>(SB), Z3, Z3
+       VGF2P8AFFINEQB $0, ·expandAVX512_60_mat2<>(SB), Z3, Z3
        VPERMB Z7, Z4, Z4
-       VGF2P8AFFINEQB $0, expandAVX512_60_mat3<>(SB), Z4, Z4
+       VGF2P8AFFINEQB $0, ·expandAVX512_60_mat3<>(SB), Z4, Z4
        VPERMI2B Z2, Z0, Z1
        MOVQ $0x9f01ffffffffffff, AX
        KMOVQ AX, K1
@@ -2575,51 +2575,51 @@ TEXT expandAVX512_60<>(SB), NOSPLIT, $0-0
        VPORQ Z0, Z5, Z2
        RET
 
-GLOBL expandAVX512_64_inShuf0<>(SB), RODATA, $0x40
-DATA  expandAVX512_64_inShuf0<>+0x00(SB)/8, $0x0000000000000000
-DATA  expandAVX512_64_inShuf0<>+0x08(SB)/8, $0x0000000000000000
-DATA  expandAVX512_64_inShuf0<>+0x10(SB)/8, $0x0000000000000000
-DATA  expandAVX512_64_inShuf0<>+0x18(SB)/8, $0x0000000000000000
-DATA  expandAVX512_64_inShuf0<>+0x20(SB)/8, $0x0000000000000000
-DATA  expandAVX512_64_inShuf0<>+0x28(SB)/8, $0x0000000000000000
-DATA  expandAVX512_64_inShuf0<>+0x30(SB)/8, $0x0000000000000000
-DATA  expandAVX512_64_inShuf0<>+0x38(SB)/8, $0x0000000000000000
-
-GLOBL expandAVX512_64_mat0<>(SB), RODATA, $0x40
-DATA  expandAVX512_64_mat0<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_64_mat0<>+0x08(SB)/8, $0x0202020202020202
-DATA  expandAVX512_64_mat0<>+0x10(SB)/8, $0x0404040404040404
-DATA  expandAVX512_64_mat0<>+0x18(SB)/8, $0x0808080808080808
-DATA  expandAVX512_64_mat0<>+0x20(SB)/8, $0x1010101010101010
-DATA  expandAVX512_64_mat0<>+0x28(SB)/8, $0x2020202020202020
-DATA  expandAVX512_64_mat0<>+0x30(SB)/8, $0x4040404040404040
-DATA  expandAVX512_64_mat0<>+0x38(SB)/8, $0x8080808080808080
-
-GLOBL expandAVX512_64_inShuf1<>(SB), RODATA, $0x40
-DATA  expandAVX512_64_inShuf1<>+0x00(SB)/8, $0x0101010101010101
-DATA  expandAVX512_64_inShuf1<>+0x08(SB)/8, $0x0101010101010101
-DATA  expandAVX512_64_inShuf1<>+0x10(SB)/8, $0x0101010101010101
-DATA  expandAVX512_64_inShuf1<>+0x18(SB)/8, $0x0101010101010101
-DATA  expandAVX512_64_inShuf1<>+0x20(SB)/8, $0x0101010101010101
-DATA  expandAVX512_64_inShuf1<>+0x28(SB)/8, $0x0101010101010101
-DATA  expandAVX512_64_inShuf1<>+0x30(SB)/8, $0x0101010101010101
-DATA  expandAVX512_64_inShuf1<>+0x38(SB)/8, $0x0101010101010101
-
-GLOBL expandAVX512_64_outShufLo(SB), RODATA, $0x40
-DATA  expandAVX512_64_outShufLo+0x00(SB)/8, $0x0706050403020100
-DATA  expandAVX512_64_outShufLo+0x08(SB)/8, $0x0f0e0d0c0b0a0908
-DATA  expandAVX512_64_outShufLo+0x10(SB)/8, $0x1716151413121110
-DATA  expandAVX512_64_outShufLo+0x18(SB)/8, $0x1f1e1d1c1b1a1918
-DATA  expandAVX512_64_outShufLo+0x20(SB)/8, $0x2726252423222120
-DATA  expandAVX512_64_outShufLo+0x28(SB)/8, $0x2f2e2d2c2b2a2928
-DATA  expandAVX512_64_outShufLo+0x30(SB)/8, $0x3736353433323130
-DATA  expandAVX512_64_outShufLo+0x38(SB)/8, $0x3f3e3d3c3b3a3938
-
-TEXT expandAVX512_64<>(SB), NOSPLIT, $0-0
-       VMOVDQU64 expandAVX512_64_inShuf0<>(SB), Z0
-       VMOVDQU64 expandAVX512_64_mat0<>(SB), Z1
-       VMOVDQU64 expandAVX512_64_inShuf1<>(SB), Z2
-       VMOVDQU64 expandAVX512_64_outShufLo(SB), Z3
+GLOBL ·expandAVX512_64_inShuf0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_64_inShuf0<>+0x00(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_64_inShuf0<>+0x08(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_64_inShuf0<>+0x10(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_64_inShuf0<>+0x18(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_64_inShuf0<>+0x20(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_64_inShuf0<>+0x28(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_64_inShuf0<>+0x30(SB)/8, $0x0000000000000000
+DATA  ·expandAVX512_64_inShuf0<>+0x38(SB)/8, $0x0000000000000000
+
+GLOBL ·expandAVX512_64_mat0<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_64_mat0<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_64_mat0<>+0x08(SB)/8, $0x0202020202020202
+DATA  ·expandAVX512_64_mat0<>+0x10(SB)/8, $0x0404040404040404
+DATA  ·expandAVX512_64_mat0<>+0x18(SB)/8, $0x0808080808080808
+DATA  ·expandAVX512_64_mat0<>+0x20(SB)/8, $0x1010101010101010
+DATA  ·expandAVX512_64_mat0<>+0x28(SB)/8, $0x2020202020202020
+DATA  ·expandAVX512_64_mat0<>+0x30(SB)/8, $0x4040404040404040
+DATA  ·expandAVX512_64_mat0<>+0x38(SB)/8, $0x8080808080808080
+
+GLOBL ·expandAVX512_64_inShuf1<>(SB), RODATA, $0x40
+DATA  ·expandAVX512_64_inShuf1<>+0x00(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_64_inShuf1<>+0x08(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_64_inShuf1<>+0x10(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_64_inShuf1<>+0x18(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_64_inShuf1<>+0x20(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_64_inShuf1<>+0x28(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_64_inShuf1<>+0x30(SB)/8, $0x0101010101010101
+DATA  ·expandAVX512_64_inShuf1<>+0x38(SB)/8, $0x0101010101010101
+
+GLOBL ·expandAVX512_64_outShufLo(SB), RODATA, $0x40
+DATA  ·expandAVX512_64_outShufLo+0x00(SB)/8, $0x0706050403020100
+DATA  ·expandAVX512_64_outShufLo+0x08(SB)/8, $0x0f0e0d0c0b0a0908
+DATA  ·expandAVX512_64_outShufLo+0x10(SB)/8, $0x1716151413121110
+DATA  ·expandAVX512_64_outShufLo+0x18(SB)/8, $0x1f1e1d1c1b1a1918
+DATA  ·expandAVX512_64_outShufLo+0x20(SB)/8, $0x2726252423222120
+DATA  ·expandAVX512_64_outShufLo+0x28(SB)/8, $0x2f2e2d2c2b2a2928
+DATA  ·expandAVX512_64_outShufLo+0x30(SB)/8, $0x3736353433323130
+DATA  ·expandAVX512_64_outShufLo+0x38(SB)/8, $0x3f3e3d3c3b3a3938
+
+TEXT ·expandAVX512_64<>(SB), NOSPLIT, $0-0
+       VMOVDQU64 ·expandAVX512_64_inShuf0<>(SB), Z0
+       VMOVDQU64 ·expandAVX512_64_mat0<>(SB), Z1
+       VMOVDQU64 ·expandAVX512_64_inShuf1<>(SB), Z2
+       VMOVDQU64 ·expandAVX512_64_outShufLo(SB), Z3
        VMOVDQU64 (AX), Z4
        VPERMB Z4, Z0, Z0
        VGF2P8AFFINEQB $0, Z1, Z0, Z0
index e36defb2e18056770d16be63a225cf39f974d233..a6c6cc994d7c7671ee857b88c32697aa259447c4 100644 (file)
@@ -63,7 +63,7 @@ func genExpanders(file *gen.File) {
                xf := int(ob) / 8
                log.Printf("size class %d bytes, expansion %dx", ob, xf)
 
-               fn := gen.NewFunc(fmt.Sprintf("expandAVX512_%d<>", xf))
+               fn := gen.NewFunc(fmt.Sprintf("·expandAVX512_%d<>", xf))
                ptrObjBits := gen.Arg[gen.Ptr[gen.Uint8x64]](fn)
 
                if xf == 1 {