]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/x86: add ADX extension
authorIlya Tocar <ilya.tocar@intel.com>
Tue, 31 Oct 2017 20:38:06 +0000 (15:38 -0500)
committerIlya Tocar <ilya.tocar@intel.com>
Thu, 2 Nov 2017 15:41:50 +0000 (15:41 +0000)
Add support for ADX cpuid bit detection and all instructions,
implied by that bit (ADOX/ADCX). They are useful for rsa and math/big in
general.

Change-Id: Idaa93303ead48fd18b9b3da09b3e79de2f7e2193
Reviewed-on: https://go-review.googlesource.com/74850
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/asm/internal/asm/testdata/amd64enc.s
src/cmd/internal/obj/x86/aenum.go
src/cmd/internal/obj/x86/anames.go
src/cmd/internal/obj/x86/asm6.go
src/internal/cpu/cpu.go
src/internal/cpu/cpu_x86.go

index cc4d6658e9e799f39cb6ed05246550434dc02836..cbfe8b4717dbdd3054439330888f488453cc8374 100644 (file)
@@ -84,22 +84,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
        ADCB (R11), DL                          // 411213
        ADCB (BX), R11                          // 44121b
        ADCB (R11), R11                         // 45121b
-       //TODO: ADCXL (BX), DX                  // 660f38f613
-       //TODO: ADCXL (R11), DX                 // 66410f38f613
-       //TODO: ADCXL DX, DX                    // 660f38f6d2
-       //TODO: ADCXL R11, DX                   // 66410f38f6d3
-       //TODO: ADCXL (BX), R11                 // 66440f38f61b
-       //TODO: ADCXL (R11), R11                // 66450f38f61b
-       //TODO: ADCXL DX, R11                   // 66440f38f6da
-       //TODO: ADCXL R11, R11                  // 66450f38f6db
-       //TODO: ADCXQ (BX), DX                  // 66480f38f613
-       //TODO: ADCXQ (R11), DX                 // 66490f38f613
-       //TODO: ADCXQ DX, DX                    // 66480f38f6d2
-       //TODO: ADCXQ R11, DX                   // 66490f38f6d3
-       //TODO: ADCXQ (BX), R11                 // 664c0f38f61b
-       //TODO: ADCXQ (R11), R11                // 664d0f38f61b
-       //TODO: ADCXQ DX, R11                   // 664c0f38f6da
-       //TODO: ADCXQ R11, R11                  // 664d0f38f6db
+       ADCXL (BX), DX                          // 660f38f613
+       ADCXL (R11), DX                         // 66410f38f613
+       ADCXL DX, DX                            // 660f38f6d2
+       ADCXL R11, DX                           // 66410f38f6d3
+       ADCXL (BX), R11                         // 66440f38f61b
+       ADCXL (R11), R11                        // 66450f38f61b
+       ADCXL DX, R11                           // 66440f38f6da
+       ADCXL R11, R11                          // 66450f38f6db
+       ADCXQ (BX), DX                          // 66480f38f613
+       ADCXQ (R11), DX                         // 66490f38f613
+       ADCXQ DX, DX                            // 66480f38f6d2
+       ADCXQ R11, DX                           // 66490f38f6d3
+       ADCXQ (BX), R11                         // 664c0f38f61b
+       ADCXQ (R11), R11                        // 664d0f38f61b
+       ADCXQ DX, R11                           // 664c0f38f6da
+       ADCXQ R11, R11                          // 664d0f38f6db
        ADDB $7, AL                             // 0407
        ADDW $61731, AX                         // 660523f1
        ADDL $4045620583, AX                    // 05674523f1
@@ -228,22 +228,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
        ADDSUBPS (R11), X11                     // f2450fd01b
        ADDSUBPS X2, X11                        // f2440fd0da
        ADDSUBPS X11, X11                       // f2450fd0db
-       //TODO: ADOXL (BX), DX                  // f30f38f613
-       //TODO: ADOXL (R11), DX                 // f3410f38f613
-       //TODO: ADOXL DX, DX                    // f30f38f6d2
-       //TODO: ADOXL R11, DX                   // f3410f38f6d3
-       //TODO: ADOXL (BX), R11                 // f3440f38f61b
-       //TODO: ADOXL (R11), R11                // f3450f38f61b
-       //TODO: ADOXL DX, R11                   // f3440f38f6da
-       //TODO: ADOXL R11, R11                  // f3450f38f6db
-       //TODO: ADOXQ (BX), DX                  // f3480f38f613
-       //TODO: ADOXQ (R11), DX                 // f3490f38f613
-       //TODO: ADOXQ DX, DX                    // f3480f38f6d2
-       //TODO: ADOXQ R11, DX                   // f3490f38f6d3
-       //TODO: ADOXQ (BX), R11                 // f34c0f38f61b
-       //TODO: ADOXQ (R11), R11                // f34d0f38f61b
-       //TODO: ADOXQ DX, R11                   // f34c0f38f6da
-       //TODO: ADOXQ R11, R11                  // f34d0f38f6db
+       ADOXL (BX), DX                          // f30f38f613
+       ADOXL (R11), DX                         // f3410f38f613
+       ADOXL DX, DX                            // f30f38f6d2
+       ADOXL R11, DX                           // f3410f38f6d3
+       ADOXL (BX), R11                         // f3440f38f61b
+       ADOXL (R11), R11                        // f3450f38f61b
+       ADOXL DX, R11                           // f3440f38f6da
+       ADOXL R11, R11                          // f3450f38f6db
+       ADOXQ (BX), DX                          // f3480f38f613
+       ADOXQ (R11), DX                         // f3490f38f613
+       ADOXQ DX, DX                            // f3480f38f6d2
+       ADOXQ R11, DX                           // f3490f38f6d3
+       ADOXQ (BX), R11                         // f34c0f38f61b
+       ADOXQ (R11), R11                        // f34d0f38f61b
+       ADOXQ DX, R11                           // f34c0f38f6da
+       ADOXQ R11, R11                          // f34d0f38f6db
        AESDEC (BX), X2                         // 660f38de13
        AESDEC (R11), X2                        // 66410f38de13
        AESDEC X2, X2                           // 660f38ded2
index c02e1aa1552d45144adb24c3a8617512204126ca..7e1c48d50f6dfb809c99dbf54ef3ddfd1a3fe9be 100644 (file)
@@ -14,9 +14,13 @@ const (
        AADCB
        AADCL
        AADCW
+       AADCXL
+       AADCXQ
        AADDB
        AADDL
        AADDW
+       AADOXL
+       AADOXQ
        AADJSP
        AANDB
        AANDL
index c12729fcab0cf309d49bf1d586f4f5290ebf6a1a..0bbf1036eb322fe3192177a2f2a624545cdfd5e9 100644 (file)
@@ -13,9 +13,13 @@ var Anames = []string{
        "ADCB",
        "ADCL",
        "ADCW",
+       "ADCXL",
+       "ADCXQ",
        "ADDB",
        "ADDL",
        "ADDW",
+       "ADOXL",
+       "ADOXQ",
        "ADJSP",
        "ANDB",
        "ANDL",
index e0c03ea5b67881fc4cc0021c1c6bd1d4d96d9b9b..6b5cb29cb419466de850e144e1a15cacb579b677 100644 (file)
@@ -225,6 +225,9 @@ const (
        Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
        Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
        Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
+       Pq4w = 0x69 /* Pq4 with Rex.w 66 0F 38 */
+       Pq5  = 0x6a /* xmm escape 5: F3 0F 38 */
+       Pq5w = 0x6b /* Pq5 with Rex.w F3 0F 38 */
        Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
        Pw   = 0x48 /* Rex.w */
        Pw8  = 0x90 // symbolic; exact value doesn't matter
@@ -956,6 +959,8 @@ var optab =
        {AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
        {AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
        {AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
+       {AADCXL, yml_rl, Pq4, [23]uint8{0xf6}},
+       {AADCXQ, yml_rl, Pq4w, [23]uint8{0xf6}},
        {AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
        {AADDPD, yxm, Pq, [23]uint8{0x58}},
@@ -966,6 +971,8 @@ var optab =
        {AADDSUBPD, yxm, Pq, [23]uint8{0xd0}},
        {AADDSUBPS, yxm, Pf2, [23]uint8{0xd0}},
        {AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
+       {AADOXL, yml_rl, Pq5, [23]uint8{0xf6}},
+       {AADOXQ, yml_rl, Pq5w, [23]uint8{0xf6}},
        {AADJSP, nil, 0, [23]uint8{}},
        {AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
        {AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
@@ -3432,6 +3439,17 @@ func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
                        case Pq4: /*  66 0F 38 */
                                asmbuf.Put3(0x66, 0x0F, 0x38)
 
+                       case Pq4w: /*  66 0F 38 + REX.W */
+                               asmbuf.rexflag |= Pw
+                               asmbuf.Put3(0x66, 0x0F, 0x38)
+
+                       case Pq5: /*  F3 0F 38 */
+                               asmbuf.Put3(0xF3, 0x0F, 0x38)
+
+                       case Pq5w: /*  F3 0F 38 + REX.W */
+                               asmbuf.rexflag |= Pw
+                               asmbuf.Put3(0xF3, 0x0F, 0x38)
+
                        case Pf2, /* xmm opcode escape */
                                Pf3:
                                asmbuf.Put2(o.prefix, Pm)
index d84a92ce56f177c3ffefe05936398fd3dbdba3a4..38fedc4e2b4aacd7bc97d333db771124fee59fd7 100644 (file)
@@ -15,6 +15,7 @@ var X86 x86
 type x86 struct {
        _            [CacheLineSize]byte
        HasAES       bool
+       HasADX       bool
        HasAVX       bool
        HasAVX2      bool
        HasBMI1      bool
index 6a7b5c2271e8ffdbca8efb38d3157b1ff0b7dd15..34c632f2f993d98e4c66e3e1e3f53bd5b89577b1 100644 (file)
@@ -53,6 +53,7 @@ func init() {
        X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
        X86.HasBMI2 = isSet(8, ebx7)
        X86.HasERMS = isSet(9, ebx7)
+       X86.HasADX = isSet(19, ebx7)
 }
 
 func isSet(bitpos uint, value uint32) bool {