From 6056cc5df602938a97e8d70176fa90e2b09f3020 Mon Sep 17 00:00:00 2001 From: Michael Hudson-Doyle Date: Wed, 28 Oct 2015 12:10:28 +1300 Subject: [PATCH] runtime: handle volatility of CX when dynamically linking on linux/386 Mostly by avoiding CX entirely, sometimes by reloading it. I also vetted the assembly in other packages, it's all fine. Change-Id: I50059669aaaa04efa303cf22ac228f9d14d83db0 Reviewed-on: https://go-review.googlesource.com/16386 Reviewed-by: Ian Lance Taylor --- src/runtime/asm_386.s | 76 +++++++++++++++++++------------------ src/runtime/sys_linux_386.s | 16 ++++---- 2 files changed, 48 insertions(+), 44 deletions(-) diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index 3654db4d0c..e816f07efc 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -45,7 +45,8 @@ notintel: MOVL $1, AX CPUID - MOVL CX, runtime·cpuid_ecx(SB) + MOVL CX, AX // Move to global variable clobbers CX when generating PIC + MOVL AX, runtime·cpuid_ecx(SB) MOVL DX, runtime·cpuid_edx(SB) nocpuinfo: @@ -90,14 +91,14 @@ needtls: ok: // set up m and g "registers" get_tls(BX) - LEAL runtime·g0(SB), CX - MOVL CX, g(BX) + LEAL runtime·g0(SB), DX + MOVL DX, g(BX) LEAL runtime·m0(SB), AX // save m->g0 = g0 - MOVL CX, m_g0(AX) + MOVL DX, m_g0(AX) // save g0->m = m0 - MOVL AX, g_m(CX) + MOVL AX, g_m(DX) CALL runtime·emptyfunc(SB) // fault if stack check is wrong @@ -187,9 +188,9 @@ TEXT runtime·gogo(SB), NOSPLIT, $0-4 // to keep running g. TEXT runtime·mcall(SB), NOSPLIT, $0-4 MOVL fn+0(FP), DI - - get_tls(CX) - MOVL g(CX), AX // save state in g->sched + + get_tls(DX) + MOVL g(DX), AX // save state in g->sched MOVL 0(SP), BX // caller's PC MOVL BX, (g_sched+gobuf_pc)(AX) LEAL fn+0(FP), BX // caller's SP @@ -197,14 +198,14 @@ TEXT runtime·mcall(SB), NOSPLIT, $0-4 MOVL AX, (g_sched+gobuf_g)(AX) // switch to m->g0 & its stack, call fn - MOVL g(CX), BX + MOVL g(DX), BX MOVL g_m(BX), BX MOVL m_g0(BX), SI CMPL SI, AX // if g == m->g0 call badmcall JNE 3(PC) MOVL $runtime·badmcall(SB), AX JMP AX - MOVL SI, g(CX) // g = m->g0 + MOVL SI, g(DX) // g = m->g0 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp PUSHL AX MOVL DI, DX @@ -255,6 +256,7 @@ switch: MOVL AX, (g_sched+gobuf_g)(AX) // switch to g0 + get_tls(CX) MOVL DX, g(CX) MOVL (g_sched+gobuf_sp)(DX), BX // make it look like mstart called systemstack on g0, to stop traceback @@ -530,11 +532,13 @@ TEXT ·asmcgocall(SB),NOSPLIT,$0-12 MOVL m_g0(BP), SI MOVL g(CX), DI CMPL SI, DI - JEQ 4(PC) + JEQ noswitch CALL gosave<>(SB) + get_tls(CX) MOVL SI, g(CX) MOVL (g_sched+gobuf_sp)(SI), SP +noswitch: // Now on a scheduling stack (a pthread-created stack). SUBL $32, SP ANDL $~15, SP // alignment, perhaps unnecessary @@ -732,8 +736,8 @@ nobar: TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8 MOVL argp+0(FP),AX // addr of first arg MOVL pc+4(FP), BX - MOVL -4(AX), CX - CMPL CX, runtime·stackBarrierPC(SB) + MOVL -4(AX), DX + CMPL DX, runtime·stackBarrierPC(SB) JEQ setbar MOVL BX, -4(AX) // set calling pc RET @@ -801,39 +805,39 @@ TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 // hash function using AES hardware instructions TEXT runtime·aeshash(SB),NOSPLIT,$0-16 MOVL p+0(FP), AX // ptr to data - MOVL s+8(FP), CX // size + MOVL s+8(FP), BX // size LEAL ret+12(FP), DX JMP runtime·aeshashbody(SB) TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 MOVL p+0(FP), AX // ptr to string object - MOVL 4(AX), CX // length of string + MOVL 4(AX), BX // length of string MOVL (AX), AX // string data LEAL ret+8(FP), DX JMP runtime·aeshashbody(SB) // AX: data -// CX: length +// BX: length // DX: address to put return value TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 MOVL h+4(FP), X0 // 32 bits of per-table hash seed - PINSRW $4, CX, X0 // 16 bits of length + PINSRW $4, BX, X0 // 16 bits of length PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times MOVO X0, X1 // save unscrambled seed PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed AESENC X0, X0 // scramble seed - CMPL CX, $16 + CMPL BX, $16 JB aes0to15 JE aes16 - CMPL CX, $32 + CMPL BX, $32 JBE aes17to32 - CMPL CX, $64 + CMPL BX, $64 JBE aes33to64 JMP aes65plus aes0to15: - TESTL CX, CX + TESTL BX, BX JE aes0 ADDL $16, AX @@ -843,8 +847,8 @@ aes0to15: // 16 bytes loaded at this address won't cross // a page boundary, so we can load it directly. MOVOU -16(AX), X1 - ADDL CX, CX - PAND masks<>(SB)(CX*8), X1 + ADDL BX, BX + PAND masks<>(SB)(BX*8), X1 final1: AESENC X0, X1 // scramble input, xor in seed @@ -857,9 +861,9 @@ endofpage: // address ends in 1111xxxx. Might be up against // a page boundary, so load ending at last byte. // Then shift bytes down using pshufb. - MOVOU -32(AX)(CX*1), X1 - ADDL CX, CX - PSHUFB shifts<>(SB)(CX*8), X1 + MOVOU -32(AX)(BX*1), X1 + ADDL BX, BX + PSHUFB shifts<>(SB)(BX*8), X1 JMP final1 aes0: @@ -879,7 +883,7 @@ aes17to32: // load data to be hashed MOVOU (AX), X2 - MOVOU -16(AX)(CX*1), X3 + MOVOU -16(AX)(BX*1), X3 // scramble 3 times AESENC X0, X2 @@ -907,8 +911,8 @@ aes33to64: MOVOU (AX), X4 MOVOU 16(AX), X5 - MOVOU -32(AX)(CX*1), X6 - MOVOU -16(AX)(CX*1), X7 + MOVOU -32(AX)(BX*1), X6 + MOVOU -16(AX)(BX*1), X7 AESENC X0, X4 AESENC X1, X5 @@ -943,10 +947,10 @@ aes65plus: AESENC X3, X3 // start with last (possibly overlapping) block - MOVOU -64(AX)(CX*1), X4 - MOVOU -48(AX)(CX*1), X5 - MOVOU -32(AX)(CX*1), X6 - MOVOU -16(AX)(CX*1), X7 + MOVOU -64(AX)(BX*1), X4 + MOVOU -48(AX)(BX*1), X5 + MOVOU -32(AX)(BX*1), X6 + MOVOU -16(AX)(BX*1), X7 // scramble state once AESENC X0, X4 @@ -955,8 +959,8 @@ aes65plus: AESENC X3, X7 // compute number of remaining 64-byte blocks - DECL CX - SHRL $6, CX + DECL BX + SHRL $6, BX aesloop: // scramble state, xor in a block @@ -976,7 +980,7 @@ aesloop: AESENC X7, X7 ADDL $64, AX - DECL CX + DECL BX JNE aesloop // 2 more scrambles to finish diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s index 3067c03d0e..1a3aaf0104 100644 --- a/src/runtime/sys_linux_386.s +++ b/src/runtime/sys_linux_386.s @@ -408,7 +408,7 @@ TEXT runtime·sigaltstack(SB),NOSPLIT,$-8 // setldt(int entry, int address, int limit) TEXT runtime·setldt(SB),NOSPLIT,$32 MOVL entry+0(FP), BX // entry - MOVL address+4(FP), CX // base address + MOVL address+4(FP), DX // base address #ifdef GOOS_android /* @@ -416,8 +416,8 @@ TEXT runtime·setldt(SB),NOSPLIT,$32 * address currently holds m->tls, which must be %gs:0xf8. * See cgo/gcc_android_386.c for the derivation of the constant. */ - SUBL $0xf8, CX - MOVL CX, 0(CX) + SUBL $0xf8, DX + MOVL DX, 0(DX) #else /* * When linking against the system libraries, @@ -429,23 +429,23 @@ TEXT runtime·setldt(SB),NOSPLIT,$32 * To accommodate that rewrite, we translate * the address here and bump the limit to 0xffffffff (no limit) * so that -4(GS) maps to 0(address). - * Also, the final 0(GS) (current 4(CX)) has to point + * Also, the final 0(GS) (current 4(DX)) has to point * to itself, to mimic ELF. */ - ADDL $0x4, CX // address - MOVL CX, 0(CX) + ADDL $0x4, DX // address + MOVL DX, 0(DX) // We copy the glibc dynamic linker behaviour of storing the // __kernel_vsyscall entry point at 0x10(GS) so that it can be invoked // by "CALL 0x10(GS)" in all situations, not only those where the // binary is actually dynamically linked. MOVL runtime·_vdso(SB), AX - MOVL AX, 0x10(CX) + MOVL AX, 0x10(DX) #endif // set up user_desc LEAL 16(SP), AX // struct user_desc MOVL BX, 0(AX) - MOVL CX, 4(AX) + MOVL DX, 4(AX) MOVL $0xfffff, 8(AX) MOVL $(SEG_32BIT|LIMIT_IN_PAGES|USEABLE|CONTENTS_DATA), 12(AX) // flag bits -- 2.48.1