From: Russ Cox Date: Thu, 3 Apr 2014 23:07:33 +0000 (-0400) Subject: runtime: test malformed address fault and fix on OS X X-Git-Tag: go1.3beta1~192 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=17f9423e75db40a08369c7ea23449db1c26a4890;p=gostls13.git runtime: test malformed address fault and fix on OS X The garbage collector poison pointers (0x6969696969696969 and 0x6868686868686868) are malformed addresses on amd64. That is, they are not 48-bit addresses sign extended to 64 bits. This causes a different kind of hardware fault than the usual 'unmapped page' when accessing such an address, and OS X 10.9.2 sends the resulting SIGSEGV incorrectly, making it look like it was user-generated rather than kernel-generated and does not include the faulting address. This means that in GODEBUG=gcdead=1 mode, if there is a bug and something tries to dereference a poisoned pointer, the runtime delivers the SIGSEGV to os/signal and returns to the faulting code, which faults again, causing the process to hang instead of crashing. Fix by rewriting "user-generated" SIGSEGV on OS X to look like a kernel-generated SIGSEGV with fault address 0xb01dfacedebac1e. I chose that address because (1) when printed in hex during a crash, it is obviously spelling out English text, (2) there are no current Google hits for that pointer, which will make its origin easy to find once this CL is indexed, and (3) it is not an altogether inaccurate description of the situation. Add a test. Maybe other systems will break too. LGTM=khr R=golang-codereviews, khr CC=golang-codereviews, iant, ken https://golang.org/cl/83270049 --- diff --git a/src/pkg/runtime/runtime_test.go b/src/pkg/runtime/runtime_test.go index 9aca68e1a1..67d39218d4 100644 --- a/src/pkg/runtime/runtime_test.go +++ b/src/pkg/runtime/runtime_test.go @@ -134,6 +134,43 @@ func TestStopCPUProfilingWithProfilerOff(t *testing.T) { SetCPUProfileRate(0) } +// Addresses to test for faulting behavior. +// This is less a test of SetPanicOnFault and more a check that +// the operating system and the runtime can process these faults +// correctly. That is, we're indirectly testing that without SetPanicOnFault +// these would manage to turn into ordinary crashes. +// Note that these are truncated on 32-bit systems, so the bottom 32 bits +// of the larger addresses must themselves be invalid addresses. +// We might get unlucky and the OS might have mapped one of these +// addresses, but probably not: they're all in the first page, very high +// adderesses that normally an OS would reserve for itself, or malformed +// addresses. Even so, we might have to remove one or two on different +// systems. We will see. + +var faultAddrs = []uint64{ + // low addresses + 0, + 1, + 0xfff, + // high (kernel) addresses + // or else malformed. + 0xffffffffffffffff, + 0xfffffffffffff001, + // no 0xffffffffffff0001; 0xffff0001 is mapped for 32-bit user space on OS X + 0xfffffffffff00001, + 0xffffffffff000001, + 0xfffffffff0000001, + 0xffffffff00000001, + 0xfffffff000000001, + 0xffffff0000000001, + 0xfffff00000000001, + 0xffff000000000001, + 0xfff0000000000001, + 0xff00000000000001, + 0xf000000000000001, + 0x8000000000000001, +} + func TestSetPanicOnFault(t *testing.T) { // This currently results in a fault in the signal trampoline on // dragonfly/386 - see issue 7421. @@ -144,6 +181,12 @@ func TestSetPanicOnFault(t *testing.T) { old := debug.SetPanicOnFault(true) defer debug.SetPanicOnFault(old) + for _, addr := range faultAddrs { + testSetPanicOnFault(t, uintptr(addr)) + } +} + +func testSetPanicOnFault(t *testing.T, addr uintptr) { defer func() { if err := recover(); err == nil { t.Fatalf("did not find error in recover") @@ -151,7 +194,7 @@ func TestSetPanicOnFault(t *testing.T) { }() var p *int - p = (*int)(unsafe.Pointer(^uintptr(0))) + p = (*int)(unsafe.Pointer(addr)) println(*p) - t.Fatalf("still here - should have faulted") + t.Fatalf("still here - should have faulted on address %#x", addr) } diff --git a/src/pkg/runtime/signal_amd64x.c b/src/pkg/runtime/signal_amd64x.c index 309bad3ba6..04026f32f6 100644 --- a/src/pkg/runtime/signal_amd64x.c +++ b/src/pkg/runtime/signal_amd64x.c @@ -52,6 +52,27 @@ runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp) return; } +#ifdef GOOS_darwin + // x86-64 has 48-bit virtual addresses. The top 16 bits must echo bit 47. + // The hardware delivers a different kind of fault for a malformed address + // than it does for an attempt to access a valid but unmapped address. + // OS X 10.9.2 mishandles the malformed address case, making it look like + // a user-generated signal (like someone ran kill -SEGV ourpid). + // We pass user-generated signals to os/signal, or else ignore them. + // Doing that here - and returning to the faulting code - results in an + // infinite loop. It appears the best we can do is rewrite what the kernel + // delivers into something more like the truth. The address used below + // has very little chance of being the one that caused the fault, but it is + // malformed, it is clearly not a real pointer, and if it does get printed + // in real life, people will probably search for it and find this code. + // There are no Google hits for b01dfacedebac1e or 0xb01dfacedebac1e + // as I type this comment. + if(sig == SIGSEGV && SIG_CODE0(info, ctxt) == SI_USER) { + SIG_CODE0(info, ctxt) = SI_USER+1; + info->si_addr = (void*)(uintptr)0xb01dfacedebac1eULL; + } +#endif + t = &runtime·sigtab[sig]; if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) { // Make it look like a call to the signal func.