]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: use MADV_FREE on Linux if available
authorTobias Klauser <tklauser@distanz.ch>
Fri, 14 Sep 2018 07:57:06 +0000 (09:57 +0200)
committerTobias Klauser <tobias.klauser@gmail.com>
Tue, 18 Sep 2018 15:41:44 +0000 (15:41 +0000)
On Linux, sysUnused currently uses madvise(MADV_DONTNEED) to signal the
kernel that a range of allocated memory contains unneeded data. After a
successful call, the range (but not the data it contained before the
call to madvise) is still available but the first access to that range
will unconditionally incur a page fault (needed to 0-fill the range).

A faster alternative is MADV_FREE, available since Linux 4.5. The
mechanism is very similar, but the page fault will only be incurred if
the kernel, between the call to madvise and the first access, decides to
reuse that memory for something else.

In sysUnused, test whether MADV_FREE is supported and fall back to
MADV_DONTNEED in case it isn't. This requires making the return value of
the madvise syscall available to the caller, so change runtime.madvise
to return it.

Fixes #23687

Change-Id: I962c3429000dd9f4a00846461ad128b71201bb04
Reviewed-on: https://go-review.googlesource.com/135395
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
31 files changed:
src/runtime/defs2_linux.go
src/runtime/defs_linux.go
src/runtime/defs_linux_386.go
src/runtime/defs_linux_amd64.go
src/runtime/defs_linux_arm.go
src/runtime/defs_linux_arm64.go
src/runtime/defs_linux_mips64x.go
src/runtime/defs_linux_mipsx.go
src/runtime/defs_linux_ppc64.go
src/runtime/defs_linux_ppc64le.go
src/runtime/defs_linux_s390x.go
src/runtime/mem_linux.go
src/runtime/stubs2.go
src/runtime/sys_dragonfly_amd64.s
src/runtime/sys_freebsd_386.s
src/runtime/sys_freebsd_amd64.s
src/runtime/sys_freebsd_arm.s
src/runtime/sys_linux_386.s
src/runtime/sys_linux_amd64.s
src/runtime/sys_linux_arm.s
src/runtime/sys_linux_arm64.s
src/runtime/sys_linux_mips64x.s
src/runtime/sys_linux_mipsx.s
src/runtime/sys_linux_ppc64x.s
src/runtime/sys_linux_s390x.s
src/runtime/sys_netbsd_386.s
src/runtime/sys_netbsd_amd64.s
src/runtime/sys_netbsd_arm.s
src/runtime/sys_openbsd_386.s
src/runtime/sys_openbsd_amd64.s
src/runtime/sys_openbsd_arm.s

index c10dfb86240002607a5d0f9cd413114137decd10..b08c0dafe12f2fa26ed69bb441b4546620a62f31 100644 (file)
@@ -58,7 +58,10 @@ const (
        MAP_PRIVATE = C.MAP_PRIVATE
        MAP_FIXED   = C.MAP_FIXED
 
-       MADV_DONTNEED = C.MADV_DONTNEED
+       MADV_DONTNEED   = C.MADV_DONTNEED
+       MADV_FREE       = C.MADV_FREE
+       MADV_HUGEPAGE   = C.MADV_HUGEPAGE
+       MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE
 
        SA_RESTART  = C.SA_RESTART
        SA_ONSTACK  = C.SA_ONSTACK
index 553366a50ba6c5adc70f4cf1cb390d0bf363bb86..2d810136d987133e71eaf75eaa602fae9cc0039f 100644 (file)
@@ -47,7 +47,10 @@ const (
        MAP_PRIVATE = C.MAP_PRIVATE
        MAP_FIXED   = C.MAP_FIXED
 
-       MADV_DONTNEED = C.MADV_DONTNEED
+       MADV_DONTNEED   = C.MADV_DONTNEED
+       MADV_FREE       = C.MADV_FREE
+       MADV_HUGEPAGE   = C.MADV_HUGEPAGE
+       MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE
 
        SA_RESTART = C.SA_RESTART
        SA_ONSTACK = C.SA_ONSTACK
index a7e435f854fe3f90e5f9c58b41b7da5f2c04b653..0ebac17aefa9e87dedb6792a19a88f5d50b8c418 100644 (file)
@@ -18,6 +18,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index e8c6a212db770ba71f16eb2d9b19b7fb29e3a41c..c0a0ef0dd4ec5069628676e786af248625afb489 100644 (file)
@@ -18,6 +18,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index 62ec8fab5e9b55d9c764b09fccacf128dc186ef0..43946bb79ca8e507639348dc5513e96727e61143 100644 (file)
@@ -16,6 +16,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index c295bc0257520948ffffb38212dadd89d33ae9f9..c2cc281ab4f5c23e63c0a7db60d1199113e0a3a1 100644 (file)
@@ -18,6 +18,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index df11cb0965d69c7cb448ddde861c5f2395a7bc8b..9dacd5d1e9bacbc99547e6adac3143caf3a0a7ff 100644 (file)
@@ -18,6 +18,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index 702fbb51c861c4a3a2912efc5f922c0342707650..9532ac54ee83da0b3c9c8b9e2f3bd46d9367c6e1 100644 (file)
@@ -22,6 +22,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index 45363d12854becdec3a59dfd2d9ddbfb97f9032e..5a4326da07a94e17dc2e1afdeefe281f9b227927 100644 (file)
@@ -18,6 +18,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index 45363d12854becdec3a59dfd2d9ddbfb97f9032e..5a4326da07a94e17dc2e1afdeefe281f9b227927 100644 (file)
@@ -18,6 +18,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index ab90723f754858bb2ac64636e85fbd8022f3d04d..a6cc9c48e91de22e7561c02931e120069b35877c 100644 (file)
@@ -19,6 +19,7 @@ const (
        _MAP_FIXED   = 0x10
 
        _MADV_DONTNEED   = 0x4
+       _MADV_FREE       = 0x8
        _MADV_HUGEPAGE   = 0xe
        _MADV_NOHUGEPAGE = 0xf
 
index 7aa48170a1164d6c270fecf11fa3e023c68a77f8..845f72ded2c1634f3a6eb4b1779a8569a5541ee2 100644 (file)
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+       "runtime/internal/atomic"
        "runtime/internal/sys"
        "unsafe"
 )
@@ -34,10 +35,12 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
        return p
 }
 
+var adviseUnused = uint32(_MADV_FREE)
+
 func sysUnused(v unsafe.Pointer, n uintptr) {
        // By default, Linux's "transparent huge page" support will
        // merge pages into a huge page if there's even a single
-       // present regular page, undoing the effects of the DONTNEED
+       // present regular page, undoing the effects of madvise(adviseUnused)
        // below. On amd64, that means khugepaged can turn a single
        // 4KB page to 2MB, bloating the process's RSS by as much as
        // 512X. (See issue #8832 and Linux kernel bug
@@ -102,7 +105,13 @@ func sysUnused(v unsafe.Pointer, n uintptr) {
                throw("unaligned sysUnused")
        }
 
-       madvise(v, n, _MADV_DONTNEED)
+       advise := atomic.Load(&adviseUnused)
+       if errno := madvise(v, n, int32(advise)); advise == _MADV_FREE && errno != 0 {
+               // MADV_FREE was added in Linux 4.5. Fall back to MADV_DONTNEED if it is
+               // not supported.
+               atomic.Store(&adviseUnused, _MADV_DONTNEED)
+               madvise(v, n, _MADV_DONTNEED)
+       }
 }
 
 func sysUsed(v unsafe.Pointer, n uintptr) {
index 02249d0aadc671f1e39ca58c9f9d98770141f701..c14db74003fe2fc12d41eaa60a0d6bfecd3b742a 100644 (file)
@@ -25,7 +25,8 @@ func write(fd uintptr, p unsafe.Pointer, n int32) int32
 //go:noescape
 func open(name *byte, mode, perm int32) int32
 
-func madvise(addr unsafe.Pointer, n uintptr, flags int32)
+// return value is only set on linux to be used in osinit()
+func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
 
 // exitThread terminates the current thread, writing *wait = 0 when
 // the stack is safe to reclaim.
index f0eb5f4e21614958aecf89ecf5865f91a39b5302..b18e9676513d7efc6fcf17d36d1c48d912644f22 100644 (file)
@@ -260,9 +260,11 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
        MOVL    flags+16(FP), DX
        MOVQ    $75, AX // madvise
        SYSCALL
-       // ignore failure - maybe pages are locked
+       JCC     2(PC)
+       MOVL    $-1, AX
+       MOVL    AX, ret+24(FP)
        RET
-       
+
 TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
        MOVQ    new+0(FP), DI
        MOVQ    old+8(FP), SI
index b8f685a32374a78c27ebec0615f0781aada19e46..754689ba0558b12a959f57cf8ff91f9661433b0b 100644 (file)
@@ -163,7 +163,9 @@ TEXT runtime·munmap(SB),NOSPLIT,$-4
 TEXT runtime·madvise(SB),NOSPLIT,$-4
        MOVL    $75, AX // madvise
        INT     $0x80
-       // ignore failure - maybe pages are locked
+       JAE     2(PC)
+       MOVL    $-1, AX
+       MOVL    AX, ret+12(FP)
        RET
 
 TEXT runtime·setitimer(SB), NOSPLIT, $-4
index be191a078458fcdd793b2da7bb0966cb8b8709fa..55959b3e3a333a41affdcbc87a47a9014c784ae0 100644 (file)
@@ -337,9 +337,11 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
        MOVL    flags+16(FP), DX
        MOVQ    $75, AX // madvise
        SYSCALL
-       // ignore failure - maybe pages are locked
+       JCC     2(PC)
+       MOVL    $-1, AX
+       MOVL    AX, ret+24(FP)
        RET
-       
+
 TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
        MOVQ    new+0(FP), DI
        MOVQ    old+8(FP), SI
index 93bf569367e922080858079c2767f5b97d603fab..f347b9fa961b99a334a31f3f0e42033466570c06 100644 (file)
@@ -264,14 +264,15 @@ TEXT runtime·munmap(SB),NOSPLIT,$0
        RET
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
-       MOVW addr+0(FP), R0             // arg 1 addr
-       MOVW n+4(FP), R1                // arg 2 len
-       MOVW flags+8(FP), R2            // arg 3 flags
-       MOVW $SYS_madvise, R7
-       SWI $0
-       // ignore failure - maybe pages are locked
+       MOVW    addr+0(FP), R0          // arg 1 addr
+       MOVW    n+4(FP), R1             // arg 2 len
+       MOVW    flags+8(FP), R2         // arg 3 flags
+       MOVW    $SYS_madvise, R7
+       SWI     $0
+       MOVW.CS $-1, R0
+       MOVW    R0, ret+12(FP)
        RET
-       
+
 TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
        MOVW new+0(FP), R0
        MOVW old+4(FP), R1
index 4e914f3e6013d0839e54aed9727b62c512ff6771..40b55a67eb8799bce85d96323ef0b9649a77a819 100644 (file)
@@ -427,7 +427,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
        MOVL    n+4(FP), CX
        MOVL    flags+8(FP), DX
        INVOKE_SYSCALL
-       // ignore failure - maybe pages are locked
+       MOVL    AX, ret+12(FP)
        RET
 
 // int32 futex(int32 *uaddr, int32 op, int32 val,
index 4492dad02e7a64ca9ee49d289b2af6b4afd4eedd..7e846371e5620c067109beafe069d601bf07e6f0 100644 (file)
@@ -519,7 +519,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
        MOVL    flags+16(FP), DX
        MOVQ    $SYS_madvise, AX
        SYSCALL
-       // ignore failure - maybe pages are locked
+       MOVL    AX, ret+24(FP)
        RET
 
 // int64 futex(int32 *uaddr, int32 op, int32 val,
index a709c4cbd050b7aa6d197b0f3e9d376b2838d799..43a58335c8068a105fc31005a3f5ea5cefbb8995 100644 (file)
@@ -195,7 +195,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
        MOVW    flags+8(FP), R2
        MOVW    $SYS_madvise, R7
        SWI     $0
-       // ignore failure - maybe pages are locked
+       MOVW    R0, ret+12(FP)
        RET
 
 TEXT runtime·setitimer(SB),NOSPLIT,$0
index 086c8ddc637fe3ab808ac23f491bb188345a8ae5..8b344be8f83b9e4c5c2888c1b62922e554f1480e 100644 (file)
@@ -401,7 +401,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
        MOVW    flags+16(FP), R2
        MOVD    $SYS_madvise, R8
        SVC
-       // ignore failure - maybe pages are locked
+       MOVW    R0, ret+24(FP)
        RET
 
 // int64 futex(int32 *uaddr, int32 op, int32 val,
index 337299ba5fe084556645ecd31a1b0a0e0d90fcfd..c45703d22801c7aa08393db59c4fd61d6b1d5bc1 100644 (file)
@@ -291,7 +291,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
        MOVW    flags+16(FP), R6
        MOVV    $SYS_madvise, R2
        SYSCALL
-       // ignore failure - maybe pages are locked
+       MOVW    R2, ret+24(FP)
        RET
 
 // int64 futex(int32 *uaddr, int32 op, int32 val,
index dca5f1ee459a8f4d2c7a82249b382e384f6a7e6a..f362b0f3f1c95d888042f2b454d3ea82b48aa9e2 100644 (file)
@@ -302,13 +302,13 @@ TEXT runtime·munmap(SB),NOSPLIT,$0-8
        UNDEF   // crash
        RET
 
-TEXT runtime·madvise(SB),NOSPLIT,$0-12
+TEXT runtime·madvise(SB),NOSPLIT,$0-16
        MOVW    addr+0(FP), R4
        MOVW    n+4(FP), R5
        MOVW    flags+8(FP), R6
        MOVW    $SYS_madvise, R2
        SYSCALL
-       // ignore failure - maybe pages are locked
+       MOVW    R2, ret+12(FP)
        RET
 
 // int32 futex(int32 *uaddr, int32 op, int32 val, struct timespec *timeout, int32 *uaddr2, int32 val2);
index 7c2f8ea637176872563a885d0711490388ba7d06..ed79b69257848d4a47c667051eaabde06a62f22f 100644 (file)
@@ -454,7 +454,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
        MOVD    n+8(FP), R4
        MOVW    flags+16(FP), R5
        SYSCALL $SYS_madvise
-       // ignore failure - maybe pages are locked
+       MOVW    R3, ret+24(FP)
        RET
 
 // int64 futex(int32 *uaddr, int32 op, int32 val,
index 95401af62ecc2e68907b32e50dd1551f6bf5417c..c79ceea7512f6400967ea2b8aea56dc61c1549b3 100644 (file)
@@ -290,7 +290,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
        MOVW    flags+16(FP), R4
        MOVW    $SYS_madvise, R1
        SYSCALL
-       // ignore failure - maybe pages are locked
+       MOVW    R2, ret+24(FP)
        RET
 
 // int64 futex(int32 *uaddr, int32 op, int32 val,
index 4042ab4f8abd83627e9535ad251928a6fecab084..66f4620cab59ad7f36e0de4434714b72a7a5f6ab 100644 (file)
@@ -135,7 +135,9 @@ TEXT runtime·munmap(SB),NOSPLIT,$-4
 TEXT runtime·madvise(SB),NOSPLIT,$-4
        MOVL    $75, AX                 // sys_madvise
        INT     $0x80
-       // ignore failure - maybe pages are locked
+       JAE     2(PC)
+       MOVL    $-1, AX
+       MOVL    AX, ret+12(FP)
        RET
 
 TEXT runtime·setitimer(SB),NOSPLIT,$-4
index 11b9c1b417375323018ad7f1be4fd27e312a5a84..55236591965cdeff800d86830480c407d5e5c4ee 100644 (file)
@@ -319,7 +319,9 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
        MOVL    flags+16(FP), DX        // arg 3 - behav
        MOVQ    $75, AX                 // sys_madvise
        SYSCALL
-       // ignore failure - maybe pages are locked
+       JCC     2(PC)
+       MOVL    $-1, AX
+       MOVL    AX, ret+24(FP)
        RET
 
 TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
index 6b2c5a83572c2ae8ec6636cf1424dcb427e053fe..304075f295e31e7402dce0fe855886e17aa06a15 100644 (file)
@@ -284,11 +284,12 @@ TEXT runtime·munmap(SB),NOSPLIT,$0
        RET
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
-       MOVW addr+0(FP), R0     // arg 1 - addr
-       MOVW n+4(FP), R1        // arg 2 - len
-       MOVW flags+8(FP), R2    // arg 3 - behav
-       SWI $0xa0004b   // sys_madvise
-       // ignore failure - maybe pages are locked
+       MOVW    addr+0(FP), R0  // arg 1 - addr
+       MOVW    n+4(FP), R1     // arg 2 - len
+       MOVW    flags+8(FP), R2 // arg 3 - behav
+       SWI     $0xa0004b       // sys_madvise
+       MOVW.CS $-1, R0
+       MOVW    R0, ret+12(FP)
        RET
 
 TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
index 21f13c806e6351f1920c605af0a325d3c6438079..8e34ab497afe15843221ab81f64c92f23676c25c 100644 (file)
@@ -136,7 +136,8 @@ TEXT runtime·madvise(SB),NOSPLIT,$-4
        MOVL    $75, AX                 // sys_madvise
        INT     $0x80
        JAE     2(PC)
-       MOVL    $0xf1, 0xf1             // crash
+       MOVL    $-1, AX
+       MOVL    AX, ret+12(FP)
        RET
 
 TEXT runtime·setitimer(SB),NOSPLIT,$-4
index 38ac38d9bf1824fb35ec0e489baf1d3c735e76a8..227e81869c0f67417bc2320a45fc91941defbf4b 100644 (file)
@@ -305,7 +305,9 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
        MOVL    flags+16(FP), DX        // arg 3 - behav
        MOVQ    $75, AX                 // sys_madvise
        SYSCALL
-       // ignore failure - maybe pages are locked
+       JCC     2(PC)
+       MOVL    $-1, AX
+       MOVL    AX, ret+24(FP)
        RET
 
 TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
index ff1c1da9b97ee0c71979421a94bb1b48e714cd4d..52d3638bc18c1f5e7e2e1c0021584691dab5ca99 100644 (file)
@@ -143,8 +143,8 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
        MOVW    flags+8(FP), R2         // arg 2 - flags
        MOVW    $75, R12                // sys_madvise
        SWI     $0
-       MOVW.CS $0, R8                  // crash on syscall failure
-       MOVW.CS R8, (R8)
+       MOVW.CS $-1, R0
+       MOVW    R0, ret+12(FP)
        RET
 
 TEXT runtime·setitimer(SB),NOSPLIT,$0