Merge pull request 'zig libc: implement malloc' (#31177) from libc-malloc into master

Reviewed-on: https://codeberg.org/ziglang/zig/pulls/31177
2026-03-08 03:24:46 +01:00 · 2026-02-13 06:30:42 +01:00 · 2026-02-13 06:30:42 +01:00 · 4a3adaaa23
commit 4a3adaaa23
parent 37109fa4ef 0957761d5c
52 changed files with 342 additions and 3923 deletions
--- a/build.zig
+++ b/build.zig
@ -483,7 +483,7 @@ pub fn build(b: *std.Build) !void {
        .skip_linux = skip_linux,
        .skip_llvm = skip_llvm,
        .skip_libc = skip_libc,
-        .max_rss = 3_300_000_000,
+        .max_rss = 3_500_000_000,
    }));

    test_modules_step.dependOn(tests.addModuleTests(b, .{
@ -518,7 +518,7 @@ pub fn build(b: *std.Build) !void {
        .test_extra_targets = test_extra_targets,
        .root_src = "lib/c.zig",
        .name = "zigc",
-        .desc = "Run the zigc tests",
+        .desc = "Run the zig libc implementation unit tests",
        .optimize_modes = optimization_modes,
        .include_paths = &.{},
        .skip_single_threaded = true,
@ -560,22 +560,7 @@ pub fn build(b: *std.Build) !void {
        .skip_linux = skip_linux,
        .skip_llvm = skip_llvm,
        .skip_libc = skip_libc,
-        .max_rss = switch (b.graph.host.result.os.tag) {
-            .freebsd => switch (b.graph.host.result.cpu.arch) {
-                .x86_64 => 3_756_422_348,
-                else => 3_800_000_000,
-            },
-            .linux => 6_800_000_000,
-            .macos => switch (b.graph.host.result.cpu.arch) {
-                .aarch64 => 8_273_795_481,
-                else => 8_300_000_000,
-            },
-            .windows => switch (b.graph.host.result.cpu.arch) {
-                .x86_64 => 3_750_236_160,
-                else => 3_800_000_000,
-            },
-            else => 8_300_000_000,
-        },
+        .max_rss = 8_500_000_000,
    }));

    const unit_tests_step = b.step("test-unit", "Run the compiler source unit tests");
--- a/lib/c.zig
+++ b/lib/c.zig
@ -15,17 +15,32 @@ pub const panic = if (builtin.is_test)
 else
    std.debug.no_panic;

-/// It is incorrect to make this conditional on `builtin.is_test`, because it is possible that
-/// libzigc is being linked into a different test compilation, as opposed to being tested itself.
-pub const linkage: std.builtin.GlobalLinkage = .strong;
-
-/// Determines the symbol's visibility to other objects.
-/// For WebAssembly this allows the symbol to be resolved to other modules, but will not
-/// export it to the host runtime.
-pub const visibility: std.builtin.SymbolVisibility = .hidden;
-
+/// It is possible that this libc is being linked into a different test
+/// compilation, as opposed to being tested itself. In such case,
+/// `builtin.link_libc` will be `true` along with `builtin.is_test`.
+///
+/// When we don't have a complete libc, `builtin.link_libc` will be `false` and
+/// we will be missing externally provided symbols, such as `_errno` from
+/// ucrtbase.dll. In such case, we must avoid analyzing otherwise exported
+/// functions because it would cause undefined symbol usage.
+///
+/// Unfortunately such logic cannot be automatically done in this function body
+/// since `func` will always be analyzed by the time we get here, so `comptime`
+/// blocks will need to each check for `builtin.link_libc` and skip exports
+/// when the exported functions have libc dependencies not provided by this
+/// compilation unit.
 pub inline fn symbol(comptime func: *const anyopaque, comptime name: []const u8) void {
-    @export(func, .{ .name = name, .linkage = linkage, .visibility = visibility });
+    @export(func, .{
+        .name = name,
+        // Normally, libc goes into a static archive, making all symbols
+        // overridable. However, Zig supports including the libc functions as part
+        // of the Zig Compilation Unit, so to support this use case we make all
+        // symbols weak.
+        .linkage = .weak,
+        // For WebAssembly, hidden visibility allows the symbol to be resolved to
+        // other modules, but will not export it to the host runtime.
+        .visibility = .hidden,
+    });
 }

 /// Given a low-level syscall return value, sets errno and returns `-1`, or on
@ -47,19 +62,22 @@ pub fn errno(syscall_return_value: usize) c_int {
 }

 comptime {
-    _ = @import("c/inttypes.zig");
    _ = @import("c/ctype.zig");
-    _ = @import("c/stdlib.zig");
+    _ = @import("c/inttypes.zig");
+    if (!builtin.target.isMinGW()) {
+        _ = @import("c/malloc.zig");
+    }
    _ = @import("c/math.zig");
+    _ = @import("c/stdlib.zig");
    _ = @import("c/string.zig");
    _ = @import("c/strings.zig");
-    _ = @import("c/wchar.zig");

-    _ = @import("c/sys/mman.zig");
-    _ = @import("c/sys/file.zig");
-    _ = @import("c/sys/reboot.zig");
    _ = @import("c/sys/capability.zig");
+    _ = @import("c/sys/file.zig");
+    _ = @import("c/sys/mman.zig");
+    _ = @import("c/sys/reboot.zig");
    _ = @import("c/sys/utsname.zig");

    _ = @import("c/unistd.zig");
+    _ = @import("c/wchar.zig");
 }
--- a/lib/c/malloc.zig
+++ b/lib/c/malloc.zig
@ -0,0 +1,195 @@
+//! Based on wrapping a stateless Zig Allocator implementation, appropriate for:
+//! - ReleaseFast and ReleaseSmall optimization modes, with multi-threading
+//!   enabled.
+//! - WebAssembly or Linux in single-threaded release modes.
+//!
+//! Because the libc APIs don't have client alignment and size tracking, in
+//! order to take advantage of Zig allocator implementations, additional
+//! metadata must be stored in the allocations.
+//!
+//! This implementation stores the metadata just before the pointer returned
+//! from `malloc`, just like many libc malloc implementations do, including
+//! musl. This has the downside of causing fragmentation for allocations with
+//! higher alignment, however most of that memory can be recovered by
+//! preemptively putting the gap onto the freelist.
+const builtin = @import("builtin");
+
+const std = @import("std");
+const assert = std.debug.assert;
+const Alignment = std.mem.Alignment;
+const alignment_bytes = @max(@alignOf(std.c.max_align_t), @sizeOf(Header));
+const alignment: Alignment = .fromByteUnits(alignment_bytes);
+
+const symbol = @import("../c.zig").symbol;
+
+comptime {
+    // Dependency on external errno location.
+    if (builtin.link_libc) {
+        symbol(&malloc, "malloc");
+        symbol(&aligned_alloc, "aligned_alloc");
+        symbol(&posix_memalign, "posix_memalign");
+        symbol(&calloc, "calloc");
+        symbol(&realloc, "realloc");
+        symbol(&reallocarray, "reallocarray");
+        symbol(&free, "free");
+        symbol(&malloc_usable_size, "malloc_usable_size");
+
+        symbol(&valloc, "valloc");
+        symbol(&memalign, "memalign");
+    }
+}
+
+const no_context: *anyopaque = undefined;
+const no_ra: usize = undefined;
+const vtable = switch (builtin.cpu.arch) {
+    .wasm32, .wasm64 => std.heap.WasmAllocator.vtable,
+    else => if (builtin.single_threaded) std.heap.BrkAllocator.vtable else std.heap.SmpAllocator.vtable,
+};
+
+/// Needed because libc memory allocators don't provide old alignment and size
+/// which are required by Zig memory allocators.
+const Header = packed struct(u64) {
+    alignment: Alignment,
+    /// Does not include the extra alignment bytes added.
+    size: Size,
+    padding: Padding = 0,
+
+    comptime {
+        assert(@sizeOf(Header) <= alignment_bytes);
+    }
+
+    const Size = @Int(.unsigned, @min(64 - @bitSizeOf(Alignment), @bitSizeOf(usize)));
+    const Padding = @Int(.unsigned, 64 - @bitSizeOf(Alignment) - @bitSizeOf(Size));
+
+    fn fromBase(base: [*]align(alignment_bytes) u8) *Header {
+        return @ptrCast(base - @sizeOf(Header));
+    }
+};
+
+fn malloc(n: usize) callconv(.c) ?[*]align(alignment_bytes) u8 {
+    const size = std.math.cast(Header.Size, n) orelse return nomem();
+    const ptr: [*]align(alignment_bytes) u8 = @alignCast(
+        vtable.alloc(no_context, n + alignment_bytes, alignment, no_ra) orelse return nomem(),
+    );
+    const base = ptr + alignment_bytes;
+    const header: *Header = .fromBase(base);
+    header.* = .{
+        .alignment = alignment,
+        .size = size,
+    };
+    return base;
+}
+
+fn aligned_alloc(alloc_alignment: usize, n: usize) callconv(.c) ?[*]align(alignment_bytes) u8 {
+    return aligned_alloc_inner(alloc_alignment, n) orelse return nomem();
+}
+
+/// Avoids setting errno so it can be called by `posix_memalign`.
+fn aligned_alloc_inner(alloc_alignment: usize, n: usize) ?[*]align(alignment_bytes) u8 {
+    const size = std.math.cast(Header.Size, n) orelse return null;
+    const max_align = alignment.max(.fromByteUnits(alloc_alignment));
+    const max_align_bytes = max_align.toByteUnits();
+    const ptr: [*]align(alignment_bytes) u8 = @alignCast(
+        vtable.alloc(no_context, n + max_align_bytes, max_align, no_ra) orelse return null,
+    );
+    const base: [*]align(alignment_bytes) u8 = @alignCast(ptr + max_align_bytes);
+    const header: *Header = .fromBase(base);
+    header.* = .{
+        .alignment = max_align,
+        .size = size,
+    };
+    return base;
+}
+
+fn calloc(elems: usize, len: usize) callconv(.c) ?[*]align(alignment_bytes) u8 {
+    const n = std.math.mul(usize, elems, len) catch return nomem();
+    const base = malloc(n) orelse return null;
+    @memset(base[0..n], 0);
+    return base;
+}
+
+fn realloc(opt_old_base: ?[*]align(alignment_bytes) u8, n: usize) callconv(.c) ?[*]align(alignment_bytes) u8 {
+    if (n == 0) {
+        free(opt_old_base);
+        return null;
+    }
+    const old_base = opt_old_base orelse return malloc(n);
+    const new_size = std.math.cast(Header.Size, n) orelse return nomem();
+    const old_header: *Header = .fromBase(old_base);
+    assert(old_header.padding == 0);
+    const old_size = old_header.size;
+    const old_alignment = old_header.alignment;
+    const old_alignment_bytes = old_alignment.toByteUnits();
+    const old_ptr = old_base - old_alignment_bytes;
+    const old_slice = old_ptr[0 .. old_size + old_alignment_bytes];
+    const new_base: [*]align(alignment_bytes) u8 = if (vtable.remap(
+        no_context,
+        old_slice,
+        old_alignment,
+        n + old_alignment_bytes,
+        no_ra,
+    )) |new_ptr| @alignCast(new_ptr + old_alignment_bytes) else b: {
+        const new_ptr: [*]align(alignment_bytes) u8 = @alignCast(
+            vtable.alloc(no_context, n + old_alignment_bytes, old_alignment, no_ra) orelse
+                return nomem(),
+        );
+        const new_base: [*]align(alignment_bytes) u8 = @alignCast(new_ptr + old_alignment_bytes);
+        const copy_len = @min(new_size, old_size);
+        @memcpy(new_base[0..copy_len], old_base[0..copy_len]);
+        vtable.free(no_context, old_slice, old_alignment, no_ra);
+        break :b new_base;
+    };
+    const new_header: *Header = .fromBase(new_base);
+    new_header.* = .{
+        .alignment = old_alignment,
+        .size = new_size,
+    };
+    return new_base;
+}
+
+fn reallocarray(opt_base: ?[*]align(alignment_bytes) u8, elems: usize, len: usize) callconv(.c) ?[*]align(alignment_bytes) u8 {
+    const n = std.math.mul(usize, elems, len) catch return nomem();
+    return realloc(opt_base, n);
+}
+
+fn free(opt_old_base: ?[*]align(alignment_bytes) u8) callconv(.c) void {
+    const old_base = opt_old_base orelse return;
+    const old_header: *Header = .fromBase(old_base);
+    assert(old_header.padding == 0);
+    const old_size = old_header.size;
+    const old_alignment = old_header.alignment;
+    const old_alignment_bytes = old_alignment.toByteUnits();
+    const old_ptr = old_base - old_alignment_bytes;
+    const old_slice = old_ptr[0 .. old_size + old_alignment_bytes];
+    vtable.free(no_context, old_slice, old_alignment, no_ra);
+}
+
+fn malloc_usable_size(opt_old_base: ?[*]align(alignment_bytes) u8) callconv(.c) usize {
+    const old_base = opt_old_base orelse return 0;
+    const old_header: *Header = .fromBase(old_base);
+    assert(old_header.padding == 0);
+    const old_size = old_header.size;
+    return old_size;
+}
+
+fn valloc(n: usize) callconv(.c) ?[*]align(alignment_bytes) u8 {
+    return aligned_alloc(std.heap.pageSize(), n);
+}
+
+fn memalign(alloc_alignment: usize, n: usize) callconv(.c) ?[*]align(alignment_bytes) u8 {
+    return aligned_alloc(alloc_alignment, n);
+}
+
+fn posix_memalign(result: *?[*]align(alignment_bytes) u8, alloc_alignment: usize, n: usize) callconv(.c) c_int {
+    if (alloc_alignment < @sizeOf(*anyopaque)) return @intFromEnum(std.c.E.INVAL);
+    result.* = aligned_alloc_inner(alloc_alignment, n) orelse return @intFromEnum(std.c.E.NOMEM);
+    return 0;
+}
+
+/// Libc memory allocation functions must set errno in addition to returning
+/// `null`.
+fn nomem() ?[*]align(alignment_bytes) u8 {
+    @branchHint(.cold);
+    std.c._errno().* = @intFromEnum(std.c.E.NOMEM);
+    return null;
+}
--- a/lib/libc/musl/src/aio/aio.c
+++ b/lib/libc/musl/src/aio/aio.c
@ -11,11 +11,6 @@
 #include "pthread_impl.h"
 #include "aio_impl.h"

-#define malloc __libc_malloc
-#define calloc __libc_calloc
-#define realloc __libc_realloc
-#define free __libc_free
-
 /* The following is a threads-based implementation of AIO with minimal
 * dependence on implementation details. Most synchronization is
 * performed with pthread primitives, but atomics and futex operations
--- a/lib/libc/musl/src/exit/atexit.c
+++ b/lib/libc/musl/src/exit/atexit.c
@ -4,11 +4,6 @@
 #include "lock.h"
 #include "fork_impl.h"

-#define malloc __libc_malloc
-#define calloc __libc_calloc
-#define realloc undef
-#define free undef
-
 /* Ensure that at least 32 atexit handlers can be registered without malloc */
 #define COUNT 32

--- a/lib/libc/musl/src/include/stdlib.h
+++ b/lib/libc/musl/src/include/stdlib.h
@ -10,10 +10,4 @@ hidden int __ptsname_r(int, char *, size_t);
 hidden char *__randname(char *);
 hidden void __qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *);

-hidden void *__libc_malloc(size_t);
-hidden void *__libc_malloc_impl(size_t);
-hidden void *__libc_calloc(size_t, size_t);
-hidden void *__libc_realloc(void *, size_t);
-hidden void __libc_free(void *);
-
 #endif
--- a/lib/libc/musl/src/ldso/dlerror.c
+++ b/lib/libc/musl/src/ldso/dlerror.c
@ -5,11 +5,6 @@
 #include "dynlink.h"
 #include "atomic.h"

-#define malloc __libc_malloc
-#define calloc __libc_calloc
-#define realloc __libc_realloc
-#define free __libc_free
-
 char *dlerror()
 {
 	pthread_t self = __pthread_self();
--- a/lib/libc/musl/src/locale/dcngettext.c
+++ b/lib/libc/musl/src/locale/dcngettext.c
@ -12,11 +12,6 @@
 #include "lock.h"
 #include "fork_impl.h"

-#define malloc __libc_malloc
-#define calloc __libc_calloc
-#define realloc undef
-#define free undef
-
 struct binding {
 	struct binding *next;
 	int dirlen;
--- a/lib/libc/musl/src/locale/duplocale.c
+++ b/lib/libc/musl/src/locale/duplocale.c
@ -3,11 +3,6 @@
 #include "locale_impl.h"
 #include "libc.h"

-#define malloc __libc_malloc
-#define calloc undef
-#define realloc undef
-#define free undef
-
 locale_t __duplocale(locale_t old)
 {
 	locale_t new = malloc(sizeof *new);
--- a/lib/libc/musl/src/locale/freelocale.c
+++ b/lib/libc/musl/src/locale/freelocale.c
@ -1,11 +1,6 @@
 #include <stdlib.h>
 #include "locale_impl.h"

-#define malloc undef
-#define calloc undef
-#define realloc undef
-#define free __libc_free
-
 void freelocale(locale_t l)
 {
 	if (__loc_is_allocated(l)) free(l);
--- a/lib/libc/musl/src/locale/locale_map.c
+++ b/lib/libc/musl/src/locale/locale_map.c
@ -7,11 +7,6 @@
 #include "lock.h"
 #include "fork_impl.h"

-#define malloc __libc_malloc
-#define calloc undef
-#define realloc undef
-#define free undef
-
 const char *__lctrans_impl(const char *msg, const struct __locale_map *lm)
 {
 	const char *trans = 0;
--- a/lib/libc/musl/src/locale/newlocale.c
+++ b/lib/libc/musl/src/locale/newlocale.c
@ -4,11 +4,6 @@
 #include "locale_impl.h"
 #include "lock.h"

-#define malloc __libc_malloc
-#define calloc undef
-#define realloc undef
-#define free undef
-
 static int default_locale_init_done;
 static struct __locale_struct default_locale, default_ctype_locale;

--- a/lib/libc/musl/src/malloc/calloc.c
+++ b/lib/libc/musl/src/malloc/calloc.c
@ -1,45 +0,0 @@
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <errno.h>
-#include "dynlink.h"
-
-static size_t mal0_clear(char *p, size_t n)
-{
-	const size_t pagesz = 4096; /* arbitrary */
-	if (n < pagesz) return n;
-#ifdef __GNUC__
-	typedef uint64_t __attribute__((__may_alias__)) T;
-#else
-	typedef unsigned char T;
-#endif
-	char *pp = p + n;
-	size_t i = (uintptr_t)pp & (pagesz - 1);
-	for (;;) {
-		pp = memset(pp - i, 0, i);
-		if (pp - p < pagesz) return pp - p;
-		for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T))
-		        if (((T *)pp)[-1] | ((T *)pp)[-2])
-				break;
-	}
-}
-
-static int allzerop(void *p)
-{
-	return 0;
-}
-weak_alias(allzerop, __malloc_allzerop);
-
-void *calloc(size_t m, size_t n)
-{
-	if (n && m > (size_t)-1/n) {
-		errno = ENOMEM;
-		return 0;
-	}
-	n *= m;
-	void *p = malloc(n);
-	if (!p || (!__malloc_replaced && __malloc_allzerop(p)))
-		return p;
-	n = mal0_clear(p, n);
-	return memset(p, 0, n);
-}
--- a/lib/libc/musl/src/malloc/free.c
+++ b/lib/libc/musl/src/malloc/free.c
@ -1,6 +0,0 @@
-#include <stdlib.h>
-
-void free(void *p)
-{
-	__libc_free(p);
-}
--- a/lib/libc/musl/src/malloc/libc_calloc.c
+++ b/lib/libc/musl/src/malloc/libc_calloc.c
@ -1,4 +0,0 @@
-#define calloc __libc_calloc
-#define malloc __libc_malloc
-
-#include "calloc.c"
--- a/lib/libc/musl/src/malloc/lite_malloc.c
+++ b/lib/libc/musl/src/malloc/lite_malloc.c
@ -1,118 +0,0 @@
-#include <stdlib.h>
-#include <stdint.h>
-#include <limits.h>
-#include <errno.h>
-#include <sys/mman.h>
-#include "libc.h"
-#include "lock.h"
-#include "syscall.h"
-#include "fork_impl.h"
-
-#define ALIGN 16
-
-/* This function returns true if the interval [old,new]
- * intersects the 'len'-sized interval below &libc.auxv
- * (interpreted as the main-thread stack) or below &b
- * (the current stack). It is used to defend against
- * buggy brk implementations that can cross the stack. */
-
-static int traverses_stack_p(uintptr_t old, uintptr_t new)
-{
-	const uintptr_t len = 8<<20;
-	uintptr_t a, b;
-
-	b = (uintptr_t)libc.auxv;
-	a = b > len ? b-len : 0;
-	if (new>a && old<b) return 1;
-
-	b = (uintptr_t)&b;
-	a = b > len ? b-len : 0;
-	if (new>a && old<b) return 1;
-
-	return 0;
-}
-
-static volatile int lock[1];
-volatile int *const __bump_lockptr = lock;
-
-static void *__simple_malloc(size_t n)
-{
-	static uintptr_t brk, cur, end;
-	static unsigned mmap_step;
-	size_t align=1;
-	void *p;
-
-	if (n > SIZE_MAX/2) {
-		errno = ENOMEM;
-		return 0;
-	}
-
-	if (!n) n++;
-	while (align<n && align<ALIGN)
-		align += align;
-
-	LOCK(lock);
-
-	cur += -cur & align-1;
-
-	if (n > end-cur) {
-		size_t req = n - (end-cur) + PAGE_SIZE-1 & -PAGE_SIZE;
-
-		if (!cur) {
-			brk = __syscall(SYS_brk, 0);
-			brk += -brk & PAGE_SIZE-1;
-			cur = end = brk;
-		}
-
-		if (brk == end && req < SIZE_MAX-brk
-		    && !traverses_stack_p(brk, brk+req)
-		    && __syscall(SYS_brk, brk+req)==brk+req) {
-			brk = end += req;
-		} else {
-			int new_area = 0;
-			req = n + PAGE_SIZE-1 & -PAGE_SIZE;
-			/* Only make a new area rather than individual mmap
-			 * if wasted space would be over 1/8 of the map. */
-			if (req-n > req/8) {
-				/* Geometric area size growth up to 64 pages,
-				 * bounding waste by 1/8 of the area. */
-				size_t min = PAGE_SIZE<<(mmap_step/2);
-				if (min-n > end-cur) {
-					if (req < min) {
-						req = min;
-						if (mmap_step < 12)
-							mmap_step++;
-					}
-					new_area = 1;
-				}
-			}
-			void *mem = __mmap(0, req, PROT_READ|PROT_WRITE,
-				MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-			if (mem == MAP_FAILED || !new_area) {
-				UNLOCK(lock);
-				return mem==MAP_FAILED ? 0 : mem;
-			}
-			cur = (uintptr_t)mem;
-			end = cur + req;
-		}
-	}
-
-	p = (void *)cur;
-	cur += n;
-	UNLOCK(lock);
-	return p;
-}
-
-weak_alias(__simple_malloc, __libc_malloc_impl);
-
-void *__libc_malloc(size_t n)
-{
-	return __libc_malloc_impl(n);
-}
-
-static void *default_malloc(size_t n)
-{
-	return __libc_malloc_impl(n);
-}
-
-weak_alias(default_malloc, malloc);
--- a/lib/libc/musl/src/malloc/mallocng/aligned_alloc.c
+++ b/lib/libc/musl/src/malloc/mallocng/aligned_alloc.c
@ -1,60 +0,0 @@
-#include <stdlib.h>
-#include <errno.h>
-#include "meta.h"
-
-void *aligned_alloc(size_t align, size_t len)
-{
-	if ((align & -align) != align) {
-		errno = EINVAL;
-		return 0;
-	}
-
-	if (len > SIZE_MAX - align || align >= (1ULL<<31)*UNIT) {
-		errno = ENOMEM;
-		return 0;
-	}
-
-	if (DISABLE_ALIGNED_ALLOC) {
-		errno = ENOMEM;
-		return 0;
-	}
-
-	if (align <= UNIT) align = UNIT;
-
-	unsigned char *p = malloc(len + align - UNIT);
-	if (!p)
-		return 0;
-
-	struct meta *g = get_meta(p);
-	int idx = get_slot_index(p);
-	size_t stride = get_stride(g);
-	unsigned char *start = g->mem->storage + stride*idx;
-	unsigned char *end = g->mem->storage + stride*(idx+1) - IB;
-	size_t adj = -(uintptr_t)p & (align-1);
-
-	if (!adj) {
-		set_size(p, end, len);
-		return p;
-	}
-	p += adj;
-	uint32_t offset = (size_t)(p-g->mem->storage)/UNIT;
-	if (offset <= 0xffff) {
-		*(uint16_t *)(p-2) = offset;
-		p[-4] = 0;
-	} else {
-		// use a 32-bit offset if 16-bit doesn't fit. for this,
-		// 16-bit field must be zero, [-4] byte nonzero.
-		*(uint16_t *)(p-2) = 0;
-		*(uint32_t *)(p-8) = offset;
-		p[-4] = 1;
-	}
-	p[-3] = idx;
-	set_size(p, end, len);
-	// store offset to aligned enframing. this facilitates cycling
-	// offset and also iteration of heap for debugging/measurement.
-	// for extreme overalignment it won't fit but these are classless
-	// allocations anyway.
-	*(uint16_t *)(start - 2) = (size_t)(p-start)/UNIT;
-	start[-3] = 7<<5;
-	return p;
-}
--- a/lib/libc/musl/src/malloc/mallocng/donate.c
+++ b/lib/libc/musl/src/malloc/mallocng/donate.c
@ -1,39 +0,0 @@
-#include <stdlib.h>
-#include <stdint.h>
-#include <limits.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <errno.h>
-
-#include "meta.h"
-
-static void donate(unsigned char *base, size_t len)
-{
-	uintptr_t a = (uintptr_t)base;
-	uintptr_t b = a + len;
-	a += -a & (UNIT-1);
-	b -= b & (UNIT-1);
-	memset(base, 0, len);
-	for (int sc=47; sc>0 && b>a; sc-=4) {
-		if (b-a < (size_classes[sc]+1)*UNIT) continue;
-		struct meta *m = alloc_meta();
-		m->avail_mask = 0;
-		m->freed_mask = 1;
-		m->mem = (void *)a;
-		m->mem->meta = m;
-		m->last_idx = 0;
-		m->freeable = 0;
-		m->sizeclass = sc;
-		m->maplen = 0;
-		*((unsigned char *)m->mem+UNIT-4) = 0;
-		*((unsigned char *)m->mem+UNIT-3) = 255;
-		m->mem->storage[size_classes[sc]*UNIT-4] = 0;
-		queue(&ctx.active[sc], m);
-		a += (size_classes[sc]+1)*UNIT;
-	}
-}
-
-void __malloc_donate(char *start, char *end)
-{
-	donate((void *)start, end-start);
-}
--- a/lib/libc/musl/src/malloc/mallocng/free.c
+++ b/lib/libc/musl/src/malloc/mallocng/free.c
@ -1,151 +0,0 @@
-#define _BSD_SOURCE
-#include <stdlib.h>
-#include <sys/mman.h>
-
-#include "meta.h"
-
-struct mapinfo {
-	void *base;
-	size_t len;
-};
-
-static struct mapinfo nontrivial_free(struct meta *, int);
-
-static struct mapinfo free_group(struct meta *g)
-{
-	struct mapinfo mi = { 0 };
-	int sc = g->sizeclass;
-	if (sc < 48) {
-		ctx.usage_by_class[sc] -= g->last_idx+1;
-	}
-	if (g->maplen) {
-		step_seq();
-		record_seq(sc);
-		mi.base = g->mem;
-		mi.len = g->maplen*4096UL;
-	} else {
-		void *p = g->mem;
-		struct meta *m = get_meta(p);
-		int idx = get_slot_index(p);
-		g->mem->meta = 0;
-		// not checking size/reserved here; it's intentionally invalid
-		mi = nontrivial_free(m, idx);
-	}
-	free_meta(g);
-	return mi;
-}
-
-static int okay_to_free(struct meta *g)
-{
-	int sc = g->sizeclass;
-
-	if (!g->freeable) return 0;
-
-	// always free individual mmaps not suitable for reuse
-	if (sc >= 48 || get_stride(g) < UNIT*size_classes[sc])
-		return 1;
-
-	// always free groups allocated inside another group's slot
-	// since recreating them should not be expensive and they
-	// might be blocking freeing of a much larger group.
-	if (!g->maplen) return 1;
-
-	// if there is another non-full group, free this one to
-	// consolidate future allocations, reduce fragmentation.
-	if (g->next != g) return 1;
-
-	// free any group in a size class that's not bouncing
-	if (!is_bouncing(sc)) return 1;
-
-	size_t cnt = g->last_idx+1;
-	size_t usage = ctx.usage_by_class[sc];
-
-	// if usage is high enough that a larger count should be
-	// used, free the low-count group so a new one will be made.
-	if (9*cnt <= usage && cnt < 20)
-		return 1;
-
-	// otherwise, keep the last group in a bouncing class.
-	return 0;
-}
-
-static struct mapinfo nontrivial_free(struct meta *g, int i)
-{
-	uint32_t self = 1u<<i;
-	int sc = g->sizeclass;
-	uint32_t mask = g->freed_mask | g->avail_mask;
-
-	if (mask+self == (2u<<g->last_idx)-1 && okay_to_free(g)) {
-		// any multi-slot group is necessarily on an active list
-		// here, but single-slot groups might or might not be.
-		if (g->next) {
-			assert(sc < 48);
-			int activate_new = (ctx.active[sc]==g);
-			dequeue(&ctx.active[sc], g);
-			if (activate_new && ctx.active[sc])
-				activate_group(ctx.active[sc]);
-		}
-		return free_group(g);
-	} else if (!mask) {
-		assert(sc < 48);
-		// might still be active if there were no allocations
-		// after last available slot was taken.
-		if (ctx.active[sc] != g) {
-			queue(&ctx.active[sc], g);
-		}
-	}
-	a_or(&g->freed_mask, self);
-	return (struct mapinfo){ 0 };
-}
-
-void free(void *p)
-{
-	if (!p) return;
-
-	struct meta *g = get_meta(p);
-	int idx = get_slot_index(p);
-	size_t stride = get_stride(g);
-	unsigned char *start = g->mem->storage + stride*idx;
-	unsigned char *end = start + stride - IB;
-	get_nominal_size(p, end);
-	uint32_t self = 1u<<idx, all = (2u<<g->last_idx)-1;
-	((unsigned char *)p)[-3] = 255;
-	// invalidate offset to group header, and cycle offset of
-	// used region within slot if current offset is zero.
-	*(uint16_t *)((char *)p-2) = 0;
-
-	// release any whole pages contained in the slot to be freed
-	// unless it's a single-slot group that will be unmapped.
-	if (((uintptr_t)(start-1) ^ (uintptr_t)end) >= 2*PGSZ && g->last_idx) {
-		unsigned char *base = start + (-(uintptr_t)start & (PGSZ-1));
-		size_t len = (end-base) & -PGSZ;
-		if (len && USE_MADV_FREE) {
-			int e = errno;
-			madvise(base, len, MADV_FREE);
-			errno = e;
-		}
-	}
-
-	// atomic free without locking if this is neither first or last slot
-	for (;;) {
-		uint32_t freed = g->freed_mask;
-		uint32_t avail = g->avail_mask;
-		uint32_t mask = freed | avail;
-		assert(!(mask&self));
-		if (!freed || mask+self==all) break;
-		if (!MT)
-			g->freed_mask = freed+self;
-		else if (a_cas(&g->freed_mask, freed, freed+self)!=freed)
-			continue;
-		return;
-	}
-
-	wrlock();
-	struct mapinfo mi = nontrivial_free(g, idx);
-	unlock();
-	if (mi.len) {
-		int e = errno;
-		munmap(mi.base, mi.len);
-		errno = e;
-	}
-}
--- a/lib/libc/musl/src/malloc/mallocng/glue.h
+++ b/lib/libc/musl/src/malloc/mallocng/glue.h
@ -1,95 +0,0 @@
-#ifndef MALLOC_GLUE_H
-#define MALLOC_GLUE_H
-
-#include <stdint.h>
-#include <sys/mman.h>
-#include <pthread.h>
-#include <unistd.h>
-#include <elf.h>
-#include <string.h>
-#include "atomic.h"
-#include "syscall.h"
-#include "libc.h"
-#include "lock.h"
-#include "dynlink.h"
-
-// use macros to appropriately namespace these.
-#define size_classes __malloc_size_classes
-#define ctx __malloc_context
-#define alloc_meta __malloc_alloc_meta
-#define is_allzero __malloc_allzerop
-#define dump_heap __dump_heap
-
-#define malloc __libc_malloc_impl
-#define realloc __libc_realloc
-#define free __libc_free
-
-#define USE_MADV_FREE 0
-
-#if USE_REAL_ASSERT
-#include <assert.h>
-#else
-#undef assert
-#define assert(x) do { if (!(x)) a_crash(); } while(0)
-#endif
-
-#define brk(p) ((uintptr_t)__syscall(SYS_brk, p))
-
-#define mmap __mmap
-#define madvise __madvise
-#define mremap __mremap
-
-#define DISABLE_ALIGNED_ALLOC (__malloc_replaced && !__aligned_alloc_replaced)
-
-static inline uint64_t get_random_secret()
-{
-	uint64_t secret = (uintptr_t)&secret * 1103515245;
-	for (size_t i=0; libc.auxv[i]; i+=2)
-		if (libc.auxv[i]==AT_RANDOM)
-			memcpy(&secret, (char *)libc.auxv[i+1]+8, sizeof secret);
-	return secret;
-}
-
-#ifndef PAGESIZE
-#define PAGESIZE PAGE_SIZE
-#endif
-
-#define MT (libc.need_locks)
-
-#define RDLOCK_IS_EXCLUSIVE 1
-
-__attribute__((__visibility__("hidden")))
-extern int __malloc_lock[1];
-
-#define LOCK_OBJ_DEF \
-int __malloc_lock[1]; \
-void __malloc_atfork(int who) { malloc_atfork(who); }
-
-static inline void rdlock()
-{
-	if (MT) LOCK(__malloc_lock);
-}
-static inline void wrlock()
-{
-	if (MT) LOCK(__malloc_lock);
-}
-static inline void unlock()
-{
-	UNLOCK(__malloc_lock);
-}
-static inline void upgradelock()
-{
-}
-static inline void resetlock()
-{
-	__malloc_lock[0] = 0;
-}
-
-static inline void malloc_atfork(int who)
-{
-	if (who<0) rdlock();
-	else if (who>0) resetlock();
-	else unlock();
-}
-
-#endif
--- a/lib/libc/musl/src/malloc/mallocng/malloc.c
+++ b/lib/libc/musl/src/malloc/mallocng/malloc.c
@ -1,387 +0,0 @@
-#include <stdlib.h>
-#include <stdint.h>
-#include <limits.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <errno.h>
-
-#include "meta.h"
-
-LOCK_OBJ_DEF;
-
-const uint16_t size_classes[] = {
-	1, 2, 3, 4, 5, 6, 7, 8,
-	9, 10, 12, 15,
-	18, 20, 25, 31,
-	36, 42, 50, 63,
-	72, 84, 102, 127,
-	146, 170, 204, 255,
-	292, 340, 409, 511,
-	584, 682, 818, 1023,
-	1169, 1364, 1637, 2047,
-	2340, 2730, 3276, 4095,
-	4680, 5460, 6552, 8191,
-};
-
-static const uint8_t small_cnt_tab[][3] = {
-	{ 30, 30, 30 },
-	{ 31, 15, 15 },
-	{ 20, 10, 10 },
-	{ 31, 15, 7 },
-	{ 25, 12, 6 },
-	{ 21, 10, 5 },
-	{ 18, 8, 4 },
-	{ 31, 15, 7 },
-	{ 28, 14, 6 },
-};
-
-static const uint8_t med_cnt_tab[4] = { 28, 24, 20, 32 };
-
-struct malloc_context ctx = { 0 };
-
-struct meta *alloc_meta(void)
-{
-	struct meta *m;
-	unsigned char *p;
-	if (!ctx.init_done) {
-#ifndef PAGESIZE
-		ctx.pagesize = get_page_size();
-#endif
-		ctx.secret = get_random_secret();
-		ctx.init_done = 1;
-	}
-	size_t pagesize = PGSZ;
-	if (pagesize < 4096) pagesize = 4096;
-	if ((m = dequeue_head(&ctx.free_meta_head))) return m;
-	if (!ctx.avail_meta_count) {
-		int need_unprotect = 1;
-		if (!ctx.avail_meta_area_count && ctx.brk!=-1) {
-			uintptr_t new = ctx.brk + pagesize;
-			int need_guard = 0;
-			if (!ctx.brk) {
-				need_guard = 1;
-				ctx.brk = brk(0);
-				// some ancient kernels returned _ebss
-				// instead of next page as initial brk.
-				ctx.brk += -ctx.brk & (pagesize-1);
-				new = ctx.brk + 2*pagesize;
-			}
-			if (brk(new) != new) {
-				ctx.brk = -1;
-			} else {
-				if (need_guard) mmap((void *)ctx.brk, pagesize,
-					PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-				ctx.brk = new;
-				ctx.avail_meta_areas = (void *)(new - pagesize);
-				ctx.avail_meta_area_count = pagesize>>12;
-				need_unprotect = 0;
-			}
-		}
-		if (!ctx.avail_meta_area_count) {
-			size_t n = 2UL << ctx.meta_alloc_shift;
-			p = mmap(0, n*pagesize, PROT_NONE,
-				MAP_PRIVATE|MAP_ANON, -1, 0);
-			if (p==MAP_FAILED) return 0;
-			ctx.avail_meta_areas = p + pagesize;
-			ctx.avail_meta_area_count = (n-1)*(pagesize>>12);
-			ctx.meta_alloc_shift++;
-		}
-		p = ctx.avail_meta_areas;
-		if ((uintptr_t)p & (pagesize-1)) need_unprotect = 0;
-		if (need_unprotect)
-			if (mprotect(p, pagesize, PROT_READ|PROT_WRITE)
-			    && errno != ENOSYS)
-				return 0;
-		ctx.avail_meta_area_count--;
-		ctx.avail_meta_areas = p + 4096;
-		if (ctx.meta_area_tail) {
-			ctx.meta_area_tail->next = (void *)p;
-		} else {
-			ctx.meta_area_head = (void *)p;
-		}
-		ctx.meta_area_tail = (void *)p;
-		ctx.meta_area_tail->check = ctx.secret;
-		ctx.avail_meta_count = ctx.meta_area_tail->nslots
-			= (4096-sizeof(struct meta_area))/sizeof *m;
-		ctx.avail_meta = ctx.meta_area_tail->slots;
-	}
-	ctx.avail_meta_count--;
-	m = ctx.avail_meta++;
-	m->prev = m->next = 0;
-	return m;
-}
-
-static uint32_t try_avail(struct meta **pm)
-{
-	struct meta *m = *pm;
-	uint32_t first;
-	if (!m) return 0;
-	uint32_t mask = m->avail_mask;
-	if (!mask) {
-		if (!m) return 0;
-		if (!m->freed_mask) {
-			dequeue(pm, m);
-			m = *pm;
-			if (!m) return 0;
-		} else {
-			m = m->next;
-			*pm = m;
-		}
-
-		mask = m->freed_mask;
-
-		// skip fully-free group unless it's the only one
-		// or it's a permanently non-freeable group
-		if (mask == (2u<<m->last_idx)-1 && m->freeable) {
-			m = m->next;
-			*pm = m;
-			mask = m->freed_mask;
-		}
-
-		// activate more slots in a not-fully-active group
-		// if needed, but only as a last resort. prefer using
-		// any other group with free slots. this avoids
-		// touching & dirtying as-yet-unused pages.
-		if (!(mask & ((2u<<m->mem->active_idx)-1))) {
-			if (m->next != m) {
-				m = m->next;
-				*pm = m;
-			} else {
-				int cnt = m->mem->active_idx + 2;
-				int size = size_classes[m->sizeclass]*UNIT;
-				int span = UNIT + size*cnt;
-				// activate up to next 4k boundary
-				while ((span^(span+size-1)) < 4096) {
-					cnt++;
-					span += size;
-				}
-				if (cnt > m->last_idx+1)
-					cnt = m->last_idx+1;
-				m->mem->active_idx = cnt-1;
-			}
-		}
-		mask = activate_group(m);
-		assert(mask);
-		decay_bounces(m->sizeclass);
-	}
-	first = mask&-mask;
-	m->avail_mask = mask-first;
-	return first;
-}
-
-static int alloc_slot(int, size_t);
-
-static struct meta *alloc_group(int sc, size_t req)
-{
-	size_t size = UNIT*size_classes[sc];
-	int i = 0, cnt;
-	unsigned char *p;
-	struct meta *m = alloc_meta();
-	if (!m) return 0;
-	size_t usage = ctx.usage_by_class[sc];
-	size_t pagesize = PGSZ;
-	int active_idx;
-	if (sc < 9) {
-		while (i<2 && 4*small_cnt_tab[sc][i] > usage)
-			i++;
-		cnt = small_cnt_tab[sc][i];
-	} else {
-		// lookup max number of slots fitting in power-of-two size
-		// from a table, along with number of factors of two we
-		// can divide out without a remainder or reaching 1.
-		cnt = med_cnt_tab[sc&3];
-
-		// reduce cnt to avoid excessive eagar allocation.
-		while (!(cnt&1) && 4*cnt > usage)
-			cnt >>= 1;
-
-		// data structures don't support groups whose slot offsets
-		// in units don't fit in 16 bits.
-		while (size*cnt >= 65536*UNIT)
-			cnt >>= 1;
-	}
-
-	// If we selected a count of 1 above but it's not sufficient to use
-	// mmap, increase to 2. Then it might be; if not it will nest.
-	if (cnt==1 && size*cnt+UNIT <= pagesize/2) cnt = 2;
-
-	// All choices of size*cnt are "just below" a power of two, so anything
-	// larger than half the page size should be allocated as whole pages.
-	if (size*cnt+UNIT > pagesize/2) {
-		// check/update bounce counter to start/increase retention
-		// of freed maps, and inhibit use of low-count, odd-size
-		// small mappings and single-slot groups if activated.
-		int nosmall = is_bouncing(sc);
-		account_bounce(sc);
-		step_seq();
-
-		// since the following count reduction opportunities have
-		// an absolute memory usage cost, don't overdo them. count
-		// coarse usage as part of usage.
-		if (!(sc&1) && sc<32) usage += ctx.usage_by_class[sc+1];
-
-		// try to drop to a lower count if the one found above
-		// increases usage by more than 25%. these reduced counts
-		// roughly fill an integral number of pages, just not a
-		// power of two, limiting amount of unusable space.
-		if (4*cnt > usage && !nosmall) {
-			if (0);
-			else if ((sc&3)==1 && size*cnt>8*pagesize) cnt = 2;
-			else if ((sc&3)==2 && size*cnt>4*pagesize) cnt = 3;
-			else if ((sc&3)==0 && size*cnt>8*pagesize) cnt = 3;
-			else if ((sc&3)==0 && size*cnt>2*pagesize) cnt = 5;
-		}
-		size_t needed = size*cnt + UNIT;
-		needed += -needed & (pagesize-1);
-
-		// produce an individually-mmapped allocation if usage is low,
-		// bounce counter hasn't triggered, and either it saves memory
-		// or it avoids eagar slot allocation without wasting too much.
-		if (!nosmall && cnt<=7) {
-			req += IB + UNIT;
-			req += -req & (pagesize-1);
-			if (req<size+UNIT || (req>=4*pagesize && 2*cnt>usage)) {
-				cnt = 1;
-				needed = req;
-			}
-		}
-
-		p = mmap(0, needed, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
-		if (p==MAP_FAILED) {
-			free_meta(m);
-			return 0;
-		}
-		m->maplen = needed>>12;
-		ctx.mmap_counter++;
-		active_idx = (4096-UNIT)/size-1;
-		if (active_idx > cnt-1) active_idx = cnt-1;
-		if (active_idx < 0) active_idx = 0;
-	} else {
-		int j = size_to_class(UNIT+cnt*size-IB);
-		int idx = alloc_slot(j, UNIT+cnt*size-IB);
-		if (idx < 0) {
-			free_meta(m);
-			return 0;
-		}
-		struct meta *g = ctx.active[j];
-		p = enframe(g, idx, UNIT*size_classes[j]-IB, ctx.mmap_counter);
-		m->maplen = 0;
-		p[-3] = (p[-3]&31) | (6<<5);
-		for (int i=0; i<=cnt; i++)
-			p[UNIT+i*size-4] = 0;
-		active_idx = cnt-1;
-	}
-	ctx.usage_by_class[sc] += cnt;
-	m->avail_mask = (2u<<active_idx)-1;
-	m->freed_mask = (2u<<(cnt-1))-1 - m->avail_mask;
-	m->mem = (void *)p;
-	m->mem->meta = m;
-	m->mem->active_idx = active_idx;
-	m->last_idx = cnt-1;
-	m->freeable = 1;
-	m->sizeclass = sc;
-	return m;
-}
-
-static int alloc_slot(int sc, size_t req)
-{
-	uint32_t first = try_avail(&ctx.active[sc]);
-	if (first) return a_ctz_32(first);
-
-	struct meta *g = alloc_group(sc, req);
-	if (!g) return -1;
-
-	g->avail_mask--;
-	queue(&ctx.active[sc], g);
-	return 0;
-}
-
-void *malloc(size_t n)
-{
-	if (size_overflows(n)) return 0;
-	struct meta *g;
-	uint32_t mask, first;
-	int sc;
-	int idx;
-	int ctr;
-
-	if (n >= MMAP_THRESHOLD) {
-		size_t needed = n + IB + UNIT;
-		void *p = mmap(0, needed, PROT_READ|PROT_WRITE,
-			MAP_PRIVATE|MAP_ANON, -1, 0);
-		if (p==MAP_FAILED) return 0;
-		wrlock();
-		step_seq();
-		g = alloc_meta();
-		if (!g) {
-			unlock();
-			munmap(p, needed);
-			return 0;
-		}
-		g->mem = p;
-		g->mem->meta = g;
-		g->last_idx = 0;
-		g->freeable = 1;
-		g->sizeclass = 63;
-		g->maplen = (needed+4095)/4096;
-		g->avail_mask = g->freed_mask = 0;
-		// use a global counter to cycle offset in
-		// individually-mmapped allocations.
-		ctx.mmap_counter++;
-		idx = 0;
-		goto success;
-	}
-
-	sc = size_to_class(n);
-
-	rdlock();
-	g = ctx.active[sc];
-
-	// use coarse size classes initially when there are not yet
-	// any groups of desired size. this allows counts of 2 or 3
-	// to be allocated at first rather than having to start with
-	// 7 or 5, the min counts for even size classes.
-	if (!g && sc>=4 && sc<32 && sc!=6 && !(sc&1) && !ctx.usage_by_class[sc]) {
-		size_t usage = ctx.usage_by_class[sc|1];
-		// if a new group may be allocated, count it toward
-		// usage in deciding if we can use coarse class.
-		if (!ctx.active[sc|1] || (!ctx.active[sc|1]->avail_mask
-		    && !ctx.active[sc|1]->freed_mask))
-			usage += 3;
-		if (usage <= 12)
-			sc |= 1;
-		g = ctx.active[sc];
-	}
-
-	for (;;) {
-		mask = g ? g->avail_mask : 0;
-		first = mask&-mask;
-		if (!first) break;
-		if (RDLOCK_IS_EXCLUSIVE || !MT)
-			g->avail_mask = mask-first;
-		else if (a_cas(&g->avail_mask, mask, mask-first)!=mask)
-			continue;
-		idx = a_ctz_32(first);
-		goto success;
-	}
-	upgradelock();
-
-	idx = alloc_slot(sc, n);
-	if (idx < 0) {
-		unlock();
-		return 0;
-	}
-	g = ctx.active[sc];
-
-success:
-	ctr = ctx.mmap_counter;
-	unlock();
-	return enframe(g, idx, n, ctr);
-}
-
-int is_allzero(void *p)
-{
-	struct meta *g = get_meta(p);
-	return g->sizeclass >= 48 ||
-		get_stride(g) < UNIT*size_classes[g->sizeclass];
-}
--- a/lib/libc/musl/src/malloc/mallocng/malloc_usable_size.c
+++ b/lib/libc/musl/src/malloc/mallocng/malloc_usable_size.c
@ -1,13 +0,0 @@
-#include <stdlib.h>
-#include "meta.h"
-
-size_t malloc_usable_size(void *p)
-{
-	if (!p) return 0;
-	struct meta *g = get_meta(p);
-	int idx = get_slot_index(p);
-	size_t stride = get_stride(g);
-	unsigned char *start = g->mem->storage + stride*idx;
-	unsigned char *end = start + stride - IB;
-	return get_nominal_size(p, end);
-}
--- a/lib/libc/musl/src/malloc/mallocng/meta.h
+++ b/lib/libc/musl/src/malloc/mallocng/meta.h
@ -1,288 +0,0 @@
-#ifndef MALLOC_META_H
-#define MALLOC_META_H
-
-#include <stdint.h>
-#include <errno.h>
-#include <limits.h>
-#include "glue.h"
-
-__attribute__((__visibility__("hidden")))
-extern const uint16_t size_classes[];
-
-#define MMAP_THRESHOLD 131052
-
-#define UNIT 16
-#define IB 4
-
-struct group {
-	struct meta *meta;
-	unsigned char active_idx:5;
-	char pad[UNIT - sizeof(struct meta *) - 1];
-	unsigned char storage[];
-};
-
-struct meta {
-	struct meta *prev, *next;
-	struct group *mem;
-	volatile int avail_mask, freed_mask;
-	uintptr_t last_idx:5;
-	uintptr_t freeable:1;
-	uintptr_t sizeclass:6;
-	uintptr_t maplen:8*sizeof(uintptr_t)-12;
-};
-
-struct meta_area {
-	uint64_t check;
-	struct meta_area *next;
-	int nslots;
-	struct meta slots[];
-};
-
-struct malloc_context {
-	uint64_t secret;
-#ifndef PAGESIZE
-	size_t pagesize;
-#endif
-	int init_done;
-	unsigned mmap_counter;
-	struct meta *free_meta_head;
-	struct meta *avail_meta;
-	size_t avail_meta_count, avail_meta_area_count, meta_alloc_shift;
-	struct meta_area *meta_area_head, *meta_area_tail;
-	unsigned char *avail_meta_areas;
-	struct meta *active[48];
-	size_t usage_by_class[48];
-	uint8_t unmap_seq[32], bounces[32];
-	uint8_t seq;
-	uintptr_t brk;
-};
-
-__attribute__((__visibility__("hidden")))
-extern struct malloc_context ctx;
-
-#ifdef PAGESIZE
-#define PGSZ PAGESIZE
-#else
-#define PGSZ ctx.pagesize
-#endif
-
-__attribute__((__visibility__("hidden")))
-struct meta *alloc_meta(void);
-
-__attribute__((__visibility__("hidden")))
-int is_allzero(void *);
-
-static inline void queue(struct meta **phead, struct meta *m)
-{
-	assert(!m->next);
-	assert(!m->prev);
-	if (*phead) {
-		struct meta *head = *phead;
-		m->next = head;
-		m->prev = head->prev;
-		m->next->prev = m->prev->next = m;
-	} else {
-		m->prev = m->next = m;
-		*phead = m;
-	}
-}
-
-static inline void dequeue(struct meta **phead, struct meta *m)
-{
-	if (m->next != m) {
-		m->prev->next = m->next;
-		m->next->prev = m->prev;
-		if (*phead == m) *phead = m->next;
-	} else {
-		*phead = 0;
-	}
-	m->prev = m->next = 0;
-}
-
-static inline struct meta *dequeue_head(struct meta **phead)
-{
-	struct meta *m = *phead;
-	if (m) dequeue(phead, m);
-	return m;
-}
-
-static inline void free_meta(struct meta *m)
-{
-	*m = (struct meta){0};
-	queue(&ctx.free_meta_head, m);
-}
-
-static inline uint32_t activate_group(struct meta *m)
-{
-	assert(!m->avail_mask);
-	uint32_t mask, act = (2u<<m->mem->active_idx)-1;
-	do mask = m->freed_mask;
-	while (a_cas(&m->freed_mask, mask, mask&~act)!=mask);
-	return m->avail_mask = mask & act;
-}
-
-static inline int get_slot_index(const unsigned char *p)
-{
-	return p[-3] & 31;
-}
-
-static inline struct meta *get_meta(const unsigned char *p)
-{
-	assert(!((uintptr_t)p & 15));
-	int offset = *(const uint16_t *)(p - 2);
-	int index = get_slot_index(p);
-	if (p[-4]) {
-		assert(!offset);
-		offset = *(uint32_t *)(p - 8);
-		assert(offset > 0xffff);
-	}
-	const struct group *base = (const void *)(p - UNIT*offset - UNIT);
-	const struct meta *meta = base->meta;
-	assert(meta->mem == base);
-	assert(index <= meta->last_idx);
-	assert(!(meta->avail_mask & (1u<<index)));
-	assert(!(meta->freed_mask & (1u<<index)));
-	const struct meta_area *area = (void *)((uintptr_t)meta & -4096);
-	assert(area->check == ctx.secret);
-	if (meta->sizeclass < 48) {
-		assert(offset >= size_classes[meta->sizeclass]*index);
-		assert(offset < size_classes[meta->sizeclass]*(index+1));
-	} else {
-		assert(meta->sizeclass == 63);
-	}
-	if (meta->maplen) {
-		assert(offset <= meta->maplen*4096UL/UNIT - 1);
-	}
-	return (struct meta *)meta;
-}
-
-static inline size_t get_nominal_size(const unsigned char *p, const unsigned char *end)
-{
-	size_t reserved = p[-3] >> 5;
-	if (reserved >= 5) {
-		assert(reserved == 5);
-		reserved = *(const uint32_t *)(end-4);
-		assert(reserved >= 5);
-		assert(!end[-5]);
-	}
-	assert(reserved <= end-p);
-	assert(!*(end-reserved));
-	// also check the slot's overflow byte
-	assert(!*end);
-	return end-reserved-p;
-}
-
-static inline size_t get_stride(const struct meta *g)
-{
-	if (!g->last_idx && g->maplen) {
-		return g->maplen*4096UL - UNIT;
-	} else {
-		return UNIT*size_classes[g->sizeclass];
-	}
-}
-
-static inline void set_size(unsigned char *p, unsigned char *end, size_t n)
-{
-	int reserved = end-p-n;
-	if (reserved) end[-reserved] = 0;
-	if (reserved >= 5) {
-		*(uint32_t *)(end-4) = reserved;
-		end[-5] = 0;
-		reserved = 5;
-	}
-	p[-3] = (p[-3]&31) + (reserved<<5);
-}
-
-static inline void *enframe(struct meta *g, int idx, size_t n, int ctr)
-{
-	size_t stride = get_stride(g);
-	size_t slack = (stride-IB-n)/UNIT;
-	unsigned char *p = g->mem->storage + stride*idx;
-	unsigned char *end = p+stride-IB;
-	// cycle offset within slot to increase interval to address
-	// reuse, facilitate trapping double-free.
-	int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255;
-	assert(!p[-4]);
-	if (off > slack) {
-		size_t m = slack;
-		m |= m>>1; m |= m>>2; m |= m>>4;
-		off &= m;
-		if (off > slack) off -= slack+1;
-		assert(off <= slack);
-	}
-	if (off) {
-		// store offset in unused header at offset zero
-		// if enframing at non-zero offset.
-		*(uint16_t *)(p-2) = off;
-		p[-3] = 7<<5;
-		p += UNIT*off;
-		// for nonzero offset there is no permanent check
-		// byte, so make one.
-		p[-4] = 0;
-	}
-	*(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT;
-	p[-3] = idx;
-	set_size(p, end, n);
-	return p;
-}
-
-static inline int size_to_class(size_t n)
-{
-	n = (n+IB-1)>>4;
-	if (n<10) return n;
-	n++;
-	int i = (28-a_clz_32(n))*4 + 8;
-	if (n>size_classes[i+1]) i+=2;
-	if (n>size_classes[i]) i++;
-	return i;
-}
-
-static inline int size_overflows(size_t n)
-{
-	if (n >= SIZE_MAX/2 - 4096) {
-		errno = ENOMEM;
-		return 1;
-	}
-	return 0;
-}
-
-static inline void step_seq(void)
-{
-	if (ctx.seq==255) {
-		for (int i=0; i<32; i++) ctx.unmap_seq[i] = 0;
-		ctx.seq = 1;
-	} else {
-		ctx.seq++;
-	}
-}
-
-static inline void record_seq(int sc)
-{
-	if (sc-7U < 32) ctx.unmap_seq[sc-7] = ctx.seq;
-}
-
-static inline void account_bounce(int sc)
-{
-	if (sc-7U < 32) {
-		int seq = ctx.unmap_seq[sc-7];
-		if (seq && ctx.seq-seq < 10) {
-			if (ctx.bounces[sc-7]+1 < 100)
-				ctx.bounces[sc-7]++;
-			else
-				ctx.bounces[sc-7] = 150;
-		}
-	}
-}
-
-static inline void decay_bounces(int sc)
-{
-	if (sc-7U < 32 && ctx.bounces[sc-7])
-		ctx.bounces[sc-7]--;
-}
-
-static inline int is_bouncing(int sc)
-{
-	return (sc-7U < 32 && ctx.bounces[sc-7] >= 100);
-}
-
-#endif
--- a/lib/libc/musl/src/malloc/mallocng/realloc.c
+++ b/lib/libc/musl/src/malloc/mallocng/realloc.c
@ -1,51 +0,0 @@
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <string.h>
-#include "meta.h"
-
-void *realloc(void *p, size_t n)
-{
-	if (!p) return malloc(n);
-	if (size_overflows(n)) return 0;
-
-	struct meta *g = get_meta(p);
-	int idx = get_slot_index(p);
-	size_t stride = get_stride(g);
-	unsigned char *start = g->mem->storage + stride*idx;
-	unsigned char *end = start + stride - IB;
-	size_t old_size = get_nominal_size(p, end);
-	size_t avail_size = end-(unsigned char *)p;
-	void *new;
-
-	// only resize in-place if size class matches
-	if (n <= avail_size && n<MMAP_THRESHOLD
-	    && size_to_class(n)+1 >= g->sizeclass) {
-		set_size(p, end, n);
-		return p;
-	}
-
-	// use mremap if old and new size are both mmap-worthy
-	if (g->sizeclass>=48 && n>=MMAP_THRESHOLD) {
-		assert(g->sizeclass==63);
-		size_t base = (unsigned char *)p-start;
-		size_t needed = (n + base + UNIT + IB + 4095) & -4096;
-		new = g->maplen*4096UL == needed ? g->mem :
-			mremap(g->mem, g->maplen*4096UL, needed, MREMAP_MAYMOVE);
-		if (new!=MAP_FAILED) {
-			g->mem = new;
-			g->maplen = needed/4096;
-			p = g->mem->storage + base;
-			end = g->mem->storage + (needed - UNIT) - IB;
-			*end = 0;
-			set_size(p, end, n);
-			return p;
-		}
-	}
-
-	new = malloc(n);
-	if (!new) return 0;
-	memcpy(new, p, n < old_size ? n : old_size);
-	free(p);
-	return new;
-}
--- a/lib/libc/musl/src/malloc/memalign.c
+++ b/lib/libc/musl/src/malloc/memalign.c
@ -1,7 +0,0 @@
-#define _BSD_SOURCE
-#include <stdlib.h>
-
-void *memalign(size_t align, size_t len)
-{
-	return aligned_alloc(align, len);
-}
--- a/lib/libc/musl/src/malloc/oldmalloc/aligned_alloc.c
+++ b/lib/libc/musl/src/malloc/oldmalloc/aligned_alloc.c
@ -1,53 +0,0 @@
-#include <stdlib.h>
-#include <stdint.h>
-#include <errno.h>
-#include "malloc_impl.h"
-
-void *aligned_alloc(size_t align, size_t len)
-{
-	unsigned char *mem, *new;
-
-	if ((align & -align) != align) {
-		errno = EINVAL;
-		return 0;
-	}
-
-	if (len > SIZE_MAX - align ||
-	    (__malloc_replaced && !__aligned_alloc_replaced)) {
-		errno = ENOMEM;
-		return 0;
-	}
-
-	if (align <= SIZE_ALIGN)
-		return malloc(len);
-
-	if (!(mem = malloc(len + align-1)))
-		return 0;
-
-	new = (void *)((uintptr_t)mem + align-1 & -align);
-	if (new == mem) return mem;
-
-	struct chunk *c = MEM_TO_CHUNK(mem);
-	struct chunk *n = MEM_TO_CHUNK(new);
-
-	if (IS_MMAPPED(c)) {
-		/* Apply difference between aligned and original
-		 * address to the "extra" field of mmapped chunk. */
-		n->psize = c->psize + (new-mem);
-		n->csize = c->csize - (new-mem);
-		return new;
-	}
-
-	struct chunk *t = NEXT_CHUNK(c);
-
-	/* Split the allocated chunk into two chunks. The aligned part
-	 * that will be used has the size in its footer reduced by the
-	 * difference between the aligned and original addresses, and
-	 * the resulting size copied to its header. A new header and
-	 * footer are written for the split-off part to be freed. */
-	n->psize = c->csize = C_INUSE | (new-mem);
-	n->csize = t->psize -= new-mem;
-
-	__bin_chunk(c);
-	return new;
-}
--- a/lib/libc/musl/src/malloc/oldmalloc/malloc.c
+++ b/lib/libc/musl/src/malloc/oldmalloc/malloc.c
@ -1,556 +0,0 @@
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <stdint.h>
-#include <errno.h>
-#include <sys/mman.h>
-#include "libc.h"
-#include "atomic.h"
-#include "pthread_impl.h"
-#include "malloc_impl.h"
-#include "fork_impl.h"
-
-#define malloc __libc_malloc_impl
-#define realloc __libc_realloc
-#define free __libc_free
-
-#if defined(__GNUC__) && defined(__PIC__)
-#define inline inline __attribute__((always_inline))
-#endif
-
-static struct {
-	volatile uint64_t binmap;
-	struct bin bins[64];
-	volatile int split_merge_lock[2];
-} mal;
-
-/* Synchronization tools */
-
-static inline void lock(volatile int *lk)
-{
-	int need_locks = libc.need_locks;
-	if (need_locks) {
-		while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
-		if (need_locks < 0) libc.need_locks = 0;
-	}
-}
-
-static inline void unlock(volatile int *lk)
-{
-	if (lk[0]) {
-		a_store(lk, 0);
-		if (lk[1]) __wake(lk, 1, 1);
-	}
-}
-
-static inline void lock_bin(int i)
-{
-	lock(mal.bins[i].lock);
-	if (!mal.bins[i].head)
-		mal.bins[i].head = mal.bins[i].tail = BIN_TO_CHUNK(i);
-}
-
-static inline void unlock_bin(int i)
-{
-	unlock(mal.bins[i].lock);
-}
-
-static int first_set(uint64_t x)
-{
-#if 1
-	return a_ctz_64(x);
-#else
-	static const char debruijn64[64] = {
-		0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
-		62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
-		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
-		51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
-	};
-	static const char debruijn32[32] = {
-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
-	};
-	if (sizeof(long) < 8) {
-		uint32_t y = x;
-		if (!y) {
-			y = x>>32;
-			return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
-		}
-		return debruijn32[(y&-y)*0x076be629 >> 27];
-	}
-	return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
-#endif
-}
-
-static const unsigned char bin_tab[60] = {
-	            32,33,34,35,36,36,37,37,38,38,39,39,
-	40,40,40,40,41,41,41,41,42,42,42,42,43,43,43,43,
-	44,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,
-	46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,
-};
-
-static int bin_index(size_t x)
-{
-	x = x / SIZE_ALIGN - 1;
-	if (x <= 32) return x;
-	if (x < 512) return bin_tab[x/8-4];
-	if (x > 0x1c00) return 63;
-	return bin_tab[x/128-4] + 16;
-}
-
-static int bin_index_up(size_t x)
-{
-	x = x / SIZE_ALIGN - 1;
-	if (x <= 32) return x;
-	x--;
-	if (x < 512) return bin_tab[x/8-4] + 1;
-	return bin_tab[x/128-4] + 17;
-}
-
-#if 0
-void __dump_heap(int x)
-{
-	struct chunk *c;
-	int i;
-	for (c = (void *)mal.heap; CHUNK_SIZE(c); c = NEXT_CHUNK(c))
-		fprintf(stderr, "base %p size %zu (%d) flags %d/%d\n",
-			c, CHUNK_SIZE(c), bin_index(CHUNK_SIZE(c)),
-			c->csize & 15,
-			NEXT_CHUNK(c)->psize & 15);
-	for (i=0; i<64; i++) {
-		if (mal.bins[i].head != BIN_TO_CHUNK(i) && mal.bins[i].head) {
-			fprintf(stderr, "bin %d: %p\n", i, mal.bins[i].head);
-			if (!(mal.binmap & 1ULL<<i))
-				fprintf(stderr, "missing from binmap!\n");
-		} else if (mal.binmap & 1ULL<<i)
-			fprintf(stderr, "binmap wrongly contains %d!\n", i);
-	}
-}
-#endif
-
-/* This function returns true if the interval [old,new]
- * intersects the 'len'-sized interval below &libc.auxv
- * (interpreted as the main-thread stack) or below &b
- * (the current stack). It is used to defend against
- * buggy brk implementations that can cross the stack. */
-
-static int traverses_stack_p(uintptr_t old, uintptr_t new)
-{
-	const uintptr_t len = 8<<20;
-	uintptr_t a, b;
-
-	b = (uintptr_t)libc.auxv;
-	a = b > len ? b-len : 0;
-	if (new>a && old<b) return 1;
-
-	b = (uintptr_t)&b;
-	a = b > len ? b-len : 0;
-	if (new>a && old<b) return 1;
-
-	return 0;
-}
-
-/* Expand the heap in-place if brk can be used, or otherwise via mmap,
- * using an exponential lower bound on growth by mmap to make
- * fragmentation asymptotically irrelevant. The size argument is both
- * an input and an output, since the caller needs to know the size
- * allocated, which will be larger than requested due to page alignment
- * and mmap minimum size rules. The caller is responsible for locking
- * to prevent concurrent calls. */
-
-static void *__expand_heap(size_t *pn)
-{
-	static uintptr_t brk;
-	static unsigned mmap_step;
-	size_t n = *pn;
-
-	if (n > SIZE_MAX/2 - PAGE_SIZE) {
-		errno = ENOMEM;
-		return 0;
-	}
-	n += -n & PAGE_SIZE-1;
-
-	if (!brk) {
-		brk = __syscall(SYS_brk, 0);
-		brk += -brk & PAGE_SIZE-1;
-	}
-
-	if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n)
-	    && __syscall(SYS_brk, brk+n)==brk+n) {
-		*pn = n;
-		brk += n;
-		return (void *)(brk-n);
-	}
-
-	size_t min = (size_t)PAGE_SIZE << mmap_step/2;
-	if (n < min) n = min;
-	void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
-		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-	if (area == MAP_FAILED) return 0;
-	*pn = n;
-	mmap_step++;
-	return area;
-}
-
-static struct chunk *expand_heap(size_t n)
-{
-	static void *end;
-	void *p;
-	struct chunk *w;
-
-	/* The argument n already accounts for the caller's chunk
-	 * overhead needs, but if the heap can't be extended in-place,
-	 * we need room for an extra zero-sized sentinel chunk. */
-	n += SIZE_ALIGN;
-
-	p = __expand_heap(&n);
-	if (!p) return 0;
-
-	/* If not just expanding existing space, we need to make a
-	 * new sentinel chunk below the allocated space. */
-	if (p != end) {
-		/* Valid/safe because of the prologue increment. */
-		n -= SIZE_ALIGN;
-		p = (char *)p + SIZE_ALIGN;
-		w = MEM_TO_CHUNK(p);
-		w->psize = 0 | C_INUSE;
-	}
-
-	/* Record new heap end and fill in footer. */
-	end = (char *)p + n;
-	w = MEM_TO_CHUNK(end);
-	w->psize = n | C_INUSE;
-	w->csize = 0 | C_INUSE;
-
-	/* Fill in header, which may be new or may be replacing a
-	 * zero-size sentinel header at the old end-of-heap. */
-	w = MEM_TO_CHUNK(p);
-	w->csize = n | C_INUSE;
-
-	return w;
-}
-
-static int adjust_size(size_t *n)
-{
-	/* Result of pointer difference must fit in ptrdiff_t. */
-	if (*n-1 > PTRDIFF_MAX - SIZE_ALIGN - PAGE_SIZE) {
-		if (*n) {
-			errno = ENOMEM;
-			return -1;
-		} else {
-			*n = SIZE_ALIGN;
-			return 0;
-		}
-	}
-	*n = (*n + OVERHEAD + SIZE_ALIGN - 1) & SIZE_MASK;
-	return 0;
-}
-
-static void unbin(struct chunk *c, int i)
-{
-	if (c->prev == c->next)
-		a_and_64(&mal.binmap, ~(1ULL<<i));
-	c->prev->next = c->next;
-	c->next->prev = c->prev;
-	c->csize |= C_INUSE;
-	NEXT_CHUNK(c)->psize |= C_INUSE;
-}
-
-static void bin_chunk(struct chunk *self, int i)
-{
-	self->next = BIN_TO_CHUNK(i);
-	self->prev = mal.bins[i].tail;
-	self->next->prev = self;
-	self->prev->next = self;
-	if (self->prev == BIN_TO_CHUNK(i))
-		a_or_64(&mal.binmap, 1ULL<<i);
-}
-
-static void trim(struct chunk *self, size_t n)
-{
-	size_t n1 = CHUNK_SIZE(self);
-	struct chunk *next, *split;
-
-	if (n >= n1 - DONTCARE) return;
-
-	next = NEXT_CHUNK(self);
-	split = (void *)((char *)self + n);
-
-	split->psize = n | C_INUSE;
-	split->csize = n1-n;
-	next->psize = n1-n;
-	self->csize = n | C_INUSE;
-
-	int i = bin_index(n1-n);
-	lock_bin(i);
-
-	bin_chunk(split, i);
-
-	unlock_bin(i);
-}
-
-void *malloc(size_t n)
-{
-	struct chunk *c;
-	int i, j;
-	uint64_t mask;
-
-	if (adjust_size(&n) < 0) return 0;
-
-	if (n > MMAP_THRESHOLD) {
-		size_t len = n + OVERHEAD + PAGE_SIZE - 1 & -PAGE_SIZE;
-		char *base = __mmap(0, len, PROT_READ|PROT_WRITE,
-			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-		if (base == (void *)-1) return 0;
-		c = (void *)(base + SIZE_ALIGN - OVERHEAD);
-		c->csize = len - (SIZE_ALIGN - OVERHEAD);
-		c->psize = SIZE_ALIGN - OVERHEAD;
-		return CHUNK_TO_MEM(c);
-	}
-
-	i = bin_index_up(n);
-	if (i<63 && (mal.binmap & (1ULL<<i))) {
-		lock_bin(i);
-		c = mal.bins[i].head;
-		if (c != BIN_TO_CHUNK(i) && CHUNK_SIZE(c)-n <= DONTCARE) {
-			unbin(c, i);
-			unlock_bin(i);
-			return CHUNK_TO_MEM(c);
-		}
-		unlock_bin(i);
-	}
-	lock(mal.split_merge_lock);
-	for (mask = mal.binmap & -(1ULL<<i); mask; mask -= (mask&-mask)) {
-		j = first_set(mask);
-		lock_bin(j);
-		c = mal.bins[j].head;
-		if (c != BIN_TO_CHUNK(j)) {
-			unbin(c, j);
-			unlock_bin(j);
-			break;
-		}
-		unlock_bin(j);
-	}
-	if (!mask) {
-		c = expand_heap(n);
-		if (!c) {
-			unlock(mal.split_merge_lock);
-			return 0;
-		}
-	}
-	trim(c, n);
-	unlock(mal.split_merge_lock);
-	return CHUNK_TO_MEM(c);
-}
-
-int __malloc_allzerop(void *p)
-{
-	return IS_MMAPPED(MEM_TO_CHUNK(p));
-}
-
-void *realloc(void *p, size_t n)
-{
-	struct chunk *self, *next;
-	size_t n0, n1;
-	void *new;
-
-	if (!p) return malloc(n);
-
-	if (adjust_size(&n) < 0) return 0;
-
-	self = MEM_TO_CHUNK(p);
-	n1 = n0 = CHUNK_SIZE(self);
-
-	if (n<=n0 && n0-n<=DONTCARE) return p;
-
-	if (IS_MMAPPED(self)) {
-		size_t extra = self->psize;
-		char *base = (char *)self - extra;
-		size_t oldlen = n0 + extra;
-		size_t newlen = n + extra;
-		/* Crash on realloc of freed chunk */
-		if (extra & 1) a_crash();
-		if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
-			n0 = n;
-			goto copy_free_ret;
-		}
-		newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
-		if (oldlen == newlen) return p;
-		base = __mremap(base, oldlen, newlen, MREMAP_MAYMOVE);
-		if (base == (void *)-1)
-			goto copy_realloc;
-		self = (void *)(base + extra);
-		self->csize = newlen - extra;
-		return CHUNK_TO_MEM(self);
-	}
-
-	next = NEXT_CHUNK(self);
-
-	/* Crash on corrupted footer (likely from buffer overflow) */
-	if (next->psize != self->csize) a_crash();
-
-	if (n < n0) {
-		int i = bin_index_up(n);
-		int j = bin_index(n0);
-		if (i<j && (mal.binmap & (1ULL << i)))
-			goto copy_realloc;
-		struct chunk *split = (void *)((char *)self + n);
-		self->csize = split->psize = n | C_INUSE;
-		split->csize = next->psize = n0-n | C_INUSE;
-		__bin_chunk(split);
-		return CHUNK_TO_MEM(self);
-	}
-
-	lock(mal.split_merge_lock);
-
-	size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next);
-	if (n0+nsize >= n) {
-		int i = bin_index(nsize);
-		lock_bin(i);
-		if (!(next->csize & C_INUSE)) {
-			unbin(next, i);
-			unlock_bin(i);
-			next = NEXT_CHUNK(next);
-			self->csize = next->psize = n0+nsize | C_INUSE;
-			trim(self, n);
-			unlock(mal.split_merge_lock);
-			return CHUNK_TO_MEM(self);
-		}
-		unlock_bin(i);
-	}
-	unlock(mal.split_merge_lock);
-
-copy_realloc:
-	/* As a last resort, allocate a new chunk and copy to it. */
-	new = malloc(n-OVERHEAD);
-	if (!new) return 0;
-copy_free_ret:
-	memcpy(new, p, (n<n0 ? n : n0) - OVERHEAD);
-	free(CHUNK_TO_MEM(self));
-	return new;
-}
-
-void __bin_chunk(struct chunk *self)
-{
-	struct chunk *next = NEXT_CHUNK(self);
-
-	/* Crash on corrupted footer (likely from buffer overflow) */
-	if (next->psize != self->csize) a_crash();
-
-	lock(mal.split_merge_lock);
-
-	size_t osize = CHUNK_SIZE(self), size = osize;
-
-	/* Since we hold split_merge_lock, only transition from free to
-	 * in-use can race; in-use to free is impossible */
-	size_t psize = self->psize & C_INUSE ? 0 : CHUNK_PSIZE(self);
-	size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next);
-
-	if (psize) {
-		int i = bin_index(psize);
-		lock_bin(i);
-		if (!(self->psize & C_INUSE)) {
-			struct chunk *prev = PREV_CHUNK(self);
-			unbin(prev, i);
-			self = prev;
-			size += psize;
-		}
-		unlock_bin(i);
-	}
-	if (nsize) {
-		int i = bin_index(nsize);
-		lock_bin(i);
-		if (!(next->csize & C_INUSE)) {
-			unbin(next, i);
-			next = NEXT_CHUNK(next);
-			size += nsize;
-		}
-		unlock_bin(i);
-	}
-
-	int i = bin_index(size);
-	lock_bin(i);
-
-	self->csize = size;
-	next->psize = size;
-	bin_chunk(self, i);
-	unlock(mal.split_merge_lock);
-
-	/* Replace middle of large chunks with fresh zero pages */
-	if (size > RECLAIM && (size^(size-osize)) > size-osize) {
-		uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
-		uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
-		int e = errno;
-#if 1
-		__madvise((void *)a, b-a, MADV_DONTNEED);
-#else
-		__mmap((void *)a, b-a, PROT_READ|PROT_WRITE,
-			MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
-#endif
-		errno = e;
-	}
-
-	unlock_bin(i);
-}
-
-static void unmap_chunk(struct chunk *self)
-{
-	size_t extra = self->psize;
-	char *base = (char *)self - extra;
-	size_t len = CHUNK_SIZE(self) + extra;
-	/* Crash on double free */
-	if (extra & 1) a_crash();
-	int e = errno;
-	__munmap(base, len);
-	errno = e;
-}
-
-void free(void *p)
-{
-	if (!p) return;
-
-	struct chunk *self = MEM_TO_CHUNK(p);
-
-	if (IS_MMAPPED(self))
-		unmap_chunk(self);
-	else
-		__bin_chunk(self);
-}
-
-void __malloc_donate(char *start, char *end)
-{
-	size_t align_start_up = (SIZE_ALIGN-1) & (-(uintptr_t)start - OVERHEAD);
-	size_t align_end_down = (SIZE_ALIGN-1) & (uintptr_t)end;
-
-	/* Getting past this condition ensures that the padding for alignment
-	 * and header overhead will not overflow and will leave a nonzero
-	 * multiple of SIZE_ALIGN bytes between start and end. */
-	if (end - start <= OVERHEAD + align_start_up + align_end_down)
-		return;
-	start += align_start_up + OVERHEAD;
-	end   -= align_end_down;
-
-	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
-	c->psize = n->csize = C_INUSE;
-	c->csize = n->psize = C_INUSE | (end-start);
-	__bin_chunk(c);
-}
-
-void __malloc_atfork(int who)
-{
-	if (who<0) {
-		lock(mal.split_merge_lock);
-		for (int i=0; i<64; i++)
-			lock(mal.bins[i].lock);
-	} else if (!who) {
-		for (int i=0; i<64; i++)
-			unlock(mal.bins[i].lock);
-		unlock(mal.split_merge_lock);
-	} else {
-		for (int i=0; i<64; i++)
-			mal.bins[i].lock[0] = mal.bins[i].lock[1] = 0;
-		mal.split_merge_lock[1] = 0;
-		mal.split_merge_lock[0] = 0;
-	}
-}
--- a/lib/libc/musl/src/malloc/oldmalloc/malloc_impl.h
+++ b/lib/libc/musl/src/malloc/oldmalloc/malloc_impl.h
@ -1,39 +0,0 @@
-#ifndef MALLOC_IMPL_H
-#define MALLOC_IMPL_H
-
-#include <sys/mman.h>
-#include "dynlink.h"
-
-struct chunk {
-	size_t psize, csize;
-	struct chunk *next, *prev;
-};
-
-struct bin {
-	volatile int lock[2];
-	struct chunk *head;
-	struct chunk *tail;
-};
-
-#define SIZE_ALIGN (4*sizeof(size_t))
-#define SIZE_MASK (-SIZE_ALIGN)
-#define OVERHEAD (2*sizeof(size_t))
-#define MMAP_THRESHOLD (0x1c00*SIZE_ALIGN)
-#define DONTCARE 16
-#define RECLAIM 163840
-
-#define CHUNK_SIZE(c) ((c)->csize & -2)
-#define CHUNK_PSIZE(c) ((c)->psize & -2)
-#define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c)))
-#define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c)))
-#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD)
-#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD)
-#define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head))
-
-#define C_INUSE  ((size_t)1)
-
-#define IS_MMAPPED(c) !((c)->csize & (C_INUSE))
-
-hidden void __bin_chunk(struct chunk *);
-
-#endif
--- a/lib/libc/musl/src/malloc/oldmalloc/malloc_usable_size.c
+++ b/lib/libc/musl/src/malloc/oldmalloc/malloc_usable_size.c
@ -1,9 +0,0 @@
-#include <malloc.h>
-#include "malloc_impl.h"
- 
-hidden void *(*const __realloc_dep)(void *, size_t) = realloc;
-
-size_t malloc_usable_size(void *p)
-{
-	return p ? CHUNK_SIZE(MEM_TO_CHUNK(p)) - OVERHEAD : 0;
-}
--- a/lib/libc/musl/src/malloc/posix_memalign.c
+++ b/lib/libc/musl/src/malloc/posix_memalign.c
@ -1,11 +0,0 @@
-#include <stdlib.h>
-#include <errno.h>
-
-int posix_memalign(void **res, size_t align, size_t len)
-{
-	if (align < sizeof(void *)) return EINVAL;
-	void *mem = aligned_alloc(align, len);
-	if (!mem) return errno;
-	*res = mem;
-	return 0;
-}
--- a/lib/libc/musl/src/malloc/realloc.c
+++ b/lib/libc/musl/src/malloc/realloc.c
@ -1,6 +0,0 @@
-#include <stdlib.h>
-
-void *realloc(void *p, size_t n)
-{
-	return __libc_realloc(p, n);
-}
--- a/lib/libc/musl/src/malloc/reallocarray.c
+++ b/lib/libc/musl/src/malloc/reallocarray.c
@ -1,13 +0,0 @@
-#define _BSD_SOURCE
-#include <errno.h>
-#include <stdlib.h>
-
-void *reallocarray(void *ptr, size_t m, size_t n)
-{
-	if (n && m > -1 / n) {
-		errno = ENOMEM;
-		return 0;
-	}
-
-	return realloc(ptr, m * n);
-}
--- a/lib/libc/musl/src/malloc/replaced.c
+++ b/lib/libc/musl/src/malloc/replaced.c
@ -1,4 +0,0 @@
-#include "dynlink.h"
-
-int __malloc_replaced;
-int __aligned_alloc_replaced;
--- a/lib/libc/musl/src/process/fdop.h
+++ b/lib/libc/musl/src/process/fdop.h
@ -10,8 +10,3 @@ struct fdop {
 	mode_t mode;
 	char path[];
 };
-
-#define malloc __libc_malloc
-#define calloc __libc_calloc
-#define realloc undef
-#define free __libc_free
--- a/lib/libc/musl/src/thread/pthread_atfork.c
+++ b/lib/libc/musl/src/thread/pthread_atfork.c
@ -3,11 +3,6 @@
 #include "libc.h"
 #include "lock.h"

-#define malloc __libc_malloc
-#define calloc undef
-#define realloc undef
-#define free undef
-
 static struct atfork_funcs {
 	void (*prepare)(void);
 	void (*parent)(void);
--- a/lib/libc/musl/src/thread/sem_open.c
+++ b/lib/libc/musl/src/thread/sem_open.c
@ -14,11 +14,6 @@
 #include "lock.h"
 #include "fork_impl.h"

-#define malloc __libc_malloc
-#define calloc __libc_calloc
-#define realloc undef
-#define free undef
-
 static struct {
 	ino_t ino;
 	sem_t *sem;
--- a/lib/libc/musl/src/time/__tz.c
+++ b/lib/libc/musl/src/time/__tz.c
@ -9,11 +9,6 @@
 #include "lock.h"
 #include "fork_impl.h"

-#define malloc __libc_malloc
-#define calloc undef
-#define realloc undef
-#define free undef
-
 long  __timezone = 0;
 int   __daylight = 0;
 char *__tzname[2] = { 0, 0 };
--- a/lib/libc/wasi/emmalloc/emmalloc.c
+++ b/lib/libc/wasi/emmalloc/emmalloc.c
--- a/lib/libc/wasi/libc-top-half/musl/src/exit/atexit.c
+++ b/lib/libc/wasi/libc-top-half/musl/src/exit/atexit.c
@ -4,11 +4,6 @@
 #include "lock.h"
 #include "fork_impl.h"

-#define malloc __libc_malloc
-#define calloc __libc_calloc
-#define realloc undef
-#define free undef
-
 /* Ensure that at least 32 atexit handlers can be registered without malloc */
 #define COUNT 32

--- a/lib/libc/wasi/libc-top-half/musl/src/include/stdlib.h
+++ b/lib/libc/wasi/libc-top-half/musl/src/include/stdlib.h
@ -10,10 +10,4 @@ hidden int __ptsname_r(int, char *, size_t);
 hidden char *__randname(char *);
 hidden void __qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *);

-hidden void *__libc_malloc(size_t);
-hidden void *__libc_malloc_impl(size_t);
-hidden void *__libc_calloc(size_t, size_t);
-hidden void *__libc_realloc(void *, size_t);
-hidden void __libc_free(void *);
-
 #endif
--- a/lib/libc/wasi/libc-top-half/musl/src/locale/locale_map.c
+++ b/lib/libc/wasi/libc-top-half/musl/src/locale/locale_map.c
@ -9,11 +9,6 @@
 #include "lock.h"
 #include "fork_impl.h"

-#define malloc __libc_malloc
-#define calloc undef
-#define realloc undef
-#define free undef
-
 const char *__lctrans_impl(const char *msg, const struct __locale_map *lm)
 {
 	const char *trans = 0;
--- a/lib/libc/wasi/libc-top-half/musl/src/locale/newlocale.c
+++ b/lib/libc/wasi/libc-top-half/musl/src/locale/newlocale.c
@ -6,11 +6,6 @@
 #include "locale_impl.h"
 #include "lock.h"

-#define malloc __libc_malloc
-#define calloc undef
-#define realloc undef
-#define free undef
-
 static int default_locale_init_done;
 static struct __locale_struct default_locale, default_ctype_locale;

--- a/lib/libc/wasi/libc-top-half/musl/src/time/__tz.c
+++ b/lib/libc/wasi/libc-top-half/musl/src/time/__tz.c
@ -11,11 +11,6 @@
 #include "lock.h"
 #include "fork_impl.h"

-#define malloc __libc_malloc
-#define calloc undef
-#define realloc undef
-#define free undef
-
 #ifdef __wasilibc_unmodified_upstream // timezone data
 long  __timezone = 0;
 int   __daylight = 0;
--- a/lib/std/heap.zig
+++ b/lib/std/heap.zig
@ -13,9 +13,9 @@ pub const ArenaAllocator = @import("heap/arena_allocator.zig").ArenaAllocator;
 pub const SmpAllocator = @import("heap/SmpAllocator.zig");
 pub const FixedBufferAllocator = @import("heap/FixedBufferAllocator.zig");
 pub const PageAllocator = @import("heap/PageAllocator.zig");
-pub const SbrkAllocator = @import("heap/sbrk_allocator.zig").SbrkAllocator;
 pub const ThreadSafeAllocator = @import("heap/ThreadSafeAllocator.zig");
-pub const WasmAllocator = @import("heap/WasmAllocator.zig");
+pub const WasmAllocator = if (builtin.single_threaded) BrkAllocator else @compileError("unimplemented");
+pub const BrkAllocator = @import("heap/BrkAllocator.zig");

 pub const DebugAllocatorConfig = @import("heap/debug_allocator.zig").Config;
 pub const DebugAllocator = @import("heap/debug_allocator.zig").DebugAllocator;
@ -356,9 +356,6 @@ pub const page_allocator: Allocator = if (@hasDecl(root, "os") and
 else if (builtin.target.cpu.arch.isWasm()) .{
    .ptr = undefined,
    .vtable = &WasmAllocator.vtable,
-} else if (builtin.target.os.tag == .plan9) .{
-    .ptr = undefined,
-    .vtable = &SbrkAllocator(std.os.plan9.sbrk).vtable,
 } else .{
    .ptr = undefined,
    .vtable = &PageAllocator.vtable,
@ -369,16 +366,18 @@ pub const smp_allocator: Allocator = .{
    .vtable = &SmpAllocator.vtable,
 };

-/// This allocator is fast, small, and specific to WebAssembly. In the future,
-/// this will be the implementation automatically selected by
-/// `GeneralPurposeAllocator` when compiling in `ReleaseSmall` mode for wasm32
-/// and wasm64 architectures.
-/// Until then, it is available here to play with.
+/// This allocator is fast, small, and specific to WebAssembly.
 pub const wasm_allocator: Allocator = .{
    .ptr = undefined,
    .vtable = &WasmAllocator.vtable,
 };

+/// Supports single-threaded WebAssembly and Linux.
+pub const brk_allocator: Allocator = .{
+    .ptr = undefined,
+    .vtable = &BrkAllocator.vtable,
+};
+
 /// Returns a `StackFallbackAllocator` allocating using either a
 /// `FixedBufferAllocator` on an array of size `size` and falling back to
 /// `fallback_allocator` if that fails.
@ -1014,9 +1013,11 @@ test {
    _ = GeneralPurposeAllocator;
    _ = FixedBufferAllocator;
    _ = ThreadSafeAllocator;
-    _ = SbrkAllocator;
-    if (builtin.target.cpu.arch.isWasm()) {
-        _ = WasmAllocator;
+    if (builtin.single_threaded) {
+        if (builtin.cpu.arch.isWasm() or (builtin.os.tag == .linux and !builtin.link_libc)) {
+            _ = brk_allocator;
+        }
+    } else {
+        _ = smp_allocator;
    }
-    if (!builtin.single_threaded) _ = smp_allocator;
 }
--- a/lib/std/heap/WasmAllocator.zig
+++ b/lib/std/heap/WasmAllocator.zig
@ -1,20 +1,29 @@
-const std = @import("../std.zig");
+//! Supports single-threaded targets that have a sbrk-like primitive which includes
+//! Linux and WebAssembly.
+//!
+//! On Linux, assumes exclusive access to the brk syscall.
+const BrkAllocator = @This();
 const builtin = @import("builtin");
+
+const std = @import("../std.zig");
 const Allocator = std.mem.Allocator;
-const mem = std.mem;
+const Alignment = std.mem.Alignment;
 const assert = std.debug.assert;
-const wasm = std.wasm;
 const math = std.math;

 comptime {
-    if (!builtin.target.cpu.arch.isWasm()) {
-        @compileError("only available for wasm32 arch");
-    }
-    if (!builtin.single_threaded) {
-        @compileError("TODO implement support for multi-threaded wasm");
-    }
+    if (!builtin.single_threaded) @compileError("unsupported");
 }

+next_addrs: [size_class_count]usize = @splat(0),
+/// For each size class, points to the freed pointer.
+frees: [size_class_count]usize = @splat(0),
+/// For each big size class, points to the freed pointer.
+big_frees: [big_size_class_count]usize = @splat(0),
+prev_brk: usize = 0,
+
+var global: BrkAllocator = .{};
+
 pub const vtable: Allocator.VTable = .{
    .alloc = alloc,
    .resize = resize,
@ -26,8 +35,7 @@ pub const Error = Allocator.Error;

 const max_usize = math.maxInt(usize);
 const ushift = math.Log2Int(usize);
-const bigpage_size = 64 * 1024;
-const pages_per_bigpage = bigpage_size / wasm.page_size;
+const bigpage_size: comptime_int = @max(64 * 1024, std.heap.page_size_max);
 const bigpage_count = max_usize / bigpage_size;

 /// Because of storing free list pointers, the minimum size class is 3.
@ -39,13 +47,7 @@ const size_class_count = math.log2(bigpage_size) - min_class;
 /// etc.
 const big_size_class_count = math.log2(bigpage_count);

-var next_addrs: [size_class_count]usize = @splat(0);
-/// For each size class, points to the freed pointer.
-var frees: [size_class_count]usize = @splat(0);
-/// For each big size class, points to the freed pointer.
-var big_frees: [big_size_class_count]usize = @splat(0);
-
-fn alloc(ctx: *anyopaque, len: usize, alignment: mem.Alignment, return_address: usize) ?[*]u8 {
+fn alloc(ctx: *anyopaque, len: usize, alignment: Alignment, return_address: usize) ?[*]u8 {
    _ = ctx;
    _ = return_address;
    // Make room for the freelist next pointer.
@ -54,24 +56,24 @@ fn alloc(ctx: *anyopaque, len: usize, alignment: mem.Alignment, return_address:
    const class = math.log2(slot_size) - min_class;
    if (class < size_class_count) {
        const addr = a: {
-            const top_free_ptr = frees[class];
+            const top_free_ptr = global.frees[class];
            if (top_free_ptr != 0) {
                const node: *usize = @ptrFromInt(top_free_ptr + (slot_size - @sizeOf(usize)));
-                frees[class] = node.*;
+                global.frees[class] = node.*;
                break :a top_free_ptr;
            }

-            const next_addr = next_addrs[class];
-            if (next_addr % wasm.page_size == 0) {
+            const next_addr = global.next_addrs[class];
+            if (next_addr % bigpage_size == 0) {
                const addr = allocBigPages(1);
                if (addr == 0) return null;
                //std.debug.print("allocated fresh slot_size={d} class={d} addr=0x{x}\n", .{
                //    slot_size, class, addr,
                //});
-                next_addrs[class] = addr + slot_size;
+                global.next_addrs[class] = addr + slot_size;
                break :a addr;
            } else {
-                next_addrs[class] = next_addr + slot_size;
+                global.next_addrs[class] = next_addr + slot_size;
                break :a next_addr;
            }
        };
@ -84,7 +86,7 @@ fn alloc(ctx: *anyopaque, len: usize, alignment: mem.Alignment, return_address:
 fn resize(
    ctx: *anyopaque,
    buf: []u8,
-    alignment: mem.Alignment,
+    alignment: Alignment,
    new_len: usize,
    return_address: usize,
 ) bool {
@ -112,7 +114,7 @@ fn resize(
 fn remap(
    context: *anyopaque,
    memory: []u8,
-    alignment: mem.Alignment,
+    alignment: Alignment,
    new_len: usize,
    return_address: usize,
 ) ?[*]u8 {
@ -122,7 +124,7 @@ fn remap(
 fn free(
    ctx: *anyopaque,
    buf: []u8,
-    alignment: mem.Alignment,
+    alignment: Alignment,
    return_address: usize,
 ) void {
    _ = ctx;
@ -134,16 +136,16 @@ fn free(
    const addr = @intFromPtr(buf.ptr);
    if (class < size_class_count) {
        const node: *usize = @ptrFromInt(addr + (slot_size - @sizeOf(usize)));
-        node.* = frees[class];
-        frees[class] = addr;
+        node.* = global.frees[class];
+        global.frees[class] = addr;
    } else {
        const bigpages_needed = bigPagesNeeded(actual_len);
        const pow2_pages = math.ceilPowerOfTwoAssert(usize, bigpages_needed);
        const big_slot_size_bytes = pow2_pages * bigpage_size;
        const node: *usize = @ptrFromInt(addr + (big_slot_size_bytes - @sizeOf(usize)));
        const big_class = math.log2(pow2_pages);
-        node.* = big_frees[big_class];
-        big_frees[big_class] = addr;
+        node.* = global.big_frees[big_class];
+        global.big_frees[big_class] = addr;
    }
 }

@ -156,16 +158,34 @@ fn allocBigPages(n: usize) usize {
    const slot_size_bytes = pow2_pages * bigpage_size;
    const class = math.log2(pow2_pages);

-    const top_free_ptr = big_frees[class];
+    const top_free_ptr = global.big_frees[class];
    if (top_free_ptr != 0) {
        const node: *usize = @ptrFromInt(top_free_ptr + (slot_size_bytes - @sizeOf(usize)));
-        big_frees[class] = node.*;
+        global.big_frees[class] = node.*;
        return top_free_ptr;
    }

-    const page_index = @wasmMemoryGrow(0, pow2_pages * pages_per_bigpage);
-    if (page_index == -1) return 0;
-    return @as(usize, @intCast(page_index)) * wasm.page_size;
+    if (builtin.cpu.arch.isWasm()) {
+        comptime assert(std.heap.page_size_max == std.heap.page_size_min);
+        const page_size = std.heap.page_size_max;
+        const pages_per_bigpage = bigpage_size / page_size;
+        const page_index = @wasmMemoryGrow(0, pow2_pages * pages_per_bigpage);
+        if (page_index == -1) return 0;
+        return @as(usize, @intCast(page_index)) * page_size;
+    } else if (builtin.os.tag == .linux) {
+        const prev_brk = global.prev_brk;
+        const start_brk = if (prev_brk == 0)
+            std.mem.alignForward(usize, std.os.linux.brk(0), bigpage_size)
+        else
+            prev_brk;
+        const end_brk = start_brk + pow2_pages * bigpage_size;
+        const new_prev_brk = std.os.linux.brk(end_brk);
+        global.prev_brk = new_prev_brk;
+        if (new_prev_brk != end_brk) return 0;
+        return start_brk;
+    } else {
+        @compileError("no sbrk-like OS primitive available");
+    }
 }

 const test_ally: Allocator = .{
@ -257,12 +277,14 @@ test "shrink" {
 }

 test "large object - grow" {
+    if (builtin.os.tag == .linux) return error.SkipZigTest;
+
    var slice1 = try test_ally.alloc(u8, bigpage_size * 2 - 20);
    defer test_ally.free(slice1);

    const old = slice1;
    slice1 = try test_ally.realloc(slice1, bigpage_size * 2 - 10);
-    try std.testing.expect(slice1.ptr == old.ptr);
+    try std.testing.expectEqual(slice1.ptr, old.ptr);

    slice1 = try test_ally.realloc(slice1, bigpage_size * 2);
    slice1 = try test_ally.realloc(slice1, bigpage_size * 2 + 1);
--- a/lib/std/heap/PageAllocator.zig
+++ b/lib/std/heap/PageAllocator.zig
@ -1,19 +1,17 @@
-const std = @import("../std.zig");
 const builtin = @import("builtin");
+const native_os = builtin.os.tag;
+
+const std = @import("../std.zig");
 const Allocator = std.mem.Allocator;
+const Alignment = std.mem.Alignment;
 const mem = std.mem;
 const maxInt = std.math.maxInt;
 const assert = std.debug.assert;
-const native_os = builtin.os.tag;
 const windows = std.os.windows;
-const ntdll = windows.ntdll;
+const ntdll = std.os.windows.ntdll;
 const posix = std.posix;
 const page_size_min = std.heap.page_size_min;

-const SUCCESS = @import("../os/windows/ntstatus.zig").NTSTATUS.SUCCESS;
-const MEM_RESERVE_PLACEHOLDER = windows.MEM_RESERVE_PLACEHOLDER;
-const MEM_PRESERVE_PLACEHOLDER = windows.MEM_PRESERVE_PLACEHOLDER;
-
 pub const vtable: Allocator.VTable = .{
    .alloc = alloc,
    .resize = resize,
@ -21,7 +19,7 @@ pub const vtable: Allocator.VTable = .{
    .free = free,
 };

-pub fn map(n: usize, alignment: mem.Alignment) ?[*]u8 {
+pub fn map(n: usize, alignment: Alignment) ?[*]u8 {
    const page_size = std.heap.pageSize();
    if (n >= maxInt(usize) - page_size) return null;
    const alignment_bytes = alignment.toByteUnits();
@ -33,11 +31,11 @@ pub fn map(n: usize, alignment: mem.Alignment) ?[*]u8 {
        const current_process = windows.GetCurrentProcess();
        var status = ntdll.NtAllocateVirtualMemory(current_process, @ptrCast(&base_addr), 0, &size, .{ .COMMIT = true, .RESERVE = true }, .{ .READWRITE = true });

-        if (status == SUCCESS and mem.isAligned(@intFromPtr(base_addr), alignment_bytes)) {
+        if (status == .SUCCESS and mem.isAligned(@intFromPtr(base_addr), alignment_bytes)) {
            return @ptrCast(base_addr);
        }

-        if (status == SUCCESS) {
+        if (status == .SUCCESS) {
            var region_size: windows.SIZE_T = 0;
            _ = ntdll.NtFreeVirtualMemory(current_process, @ptrCast(&base_addr), &region_size, .{ .RELEASE = true });
        }
@ -50,7 +48,7 @@ pub fn map(n: usize, alignment: mem.Alignment) ?[*]u8 {

        status = ntdll.NtAllocateVirtualMemory(current_process, @ptrCast(&base_addr), 0, &size, .{ .RESERVE = true, .RESERVE_PLACEHOLDER = true }, .{ .NOACCESS = true });

-        if (status != SUCCESS) return null;
+        if (status != .SUCCESS) return null;

        const placeholder_addr = @intFromPtr(base_addr);
        const aligned_addr = mem.alignForward(usize, placeholder_addr, alignment_bytes);
@ -75,7 +73,7 @@ pub fn map(n: usize, alignment: mem.Alignment) ?[*]u8 {

        status = ntdll.NtAllocateVirtualMemory(current_process, @ptrCast(&base_addr), 0, &size, .{ .COMMIT = true }, .{ .READWRITE = true });

-        if (status == SUCCESS) {
+        if (status == .SUCCESS) {
            return @ptrCast(base_addr);
        }

@ -116,31 +114,29 @@ pub fn map(n: usize, alignment: mem.Alignment) ?[*]u8 {
    return result_ptr;
 }

-fn alloc(context: *anyopaque, n: usize, alignment: mem.Alignment, ra: usize) ?[*]u8 {
+fn alloc(context: *anyopaque, n: usize, alignment: Alignment, ra: usize) ?[*]u8 {
    _ = context;
    _ = ra;
    assert(n > 0);
    return map(n, alignment);
 }

-fn resize(context: *anyopaque, memory: []u8, alignment: mem.Alignment, new_len: usize, return_address: usize) bool {
+fn resize(context: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, return_address: usize) bool {
    _ = context;
-    _ = alignment;
    _ = return_address;
-    return realloc(memory, new_len, false) != null;
+    return realloc(memory, alignment, new_len, false) != null;
 }

-fn remap(context: *anyopaque, memory: []u8, alignment: mem.Alignment, new_len: usize, return_address: usize) ?[*]u8 {
+fn remap(context: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, return_address: usize) ?[*]u8 {
    _ = context;
-    _ = alignment;
    _ = return_address;
-    return realloc(memory, new_len, true);
+    return realloc(memory, alignment, new_len, true);
 }

-fn free(context: *anyopaque, memory: []u8, alignment: mem.Alignment, return_address: usize) void {
+fn free(context: *anyopaque, memory: []u8, alignment: Alignment, return_address: usize) void {
    _ = context;
-    _ = alignment;
    _ = return_address;
+    _ = alignment;
    return unmap(@alignCast(memory));
 }

@ -155,9 +151,10 @@ pub fn unmap(memory: []align(page_size_min) u8) void {
    }
 }

-pub fn realloc(uncasted_memory: []u8, new_len: usize, may_move: bool) ?[*]u8 {
+pub fn realloc(uncasted_memory: []u8, alignment: Alignment, new_len: usize, may_move: bool) ?[*]u8 {
    const memory: []align(page_size_min) u8 = @alignCast(uncasted_memory);
    const page_size = std.heap.pageSize();
+    if (alignment.toByteUnits() > page_size) return null;
    const new_size_aligned = mem.alignForward(usize, new_len, page_size);

    if (native_os == .windows) {
--- a/lib/std/heap/SmpAllocator.zig
+++ b/lib/std/heap/SmpAllocator.zig
@ -26,6 +26,7 @@
 //! By limiting the thread-local metadata array to the same number as the CPU
 //! count, ensures that as threads are created and destroyed, they cycle
 //! through the full set of freelists.
+const SmpAllocator = @This();

 const builtin = @import("builtin");

@ -34,7 +35,7 @@ const assert = std.debug.assert;
 const mem = std.mem;
 const math = std.math;
 const Allocator = std.mem.Allocator;
-const SmpAllocator = @This();
+const Alignment = std.mem.Alignment;
 const PageAllocator = std.heap.PageAllocator;

 cpu_count: u32,
@ -114,7 +115,7 @@ comptime {
    assert(!builtin.single_threaded); // you're holding it wrong
 }

-fn alloc(context: *anyopaque, len: usize, alignment: mem.Alignment, ra: usize) ?[*]u8 {
+fn alloc(context: *anyopaque, len: usize, alignment: Alignment, ra: usize) ?[*]u8 {
    _ = context;
    _ = ra;
    const class = sizeClassIndex(len, alignment);
@ -172,31 +173,31 @@ fn alloc(context: *anyopaque, len: usize, alignment: mem.Alignment, ra: usize) ?
    }
 }

-fn resize(context: *anyopaque, memory: []u8, alignment: mem.Alignment, new_len: usize, ra: usize) bool {
+fn resize(context: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ra: usize) bool {
    _ = context;
    _ = ra;
    const class = sizeClassIndex(memory.len, alignment);
    const new_class = sizeClassIndex(new_len, alignment);
    if (class >= size_class_count) {
        if (new_class < size_class_count) return false;
-        return PageAllocator.realloc(memory, new_len, false) != null;
+        return PageAllocator.realloc(memory, alignment, new_len, false) != null;
    }
    return new_class == class;
 }

-fn remap(context: *anyopaque, memory: []u8, alignment: mem.Alignment, new_len: usize, ra: usize) ?[*]u8 {
+fn remap(context: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ra: usize) ?[*]u8 {
    _ = context;
    _ = ra;
    const class = sizeClassIndex(memory.len, alignment);
    const new_class = sizeClassIndex(new_len, alignment);
    if (class >= size_class_count) {
        if (new_class < size_class_count) return null;
-        return PageAllocator.realloc(memory, new_len, true);
+        return PageAllocator.realloc(memory, alignment, new_len, true);
    }
    return if (new_class == class) memory.ptr else null;
 }

-fn free(context: *anyopaque, memory: []u8, alignment: mem.Alignment, ra: usize) void {
+fn free(context: *anyopaque, memory: []u8, alignment: Alignment, ra: usize) void {
    _ = context;
    _ = ra;
    const class = sizeClassIndex(memory.len, alignment);
@ -214,7 +215,7 @@ fn free(context: *anyopaque, memory: []u8, alignment: mem.Alignment, ra: usize)
    t.frees[class] = @intFromPtr(node);
 }

-fn sizeClassIndex(len: usize, alignment: mem.Alignment) usize {
+fn sizeClassIndex(len: usize, alignment: Alignment) usize {
    return @max(@bitSizeOf(usize) - @clz(len - 1), @intFromEnum(alignment), min_class) - min_class;
 }

--- a/lib/std/heap/sbrk_allocator.zig
+++ b/lib/std/heap/sbrk_allocator.zig
@ -1,180 +0,0 @@
-const builtin = @import("builtin");
-
-const std = @import("../std.zig");
-const Io = std.Io;
-const math = std.math;
-const Allocator = std.mem.Allocator;
-const mem = std.mem;
-const heap = std.heap;
-const assert = std.debug.assert;
-
-pub fn SbrkAllocator(comptime sbrk: *const fn (n: usize) usize) type {
-    return struct {
-        pub const vtable: Allocator.VTable = .{
-            .alloc = alloc,
-            .resize = resize,
-            .remap = remap,
-            .free = free,
-        };
-
-        pub const Error = Allocator.Error;
-
-        const max_usize = math.maxInt(usize);
-        const ushift = math.Log2Int(usize);
-        const bigpage_size = 64 * 1024;
-        const pages_per_bigpage = bigpage_size / heap.pageSize();
-        const bigpage_count = max_usize / bigpage_size;
-
-        /// Because of storing free list pointers, the minimum size class is 3.
-        const min_class = math.log2(math.ceilPowerOfTwoAssert(usize, 1 + @sizeOf(usize)));
-        const size_class_count = math.log2(bigpage_size) - min_class;
-        /// 0 - 1 bigpage
-        /// 1 - 2 bigpages
-        /// 2 - 4 bigpages
-        /// etc.
-        const big_size_class_count = math.log2(bigpage_count);
-
-        var next_addrs = [1]usize{0} ** size_class_count;
-        /// For each size class, points to the freed pointer.
-        var frees = [1]usize{0} ** size_class_count;
-        /// For each big size class, points to the freed pointer.
-        var big_frees = [1]usize{0} ** big_size_class_count;
-
-        // TODO don't do the naive locking strategy
-        var mutex: Io.Mutex = .{};
-        fn alloc(ctx: *anyopaque, len: usize, alignment: mem.Alignment, return_address: usize) ?[*]u8 {
-            _ = ctx;
-            _ = return_address;
-            Io.Threaded.mutexLock(&mutex);
-            defer Io.Threaded.mutexUnlock(&mutex);
-            // Make room for the freelist next pointer.
-            const actual_len = @max(len +| @sizeOf(usize), alignment.toByteUnits());
-            const slot_size = math.ceilPowerOfTwo(usize, actual_len) catch return null;
-            const class = math.log2(slot_size) - min_class;
-            if (class < size_class_count) {
-                const addr = a: {
-                    const top_free_ptr = frees[class];
-                    if (top_free_ptr != 0) {
-                        const node = @as(*usize, @ptrFromInt(top_free_ptr + (slot_size - @sizeOf(usize))));
-                        frees[class] = node.*;
-                        break :a top_free_ptr;
-                    }
-
-                    const next_addr = next_addrs[class];
-                    if (next_addr % heap.pageSize() == 0) {
-                        const addr = allocBigPages(1);
-                        if (addr == 0) return null;
-                        //std.debug.print("allocated fresh slot_size={d} class={d} addr=0x{x}\n", .{
-                        //    slot_size, class, addr,
-                        //});
-                        next_addrs[class] = addr + slot_size;
-                        break :a addr;
-                    } else {
-                        next_addrs[class] = next_addr + slot_size;
-                        break :a next_addr;
-                    }
-                };
-                return @as([*]u8, @ptrFromInt(addr));
-            }
-            const bigpages_needed = bigPagesNeeded(actual_len);
-            const addr = allocBigPages(bigpages_needed);
-            return @as([*]u8, @ptrFromInt(addr));
-        }
-
-        fn resize(
-            ctx: *anyopaque,
-            buf: []u8,
-            alignment: mem.Alignment,
-            new_len: usize,
-            return_address: usize,
-        ) bool {
-            _ = ctx;
-            _ = return_address;
-            Io.Threaded.mutexLock(&mutex);
-            defer Io.Threaded.mutexUnlock(&mutex);
-            // We don't want to move anything from one size class to another, but we
-            // can recover bytes in between powers of two.
-            const buf_align = alignment.toByteUnits();
-            const old_actual_len = @max(buf.len + @sizeOf(usize), buf_align);
-            const new_actual_len = @max(new_len +| @sizeOf(usize), buf_align);
-            const old_small_slot_size = math.ceilPowerOfTwoAssert(usize, old_actual_len);
-            const old_small_class = math.log2(old_small_slot_size) - min_class;
-            if (old_small_class < size_class_count) {
-                const new_small_slot_size = math.ceilPowerOfTwo(usize, new_actual_len) catch return false;
-                return old_small_slot_size == new_small_slot_size;
-            } else {
-                const old_bigpages_needed = bigPagesNeeded(old_actual_len);
-                const old_big_slot_pages = math.ceilPowerOfTwoAssert(usize, old_bigpages_needed);
-                const new_bigpages_needed = bigPagesNeeded(new_actual_len);
-                const new_big_slot_pages = math.ceilPowerOfTwo(usize, new_bigpages_needed) catch return false;
-                return old_big_slot_pages == new_big_slot_pages;
-            }
-        }
-
-        fn remap(
-            context: *anyopaque,
-            memory: []u8,
-            alignment: mem.Alignment,
-            new_len: usize,
-            return_address: usize,
-        ) ?[*]u8 {
-            return if (resize(context, memory, alignment, new_len, return_address)) memory.ptr else null;
-        }
-
-        fn free(
-            ctx: *anyopaque,
-            buf: []u8,
-            alignment: mem.Alignment,
-            return_address: usize,
-        ) void {
-            _ = ctx;
-            _ = return_address;
-            Io.Threaded.mutexLock(&mutex);
-            defer Io.Threaded.mutexUnlock(&mutex);
-            const buf_align = alignment.toByteUnits();
-            const actual_len = @max(buf.len + @sizeOf(usize), buf_align);
-            const slot_size = math.ceilPowerOfTwoAssert(usize, actual_len);
-            const class = math.log2(slot_size) - min_class;
-            const addr = @intFromPtr(buf.ptr);
-            if (class < size_class_count) {
-                const node = @as(*usize, @ptrFromInt(addr + (slot_size - @sizeOf(usize))));
-                node.* = frees[class];
-                frees[class] = addr;
-            } else {
-                const bigpages_needed = bigPagesNeeded(actual_len);
-                const pow2_pages = math.ceilPowerOfTwoAssert(usize, bigpages_needed);
-                const big_slot_size_bytes = pow2_pages * bigpage_size;
-                const node = @as(*usize, @ptrFromInt(addr + (big_slot_size_bytes - @sizeOf(usize))));
-                const big_class = math.log2(pow2_pages);
-                node.* = big_frees[big_class];
-                big_frees[big_class] = addr;
-            }
-        }
-
-        inline fn bigPagesNeeded(byte_count: usize) usize {
-            return (byte_count + (bigpage_size + (@sizeOf(usize) - 1))) / bigpage_size;
-        }
-
-        fn allocBigPages(n: usize) usize {
-            const pow2_pages = math.ceilPowerOfTwoAssert(usize, n);
-            const slot_size_bytes = pow2_pages * bigpage_size;
-            const class = math.log2(pow2_pages);
-
-            const top_free_ptr = big_frees[class];
-            if (top_free_ptr != 0) {
-                const node = @as(*usize, @ptrFromInt(top_free_ptr + (slot_size_bytes - @sizeOf(usize))));
-                big_frees[class] = node.*;
-                return top_free_ptr;
-            }
-            return sbrk(pow2_pages * pages_per_bigpage * heap.pageSize());
-        }
-    };
-}
-
-test SbrkAllocator {
-    _ = SbrkAllocator(struct {
-        fn sbrk(_: usize) usize {
-            return 0;
-        }
-    }.sbrk);
-}
--- a/lib/std/os/linux.zig
+++ b/lib/std/os/linux.zig
@ -594,6 +594,10 @@ pub fn errno(r: usize) E {
    return @enumFromInt(int);
 }

+pub fn brk(addr: usize) usize {
+    return syscall1(.brk, addr);
+}
+
 pub fn dup(old: i32) usize {
    return syscall1(.dup, @as(usize, @bitCast(@as(isize, old))));
 }
--- a/src/libs/musl.zig
+++ b/src/libs/musl.zig
@ -352,8 +352,7 @@ const Ext = enum {
 fn addSrcFile(arena: Allocator, source_table: *std.StringArrayHashMap(Ext), file_path: []const u8) !void {
    const ext: Ext = ext: {
        if (mem.endsWith(u8, file_path, ".c")) {
-            if (mem.startsWith(u8, file_path, "musl/src/malloc/") or
-                mem.startsWith(u8, file_path, "musl/src/string/") or
+            if (mem.startsWith(u8, file_path, "musl/src/string/") or
                mem.startsWith(u8, file_path, "musl/src/internal/"))
            {
                break :ext .o3;
@ -786,24 +785,6 @@ const src_files = [_][]const u8{
    "musl/src/locale/uselocale.c",
    "musl/src/locale/wcscoll.c",
    "musl/src/locale/wcsxfrm.c",
-    "musl/src/malloc/calloc.c",
-    "musl/src/malloc/free.c",
-    "musl/src/malloc/libc_calloc.c",
-    "musl/src/malloc/lite_malloc.c",
-    "musl/src/malloc/mallocng/aligned_alloc.c",
-    "musl/src/malloc/mallocng/donate.c",
-    "musl/src/malloc/mallocng/free.c",
-    "musl/src/malloc/mallocng/malloc.c",
-    "musl/src/malloc/mallocng/malloc_usable_size.c",
-    "musl/src/malloc/mallocng/realloc.c",
-    "musl/src/malloc/memalign.c",
-    "musl/src/malloc/oldmalloc/aligned_alloc.c",
-    "musl/src/malloc/oldmalloc/malloc.c",
-    "musl/src/malloc/oldmalloc/malloc_usable_size.c",
-    "musl/src/malloc/posix_memalign.c",
-    "musl/src/malloc/reallocarray.c",
-    "musl/src/malloc/realloc.c",
-    "musl/src/malloc/replaced.c",
    "musl/src/math/aarch64/fma.c",
    "musl/src/math/aarch64/fmaf.c",
    "musl/src/math/aarch64/llrint.c",
--- a/src/libs/wasi_libc.zig
+++ b/src/libs/wasi_libc.zig
@ -77,22 +77,6 @@ pub fn buildCrtFile(comp: *Compilation, crt_file: CrtFile, prog_node: std.Progre
        .libc_a => {
            var libc_sources = std.array_list.Managed(Compilation.CSourceFile).init(arena);

-            {
-                // Compile emmalloc.
-                var args = std.array_list.Managed([]const u8).init(arena);
-                try addCCArgs(comp, arena, &args, .{ .want_O3 = true, .no_strict_aliasing = true });
-
-                for (emmalloc_src_files) |file_path| {
-                    try libc_sources.append(.{
-                        .src_path = try comp.dirs.zig_lib.join(arena, &.{
-                            "libc", try sanitize(arena, file_path),
-                        }),
-                        .extra_flags = args.items,
-                        .owner = undefined,
-                    });
-                }
-            }
-
            {
                // Compile libc-bottom-half.
                var args = std.array_list.Managed([]const u8).init(arena);
@ -472,10 +456,6 @@ fn addLibcTopHalfIncludes(
    });
 }

-const emmalloc_src_files = [_][]const u8{
-    "wasi/emmalloc/emmalloc.c",
-};
-
 const libc_bottom_half_src_files = [_][]const u8{
    "wasi/libc-bottom-half/cloudlibc/src/libc/dirent/closedir.c",
    "wasi/libc-bottom-half/cloudlibc/src/libc/dirent/dirfd.c",
--- a/test/src/Libc.zig
+++ b/test/src/Libc.zig
@ -60,7 +60,9 @@ pub fn addTarget(libc: *const Libc, target: std.Build.ResolvedTarget) void {
            .link_libc = true,
        });

-        var libtest_c_source_files: []const []const u8 = &.{ "print.c", "rand.c", "mtest.c", "setrlim.c", "memfill.c", "vmfill.c", "fdfill.c", "utf8.c" };
+        var libtest_c_source_files: []const []const u8 = &.{
+            "print.c", "rand.c", "mtest.c", "setrlim.c", "memfill.c", "vmfill.c", "fdfill.c", "utf8.c",
+        };
        libtest_mod.addCSourceFiles(.{
            .root = common,
            .files = libtest_c_source_files[0..if (target.result.isMuslLibC()) 8 else 3],