From c8dd05030519fc52dd457531f6b2654e1ad129b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Wed, 18 Feb 2026 04:54:04 +0100 Subject: [PATCH 1/4] std.heap.PageAllocator: hint mmaps in the same direction as stack growth The old logic was fine for targets where the stack grows up (so, literally just hppa), but problematic on targets where it grows down, because we could hint that we wanted an allocation to happen in an area of the address space that the kernel expects to be able to expand the stack into. The kernel is happy to satisfy such a hint despite the obvious problems this leads to later down the road. Co-authored-by: rpkak --- lib/std/heap.zig | 3 -- lib/std/heap/PageAllocator.zig | 72 +++++++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/lib/std/heap.zig b/lib/std/heap.zig index 562d0846ef..bf28555e10 100644 --- a/lib/std/heap.zig +++ b/lib/std/heap.zig @@ -41,9 +41,6 @@ pub const MemoryPoolExtra = memory_pool.Extra; /// Deprecated; use `memory_pool.Options`. pub const MemoryPoolOptions = memory_pool.Options; -/// TODO Utilize this on Windows. -pub var next_mmap_addr_hint: ?[*]align(page_size_min) u8 = null; - /// comptime-known minimum page size of the target. /// /// All pointers from `mmap` or `NtAllocateVirtualMemory` are aligned to at least diff --git a/lib/std/heap/PageAllocator.zig b/lib/std/heap/PageAllocator.zig index bed9be5449..66abe7d4da 100644 --- a/lib/std/heap/PageAllocator.zig +++ b/lib/std/heap/PageAllocator.zig @@ -19,6 +19,28 @@ pub const vtable: Allocator.VTable = .{ .free = free, }; +/// Hhinting is disabled on operating systems that make an effort to not reuse +/// mappings. For example, OpenBSD aggressively randomizes addresses of mappings +/// that don't provide a hint (for security reasons, but it serves our needs +/// too). +const enable_hints = switch (builtin.target.os.tag) { + .openbsd => false, + else => true, +}; + +/// On operating systems that don't immediately map in the whole stack, we need +/// to be careful to not hint into the pages after the stack guard gap, which +/// the stack will expand into. The easiest way to avoid that is to hint in the +/// same direction as stack growth. +const stack_direction = builtin.target.stackGrowth(); + +/// When hinting upwards, this points to the next page that we hope to allocate +/// at; when hinting downwards, this points to the beginning of the last +/// successful allocation. +/// +/// TODO: Utilize this on Windows. +var addr_hint: ?[*]align(page_size_min) u8 = null; + pub fn map(n: usize, alignment: Alignment) ?[*]u8 { const page_size = std.heap.pageSize(); if (n >= maxInt(usize) - page_size) return null; @@ -41,7 +63,7 @@ pub fn map(n: usize, alignment: Alignment) ?[*]u8 { } const overalloc_len = n + alignment_bytes - page_size; - const aligned_len = mem.alignForward(usize, n, page_size); + const page_aligned_len = mem.alignForward(usize, n, page_size); base_addr = null; size = overalloc_len; @@ -60,7 +82,7 @@ pub fn map(n: usize, alignment: Alignment) ?[*]u8 { _ = ntdll.NtFreeVirtualMemory(current_process, @ptrCast(&prefix_base), &prefix_size_param, .{ .RELEASE = true, .PRESERVE_PLACEHOLDER = true }); } - const suffix_start = aligned_addr + aligned_len; + const suffix_start = aligned_addr + page_aligned_len; const suffix_size = (placeholder_addr + overalloc_len) - suffix_start; if (suffix_size > 0) { var suffix_base = @as(?*anyopaque, @ptrFromInt(suffix_start)); @@ -69,7 +91,7 @@ pub fn map(n: usize, alignment: Alignment) ?[*]u8 { } base_addr = @ptrFromInt(aligned_addr); - size = aligned_len; + size = page_aligned_len; status = ntdll.NtAllocateVirtualMemory(current_process, @ptrCast(&base_addr), 0, &size, .{ .COMMIT = true }, .{ .READWRITE = true }); @@ -78,20 +100,34 @@ pub fn map(n: usize, alignment: Alignment) ?[*]u8 { } base_addr = @as(?*anyopaque, @ptrFromInt(aligned_addr)); - size = aligned_len; + size = page_aligned_len; _ = ntdll.NtFreeVirtualMemory(current_process, @ptrCast(&base_addr), &size, .{ .RELEASE = true }); return null; } - const aligned_len = mem.alignForward(usize, n, page_size); + const page_aligned_len = mem.alignForward(usize, n, page_size); const max_drop_len = alignment_bytes -| page_size; - const overalloc_len = aligned_len + max_drop_len; - const maybe_unaligned_hint = @atomicLoad(@TypeOf(std.heap.next_mmap_addr_hint), &std.heap.next_mmap_addr_hint, .unordered); + const overalloc_len = page_aligned_len + max_drop_len; - // Aligning hint does not use mem.alignPointer, because it is slow. - // Aligning hint does not use mem.alignForward, because it asserts that there will be no overflow. - const hint: ?[*]align(page_size_min) u8 = @ptrFromInt(((@intFromPtr(maybe_unaligned_hint)) +% (alignment_bytes - 1)) & ~(alignment_bytes - 1)); + const maybe_unaligned_hint, const hint = blk: { + if (!enable_hints) break :blk .{ null, null }; + + const maybe_unaligned_hint = @atomicLoad(@TypeOf(addr_hint), &addr_hint, .unordered); + + // For the very first mmap, let the kernel pick a good starting address; + // we'll begin doing our hinting from there. + if (maybe_unaligned_hint == null) break :blk .{ null, null }; + + // Aligning hint does not use mem.alignPointer, because it is slow. + // Aligning hint does not use mem.alignForward, because it asserts that there will be no overflow. + const hint: ?[*]align(page_size_min) u8 = @ptrFromInt(switch (stack_direction) { + .down => ((@intFromPtr(maybe_unaligned_hint) -% page_aligned_len) & ~(alignment_bytes - 1)) -% max_drop_len, + .up => (@intFromPtr(maybe_unaligned_hint) +% (alignment_bytes - 1)) & ~(alignment_bytes - 1), + }); + + break :blk .{ maybe_unaligned_hint, hint }; + }; const slice = posix.mmap( hint, @@ -101,16 +137,24 @@ pub fn map(n: usize, alignment: Alignment) ?[*]u8 { -1, 0, ) catch return null; - const result_ptr = mem.alignPointer(slice.ptr, alignment_bytes) orelse return null; + const result_ptr = mem.alignPointer(slice.ptr, alignment_bytes).?; + // Unmap the extra bytes that were only requested in order to guarantee // that the range of memory we were provided had a proper alignment in it // somewhere. The extra bytes could be at the beginning, or end, or both. const drop_len = result_ptr - slice.ptr; if (drop_len != 0) posix.munmap(slice[0..drop_len]); const remaining_len = overalloc_len - drop_len; - if (remaining_len > aligned_len) posix.munmap(@alignCast(result_ptr[aligned_len..remaining_len])); - const new_hint: [*]align(page_size_min) u8 = @alignCast(result_ptr + aligned_len); - _ = @cmpxchgStrong(@TypeOf(std.heap.next_mmap_addr_hint), &std.heap.next_mmap_addr_hint, maybe_unaligned_hint, new_hint, .monotonic, .monotonic); + if (remaining_len > page_aligned_len) posix.munmap(@alignCast(result_ptr[page_aligned_len..remaining_len])); + + if (enable_hints) { + const new_hint: [*]align(page_size_min) u8 = @alignCast(result_ptr + switch (stack_direction) { + .up => page_aligned_len, + .down => 0, + }); + _ = @cmpxchgStrong(@TypeOf(addr_hint), &addr_hint, maybe_unaligned_hint, new_hint, .monotonic, .monotonic); + } + return result_ptr; } From a9d18c4a0c2be3d5e7dcedbedc32a9998b1e5515 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 18 Feb 2026 14:51:59 +0100 Subject: [PATCH 2/4] std.heap.PageAllocator: avoid mremaps which may reserve potential stack space Linux's approach to mapping the main thread's stack is quite odd: it essentially tries to select an mmap address (assuming unhinted mmap calls) which do not cover the region of virtual address space into which the stack *would* grow (based on the stack rlimit), but it doesn't actually *prevent* those pages from being mapped. It also doesn't try particularly hard: it's been observed that the first (unhinted) mmap call in a simple application is usually put at an address which is within a gigabyte or two of the stack, which is close enough to make issues somewhat likely. In particular, if we get an address which is close-ish to the stack, and then `mremap` it without the MAY_MOVE flag, we are *very* likely to map pages in this "theoretical stack region". This is particularly a problem on loongarch64, where the initial mmap address is empirically only around 200 megabytes from the stack (whereas on most other 64-bit targets it's closer to a gigabyte). To work around this, we just need to avoid mremap in some cases. Unfortunately, this system call isn't used too heavily by musl or glibc, so design issues like this can and do exist without being caught. So, when `PageAllocator.resize` is called, let's not try to `mremap` to grow the pages. We can still call `mremap` in the `PageAllocator.remap` path, because in that case we can set the `MAY_MOVE` flag, which empirically appears to make the Linux kernel avoid the problematic "theoretical stack region". --- lib/std/heap/PageAllocator.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/std/heap/PageAllocator.zig b/lib/std/heap/PageAllocator.zig index 66abe7d4da..db736036b3 100644 --- a/lib/std/heap/PageAllocator.zig +++ b/lib/std/heap/PageAllocator.zig @@ -225,7 +225,10 @@ pub fn realloc(uncasted_memory: []u8, alignment: Alignment, new_len: usize, may_ if (new_size_aligned == page_aligned_len) return memory.ptr; - if (posix.MREMAP != void) { + // When the stack grows down, only use `mremap` if the allocation may move. + // Otherwise, we might grow the allocation and intrude on virtual address + // space which we want to keep available to the stack. + if (posix.MREMAP != void and (stack_direction == .up or may_move)) { // TODO: if the next_mmap_addr_hint is within the remapped range, update it const new_memory = posix.mremap(memory.ptr, page_aligned_len, new_size_aligned, .{ .MAYMOVE = may_move }, null) catch return null; return new_memory.ptr; From b5bcbf2a62aa513022fdb12df75699308f87b7bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Fri, 20 Feb 2026 22:14:35 +0100 Subject: [PATCH 3/4] std.heap.DebugAllocator: make BucketHeader.fromPage() use wrapping arithmetic If we've allocated the very last page in the address space then these operations will overflow and underflow respectively - which is fine. --- lib/std/heap/debug_allocator.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/heap/debug_allocator.zig b/lib/std/heap/debug_allocator.zig index d150bb0927..d6f2bf1796 100644 --- a/lib/std/heap/debug_allocator.zig +++ b/lib/std/heap/debug_allocator.zig @@ -266,7 +266,7 @@ pub fn DebugAllocator(comptime config: Config) type { canary: usize = config.canary, fn fromPage(page_addr: usize, slot_count: usize) *BucketHeader { - const unaligned = page_addr + page_size - bucketSize(slot_count); + const unaligned = page_addr +% page_size -% bucketSize(slot_count); return @ptrFromInt(unaligned & ~(@as(usize, @alignOf(BucketHeader)) - 1)); } From bd80ad46479fc39e0d629b7a1ff48a96760bb7f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Wed, 18 Feb 2026 16:46:42 +0100 Subject: [PATCH 4/4] Revert "ci: disable loongarch64-linux" This reverts commit f061c0dc2851e5fab34dca1991ebe64cfd156bd5. --- .forgejo/workflows/ci.yaml | 41 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/.forgejo/workflows/ci.yaml b/.forgejo/workflows/ci.yaml index 8ae897599a..48813b9001 100644 --- a/.forgejo/workflows/ci.yaml +++ b/.forgejo/workflows/ci.yaml @@ -59,27 +59,26 @@ jobs: run: sh ci/aarch64-macos-release.sh timeout-minutes: 120 - # https://codeberg.org/ziglang/zig/issues/30800 - #loongarch64-linux-debug: - # runs-on: [self-hosted, loongarch64-linux] - # steps: - # - name: Checkout - # uses: https://codeberg.org/ziglang/checkout@19af6bac491e2534a4687a50ee84fa7f13258d28 - # with: - # fetch-depth: 0 - # - name: Build and Test - # run: sh ci/loongarch64-linux-debug.sh - # timeout-minutes: 240 - #loongarch64-linux-release: - # runs-on: [self-hosted, loongarch64-linux] - # steps: - # - name: Checkout - # uses: https://codeberg.org/ziglang/checkout@19af6bac491e2534a4687a50ee84fa7f13258d28 - # with: - # fetch-depth: 0 - # - name: Build and Test - # run: sh ci/loongarch64-linux-release.sh - # timeout-minutes: 180 + loongarch64-linux-debug: + runs-on: [self-hosted, loongarch64-linux] + steps: + - name: Checkout + uses: https://codeberg.org/ziglang/checkout@19af6bac491e2534a4687a50ee84fa7f13258d28 + with: + fetch-depth: 0 + - name: Build and Test + run: sh ci/loongarch64-linux-debug.sh + timeout-minutes: 240 + loongarch64-linux-release: + runs-on: [self-hosted, loongarch64-linux] + steps: + - name: Checkout + uses: https://codeberg.org/ziglang/checkout@19af6bac491e2534a4687a50ee84fa7f13258d28 + with: + fetch-depth: 0 + - name: Build and Test + run: sh ci/loongarch64-linux-release.sh + timeout-minutes: 180 powerpc64le-linux-debug: runs-on: [self-hosted, powerpc64le-linux]