From 65922a2d4319ca2e9f3e67650d018e269d018a77 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Sat, 6 Dec 2025 10:05:23 +0000 Subject: [PATCH] std: make stack unwinding faster on macOS https://github.com/ziglang/zig/issues/26027#issuecomment-3571227050 tracked some bad performance in `DebugAllocator` on macOS down to a function in dyld which `std.debug.SelfInfo` was calling into. It turns out `dladdr`'s symbol lookup logic is horrendously slow (looking at its source code, it appears to be doing a *linear scan* over all symbols in the image?!). However, we don't actually need the symbol, so we want to try and avoid this logic. Luckily, dyld has more precise APIs for what we need! Unluckily, Apple, in their infinite wisdom, decided they should be deprecated in favour of `dladdr`, despite the latter being several times slower (and by "several times", I have measured a 50x slowdown on repeated calls to `dladdr` compared to the other API). But luckily again, the deprecated APIs are still exposed. So, after a careful analysis of the situation (reading dyld code and cursing Apple engineers), I think it makes sense to just use these deprecated APIs for now. If they ever go away, we can write our own cache for this data to bypass Apple's awfully slow code, but I suspect these functions will stick around for the foreseeable future. Uh, and if `_dyld_get_image_header_containing_address` goes away, there's also `dyld_image_header_containing_address`, which is a seemingly identical function, exported by dyld just the same, but with a separate (functionally identical) implementation, and not documented in the public header file. Apple work in mysterious ways, I guess. --- lib/std/c.zig | 2 ++ lib/std/c/darwin.zig | 2 ++ lib/std/debug/SelfInfo/MachO.zig | 50 ++++++++++++++++++-------------- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/lib/std/c.zig b/lib/std/c.zig index f3fdeb6d95..816074652b 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -11324,6 +11324,8 @@ pub const _dyld_get_image_header = darwin._dyld_get_image_header; pub const _dyld_get_image_name = darwin._dyld_get_image_name; pub const _dyld_get_image_vmaddr_slide = darwin._dyld_get_image_vmaddr_slide; pub const _dyld_image_count = darwin._dyld_image_count; +pub const _dyld_get_image_header_containing_address = darwin._dyld_get_image_header_containing_address; +pub const dyld_image_path_containing_address = darwin.dyld_image_path_containing_address; pub const _host_page_size = darwin._host_page_size; pub const boolean_t = darwin.boolean_t; pub const clock_get_time = darwin.clock_get_time; diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index cf7d3127eb..f0c4f4c278 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -354,6 +354,8 @@ pub extern "c" fn _dyld_image_count() u32; pub extern "c" fn _dyld_get_image_header(image_index: u32) ?*mach_header; pub extern "c" fn _dyld_get_image_vmaddr_slide(image_index: u32) usize; pub extern "c" fn _dyld_get_image_name(image_index: u32) [*:0]const u8; +pub extern "c" fn _dyld_get_image_header_containing_address(address: *const anyopaque) ?*mach_header; +pub extern "c" fn dyld_image_path_containing_address(address: *const anyopaque) ?[*:0]const u8; pub extern "c" fn dladdr(addr: *const anyopaque, info: *dl_info) c_int; pub const dl_info = extern struct { diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig index 94d50bbf77..dd11b4c8bf 100644 --- a/lib/std/debug/SelfInfo/MachO.zig +++ b/lib/std/debug/SelfInfo/MachO.zig @@ -78,9 +78,14 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st }; } pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { - const module = try si.findModule(gpa, address); - defer si.mutex.unlock(); - return module.name; + _ = si; + _ = gpa; + // This function is marked as deprecated; however, it is significantly more + // performant than `dladdr` (since the latter also does a very slow symbol + // lookup), so let's use it since it's still available. + return std.mem.span(std.c.dyld_image_path_containing_address( + @ptrFromInt(address), + ) orelse return error.MissingDebugInfo); } pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize { const module = try si.findModule(gpa, address); @@ -426,28 +431,26 @@ fn unwindFrameInner(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usi /// Acquires the mutex on success. fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module { - var info: std.c.dl_info = undefined; - if (std.c.dladdr(@ptrFromInt(address), &info) == 0) { - return error.MissingDebugInfo; - } + // This function is marked as deprecated; however, it is significantly more + // performant than `dladdr` (since the latter also does a very slow symbol + // lookup), so let's use it since it's still available. + const text_base = std.c._dyld_get_image_header_containing_address( + @ptrFromInt(address), + ) orelse return error.MissingDebugInfo; si.mutex.lock(); errdefer si.mutex.unlock(); - const gop = try si.modules.getOrPutAdapted(gpa, @intFromPtr(info.fbase), Module.Adapter{}); + const gop = try si.modules.getOrPutAdapted(gpa, @intFromPtr(text_base), Module.Adapter{}); errdefer comptime unreachable; - if (!gop.found_existing) { - gop.key_ptr.* = .{ - .text_base = @intFromPtr(info.fbase), - .name = std.mem.span(info.fname), - .unwind = null, - .file = null, - }; - } + if (!gop.found_existing) gop.key_ptr.* = .{ + .text_base = @intFromPtr(text_base), + .unwind = null, + .file = null, + }; return gop.key_ptr; } const Module = struct { text_base: usize, - name: []const u8, unwind: ?(Error!Unwind), file: ?(Error!MachOFile), @@ -544,10 +547,15 @@ const Module = struct { } fn getFile(module: *Module, gpa: Allocator) Error!*MachOFile { - if (module.file == null) module.file = MachOFile.load(gpa, module.name, builtin.cpu.arch) catch |err| switch (err) { - error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo, - error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e, - }; + if (module.file == null) { + const path = std.mem.span( + std.c.dyld_image_path_containing_address(@ptrFromInt(module.text_base)).?, + ); + module.file = MachOFile.load(gpa, path, builtin.cpu.arch) catch |err| switch (err) { + error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo, + error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e, + }; + } return if (module.file.?) |*f| f else |err| err; } };