WindowsSdk: Use ntdll instead of advapi32 APIs for registry querying

The dependency on advapi32.dll actually silently brings along 3 other dlls at runtime (msvcrt.dll, sechost.dll, bcrypt.dll), even if no advapi32 APIs are called. So, this commit actually reduces the number of dlls loaded at runtime by 4 (but only when LLVM is not linked, since LLVM has its own dependency on advapi32.dll).

The data is not super conclusive, but the ntdll version of WindowsSdk appears to run slightly faster than the previous advapi32 version:

Benchmark 1: libc-ntdll.exe ..
  Time (mean ± σ):       6.0 ms ±   0.6 ms    [User: 3.9 ms, System: 7.1 ms]
  Range (min … max):     4.8 ms …   7.9 ms    112 runs

Benchmark 2: libc-advapi32.exe ..
  Time (mean ± σ):       7.2 ms ±   0.5 ms    [User: 5.4 ms, System: 9.2 ms]
  Range (min … max):     6.1 ms …   8.9 ms    103 runs

Summary
  'libc-ntdll.exe ..' ran
    1.21 ± 0.15 times faster than 'libc-advapi32.exe ..'

and this mostly seems to be due to changes in the implementation (the advapi32 APIs do a lot of NtQueryKey calls that the new implementation doesn't do) rather than due to the decrease in dll loading. LLVM-less zig binaries don't show the same reduction (the only difference here is the DLLs being loaded):

Benchmark 1: stage4-ntdll\bin\zig.exe version
  Time (mean ± σ):       3.0 ms ±   0.6 ms    [User: 5.3 ms, System: 4.8 ms]
  Range (min … max):     1.3 ms …   4.2 ms    112 runs

Benchmark 2: stage4-advapi32\bin\zig.exe version
  Time (mean ± σ):       3.5 ms ±   0.6 ms    [User: 6.9 ms, System: 5.5 ms]
  Range (min … max):     2.5 ms …   5.9 ms    111 runs

Summary
  'stage4-ntdll\bin\zig.exe version' ran
    1.16 ± 0.28 times faster than 'stage4-advapi32\bin\zig.exe version'

---

With the removal of the advapi32 dependency, the non-ntdll dependencies that remain in an LLVM-less Zig binary are ws2_32.dll (which brings along rpcrt4.dll at runtime), kernel32.dll (which brings along kernelbase.dll at runtime), and crypt32.dll (which brings along ucrtbase.dll at runtime).
This commit is contained in:
Ryan Liptak 2026-02-26 23:02:11 -08:00
parent 421c3c3cc5
commit 70058471e3
4 changed files with 703 additions and 433 deletions

View file

@ -3506,6 +3506,16 @@ pub const GUID = extern struct {
}
return @as(GUID, @bitCast(bytes));
}
pub fn format(self: GUID, w: *std.Io.Writer) std.Io.Writer.Error!void {
return w.print("{{{x:0>8}-{x:0>4}-{x:0>4}-{x}-{x}}}", .{
self.Data1,
self.Data2,
self.Data3,
self.Data4[0..2],
self.Data4[2..8],
});
}
};
test GUID {
@ -3518,6 +3528,16 @@ test GUID {
},
GUID.parse("{01234567-89AB-EF10-3254-7698badcfe91}"),
);
try std.testing.expectFmt(
"{01234567-89ab-ef10-3254-7698badcfe91}",
"{f}",
.{GUID.parse("{01234567-89AB-EF10-3254-7698badcfe91}")},
);
try std.testing.expectFmt(
"{00000001-0001-0001-0001-000000000001}",
"{f}",
.{GUID{ .Data1 = 1, .Data2 = 1, .Data3 = 1, .Data4 = [_]u8{ 0, 1, 0, 0, 0, 0, 0, 1 } }},
);
}
pub const COORD = extern struct {
@ -3560,7 +3580,7 @@ pub const RTL_QUERY_REGISTRY_TABLE = extern struct {
Flags: ULONG,
Name: ?PWSTR,
EntryContext: ?*anyopaque,
DefaultType: ULONG,
DefaultType: REG.ValueType,
DefaultData: ?*anyopaque,
DefaultLength: ULONG,
};
@ -3625,34 +3645,120 @@ pub const RTL_QUERY_REGISTRY_DELETE = 0x00000040;
/// If the types do not match, the call fails.
pub const RTL_QUERY_REGISTRY_TYPECHECK = 0x00000100;
/// REG_ is a crowded namespace with a lot of overlapping and unrelated
/// defines in the Windows headers, so instead of strictly following the
/// Windows headers names, extra namespaces are added here for clarity.
pub const REG = struct {
/// No value type
pub const NONE: ULONG = 0;
/// Unicode nul terminated string
pub const SZ: ULONG = 1;
/// Unicode nul terminated string (with environment variable references)
pub const EXPAND_SZ: ULONG = 2;
/// Free form binary
pub const BINARY: ULONG = 3;
/// 32-bit number
pub const DWORD: ULONG = 4;
/// 32-bit number (same as REG_DWORD)
pub const DWORD_LITTLE_ENDIAN: ULONG = 4;
/// 32-bit number
pub const DWORD_BIG_ENDIAN: ULONG = 5;
/// Symbolic Link (unicode)
pub const LINK: ULONG = 6;
/// Multiple Unicode strings
pub const MULTI_SZ: ULONG = 7;
/// Resource list in the resource map
pub const RESOURCE_LIST: ULONG = 8;
/// Resource list in the hardware description
pub const FULL_RESOURCE_DESCRIPTOR: ULONG = 9;
pub const RESOURCE_REQUIREMENTS_LIST: ULONG = 10;
/// 64-bit number
pub const QWORD: ULONG = 11;
/// 64-bit number (same as REG_QWORD)
pub const QWORD_LITTLE_ENDIAN: ULONG = 11;
pub const ValueType = enum(ULONG) {
/// No value type
NONE = 0,
/// Unicode nul terminated string
SZ = 1,
/// Unicode nul terminated string (with environment variable references)
EXPAND_SZ = 2,
/// Free form binary
BINARY = 3,
/// 32-bit number
DWORD = 4,
/// 32-bit number
DWORD_BIG_ENDIAN = 5,
/// Symbolic Link (unicode)
LINK = 6,
/// Multiple Unicode strings
MULTI_SZ = 7,
/// Resource list in the resource map
RESOURCE_LIST = 8,
/// Resource list in the hardware description
FULL_RESOURCE_DESCRIPTOR = 9,
RESOURCE_REQUIREMENTS_LIST = 10,
/// 64-bit number
QWORD = 11,
_,
/// 32-bit number (same as REG_DWORD)
pub const DWORD_LITTLE_ENDIAN: ValueType = .DWORD;
/// 64-bit number (same as REG_QWORD)
pub const QWORD_LITTLE_ENDIAN: ValueType = .QWORD;
};
/// Used with NtOpenKeyEx, maybe others
pub const OpenOptions = packed struct(ULONG) {
Reserved0: u2 = 0,
/// Open for backup or restore
/// special access rules privilege required
BACKUP_RESTORE: bool = false,
/// Open symbolic link
OPEN_LINK: bool = false,
Reserved3: u28 = 0,
};
/// Used with NtLoadKeyEx, maybe others
pub const LoadOptions = packed struct(ULONG) {
/// Restore whole hive volatile
WHOLE_HIVE_VOLATILE: bool = false,
/// Unwind changes to last flush
REFRESH_HIVE: bool = false,
/// Never lazy flush this hive
NO_LAZY_FLUSH: bool = false,
/// Force the restore process even when we have open handles on subkeys
FORCE_RESTORE: bool = false,
/// Loads the hive visible to the calling process
APP_HIVE: bool = false,
/// Hive cannot be mounted by any other process while in use
PROCESS_PRIVATE: bool = false,
/// Starts Hive Journal
START_JOURNAL: bool = false,
/// Grow hive file in exact 4k increments
HIVE_EXACT_FILE_GROWTH: bool = false,
/// No RM is started for this hive (no transactions)
HIVE_NO_RM: bool = false,
/// Legacy single logging is used for this hive
HIVE_SINGLE_LOG: bool = false,
/// This hive might be used by the OS loader
BOOT_HIVE: bool = false,
/// Load the hive and return a handle to its root kcb
LOAD_HIVE_OPEN_HANDLE: bool = false,
/// Flush changes to primary hive file size as part of all flushes
FLUSH_HIVE_FILE_GROWTH: bool = false,
/// Open a hive's files in read-only mode
/// The same flag is used for REG_APP_HIVE_OPEN_READ_ONLY:
/// Open an app hive's files in read-only mode (if the hive was not previously loaded).
OPEN_READ_ONLY: bool = false,
/// Load the hive, but don't allow any modification of it
IMMUTABLE: bool = false,
/// Do not fall back to impersonating the caller if hive file access fails
NO_IMPERSONATION_FALLBACK: bool = false,
Reserved16: u16 = 0,
};
};
pub const KEY = struct {
pub const VALUE = struct {
/// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/ne-wdm-_key_value_information_class
pub const INFORMATION_CLASS = enum(c_int) {
Basic = 0,
Full = 1,
Partial = 2,
FullAlign64 = 3,
PartialAlign64 = 4,
Layer = 5,
_,
pub const Max: @typeInfo(@This()).@"enum".tag_type = @typeInfo(@This()).@"enum".fields.len;
};
pub const PARTIAL_INFORMATION = extern struct {
TitleIndex: ULONG,
Type: REG.ValueType,
DataLength: ULONG,
Data: [0]UCHAR,
pub fn data(info: *const PARTIAL_INFORMATION) []const UCHAR {
const ptr: [*]const UCHAR = @ptrCast(&info.Data);
return ptr[0..info.DataLength];
}
};
};
};
pub const ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x4;

View file

@ -22,6 +22,7 @@ const HANDLE = windows.HANDLE;
const HEAP = windows.HEAP;
const IO_APC_ROUTINE = windows.IO_APC_ROUTINE;
const IO_STATUS_BLOCK = windows.IO_STATUS_BLOCK;
const KEY = windows.KEY;
const KNONVOLATILE_CONTEXT_POINTERS = windows.KNONVOLATILE_CONTEXT_POINTERS;
const LARGE_INTEGER = windows.LARGE_INTEGER;
const LDR = windows.LDR;
@ -37,6 +38,7 @@ const PCWSTR = windows.PCWSTR;
const PROCESS = windows.PROCESS;
const PVOID = windows.PVOID;
const PWSTR = windows.PWSTR;
const REG = windows.REG;
const RTL_OSVERSIONINFOW = windows.RTL_OSVERSIONINFOW;
const RTL_QUERY_REGISTRY_TABLE = windows.RTL_QUERY_REGISTRY_TABLE;
const RUNTIME_FUNCTION = windows.RUNTIME_FUNCTION;
@ -724,3 +726,36 @@ pub extern "ntdll" fn RtlWakeConditionVariable(
pub extern "ntdll" fn RtlWakeAllConditionVariable(
ConditionVariable: *CONDITION_VARIABLE,
) callconv(.winapi) void;
pub extern "ntdll" fn NtOpenKeyEx(
KeyHandle: *HANDLE,
DesiredAccess: ACCESS_MASK,
ObjectAttributes: *const OBJECT.ATTRIBUTES,
OpenOptions: REG.OpenOptions,
) callconv(.winapi) NTSTATUS;
pub extern "ntdll" fn RtlOpenCurrentUser(
DesiredAccess: ACCESS_MASK,
CurrentUserKey: *HANDLE,
) callconv(.winapi) NTSTATUS;
pub extern "ntdll" fn NtQueryValueKey(
KeyHandle: HANDLE,
ValueName: *const UNICODE_STRING,
KeyValueInformationClass: KEY.VALUE.INFORMATION_CLASS,
KeyValueInformation: *anyopaque,
/// Length of KeyValueInformation buffer in bytes
Length: ULONG,
/// On STATUS_SUCCESS, contains the length of the populated portion of the
/// provided buffer. On STATUS_BUFFER_OVERFLOW or STATUS_BUFFER_TOO_SMALL,
/// contains the minimum `Length` value that would be required to hold the information.
ResultLength: *ULONG,
) callconv(.winapi) NTSTATUS;
pub extern "ntdll" fn NtLoadKeyEx(
TargetKey: *const OBJECT.ATTRIBUTES,
SourceFile: *const OBJECT.ATTRIBUTES,
Flags: REG.LoadOptions,
TrustClassKey: ?HANDLE,
Event: ?HANDLE,
DesiredAccess: ACCESS_MASK,
RootHandle: ?*HANDLE,
Reserved: ?*anyopaque,
) callconv(.winapi) NTSTATUS;

File diff suppressed because it is too large Load diff

View file

@ -85,7 +85,7 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
.Flags = std.os.windows.RTL_QUERY_REGISTRY_SUBKEY | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED,
.Name = subkey[0..subkey_len :0],
.EntryContext = null,
.DefaultType = REG.NONE,
.DefaultType = .NONE,
.DefaultData = null,
.DefaultLength = 0,
};
@ -95,9 +95,9 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
inline for (fields_info, 0..) |field, i| {
const ctx: *anyopaque = blk: {
switch (@field(args, field.name).value_type) {
REG.SZ,
REG.EXPAND_SZ,
REG.MULTI_SZ,
.SZ,
.EXPAND_SZ,
.MULTI_SZ,
=> {
comptime assert(@sizeOf(std.os.windows.UNICODE_STRING) % 2 == 0);
const unicode: *std.os.windows.UNICODE_STRING = @ptrCast(&tmp_bufs[i]);
@ -109,9 +109,9 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
break :blk unicode;
},
REG.DWORD,
REG.DWORD_BIG_ENDIAN,
REG.QWORD,
.DWORD,
.DWORD_BIG_ENDIAN,
.QWORD,
=> break :blk &tmp_bufs[i],
else => unreachable,
@ -127,7 +127,7 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
.Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED,
.Name = key_buf[0..key_len :0],
.EntryContext = ctx,
.DefaultType = REG.NONE,
.DefaultType = .NONE,
.DefaultData = null,
.DefaultLength = 0,
};
@ -139,7 +139,7 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
.Flags = 0,
.Name = null,
.EntryContext = null,
.DefaultType = 0,
.DefaultType = .NONE,
.DefaultData = null,
.DefaultLength = 0,
};
@ -154,9 +154,9 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
switch (res) {
.SUCCESS => {
inline for (fields_info, 0..) |field, i| switch (@field(args, field.name).value_type) {
REG.SZ,
REG.EXPAND_SZ,
REG.MULTI_SZ,
.SZ,
.EXPAND_SZ,
.MULTI_SZ,
=> {
var buf = @field(args, field.name).value_buf;
const entry: *const std.os.windows.UNICODE_STRING = @ptrCast(table[i + 1].EntryContext);
@ -164,16 +164,16 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
buf[len] = 0;
},
REG.DWORD,
REG.DWORD_BIG_ENDIAN,
REG.QWORD,
.DWORD,
.DWORD_BIG_ENDIAN,
.QWORD,
=> {
const entry: [*]const u8 = @ptrCast(table[i + 1].EntryContext);
switch (@field(args, field.name).value_type) {
REG.DWORD, REG.DWORD_BIG_ENDIAN => {
.DWORD, .DWORD_BIG_ENDIAN => {
@memcpy(@field(args, field.name).value_buf[0..4], entry[0..4]);
},
REG.QWORD => {
.QWORD => {
@memcpy(@field(args, field.name).value_buf[0..8], entry[0..8]);
},
else => unreachable,
@ -254,18 +254,18 @@ pub fn detectNativeCpuAndFeatures() ?Target.Cpu {
// CP 4039 -> ID_AA64MMFR1_EL1
// CP 403A -> ID_AA64MMFR2_EL1
getCpuInfoFromRegistry(i, .{
.{ .key = "CP 4000", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[0])) },
.{ .key = "CP 4020", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[1])) },
.{ .key = "CP 4021", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[2])) },
.{ .key = "CP 4028", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[3])) },
.{ .key = "CP 4029", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[4])) },
.{ .key = "CP 402C", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[5])) },
.{ .key = "CP 402D", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[6])) },
.{ .key = "CP 4030", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[7])) },
.{ .key = "CP 4031", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[8])) },
.{ .key = "CP 4038", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[9])) },
.{ .key = "CP 4039", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[10])) },
.{ .key = "CP 403A", .value_type = REG.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[11])) },
.{ .key = "CP 4000", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[0])) },
.{ .key = "CP 4020", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[1])) },
.{ .key = "CP 4021", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[2])) },
.{ .key = "CP 4028", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[3])) },
.{ .key = "CP 4029", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[4])) },
.{ .key = "CP 402C", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[5])) },
.{ .key = "CP 402D", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[6])) },
.{ .key = "CP 4030", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[7])) },
.{ .key = "CP 4031", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[8])) },
.{ .key = "CP 4038", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[9])) },
.{ .key = "CP 4039", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[10])) },
.{ .key = "CP 403A", .value_type = REG.ValueType.QWORD, .value_buf = @as(*[8]u8, @ptrCast(&registers[11])) },
}) catch break :blk null;
cores[i] = @import("arm.zig").aarch64.detectNativeCpuAndFeatures(current_arch, registers) orelse