From 514f6e589ca441c3ac115b8c764bdc9f6d4aca01 Mon Sep 17 00:00:00 2001 From: GasInfinity Date: Sun, 11 Jan 2026 16:58:04 +0100 Subject: [PATCH] feat(libzigc): use common integer `ato*` and `strto*` implementations * also removes their musl implementation --- lib/c/stdlib.zig | 220 ++++++++++++++++++++++++++++++ lib/libc/musl/src/stdlib/atoi.c | 16 --- lib/libc/musl/src/stdlib/atol.c | 17 --- lib/libc/musl/src/stdlib/atoll.c | 17 --- lib/libc/musl/src/stdlib/strtol.c | 56 -------- src/libs/musl.zig | 4 - src/libs/wasi_libc.zig | 4 - 7 files changed, 220 insertions(+), 114 deletions(-) delete mode 100644 lib/libc/musl/src/stdlib/atoi.c delete mode 100644 lib/libc/musl/src/stdlib/atol.c delete mode 100644 lib/libc/musl/src/stdlib/atoll.c delete mode 100644 lib/libc/musl/src/stdlib/strtol.c diff --git a/lib/c/stdlib.zig b/lib/c/stdlib.zig index c8db9b8579..78757f38b9 100644 --- a/lib/c/stdlib.zig +++ b/lib/c/stdlib.zig @@ -1,6 +1,7 @@ const std = @import("std"); const common = @import("common.zig"); const builtin = @import("builtin"); +const assert = std.debug.assert; const div_t = std.c.div_t; const ldiv_t = std.c.ldiv_t; const lldiv_t = std.c.lldiv_t; @@ -16,6 +17,24 @@ comptime { @export(&ldiv, .{ .name = "ldiv", .linkage = common.linkage, .visibility = common.visibility }); @export(&lldiv, .{ .name = "lldiv", .linkage = common.linkage, .visibility = common.visibility }); + @export(&atoi, .{ .name = "atoi", .linkage = common.linkage, .visibility = common.visibility }); + @export(&atol, .{ .name = "atol", .linkage = common.linkage, .visibility = common.visibility }); + @export(&atoll, .{ .name = "atoll", .linkage = common.linkage, .visibility = common.visibility }); + + @export(&strtol, .{ .name = "strtol", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoll, .{ .name = "strtoll", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoul, .{ .name = "strtoul", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoull, .{ .name = "strtoull", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoimax, .{ .name = "strtoimax", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoumax, .{ .name = "strtoumax", .linkage = common.linkage, .visibility = common.visibility }); + + @export(&strtol, .{ .name = "__strtol_internal", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoll, .{ .name = "__strtoll_internal", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoul, .{ .name = "__strtoul_internal", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoull, .{ .name = "__strtoull_internal", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoimax, .{ .name = "__strtoimax_internal", .linkage = common.linkage, .visibility = common.visibility }); + @export(&strtoumax, .{ .name = "__strtoumax_internal", .linkage = common.linkage, .visibility = common.visibility }); + @export(&qsort_r, .{ .name = "qsort_r", .linkage = common.linkage, .visibility = common.visibility }); @export(&qsort, .{ .name = "qsort", .linkage = common.linkage, .visibility = common.visibility }); @@ -56,6 +75,160 @@ fn lldiv(a: c_longlong, b: c_longlong) callconv(.c) lldiv_t { }; } +fn atoi(str: [*:0]const c_char) callconv(.c) c_int { + return asciiToInteger(c_int, @ptrCast(str)); +} + +fn atol(str: [*:0]const c_char) callconv(.c) c_long { + return asciiToInteger(c_long, @ptrCast(str)); +} + +fn atoll(str: [*:0]const c_char) callconv(.c) c_longlong { + return asciiToInteger(c_longlong, @ptrCast(str)); +} + +fn asciiToInteger(comptime T: type, buf: [*:0]const u8) T { + comptime assert(std.math.isPowerOfTwo(@bitSizeOf(T))); + + var current = buf; + while (std.ascii.isWhitespace(current[0])) : (current += 1) {} + + // The behaviour *is* undefined if the result cannot be represented + // but as they are usually called with untrusted input we can just handle overflow gracefully. + if (current[0] == '-') return parseDigitsWithSignGenericCharacter(T, u8, current + 1, null, 10, .neg) catch std.math.minInt(T); + if (current[0] == '+') current += 1; + return parseDigitsWithSignGenericCharacter(T, u8, current, null, 10, .pos) catch std.math.maxInt(T); +} + +fn strtol(noalias str: [*:0]const c_char, noalias str_end: ?*[*:0]const c_char, base: c_int) callconv(.c) c_long { + return stringToInteger(c_long, @ptrCast(str), if (str_end) |end| @ptrCast(end) else null, base); +} + +fn strtoll(noalias str: [*:0]const c_char, noalias str_end: ?*[*:0]const c_char, base: c_int) callconv(.c) c_longlong { + return stringToInteger(c_longlong, @ptrCast(str), if (str_end) |end| @ptrCast(end) else null, base); +} + +fn strtoul(noalias str: [*:0]const c_char, noalias str_end: ?*[*:0]const c_char, base: c_int) callconv(.c) c_ulong { + return stringToInteger(c_ulong, @ptrCast(str), if (str_end) |end| @ptrCast(end) else null, base); +} + +fn strtoull(noalias str: [*:0]const c_char, noalias str_end: ?*[*:0]const c_char, base: c_int) callconv(.c) c_ulonglong { + return stringToInteger(c_ulonglong, @ptrCast(str), if (str_end) |end| @ptrCast(end) else null, base); +} + +// XXX: These belong in inttypes.zig but we'd have to make stringToInteger pub or move it somewhere else. +fn strtoimax(noalias str: [*:0]const c_char, noalias str_end: ?*[*:0]const c_char, base: c_int) callconv(.c) std.c.intmax_t { + return stringToInteger(std.c.intmax_t, @ptrCast(str), if (str_end) |end| @ptrCast(end) else null, base); +} + +fn strtoumax(noalias str: [*:0]const c_char, noalias str_end: ?*[*:0]const c_char, base: c_int) callconv(.c) std.c.uintmax_t { + return stringToInteger(std.c.uintmax_t, @ptrCast(str), if (str_end) |end| @ptrCast(end) else null, base); +} + +fn stringToInteger(comptime T: type, noalias buf: [*:0]const u8, noalias maybe_end: ?*[*:0]const u8, base: c_int) T { + comptime assert(std.math.isPowerOfTwo(@bitSizeOf(T))); + + if (base == 1 or base > 36) { + if (maybe_end) |end| { + end.* = buf; + } + + std.c._errno().* = @intFromEnum(std.c.E.INVAL); + return 0; + } + + var current = buf; + while (std.ascii.isWhitespace(current[0])) : (current += 1) {} + + const negative: bool = switch (current[0]) { + '-' => blk: { + current += 1; + break :blk true; + }, + '+' => blk: { + current += 1; + break :blk false; + }, + else => false, + }; + + // The prefix is allowed iff base == 0 or base == base of the prefix + const real_base: u6 = if (current[0] == '0') blk: { + current += 1; + + if ((base == 0 or base == 16) and std.ascii.toLower(current[0]) == 'x' and std.ascii.isHex(current[1])) { + current += 1; + break :blk 16; + } + + if ((base == 0 or base == 8) and std.ascii.isDigit(current[0])) { + break :blk 8; + } + + break :blk switch (base) { + 0 => 10, + else => @intCast(base), + }; + } else switch (base) { + 0 => 10, + else => @intCast(base), + }; + + if (@typeInfo(T).int.signedness == .unsigned) { + const result = parseDigitsWithSignGenericCharacter(T, u8, current, maybe_end, real_base, .pos) catch { + std.c._errno().* = @intFromEnum(std.c.E.RANGE); + return std.math.maxInt(T); + }; + + return if (negative) -%result else result; + } + + if (negative) return parseDigitsWithSignGenericCharacter(T, u8, current, maybe_end, real_base, .neg) catch blk: { + std.c._errno().* = @intFromEnum(std.c.E.RANGE); + break :blk std.math.minInt(T); + }; + + return parseDigitsWithSignGenericCharacter(T, u8, current, maybe_end, real_base, .pos) catch blk: { + std.c._errno().* = @intFromEnum(std.c.E.RANGE); + break :blk std.math.maxInt(T); + }; +} + +fn parseDigitsWithSignGenericCharacter( + comptime T: type, + comptime Char: type, + noalias buf: [*:0]const Char, + noalias maybe_end: ?*[*:0]const Char, + base: u6, + comptime sign: enum { pos, neg }, +) error{Overflow}!T { + assert(base >= 2 and base <= 36); + + var current = buf; + defer if (maybe_end) |end| { + end.* = current; + }; + + const add = switch (sign) { + .pos => std.math.add, + .neg => std.math.sub, + }; + + var value: T = 0; + while (true) { + const c: u8 = std.math.cast(u8, current[0]) orelse break; + if (!std.ascii.isAlphanumeric(c)) break; + + const digit: u6 = @intCast(std.fmt.charToDigit(c, base) catch break); + defer current += 1; + + value = try std.math.mul(T, value, base); + value = try add(T, value, digit); + } + + return value; +} + // NOTE: Despite its name, `qsort` doesn't have to use quicksort or make any complexity or stability guarantee. fn qsort_r(base: *anyopaque, n: usize, size: usize, compare: *const fn (a: *const anyopaque, b: *const anyopaque, arg: ?*anyopaque) callconv(.c) c_int, arg: ?*anyopaque) callconv(.c) void { const Context = struct { @@ -147,6 +320,53 @@ test lldiv { try std.testing.expectEqual(expected, lldiv(5, 3)); } +test atoi { + try std.testing.expectEqual(0, atoi(@ptrCast("stop42true"))); + try std.testing.expectEqual(42, atoi(@ptrCast("42true"))); + try std.testing.expectEqual(-1, atoi(@ptrCast("-01"))); + try std.testing.expectEqual(1, atoi(@ptrCast("+001"))); + try std.testing.expectEqual(100, atoi(@ptrCast(" 100"))); + try std.testing.expectEqual(500, atoi(@ptrCast("000000000000500"))); + try std.testing.expectEqual(1111, atoi(@ptrCast("0000000000001111_0000"))); + try std.testing.expectEqual(0, atoi(@ptrCast("0xAA"))); + try std.testing.expectEqual(700, atoi(@ptrCast("700B"))); + try std.testing.expectEqual(32453, atoi(@ptrCast("+32453more"))); + try std.testing.expectEqual(std.math.maxInt(c_int), atoi(@ptrCast(std.fmt.comptimePrint("{d}", .{std.math.maxInt(c_int)})))); + try std.testing.expectEqual(std.math.minInt(c_int), atoi(@ptrCast(std.fmt.comptimePrint("{d}", .{std.math.minInt(c_int)})))); +} + +test atol { + try std.testing.expectEqual(0, atol(@ptrCast("stop42true"))); + try std.testing.expectEqual(42, atol(@ptrCast("42true"))); + try std.testing.expectEqual(-1, atol(@ptrCast("-01"))); + try std.testing.expectEqual(1, atol(@ptrCast("+001"))); + try std.testing.expectEqual(100, atol(@ptrCast(" 100"))); + try std.testing.expectEqual(500, atol(@ptrCast("000000000000500"))); + try std.testing.expectEqual(1111, atol(@ptrCast("0000000000001111_0000"))); + try std.testing.expectEqual(0, atol(@ptrCast("0xAA"))); + try std.testing.expectEqual(700, atol(@ptrCast("700B"))); + try std.testing.expectEqual(32453, atol(@ptrCast("+32453more"))); + try std.testing.expectEqual(std.math.maxInt(c_long), atol(@ptrCast(std.fmt.comptimePrint("{d}", .{std.math.maxInt(c_long)})))); + try std.testing.expectEqual(std.math.minInt(c_long), atol(@ptrCast(std.fmt.comptimePrint("{d}", .{std.math.minInt(c_long)})))); +} + +test atoll { + try std.testing.expectEqual(0, atoll(@ptrCast("stop42true"))); + try std.testing.expectEqual(42, atoll(@ptrCast("42true"))); + try std.testing.expectEqual(-1, atoll(@ptrCast("-01"))); + try std.testing.expectEqual(1, atoll(@ptrCast("+001"))); + try std.testing.expectEqual(100, atoll(@ptrCast(" 100"))); + try std.testing.expectEqual(500, atoll(@ptrCast("000000000000500"))); + try std.testing.expectEqual(1111, atoll(@ptrCast("0000000000001111_0000"))); + try std.testing.expectEqual(0, atoll(@ptrCast("0xAA"))); + try std.testing.expectEqual(700, atoll(@ptrCast("700B"))); + try std.testing.expectEqual(32453, atoll(@ptrCast(" +32453more"))); + try std.testing.expectEqual(std.math.maxInt(c_longlong), atoll(@ptrCast(std.fmt.comptimePrint("{d}", .{std.math.maxInt(c_longlong)})))); + try std.testing.expectEqual(std.math.minInt(c_longlong), atoll(@ptrCast(std.fmt.comptimePrint("{d}", .{std.math.minInt(c_longlong)})))); +} + +// FIXME: We cannot test strtol, strtoll, strtoul, etc.. here as it must modify errno and libc is not linked in tests + test bsearch { const Comparison = struct { pub fn compare(a: *const anyopaque, b: *const anyopaque) callconv(.c) c_int { diff --git a/lib/libc/musl/src/stdlib/atoi.c b/lib/libc/musl/src/stdlib/atoi.c deleted file mode 100644 index 9baca7b895..0000000000 --- a/lib/libc/musl/src/stdlib/atoi.c +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include - -int atoi(const char *s) -{ - int n=0, neg=0; - while (isspace(*s)) s++; - switch (*s) { - case '-': neg=1; - case '+': s++; - } - /* Compute n as a negative number to avoid overflow on INT_MIN */ - while (isdigit(*s)) - n = 10*n - (*s++ - '0'); - return neg ? n : -n; -} diff --git a/lib/libc/musl/src/stdlib/atol.c b/lib/libc/musl/src/stdlib/atol.c deleted file mode 100644 index 140ea3ea3f..0000000000 --- a/lib/libc/musl/src/stdlib/atol.c +++ /dev/null @@ -1,17 +0,0 @@ -#include -#include - -long atol(const char *s) -{ - long n=0; - int neg=0; - while (isspace(*s)) s++; - switch (*s) { - case '-': neg=1; - case '+': s++; - } - /* Compute n as a negative number to avoid overflow on LONG_MIN */ - while (isdigit(*s)) - n = 10*n - (*s++ - '0'); - return neg ? n : -n; -} diff --git a/lib/libc/musl/src/stdlib/atoll.c b/lib/libc/musl/src/stdlib/atoll.c deleted file mode 100644 index b69304895a..0000000000 --- a/lib/libc/musl/src/stdlib/atoll.c +++ /dev/null @@ -1,17 +0,0 @@ -#include -#include - -long long atoll(const char *s) -{ - long long n=0; - int neg=0; - while (isspace(*s)) s++; - switch (*s) { - case '-': neg=1; - case '+': s++; - } - /* Compute n as a negative number to avoid overflow on LLONG_MIN */ - while (isdigit(*s)) - n = 10*n - (*s++ - '0'); - return neg ? n : -n; -} diff --git a/lib/libc/musl/src/stdlib/strtol.c b/lib/libc/musl/src/stdlib/strtol.c deleted file mode 100644 index bfefea69d1..0000000000 --- a/lib/libc/musl/src/stdlib/strtol.c +++ /dev/null @@ -1,56 +0,0 @@ -#include "stdio_impl.h" -#include "intscan.h" -#include "shgetc.h" -#include -#include -#include - -static unsigned long long strtox(const char *s, char **p, int base, unsigned long long lim) -{ - FILE f; - sh_fromstring(&f, s); - shlim(&f, 0); - unsigned long long y = __intscan(&f, base, 1, lim); - if (p) { - size_t cnt = shcnt(&f); - *p = (char *)s + cnt; - } - return y; -} - -unsigned long long strtoull(const char *restrict s, char **restrict p, int base) -{ - return strtox(s, p, base, ULLONG_MAX); -} - -long long strtoll(const char *restrict s, char **restrict p, int base) -{ - return strtox(s, p, base, LLONG_MIN); -} - -unsigned long strtoul(const char *restrict s, char **restrict p, int base) -{ - return strtox(s, p, base, ULONG_MAX); -} - -long strtol(const char *restrict s, char **restrict p, int base) -{ - return strtox(s, p, base, 0UL+LONG_MIN); -} - -intmax_t strtoimax(const char *restrict s, char **restrict p, int base) -{ - return strtoll(s, p, base); -} - -uintmax_t strtoumax(const char *restrict s, char **restrict p, int base) -{ - return strtoull(s, p, base); -} - -weak_alias(strtol, __strtol_internal); -weak_alias(strtoul, __strtoul_internal); -weak_alias(strtoll, __strtoll_internal); -weak_alias(strtoull, __strtoull_internal); -weak_alias(strtoimax, __strtoimax_internal); -weak_alias(strtoumax, __strtoumax_internal); diff --git a/src/libs/musl.zig b/src/libs/musl.zig index 302d22b062..e2d683ae69 100644 --- a/src/libs/musl.zig +++ b/src/libs/musl.zig @@ -1620,14 +1620,10 @@ const src_files = [_][]const u8{ "musl/src/stdio/wprintf.c", "musl/src/stdio/wscanf.c", "musl/src/stdlib/atof.c", - "musl/src/stdlib/atoi.c", - "musl/src/stdlib/atol.c", - "musl/src/stdlib/atoll.c", "musl/src/stdlib/ecvt.c", "musl/src/stdlib/fcvt.c", "musl/src/stdlib/gcvt.c", "musl/src/stdlib/strtod.c", - "musl/src/stdlib/strtol.c", "musl/src/stdlib/wcstod.c", "musl/src/stdlib/wcstol.c", "musl/src/string/bcopy.c", diff --git a/src/libs/wasi_libc.zig b/src/libs/wasi_libc.zig index 161b60d552..3b4fa178d1 100644 --- a/src/libs/wasi_libc.zig +++ b/src/libs/wasi_libc.zig @@ -975,13 +975,9 @@ const libc_top_half_src_files = [_][]const u8{ "musl/src/stdio/wprintf.c", "musl/src/stdio/wscanf.c", "musl/src/stdlib/atof.c", - "musl/src/stdlib/atoi.c", - "musl/src/stdlib/atol.c", - "musl/src/stdlib/atoll.c", "musl/src/stdlib/ecvt.c", "musl/src/stdlib/fcvt.c", "musl/src/stdlib/gcvt.c", - "musl/src/stdlib/strtol.c", "musl/src/string/bcopy.c", "musl/src/string/explicit_bzero.c", "musl/src/string/index.c",