initial support for integrated fuzzing

* Add the `-ffuzz` and `-fno-fuzz` CLI arguments.
* Detect fuzz testing flags from zig cc.
* Set the correct clang flags when fuzz testing is requested. It can be
  combined with TSAN and UBSAN.
* Compilation: build fuzzer library when needed which is currently an
  empty zig file.
* Add optforfuzzing to every function in the llvm backend for modules
  that have requested fuzzing.
* In ZigLLVMTargetMachineEmitToFile, add the optimization passes for
  sanitizer coverage.
* std.mem.eql uses a naive implementation optimized for fuzzing when
  builtin.fuzz is true.

Tracked by #20702
This commit is contained in:
Andrew Kelley 2024-07-21 18:12:22 -07:00
parent eac7fd4da5
commit 54b7e144b1
11 changed files with 133 additions and 53 deletions

0
lib/fuzzer.zig Normal file
View file

View file

@ -636,18 +636,20 @@ test lessThan {
try testing.expect(lessThan(u8, "", "a"));
}
const backend_can_use_eql_bytes = switch (builtin.zig_backend) {
const eqlBytes_allowed = switch (builtin.zig_backend) {
// The SPIR-V backend does not support the optimized path yet.
.stage2_spirv64 => false,
// The RISC-V does not support vectors.
.stage2_riscv64 => false,
else => true,
// The naive memory comparison implementation is more useful for fuzzers to
// find interesting inputs.
else => !builtin.fuzz,
};
/// Compares two slices and returns whether they are equal.
pub fn eql(comptime T: type, a: []const T, b: []const T) bool {
if (@sizeOf(T) == 0) return true;
if (!@inComptime() and std.meta.hasUniqueRepresentation(T) and backend_can_use_eql_bytes) return eqlBytes(sliceAsBytes(a), sliceAsBytes(b));
if (!@inComptime() and std.meta.hasUniqueRepresentation(T) and eqlBytes_allowed) return eqlBytes(sliceAsBytes(a), sliceAsBytes(b));
if (a.len != b.len) return false;
if (a.len == 0 or a.ptr == b.ptr) return true;
@ -660,9 +662,7 @@ pub fn eql(comptime T: type, a: []const T, b: []const T) bool {
/// std.mem.eql heavily optimized for slices of bytes.
fn eqlBytes(a: []const u8, b: []const u8) bool {
if (!backend_can_use_eql_bytes) {
return eql(u8, a, b);
}
comptime assert(eqlBytes_allowed);
if (a.len != b.len) return false;
if (a.len == 0 or a.ptr == b.ptr) return true;

View file

@ -10,6 +10,7 @@ optimize_mode: std.builtin.OptimizeMode,
error_tracing: bool,
valgrind: bool,
sanitize_thread: bool,
fuzz: bool,
pic: bool,
pie: bool,
strip: bool,
@ -185,6 +186,7 @@ pub fn append(opts: @This(), buffer: *std.ArrayList(u8)) Allocator.Error!void {
\\pub const have_error_return_tracing = {};
\\pub const valgrind_support = {};
\\pub const sanitize_thread = {};
\\pub const fuzz = {};
\\pub const position_independent_code = {};
\\pub const position_independent_executable = {};
\\pub const strip_debug_info = {};
@ -199,6 +201,7 @@ pub fn append(opts: @This(), buffer: *std.ArrayList(u8)) Allocator.Error!void {
opts.error_tracing,
opts.valgrind,
opts.sanitize_thread,
opts.fuzz,
opts.pic,
opts.pie,
opts.strip,

View file

@ -190,6 +190,7 @@ debug_compile_errors: bool,
incremental: bool,
job_queued_compiler_rt_lib: bool = false,
job_queued_compiler_rt_obj: bool = false,
job_queued_fuzzer_lib: bool = false,
job_queued_update_builtin_zig: bool,
alloc_failure_occurred: bool = false,
formatted_panics: bool = false,
@ -231,6 +232,10 @@ compiler_rt_lib: ?CRTFile = null,
/// Populated when we build the compiler_rt_obj object. A Job to build this is indicated
/// by setting `job_queued_compiler_rt_obj` and resolved before calling linker.flush().
compiler_rt_obj: ?CRTFile = null,
/// Populated when we build the libfuzzer static library. A Job to build this
/// is indicated by setting `job_queued_fuzzer_lib` and resolved before
/// calling linker.flush().
fuzzer_lib: ?CRTFile = null,
glibc_so_files: ?glibc.BuiltSharedObjects = null,
wasi_emulated_libs: []const wasi_libc.CRTFile,
@ -799,6 +804,7 @@ pub const MiscTask = enum {
libcxx,
libcxxabi,
libtsan,
libfuzzer,
wasi_libc_crt_file,
compiler_rt,
zig_libc,
@ -887,6 +893,7 @@ pub const cache_helpers = struct {
hh.add(mod.red_zone);
hh.add(mod.sanitize_c);
hh.add(mod.sanitize_thread);
hh.add(mod.fuzz);
hh.add(mod.unwind_tables);
hh.add(mod.structured_cfg);
hh.addListOfBytes(mod.cc_argv);
@ -1302,6 +1309,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
const any_unwind_tables = options.config.any_unwind_tables or options.root_mod.unwind_tables;
const any_non_single_threaded = options.config.any_non_single_threaded or !options.root_mod.single_threaded;
const any_sanitize_thread = options.config.any_sanitize_thread or options.root_mod.sanitize_thread;
const any_fuzz = options.config.any_fuzz or options.root_mod.fuzz;
const link_eh_frame_hdr = options.link_eh_frame_hdr or any_unwind_tables;
const build_id = options.build_id orelse .none;
@ -1563,6 +1571,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
comp.config.any_unwind_tables = any_unwind_tables;
comp.config.any_non_single_threaded = any_non_single_threaded;
comp.config.any_sanitize_thread = any_sanitize_thread;
comp.config.any_fuzz = any_fuzz;
const lf_open_opts: link.File.OpenOptions = .{
.linker_script = options.linker_script,
@ -1908,6 +1917,13 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
}
}
if (comp.config.any_fuzz and capable_of_building_compiler_rt) {
if (is_exe_or_dyn_lib) {
log.debug("queuing a job to build libfuzzer", .{});
comp.job_queued_fuzzer_lib = true;
}
}
if (!comp.skip_linker_dependencies and is_exe_or_dyn_lib and
!comp.config.link_libc and capable_of_building_zig_libc)
{
@ -1956,6 +1972,9 @@ pub fn destroy(comp: *Compilation) void {
if (comp.compiler_rt_obj) |*crt_file| {
crt_file.deinit(gpa);
}
if (comp.fuzzer_lib) |*crt_file| {
crt_file.deinit(gpa);
}
if (comp.libc_static_lib) |*crt_file| {
crt_file.deinit(gpa);
}
@ -2721,6 +2740,7 @@ pub fn emitLlvmObject(
.is_small = comp.root_mod.optimize_mode == .ReleaseSmall,
.time_report = comp.time_report,
.sanitize_thread = comp.config.any_sanitize_thread,
.fuzz = comp.config.any_fuzz,
.lto = comp.config.lto,
});
}
@ -3641,15 +3661,9 @@ fn performAllTheWorkInner(
break;
}
if (comp.job_queued_compiler_rt_lib) {
comp.job_queued_compiler_rt_lib = false;
buildCompilerRtOneShot(comp, .Lib, &comp.compiler_rt_lib, main_progress_node);
}
if (comp.job_queued_compiler_rt_obj) {
comp.job_queued_compiler_rt_obj = false;
buildCompilerRtOneShot(comp, .Obj, &comp.compiler_rt_obj, main_progress_node);
}
buildCompilerRtOneShot(comp, &comp.job_queued_compiler_rt_lib, "compiler_rt.zig", .compiler_rt, .Lib, &comp.compiler_rt_lib, main_progress_node);
buildCompilerRtOneShot(comp, &comp.job_queued_compiler_rt_obj, "compiler_rt.zig", .compiler_rt, .Obj, &comp.compiler_rt_obj, main_progress_node);
buildCompilerRtOneShot(comp, &comp.job_queued_fuzzer_lib, "fuzzer.zig", .libfuzzer, .Lib, &comp.fuzzer_lib, main_progress_node);
}
const JobError = Allocator.Error;
@ -4655,23 +4669,27 @@ fn workerUpdateWin32Resource(
fn buildCompilerRtOneShot(
comp: *Compilation,
job_queued: *bool,
root_source_name: []const u8,
misc_task: MiscTask,
output_mode: std.builtin.OutputMode,
out: *?CRTFile,
prog_node: std.Progress.Node,
) void {
if (!job_queued.*) return;
job_queued.* = false;
comp.buildOutputFromZig(
"compiler_rt.zig",
root_source_name,
output_mode,
out,
.compiler_rt,
misc_task,
prog_node,
) catch |err| switch (err) {
error.SubCompilationFailed => return, // error reported already
else => comp.lockAndSetMiscFailure(
.compiler_rt,
"unable to build compiler_rt: {s}",
.{@errorName(err)},
),
else => comp.lockAndSetMiscFailure(misc_task, "unable to build {s}: {s}", .{
@tagName(misc_task), @errorName(err),
}),
};
}
@ -5602,23 +5620,32 @@ pub fn addCCArgs(
try argv.append("-mthumb");
}
if (mod.sanitize_c and !mod.sanitize_thread) {
try argv.append("-fsanitize=undefined");
try argv.append("-fsanitize-trap=undefined");
// It is very common, and well-defined, for a pointer on one side of a C ABI
// to have a different but compatible element type. Examples include:
// `char*` vs `uint8_t*` on a system with 8-bit bytes
// `const char*` vs `char*`
// `char*` vs `unsigned char*`
// Without this flag, Clang would invoke UBSAN when such an extern
// function was called.
try argv.append("-fno-sanitize=function");
} else if (mod.sanitize_c and mod.sanitize_thread) {
try argv.append("-fsanitize=undefined,thread");
try argv.append("-fsanitize-trap=undefined");
try argv.append("-fno-sanitize=function");
} else if (!mod.sanitize_c and mod.sanitize_thread) {
try argv.append("-fsanitize=thread");
{
var san_arg: std.ArrayListUnmanaged(u8) = .{};
const prefix = "-fsanitize=";
if (mod.sanitize_c) {
if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix);
try san_arg.appendSlice(arena, "undefined,");
try argv.append("-fsanitize-trap=undefined");
// It is very common, and well-defined, for a pointer on one side of a C ABI
// to have a different but compatible element type. Examples include:
// `char*` vs `uint8_t*` on a system with 8-bit bytes
// `const char*` vs `char*`
// `char*` vs `unsigned char*`
// Without this flag, Clang would invoke UBSAN when such an extern
// function was called.
try argv.append("-fno-sanitize=function");
}
if (mod.sanitize_thread) {
if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix);
try san_arg.appendSlice(arena, "thread,");
}
if (mod.fuzz) {
if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix);
try san_arg.appendSlice(arena, "fuzzer-no-link,");
}
// Chop off the trailing comma and append to argv.
if (san_arg.popOrNull()) |_| try argv.append(san_arg.items);
}
if (mod.red_zone) {

View file

@ -32,6 +32,7 @@ any_non_single_threaded: bool,
/// per-Module setting.
any_error_tracing: bool,
any_sanitize_thread: bool,
any_fuzz: bool,
pie: bool,
/// If this is true then linker code is responsible for making an LLVM IR
/// Module, outputting it to an object file, and then linking that together
@ -82,6 +83,7 @@ pub const Options = struct {
ensure_libcpp_on_non_freestanding: bool = false,
any_non_single_threaded: bool = false,
any_sanitize_thread: bool = false,
any_fuzz: bool = false,
any_unwind_tables: bool = false,
any_dyn_libs: bool = false,
any_c_source_files: bool = false,
@ -486,6 +488,7 @@ pub fn resolve(options: Options) ResolveError!Config {
.any_non_single_threaded = options.any_non_single_threaded,
.any_error_tracing = any_error_tracing,
.any_sanitize_thread = options.any_sanitize_thread,
.any_fuzz = options.any_fuzz,
.root_error_tracing = root_error_tracing,
.pie = pie,
.lto = lto,

View file

@ -26,6 +26,7 @@ stack_protector: u32,
red_zone: bool,
sanitize_c: bool,
sanitize_thread: bool,
fuzz: bool,
unwind_tables: bool,
cc_argv: []const []const u8,
/// (SPIR-V) whether to generate a structured control flow graph or not
@ -92,6 +93,7 @@ pub const CreateOptions = struct {
unwind_tables: ?bool = null,
sanitize_c: ?bool = null,
sanitize_thread: ?bool = null,
fuzz: ?bool = null,
structured_cfg: ?bool = null,
};
};
@ -106,6 +108,7 @@ pub const ResolvedTarget = struct {
/// At least one of `parent` and `resolved_target` must be non-null.
pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
if (options.inherited.sanitize_thread == true) assert(options.global.any_sanitize_thread);
if (options.inherited.fuzz == true) assert(options.global.any_fuzz);
if (options.inherited.single_threaded == false) assert(options.global.any_non_single_threaded);
if (options.inherited.unwind_tables == true) assert(options.global.any_unwind_tables);
if (options.inherited.error_tracing == true) assert(options.global.any_error_tracing);
@ -210,6 +213,12 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
break :b false;
};
const fuzz = b: {
if (options.inherited.fuzz) |x| break :b x;
if (options.parent) |p| break :b p.fuzz;
break :b false;
};
const code_model = b: {
if (options.inherited.code_model) |x| break :b x;
if (options.parent) |p| break :b p.code_model;
@ -337,6 +346,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
.red_zone = red_zone,
.sanitize_c = sanitize_c,
.sanitize_thread = sanitize_thread,
.fuzz = fuzz,
.unwind_tables = unwind_tables,
.cc_argv = options.cc_argv,
.structured_cfg = structured_cfg,
@ -359,6 +369,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
.error_tracing = error_tracing,
.valgrind = valgrind,
.sanitize_thread = sanitize_thread,
.fuzz = fuzz,
.pic = pic,
.pie = options.global.pie,
.strip = strip,
@ -427,6 +438,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
.red_zone = red_zone,
.sanitize_c = sanitize_c,
.sanitize_thread = sanitize_thread,
.fuzz = fuzz,
.unwind_tables = unwind_tables,
.cc_argv = &.{},
.structured_cfg = structured_cfg,
@ -485,6 +497,7 @@ pub fn createLimited(gpa: Allocator, options: LimitedOptions) Allocator.Error!*P
.red_zone = undefined,
.sanitize_c = undefined,
.sanitize_thread = undefined,
.fuzz = undefined,
.unwind_tables = undefined,
.cc_argv = undefined,
.structured_cfg = undefined,

View file

@ -1101,6 +1101,7 @@ pub const Object = struct {
is_small: bool,
time_report: bool,
sanitize_thread: bool,
fuzz: bool,
lto: bool,
};
@ -1287,6 +1288,7 @@ pub const Object = struct {
options.is_small,
options.time_report,
options.sanitize_thread,
options.fuzz,
options.lto,
null,
emit_bin_path,
@ -1311,6 +1313,7 @@ pub const Object = struct {
options.is_small,
options.time_report,
options.sanitize_thread,
options.fuzz,
options.lto,
options.asm_path,
emit_bin_path,
@ -2982,6 +2985,9 @@ pub const Object = struct {
if (owner_mod.sanitize_thread) {
try attributes.addFnAttr(.sanitize_thread, &o.builder);
}
if (owner_mod.fuzz) {
try attributes.addFnAttr(.optforfuzzing, &o.builder);
}
const target = owner_mod.resolved_target.result;
if (target.cpu.model.llvm_name) |s| {
try attributes.addFnAttr(.{ .string = .{

View file

@ -93,6 +93,7 @@ pub const TargetMachine = opaque {
is_small: bool,
time_report: bool,
tsan: bool,
sancov: bool,
lto: bool,
asm_filename: ?[*:0]const u8,
bin_filename: ?[*:0]const u8,

View file

@ -499,12 +499,14 @@ const usage_build_generic =
\\ -fno-stack-check Disable stack probing in safe builds
\\ -fstack-protector Enable stack protection in unsafe builds
\\ -fno-stack-protector Disable stack protection in safe builds
\\ -fsanitize-c Enable C undefined behavior detection in unsafe builds
\\ -fno-sanitize-c Disable C undefined behavior detection in safe builds
\\ -fvalgrind Include valgrind client requests in release builds
\\ -fno-valgrind Omit valgrind client requests in debug builds
\\ -fsanitize-c Enable C undefined behavior detection in unsafe builds
\\ -fno-sanitize-c Disable C undefined behavior detection in safe builds
\\ -fsanitize-thread Enable Thread Sanitizer
\\ -fno-sanitize-thread Disable Thread Sanitizer
\\ -ffuzz Enable fuzz testing instrumentation
\\ -fno-fuzz Disable fuzz testing instrumentation
\\ -funwind-tables Always produce unwind table entries for all functions
\\ -fno-unwind-tables Never produce unwind table entries
\\ -ferror-tracing Enable error tracing in ReleaseFast mode
@ -1429,6 +1431,10 @@ fn buildOutputType(
mod_opts.sanitize_thread = true;
} else if (mem.eql(u8, arg, "-fno-sanitize-thread")) {
mod_opts.sanitize_thread = false;
} else if (mem.eql(u8, arg, "-ffuzz")) {
mod_opts.fuzz = true;
} else if (mem.eql(u8, arg, "-fno-fuzz")) {
mod_opts.fuzz = false;
} else if (mem.eql(u8, arg, "-fllvm")) {
create_module.opts.use_llvm = true;
} else if (mem.eql(u8, arg, "-fno-llvm")) {
@ -2060,11 +2066,21 @@ fn buildOutputType(
create_module.opts.debug_format = .{ .dwarf = .@"64" };
},
.sanitize => {
if (mem.eql(u8, it.only_arg, "undefined")) {
mod_opts.sanitize_c = true;
} else if (mem.eql(u8, it.only_arg, "thread")) {
mod_opts.sanitize_thread = true;
} else {
var san_it = std.mem.splitScalar(u8, it.only_arg, ',');
var recognized_any = false;
while (san_it.next()) |sub_arg| {
if (mem.eql(u8, sub_arg, "undefined")) {
mod_opts.sanitize_c = true;
recognized_any = true;
} else if (mem.eql(u8, sub_arg, "thread")) {
mod_opts.sanitize_thread = true;
recognized_any = true;
} else if (mem.eql(u8, sub_arg, "fuzzer") or mem.eql(u8, sub_arg, "fuzzer-no-link")) {
mod_opts.fuzz = true;
recognized_any = true;
}
}
if (!recognized_any) {
try cc_argv.appendSlice(arena, it.other_args);
}
},
@ -2642,6 +2658,8 @@ fn buildOutputType(
create_module.opts.any_non_single_threaded = true;
if (mod_opts.sanitize_thread == true)
create_module.opts.any_sanitize_thread = true;
if (mod_opts.fuzz == true)
create_module.opts.any_fuzz = true;
if (mod_opts.unwind_tables == true)
create_module.opts.any_unwind_tables = true;
if (mod_opts.strip == false)
@ -7491,6 +7509,8 @@ fn handleModArg(
create_module.opts.any_non_single_threaded = true;
if (mod_opts.sanitize_thread == true)
create_module.opts.any_sanitize_thread = true;
if (mod_opts.fuzz == true)
create_module.opts.any_fuzz = true;
if (mod_opts.unwind_tables == true)
create_module.opts.any_unwind_tables = true;
if (mod_opts.strip == false)

View file

@ -54,6 +54,7 @@
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
#include <llvm/Transforms/Instrumentation/SanitizerCoverage.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils.h>
#include <llvm/Transforms/Utils/AddDiscriminators.h>
@ -188,9 +189,10 @@ struct TimeTracerRAII {
};
} // end anonymous namespace
bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMModuleRef module_ref,
char **error_message, bool is_debug,
bool is_small, bool time_report, bool tsan, bool lto,
bool is_small, bool time_report, bool tsan, bool sancov, bool lto,
const char *asm_filename, const char *bin_filename,
const char *llvm_ir_filename, const char *bitcode_filename)
{
@ -303,13 +305,18 @@ bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMM
});
}
// Thread sanitizer
if (tsan) {
pass_builder.registerOptimizerLastEPCallback([](ModulePassManager &module_pm, OptimizationLevel level) {
pass_builder.registerOptimizerLastEPCallback([&](ModulePassManager &module_pm, OptimizationLevel level) {
// Code coverage instrumentation.
if (sancov) {
module_pm.addPass(SanitizerCoveragePass());
}
// Thread sanitizer
if (tsan) {
module_pm.addPass(ModuleThreadSanitizerPass());
module_pm.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
});
}
}
});
ModulePassManager module_pm;
OptimizationLevel opt_level;

View file

@ -26,7 +26,7 @@
ZIG_EXTERN_C bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMModuleRef module_ref,
char **error_message, bool is_debug,
bool is_small, bool time_report, bool tsan, bool lto,
bool is_small, bool time_report, bool tsan, bool sancov, bool lto,
const char *asm_filename, const char *bin_filename,
const char *llvm_ir_filename, const char *bitcode_filename);