From 97643c1ecc9621b088807f446f06fe51e2db8fb6 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 29 Jul 2024 21:44:38 -0700 Subject: [PATCH 01/34] fuzzer: track code coverage from all runs When a unique run is encountered, track it in a bit set memory-mapped into the fuzz directory so it can be observed by other processes, even while the fuzzer is running. --- lib/compiler/test_runner.zig | 2 + lib/fuzzer.zig | 195 +++++++++++++++++++++++++++++++---- 2 files changed, 177 insertions(+), 20 deletions(-) diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index be793376a570..64f6f230fe9c 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -41,6 +41,7 @@ pub fn main() void { } fba.reset(); + if (builtin.fuzz) fuzzer_init(); if (listen) { return mainServer() catch @panic("internal test runner failure"); @@ -323,6 +324,7 @@ const FuzzerSlice = extern struct { var is_fuzz_test: bool = undefined; extern fn fuzzer_next() FuzzerSlice; +extern fn fuzzer_init() void; pub fn fuzzInput(options: testing.FuzzInputOptions) []const u8 { @disableInstrumentation(); diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 60876e0bfb25..4673bf6e31f3 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -2,6 +2,7 @@ const builtin = @import("builtin"); const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const fatal = std.process.fatal; pub const std_options = .{ .logFn = logOverride, @@ -17,7 +18,7 @@ fn logOverride( ) void { if (builtin.mode != .Debug) return; const f = if (log_file) |f| f else f: { - const f = std.fs.cwd().createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file"); + const f = fuzzer.dir.createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file"); log_file = f; break :f f; }; @@ -28,16 +29,17 @@ fn logOverride( export threadlocal var __sancov_lowest_stack: usize = 0; -export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, stop: [*]u8) void { - std.log.debug("__sanitizer_cov_8bit_counters_init start={*}, stop={*}", .{ start, stop }); +var module_count_8bc: usize = 0; +var module_count_pcs: usize = 0; + +export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, end: [*]u8) void { + assert(@atomicRmw(usize, &module_count_8bc, .Add, 1, .monotonic) == 0); + fuzzer.pc_counters = start[0 .. end - start]; } -export fn __sanitizer_cov_pcs_init(pc_start: [*]const usize, pc_end: [*]const usize) void { - std.log.debug("__sanitizer_cov_pcs_init pc_start={*}, pc_end={*}", .{ pc_start, pc_end }); - fuzzer.pc_range = .{ - .start = @intFromPtr(pc_start), - .end = @intFromPtr(pc_start), - }; +export fn __sanitizer_cov_pcs_init(start: [*]const Fuzzer.FlaggedPc, end: [*]const Fuzzer.FlaggedPc) void { + assert(@atomicRmw(usize, &module_count_pcs, .Add, 1, .monotonic) == 0); + fuzzer.flagged_pcs = start[0 .. end - start]; } export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void { @@ -102,11 +104,25 @@ const Fuzzer = struct { gpa: Allocator, rng: std.Random.DefaultPrng, input: std.ArrayListUnmanaged(u8), - pc_range: PcRange, - count: usize, + flagged_pcs: []const FlaggedPc, + pc_counters: []u8, + n_runs: usize, recent_cases: RunMap, deduplicated_runs: usize, + /// Data collected from code coverage instrumentation from one execution of + /// the test function. coverage: Coverage, + /// Tracks which PCs have been seen across all runs that do not crash the fuzzer process. + /// Stored in a memory-mapped file so that it can be shared with other + /// processes and viewed while the fuzzer is running. + seen_pcs: MemoryMappedList, + dir: std.fs.Dir, + + const SeenPcsHeader = extern struct { + n_runs: usize, + pcs_len: usize, + lowest_stack: usize, + }; const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false); @@ -161,9 +177,12 @@ const Fuzzer = struct { } }; - const PcRange = struct { - start: usize, - end: usize, + const FlaggedPc = extern struct { + addr: usize, + flags: packed struct(usize) { + entry: bool, + _: @Type(.{ .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(usize) - 1 } }), + }, }; const Analysis = struct { @@ -171,6 +190,56 @@ const Fuzzer = struct { id: Run.Id, }; + fn init(f: *Fuzzer, dir: std.fs.Dir) !void { + f.dir = dir; + + // Layout of this file: + // - Header + // - list of PC addresses (usize elements) + // - list of hit flag, 1 bit per address (stored in u8 elements) + const coverage_file = dir.createFile("coverage", .{ + .read = true, + .truncate = false, + }) catch |err| fatal("unable to create coverage file: {s}", .{@errorName(err)}); + const flagged_pcs = f.flagged_pcs; + const n_bitset_elems = (flagged_pcs.len + 7) / 8; + const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems; + const existing_len = coverage_file.getEndPos() catch |err| { + fatal("unable to check len of coverage file: {s}", .{@errorName(err)}); + }; + if (existing_len == 0) { + coverage_file.setEndPos(bytes_len) catch |err| { + fatal("unable to set len of coverage file: {s}", .{@errorName(err)}); + }; + } else if (existing_len != bytes_len) { + fatal("incompatible existing coverage file (differing lengths)", .{}); + } + f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| { + fatal("unable to init coverage memory map: {s}", .{@errorName(err)}); + }; + if (existing_len != 0) { + const existing_pcs = std.mem.bytesAsSlice(usize, f.seen_pcs.items[@sizeOf(SeenPcsHeader)..][0 .. flagged_pcs.len * @sizeOf(usize)]); + for (existing_pcs, flagged_pcs, 0..) |old, new, i| { + if (old != new.addr) { + fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{ + i, old, new.addr, + }); + } + } + } else { + const header: SeenPcsHeader = .{ + .n_runs = 0, + .pcs_len = flagged_pcs.len, + .lowest_stack = std.math.maxInt(usize), + }; + f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header)); + for (flagged_pcs) |flagged_pc| { + f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&flagged_pc.addr)); + } + f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems); + } + } + fn analyzeLastRun(f: *Fuzzer) Analysis { return .{ .id = f.coverage.run_id_hasher.final(), @@ -194,7 +263,7 @@ const Fuzzer = struct { .score = 0, }, {}); } else { - if (f.count % 1000 == 0) f.dumpStats(); + if (f.n_runs % 1000 == 0) f.dumpStats(); const analysis = f.analyzeLastRun(); const gop = f.recent_cases.getOrPutAssumeCapacity(.{ @@ -217,6 +286,25 @@ const Fuzzer = struct { .input = try gpa.dupe(u8, f.input.items), .score = analysis.score, }; + + // Track code coverage from all runs. + { + const seen_pcs = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + f.flagged_pcs.len * @sizeOf(usize) ..]; + for (seen_pcs, 0..) |*elem, i| { + const byte_i = i / 8; + const mask: u8 = + (@as(u8, @intFromBool(f.pc_counters[byte_i + 0] != 0)) << 0) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 1] != 0)) << 1) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 2] != 0)) << 2) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 3] != 0)) << 3) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 4] != 0)) << 4) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 5] != 0)) << 5) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 6] != 0)) << 6) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 7] != 0)) << 7); + + _ = @atomicRmw(u8, elem, .Or, mask, .monotonic); + } + } } if (f.recent_cases.entries.len >= 100) { @@ -244,8 +332,12 @@ const Fuzzer = struct { f.input.appendSliceAssumeCapacity(run.input); try f.mutate(); + f.n_runs += 1; + const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); + _ = @atomicRmw(usize, &header.lowest_stack, .Min, __sancov_lowest_stack, .monotonic); + @memset(f.pc_counters, 0); f.coverage.reset(); - f.count += 1; return f.input.items; } @@ -257,8 +349,7 @@ const Fuzzer = struct { fn dumpStats(f: *Fuzzer) void { std.log.info("stats: runs={d} deduplicated={d}", .{ - f.count, - f.deduplicated_runs, + f.n_runs, f.deduplicated_runs, }); for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| { std.log.info("best[{d}] id={x} score={d} input: '{}'", .{ @@ -303,11 +394,14 @@ var fuzzer: Fuzzer = .{ .gpa = general_purpose_allocator.allocator(), .rng = std.Random.DefaultPrng.init(0), .input = .{}, - .pc_range = .{ .start = 0, .end = 0 }, - .count = 0, + .flagged_pcs = undefined, + .pc_counters = undefined, + .n_runs = 0, .deduplicated_runs = 0, .recent_cases = .{}, .coverage = undefined, + .dir = undefined, + .seen_pcs = undefined, }; export fn fuzzer_next() Fuzzer.Slice { @@ -315,3 +409,64 @@ export fn fuzzer_next() Fuzzer.Slice { error.OutOfMemory => @panic("out of memory"), }); } + +export fn fuzzer_init() void { + if (module_count_8bc == 0) fatal("__sanitizer_cov_8bit_counters_init was never called", .{}); + if (module_count_pcs == 0) fatal("__sanitizer_cov_pcs_init was never called", .{}); + + // TODO: move this to .zig-cache/f + const fuzz_dir = std.fs.cwd().makeOpenPath("f", .{ .iterate = true }) catch |err| { + fatal("unable to open fuzz directory 'f': {s}", .{@errorName(err)}); + }; + fuzzer.init(fuzz_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)}); +} + +/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping. +pub const MemoryMappedList = struct { + /// Contents of the list. + /// + /// Pointers to elements in this slice are invalidated by various functions + /// of this ArrayList in accordance with the respective documentation. In + /// all cases, "invalidated" means that the memory has been passed to this + /// allocator's resize or free function. + items: []align(std.mem.page_size) volatile u8, + /// How many bytes this list can hold without allocating additional memory. + capacity: usize, + + pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList { + const ptr = try std.posix.mmap( + null, + capacity, + std.posix.PROT.READ | std.posix.PROT.WRITE, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); + return .{ + .items = ptr[0..length], + .capacity = capacity, + }; + } + + /// Append the slice of items to the list. + /// Asserts that the list can hold the additional items. + pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void { + const old_len = l.items.len; + const new_len = old_len + items.len; + assert(new_len <= l.capacity); + l.items.len = new_len; + @memcpy(l.items[old_len..][0..items.len], items); + } + + /// Append a value to the list `n` times. + /// Never invalidates element pointers. + /// The function is inline so that a comptime-known `value` parameter will + /// have better memset codegen in case it has a repeated byte pattern. + /// Asserts that the list can hold the additional items. + pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void { + const new_len = l.items.len + n; + assert(new_len <= l.capacity); + @memset(l.items.ptr[l.items.len..new_len], value); + l.items.len = new_len; + } +}; From ffc050e0557c5d951cd293ca365ed0cd3cdf83db Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 29 Jul 2024 21:59:23 -0700 Subject: [PATCH 02/34] fuzzer: log errors and move deduplicated runs to shared mem --- lib/fuzzer.zig | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 4673bf6e31f3..7edab785a9f2 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -16,7 +16,6 @@ fn logOverride( comptime format: []const u8, args: anytype, ) void { - if (builtin.mode != .Debug) return; const f = if (log_file) |f| f else f: { const f = fuzzer.dir.createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file"); log_file = f; @@ -27,7 +26,7 @@ fn logOverride( f.writer().print(prefix1 ++ prefix2 ++ format ++ "\n", args) catch @panic("failed to write to fuzzer log"); } -export threadlocal var __sancov_lowest_stack: usize = 0; +export threadlocal var __sancov_lowest_stack: usize = std.math.maxInt(usize); var module_count_8bc: usize = 0; var module_count_pcs: usize = 0; @@ -108,7 +107,6 @@ const Fuzzer = struct { pc_counters: []u8, n_runs: usize, recent_cases: RunMap, - deduplicated_runs: usize, /// Data collected from code coverage instrumentation from one execution of /// the test function. coverage: Coverage, @@ -120,6 +118,7 @@ const Fuzzer = struct { const SeenPcsHeader = extern struct { n_runs: usize, + deduplicated_runs: usize, pcs_len: usize, lowest_stack: usize, }; @@ -229,6 +228,7 @@ const Fuzzer = struct { } else { const header: SeenPcsHeader = .{ .n_runs = 0, + .deduplicated_runs = 0, .pcs_len = flagged_pcs.len, .lowest_stack = std.math.maxInt(usize), }; @@ -273,7 +273,8 @@ const Fuzzer = struct { }); if (gop.found_existing) { //std.log.info("duplicate analysis: score={d} id={d}", .{ analysis.score, analysis.id }); - f.deduplicated_runs += 1; + const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + _ = @atomicRmw(usize, &header.deduplicated_runs, .Add, 1, .monotonic); if (f.input.items.len < gop.key_ptr.input.len or gop.key_ptr.score == 0) { gpa.free(gop.key_ptr.input); gop.key_ptr.input = try gpa.dupe(u8, f.input.items); @@ -348,9 +349,6 @@ const Fuzzer = struct { } fn dumpStats(f: *Fuzzer) void { - std.log.info("stats: runs={d} deduplicated={d}", .{ - f.n_runs, f.deduplicated_runs, - }); for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| { std.log.info("best[{d}] id={x} score={d} input: '{}'", .{ i, run.id, run.score, std.zig.fmtEscapes(run.input), @@ -397,7 +395,6 @@ var fuzzer: Fuzzer = .{ .flagged_pcs = undefined, .pc_counters = undefined, .n_runs = 0, - .deduplicated_runs = 0, .recent_cases = .{}, .coverage = undefined, .dir = undefined, From e0ffac4e3c2271a617616760f3084f5f01fb0785 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 31 Jul 2024 22:54:05 -0700 Subject: [PATCH 03/34] introduce a web interface for fuzzing * new .zig-cache subdirectory: 'v' - stores coverage information with filename of hash of PCs that want coverage. This hash is a hex encoding of the 64-bit coverage ID. * build runner * fixed bug in file system inputs when a compile step has an overridden zig_lib_dir field set. * set some std lib options optimized for the build runner - no side channel mitigations - no Transport Layer Security - no crypto fork safety * add a --port CLI arg for choosing the port the fuzzing web interface listens on. it defaults to choosing a random open port. * introduce a web server, and serve a basic single page application - shares wasm code with autodocs - assets are created live on request, for convenient development experience. main.wasm is properly cached if nothing changes. - sources.tar comes from file system inputs (introduced with the `--watch` feature) * receives coverage ID from test runner and sends it on a thread-safe queue to the WebServer. * test runner - takes a zig cache directory argument now, for where to put coverage information. - sends coverage ID to parent process * fuzzer - puts its logs (in debug mode) in .zig-cache/tmp/libfuzzer.log - computes coverage_id and makes it available with `fuzzer_coverage_id` exported function. - the memory-mapped coverage file is now namespaced by the coverage id in hex encoding, in `.zig-cache/v` * tokenizer - add a fuzz test to check that several properties are upheld --- lib/compiler/build_runner.zig | 29 ++- lib/compiler/test_runner.zig | 22 +- lib/docs/wasm/main.zig | 4 +- lib/fuzzer.zig | 73 ++++-- lib/fuzzer/index.html | 76 ++++++ lib/fuzzer/main.js | 40 +++ lib/fuzzer/wasm/main.zig | 99 ++++++++ lib/std/Build.zig | 12 +- lib/std/Build/Fuzz.zig | 455 ++++++++++++++++++++++++++++++++-- lib/std/Build/Step.zig | 3 +- lib/std/Build/Step/Run.zig | 52 ++-- lib/std/zig/Server.zig | 24 +- lib/std/zig/tokenizer.zig | 45 ++++ 13 files changed, 872 insertions(+), 62 deletions(-) create mode 100644 lib/fuzzer/index.html create mode 100644 lib/fuzzer/main.js create mode 100644 lib/fuzzer/wasm/main.zig diff --git a/lib/compiler/build_runner.zig b/lib/compiler/build_runner.zig index 8bb03939dc6f..76be8610efc1 100644 --- a/lib/compiler/build_runner.zig +++ b/lib/compiler/build_runner.zig @@ -17,6 +17,12 @@ const runner = @This(); pub const root = @import("@build"); pub const dependencies = @import("@dependencies"); +pub const std_options: std.Options = .{ + .side_channels_mitigations = .none, + .http_disable_tls = true, + .crypto_fork_safety = false, +}; + pub fn main() !void { // Here we use an ArenaAllocator backed by a page allocator because a build is a short-lived, // one shot program. We don't need to waste time freeing memory and finding places to squish @@ -106,6 +112,7 @@ pub fn main() !void { var watch = false; var fuzz = false; var debounce_interval_ms: u16 = 50; + var listen_port: u16 = 0; while (nextArg(args, &arg_idx)) |arg| { if (mem.startsWith(u8, arg, "-Z")) { @@ -203,6 +210,14 @@ pub fn main() !void { next_arg, @errorName(err), }); }; + } else if (mem.eql(u8, arg, "--port")) { + const next_arg = nextArg(args, &arg_idx) orelse + fatalWithHint("expected u16 after '{s}'", .{arg}); + listen_port = std.fmt.parseUnsigned(u16, next_arg, 10) catch |err| { + fatal("unable to parse port '{s}' as unsigned 16-bit integer: {s}\n", .{ + next_arg, @errorName(err), + }); + }; } else if (mem.eql(u8, arg, "--debug-log")) { const next_arg = nextArgOrFatal(args, &arg_idx); try debug_log_scopes.append(next_arg); @@ -403,7 +418,19 @@ pub fn main() !void { else => return err, }; if (fuzz) { - Fuzz.start(&run.thread_pool, run.step_stack.keys(), run.ttyconf, main_progress_node); + const listen_address = std.net.Address.parseIp("127.0.0.1", listen_port) catch unreachable; + try Fuzz.start( + gpa, + arena, + global_cache_directory, + zig_lib_directory, + zig_exe, + &run.thread_pool, + run.step_stack.keys(), + run.ttyconf, + listen_address, + main_progress_node, + ); } if (!watch) return cleanExit(); diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index 64f6f230fe9c..65580fcd3f27 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -28,6 +28,7 @@ pub fn main() void { @panic("unable to parse command line args"); var listen = false; + var opt_cache_dir: ?[]const u8 = null; for (args[1..]) |arg| { if (std.mem.eql(u8, arg, "--listen=-")) { @@ -35,13 +36,18 @@ pub fn main() void { } else if (std.mem.startsWith(u8, arg, "--seed=")) { testing.random_seed = std.fmt.parseUnsigned(u32, arg["--seed=".len..], 0) catch @panic("unable to parse --seed command line argument"); + } else if (std.mem.startsWith(u8, arg, "--cache-dir")) { + opt_cache_dir = arg["--cache-dir=".len..]; } else { @panic("unrecognized command line argument"); } } fba.reset(); - if (builtin.fuzz) fuzzer_init(); + if (builtin.fuzz) { + const cache_dir = opt_cache_dir orelse @panic("missing --cache-dir=[path] argument"); + fuzzer_init(FuzzerSlice.fromSlice(cache_dir)); + } if (listen) { return mainServer() catch @panic("internal test runner failure"); @@ -60,6 +66,11 @@ fn mainServer() !void { }); defer server.deinit(); + if (builtin.fuzz) { + const coverage_id = fuzzer_coverage_id(); + try server.serveU64Message(.coverage_id, coverage_id); + } + while (true) { const hdr = try server.receiveMessage(); switch (hdr.tag) { @@ -316,15 +327,22 @@ const FuzzerSlice = extern struct { ptr: [*]const u8, len: usize, + /// Inline to avoid fuzzer instrumentation. inline fn toSlice(s: FuzzerSlice) []const u8 { return s.ptr[0..s.len]; } + + /// Inline to avoid fuzzer instrumentation. + inline fn fromSlice(s: []const u8) FuzzerSlice { + return .{ .ptr = s.ptr, .len = s.len }; + } }; var is_fuzz_test: bool = undefined; extern fn fuzzer_next() FuzzerSlice; -extern fn fuzzer_init() void; +extern fn fuzzer_init(cache_dir: FuzzerSlice) void; +extern fn fuzzer_coverage_id() u64; pub fn fuzzInput(options: testing.FuzzInputOptions) []const u8 { @disableInstrumentation(); diff --git a/lib/docs/wasm/main.zig b/lib/docs/wasm/main.zig index f5ce02d7d669..214f28c24b95 100644 --- a/lib/docs/wasm/main.zig +++ b/lib/docs/wasm/main.zig @@ -53,7 +53,7 @@ export fn unpack(tar_ptr: [*]u8, tar_len: usize) void { const tar_bytes = tar_ptr[0..tar_len]; //log.debug("received {d} bytes of tar file", .{tar_bytes.len}); - unpack_inner(tar_bytes) catch |err| { + unpackInner(tar_bytes) catch |err| { fatal("unable to unpack tar: {s}", .{@errorName(err)}); }; } @@ -750,7 +750,7 @@ export fn decl_type_html(decl_index: Decl.Index) String { const Oom = error{OutOfMemory}; -fn unpack_inner(tar_bytes: []u8) !void { +fn unpackInner(tar_bytes: []u8) !void { var fbs = std.io.fixedBufferStream(tar_bytes); var file_name_buffer: [1024]u8 = undefined; var link_name_buffer: [1024]u8 = undefined; diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 7edab785a9f2..ede3663cdca0 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -17,7 +17,8 @@ fn logOverride( args: anytype, ) void { const f = if (log_file) |f| f else f: { - const f = fuzzer.dir.createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file"); + const f = fuzzer.cache_dir.createFile("tmp/libfuzzer.log", .{}) catch + @panic("failed to open fuzzer log file"); log_file = f; break :f f; }; @@ -114,7 +115,10 @@ const Fuzzer = struct { /// Stored in a memory-mapped file so that it can be shared with other /// processes and viewed while the fuzzer is running. seen_pcs: MemoryMappedList, - dir: std.fs.Dir, + cache_dir: std.fs.Dir, + /// Identifies the file name that will be used to store coverage + /// information, available to other processes. + coverage_id: u64, const SeenPcsHeader = extern struct { n_runs: usize, @@ -189,18 +193,31 @@ const Fuzzer = struct { id: Run.Id, }; - fn init(f: *Fuzzer, dir: std.fs.Dir) !void { - f.dir = dir; + fn init(f: *Fuzzer, cache_dir: std.fs.Dir) !void { + const flagged_pcs = f.flagged_pcs; + + f.cache_dir = cache_dir; + + // Choose a file name for the coverage based on a hash of the PCs that will be stored within. + const pc_digest = d: { + var hasher = std.hash.Wyhash.init(0); + for (flagged_pcs) |flagged_pc| { + hasher.update(std.mem.asBytes(&flagged_pc.addr)); + } + break :d f.coverage.run_id_hasher.final(); + }; + f.coverage_id = pc_digest; + const hex_digest = std.fmt.hex(pc_digest); + const coverage_file_path = "v/" ++ hex_digest; // Layout of this file: // - Header // - list of PC addresses (usize elements) // - list of hit flag, 1 bit per address (stored in u8 elements) - const coverage_file = dir.createFile("coverage", .{ + const coverage_file = createFileBail(cache_dir, coverage_file_path, .{ .read = true, .truncate = false, - }) catch |err| fatal("unable to create coverage file: {s}", .{@errorName(err)}); - const flagged_pcs = f.flagged_pcs; + }); const n_bitset_elems = (flagged_pcs.len + 7) / 8; const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems; const existing_len = coverage_file.getEndPos() catch |err| { @@ -217,7 +234,8 @@ const Fuzzer = struct { fatal("unable to init coverage memory map: {s}", .{@errorName(err)}); }; if (existing_len != 0) { - const existing_pcs = std.mem.bytesAsSlice(usize, f.seen_pcs.items[@sizeOf(SeenPcsHeader)..][0 .. flagged_pcs.len * @sizeOf(usize)]); + const existing_pcs_bytes = f.seen_pcs.items[@sizeOf(SeenPcsHeader)..][0 .. flagged_pcs.len * @sizeOf(usize)]; + const existing_pcs = std.mem.bytesAsSlice(usize, existing_pcs_bytes); for (existing_pcs, flagged_pcs, 0..) |old, new, i| { if (old != new.addr) { fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{ @@ -380,6 +398,21 @@ const Fuzzer = struct { } }; +fn createFileBail(dir: std.fs.Dir, sub_path: []const u8, flags: std.fs.File.CreateFlags) std.fs.File { + return dir.createFile(sub_path, flags) catch |err| switch (err) { + error.FileNotFound => { + const dir_name = std.fs.path.dirname(sub_path).?; + dir.makePath(dir_name) catch |e| { + fatal("unable to make path '{s}': {s}", .{ dir_name, @errorName(e) }); + }; + return dir.createFile(sub_path, flags) catch |e| { + fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(e) }); + }; + }, + else => fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(err) }), + }; +} + fn oom(err: anytype) noreturn { switch (err) { error.OutOfMemory => @panic("out of memory"), @@ -397,25 +430,35 @@ var fuzzer: Fuzzer = .{ .n_runs = 0, .recent_cases = .{}, .coverage = undefined, - .dir = undefined, + .cache_dir = undefined, .seen_pcs = undefined, + .coverage_id = undefined, }; +/// Invalid until `fuzzer_init` is called. +export fn fuzzer_coverage_id() u64 { + return fuzzer.coverage_id; +} + export fn fuzzer_next() Fuzzer.Slice { return Fuzzer.Slice.fromZig(fuzzer.next() catch |err| switch (err) { error.OutOfMemory => @panic("out of memory"), }); } -export fn fuzzer_init() void { +export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { if (module_count_8bc == 0) fatal("__sanitizer_cov_8bit_counters_init was never called", .{}); if (module_count_pcs == 0) fatal("__sanitizer_cov_pcs_init was never called", .{}); - // TODO: move this to .zig-cache/f - const fuzz_dir = std.fs.cwd().makeOpenPath("f", .{ .iterate = true }) catch |err| { - fatal("unable to open fuzz directory 'f': {s}", .{@errorName(err)}); - }; - fuzzer.init(fuzz_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)}); + const cache_dir_path = cache_dir_struct.toZig(); + const cache_dir = if (cache_dir_path.len == 0) + std.fs.cwd() + else + std.fs.cwd().makeOpenPath(cache_dir_path, .{ .iterate = true }) catch |err| { + fatal("unable to open fuzz directory '{s}': {s}", .{ cache_dir_path, @errorName(err) }); + }; + + fuzzer.init(cache_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)}); } /// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping. diff --git a/lib/fuzzer/index.html b/lib/fuzzer/index.html new file mode 100644 index 000000000000..c1ef059ad6e5 --- /dev/null +++ b/lib/fuzzer/index.html @@ -0,0 +1,76 @@ + + + + + Zig Documentation + + + + + + + diff --git a/lib/fuzzer/main.js b/lib/fuzzer/main.js new file mode 100644 index 000000000000..9b0d4cd8c3c6 --- /dev/null +++ b/lib/fuzzer/main.js @@ -0,0 +1,40 @@ +(function() { + let wasm_promise = fetch("main.wasm"); + let sources_promise = fetch("sources.tar").then(function(response) { + if (!response.ok) throw new Error("unable to download sources"); + return response.arrayBuffer(); + }); + var wasm_exports = null; + + const text_decoder = new TextDecoder(); + const text_encoder = new TextEncoder(); + + WebAssembly.instantiateStreaming(wasm_promise, { + js: { + log: function(ptr, len) { + const msg = decodeString(ptr, len); + console.log(msg); + }, + panic: function (ptr, len) { + const msg = decodeString(ptr, len); + throw new Error("panic: " + msg); + }, + }, + }).then(function(obj) { + wasm_exports = obj.instance.exports; + window.wasm = obj; // for debugging + + sources_promise.then(function(buffer) { + const js_array = new Uint8Array(buffer); + const ptr = wasm_exports.alloc(js_array.length); + const wasm_array = new Uint8Array(wasm_exports.memory.buffer, ptr, js_array.length); + wasm_array.set(js_array); + wasm_exports.unpack(ptr, js_array.length); + }); + }); + + function decodeString(ptr, len) { + if (len === 0) return ""; + return text_decoder.decode(new Uint8Array(wasm_exports.memory.buffer, ptr, len)); + } +})(); diff --git a/lib/fuzzer/wasm/main.zig b/lib/fuzzer/wasm/main.zig new file mode 100644 index 000000000000..09b9d8106817 --- /dev/null +++ b/lib/fuzzer/wasm/main.zig @@ -0,0 +1,99 @@ +const std = @import("std"); +const assert = std.debug.assert; + +const Walk = @import("Walk"); + +const gpa = std.heap.wasm_allocator; +const log = std.log; + +const js = struct { + extern "js" fn log(ptr: [*]const u8, len: usize) void; + extern "js" fn panic(ptr: [*]const u8, len: usize) noreturn; +}; + +pub const std_options: std.Options = .{ + .logFn = logFn, +}; + +pub fn panic(msg: []const u8, st: ?*std.builtin.StackTrace, addr: ?usize) noreturn { + _ = st; + _ = addr; + log.err("panic: {s}", .{msg}); + @trap(); +} + +fn logFn( + comptime message_level: log.Level, + comptime scope: @TypeOf(.enum_literal), + comptime format: []const u8, + args: anytype, +) void { + const level_txt = comptime message_level.asText(); + const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): "; + var buf: [500]u8 = undefined; + const line = std.fmt.bufPrint(&buf, level_txt ++ prefix2 ++ format, args) catch l: { + buf[buf.len - 3 ..][0..3].* = "...".*; + break :l &buf; + }; + js.log(line.ptr, line.len); +} + +export fn alloc(n: usize) [*]u8 { + const slice = gpa.alloc(u8, n) catch @panic("OOM"); + return slice.ptr; +} + +export fn unpack(tar_ptr: [*]u8, tar_len: usize) void { + const tar_bytes = tar_ptr[0..tar_len]; + log.debug("received {d} bytes of tar file", .{tar_bytes.len}); + + unpackInner(tar_bytes) catch |err| { + fatal("unable to unpack tar: {s}", .{@errorName(err)}); + }; +} + +fn unpackInner(tar_bytes: []u8) !void { + var fbs = std.io.fixedBufferStream(tar_bytes); + var file_name_buffer: [1024]u8 = undefined; + var link_name_buffer: [1024]u8 = undefined; + var it = std.tar.iterator(fbs.reader(), .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + while (try it.next()) |tar_file| { + switch (tar_file.kind) { + .file => { + if (tar_file.size == 0 and tar_file.name.len == 0) break; + if (std.mem.endsWith(u8, tar_file.name, ".zig")) { + log.debug("found file: '{s}'", .{tar_file.name}); + const file_name = try gpa.dupe(u8, tar_file.name); + if (std.mem.indexOfScalar(u8, file_name, '/')) |pkg_name_end| { + const pkg_name = file_name[0..pkg_name_end]; + const gop = try Walk.modules.getOrPut(gpa, pkg_name); + const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len); + if (!gop.found_existing or + std.mem.eql(u8, file_name[pkg_name_end..], "/root.zig") or + std.mem.eql(u8, file_name[pkg_name_end + 1 .. file_name.len - ".zig".len], pkg_name)) + { + gop.value_ptr.* = file; + } + const file_bytes = tar_bytes[fbs.pos..][0..@intCast(tar_file.size)]; + assert(file == try Walk.add_file(file_name, file_bytes)); + } + } else { + log.warn("skipping: '{s}' - the tar creation should have done that", .{tar_file.name}); + } + }, + else => continue, + } + } +} + +fn fatal(comptime format: []const u8, args: anytype) noreturn { + var buf: [500]u8 = undefined; + const line = std.fmt.bufPrint(&buf, format, args) catch l: { + buf[buf.len - 3 ..][0..3].* = "...".*; + break :l &buf; + }; + js.panic(line.ptr, line.len); +} diff --git a/lib/std/Build.zig b/lib/std/Build.zig index 7612ad0d6d1a..03743cf52e3a 100644 --- a/lib/std/Build.zig +++ b/lib/std/Build.zig @@ -2300,22 +2300,26 @@ pub const LazyPath = union(enum) { } pub fn path(lazy_path: LazyPath, b: *Build, sub_path: []const u8) LazyPath { + return lazy_path.join(b.allocator, sub_path) catch @panic("OOM"); + } + + pub fn join(lazy_path: LazyPath, arena: Allocator, sub_path: []const u8) Allocator.Error!LazyPath { return switch (lazy_path) { .src_path => |src| .{ .src_path = .{ .owner = src.owner, - .sub_path = b.pathResolve(&.{ src.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ src.sub_path, sub_path }), } }, .generated => |gen| .{ .generated = .{ .file = gen.file, .up = gen.up, - .sub_path = b.pathResolve(&.{ gen.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ gen.sub_path, sub_path }), } }, .cwd_relative => |cwd_relative| .{ - .cwd_relative = b.pathResolve(&.{ cwd_relative, sub_path }), + .cwd_relative = try fs.path.resolve(arena, &.{ cwd_relative, sub_path }), }, .dependency => |dep| .{ .dependency = .{ .dependency = dep.dependency, - .sub_path = b.pathResolve(&.{ dep.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ dep.sub_path, sub_path }), } }, }; } diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index 2628b9251621..e26f587eacb1 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -1,59 +1,479 @@ +const builtin = @import("builtin"); const std = @import("../std.zig"); -const Fuzz = @This(); +const Build = std.Build; const Step = std.Build.Step; const assert = std.debug.assert; const fatal = std.process.fatal; +const Allocator = std.mem.Allocator; +const log = std.log; + +const Fuzz = @This(); const build_runner = @import("root"); pub fn start( + gpa: Allocator, + arena: Allocator, + global_cache_directory: Build.Cache.Directory, + zig_lib_directory: Build.Cache.Directory, + zig_exe_path: []const u8, thread_pool: *std.Thread.Pool, all_steps: []const *Step, ttyconf: std.io.tty.Config, + listen_address: std.net.Address, prog_node: std.Progress.Node, -) void { - const count = block: { +) Allocator.Error!void { + const fuzz_run_steps = block: { const rebuild_node = prog_node.start("Rebuilding Unit Tests", 0); defer rebuild_node.end(); - var count: usize = 0; var wait_group: std.Thread.WaitGroup = .{}; defer wait_group.wait(); + var fuzz_run_steps: std.ArrayListUnmanaged(*Step.Run) = .{}; + defer fuzz_run_steps.deinit(gpa); for (all_steps) |step| { const run = step.cast(Step.Run) orelse continue; if (run.fuzz_tests.items.len > 0 and run.producer != null) { thread_pool.spawnWg(&wait_group, rebuildTestsWorkerRun, .{ run, ttyconf, rebuild_node }); - count += 1; + try fuzz_run_steps.append(gpa, run); } } - if (count == 0) fatal("no fuzz tests found", .{}); - rebuild_node.setEstimatedTotalItems(count); - break :block count; + if (fuzz_run_steps.items.len == 0) fatal("no fuzz tests found", .{}); + rebuild_node.setEstimatedTotalItems(fuzz_run_steps.items.len); + break :block try arena.dupe(*Step.Run, fuzz_run_steps.items); }; // Detect failure. - for (all_steps) |step| { - const run = step.cast(Step.Run) orelse continue; - if (run.fuzz_tests.items.len > 0 and run.rebuilt_executable == null) + for (fuzz_run_steps) |run| { + assert(run.fuzz_tests.items.len > 0); + if (run.rebuilt_executable == null) fatal("one or more unit tests failed to be rebuilt in fuzz mode", .{}); } + var web_server: WebServer = .{ + .gpa = gpa, + .global_cache_directory = global_cache_directory, + .zig_lib_directory = zig_lib_directory, + .zig_exe_path = zig_exe_path, + .msg_queue = .{}, + .mutex = .{}, + .listen_address = listen_address, + .fuzz_run_steps = fuzz_run_steps, + }; + + const web_server_thread = std.Thread.spawn(.{}, WebServer.run, .{&web_server}) catch |err| { + fatal("unable to spawn web server thread: {s}", .{@errorName(err)}); + }; + defer web_server_thread.join(); + { - const fuzz_node = prog_node.start("Fuzzing", count); + const fuzz_node = prog_node.start("Fuzzing", fuzz_run_steps.len); defer fuzz_node.end(); var wait_group: std.Thread.WaitGroup = .{}; defer wait_group.wait(); - for (all_steps) |step| { - const run = step.cast(Step.Run) orelse continue; + for (fuzz_run_steps) |run| { for (run.fuzz_tests.items) |unit_test_index| { assert(run.rebuilt_executable != null); - thread_pool.spawnWg(&wait_group, fuzzWorkerRun, .{ run, unit_test_index, ttyconf, fuzz_node }); + thread_pool.spawnWg(&wait_group, fuzzWorkerRun, .{ + run, &web_server, unit_test_index, ttyconf, fuzz_node, + }); } } } - fatal("all fuzz workers crashed", .{}); + log.err("all fuzz workers crashed", .{}); } +pub const WebServer = struct { + gpa: Allocator, + global_cache_directory: Build.Cache.Directory, + zig_lib_directory: Build.Cache.Directory, + zig_exe_path: []const u8, + /// Messages from fuzz workers. Protected by mutex. + msg_queue: std.ArrayListUnmanaged(Msg), + mutex: std.Thread.Mutex, + listen_address: std.net.Address, + fuzz_run_steps: []const *Step.Run, + + const Msg = union(enum) { + coverage_id: u64, + }; + + fn run(ws: *WebServer) void { + var http_server = ws.listen_address.listen(.{ + .reuse_address = true, + }) catch |err| { + log.err("failed to listen to port {d}: {s}", .{ ws.listen_address.in.getPort(), @errorName(err) }); + return; + }; + const port = http_server.listen_address.in.getPort(); + log.info("web interface listening at http://127.0.0.1:{d}/", .{port}); + + while (true) { + const connection = http_server.accept() catch |err| { + log.err("failed to accept connection: {s}", .{@errorName(err)}); + return; + }; + _ = std.Thread.spawn(.{}, accept, .{ ws, connection }) catch |err| { + log.err("unable to spawn connection thread: {s}", .{@errorName(err)}); + connection.stream.close(); + continue; + }; + } + } + + fn accept(ws: *WebServer, connection: std.net.Server.Connection) void { + defer connection.stream.close(); + + var read_buffer: [8000]u8 = undefined; + var server = std.http.Server.init(connection, &read_buffer); + while (server.state == .ready) { + var request = server.receiveHead() catch |err| switch (err) { + error.HttpConnectionClosing => return, + else => { + log.err("closing http connection: {s}", .{@errorName(err)}); + return; + }, + }; + serveRequest(ws, &request) catch |err| switch (err) { + error.AlreadyReported => return, + else => |e| { + log.err("unable to serve {s}: {s}", .{ request.head.target, @errorName(e) }); + return; + }, + }; + } + } + + fn serveRequest(ws: *WebServer, request: *std.http.Server.Request) !void { + if (std.mem.eql(u8, request.head.target, "/") or + std.mem.eql(u8, request.head.target, "/debug") or + std.mem.eql(u8, request.head.target, "/debug/")) + { + try serveFile(ws, request, "fuzzer/index.html", "text/html"); + } else if (std.mem.eql(u8, request.head.target, "/main.js") or + std.mem.eql(u8, request.head.target, "/debug/main.js")) + { + try serveFile(ws, request, "fuzzer/main.js", "application/javascript"); + } else if (std.mem.eql(u8, request.head.target, "/main.wasm")) { + try serveWasm(ws, request, .ReleaseFast); + } else if (std.mem.eql(u8, request.head.target, "/debug/main.wasm")) { + try serveWasm(ws, request, .Debug); + } else if (std.mem.eql(u8, request.head.target, "/sources.tar") or + std.mem.eql(u8, request.head.target, "/debug/sources.tar")) + { + try serveSourcesTar(ws, request); + } else { + try request.respond("not found", .{ + .status = .not_found, + .extra_headers = &.{ + .{ .name = "content-type", .value = "text/plain" }, + }, + }); + } + } + + fn serveFile( + ws: *WebServer, + request: *std.http.Server.Request, + name: []const u8, + content_type: []const u8, + ) !void { + const gpa = ws.gpa; + // The desired API is actually sendfile, which will require enhancing std.http.Server. + // We load the file with every request so that the user can make changes to the file + // and refresh the HTML page without restarting this server. + const file_contents = ws.zig_lib_directory.handle.readFileAlloc(gpa, name, 10 * 1024 * 1024) catch |err| { + log.err("failed to read '{}{s}': {s}", .{ ws.zig_lib_directory, name, @errorName(err) }); + return error.AlreadyReported; + }; + defer gpa.free(file_contents); + try request.respond(file_contents, .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = content_type }, + cache_control_header, + }, + }); + } + + fn serveWasm( + ws: *WebServer, + request: *std.http.Server.Request, + optimize_mode: std.builtin.OptimizeMode, + ) !void { + const gpa = ws.gpa; + + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // Do the compilation every request, so that the user can edit the files + // and see the changes without restarting the server. + const wasm_binary_path = try buildWasmBinary(ws, arena, optimize_mode); + // std.http.Server does not have a sendfile API yet. + const file_contents = try std.fs.cwd().readFileAlloc(gpa, wasm_binary_path, 10 * 1024 * 1024); + defer gpa.free(file_contents); + try request.respond(file_contents, .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "application/wasm" }, + cache_control_header, + }, + }); + } + + fn buildWasmBinary( + ws: *WebServer, + arena: Allocator, + optimize_mode: std.builtin.OptimizeMode, + ) ![]const u8 { + const gpa = ws.gpa; + + const main_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "fuzzer/wasm/main.zig", + }; + const walk_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "docs/wasm/Walk.zig", + }; + + var argv: std.ArrayListUnmanaged([]const u8) = .{}; + + try argv.appendSlice(arena, &.{ + ws.zig_exe_path, + "build-exe", + "-fno-entry", + "-O", + @tagName(optimize_mode), + "-target", + "wasm32-freestanding", + "-mcpu", + "baseline+atomics+bulk_memory+multivalue+mutable_globals+nontrapping_fptoint+reference_types+sign_ext", + "--cache-dir", + ws.global_cache_directory.path orelse ".", + "--global-cache-dir", + ws.global_cache_directory.path orelse ".", + "--name", + "fuzzer", + "-rdynamic", + "--dep", + "Walk", + try std.fmt.allocPrint(arena, "-Mroot={}", .{main_src_path}), + try std.fmt.allocPrint(arena, "-MWalk={}", .{walk_src_path}), + "--listen=-", + }); + + var child = std.process.Child.init(argv.items, gpa); + child.stdin_behavior = .Pipe; + child.stdout_behavior = .Pipe; + child.stderr_behavior = .Pipe; + try child.spawn(); + + var poller = std.io.poll(gpa, enum { stdout, stderr }, .{ + .stdout = child.stdout.?, + .stderr = child.stderr.?, + }); + defer poller.deinit(); + + try sendMessage(child.stdin.?, .update); + try sendMessage(child.stdin.?, .exit); + + const Header = std.zig.Server.Message.Header; + var result: ?[]const u8 = null; + var result_error_bundle = std.zig.ErrorBundle.empty; + + const stdout = poller.fifo(.stdout); + + poll: while (true) { + while (stdout.readableLength() < @sizeOf(Header)) { + if (!(try poller.poll())) break :poll; + } + const header = stdout.reader().readStruct(Header) catch unreachable; + while (stdout.readableLength() < header.bytes_len) { + if (!(try poller.poll())) break :poll; + } + const body = stdout.readableSliceOfLen(header.bytes_len); + + switch (header.tag) { + .zig_version => { + if (!std.mem.eql(u8, builtin.zig_version_string, body)) { + return error.ZigProtocolVersionMismatch; + } + }, + .error_bundle => { + const EbHdr = std.zig.Server.Message.ErrorBundle; + const eb_hdr = @as(*align(1) const EbHdr, @ptrCast(body)); + const extra_bytes = + body[@sizeOf(EbHdr)..][0 .. @sizeOf(u32) * eb_hdr.extra_len]; + const string_bytes = + body[@sizeOf(EbHdr) + extra_bytes.len ..][0..eb_hdr.string_bytes_len]; + // TODO: use @ptrCast when the compiler supports it + const unaligned_extra = std.mem.bytesAsSlice(u32, extra_bytes); + const extra_array = try arena.alloc(u32, unaligned_extra.len); + @memcpy(extra_array, unaligned_extra); + result_error_bundle = .{ + .string_bytes = try arena.dupe(u8, string_bytes), + .extra = extra_array, + }; + }, + .emit_bin_path => { + const EbpHdr = std.zig.Server.Message.EmitBinPath; + const ebp_hdr = @as(*align(1) const EbpHdr, @ptrCast(body)); + if (!ebp_hdr.flags.cache_hit) { + log.info("source changes detected; rebuilt wasm component", .{}); + } + result = try arena.dupe(u8, body[@sizeOf(EbpHdr)..]); + }, + else => {}, // ignore other messages + } + + stdout.discard(body.len); + } + + const stderr = poller.fifo(.stderr); + if (stderr.readableLength() > 0) { + const owned_stderr = try stderr.toOwnedSlice(); + defer gpa.free(owned_stderr); + std.debug.print("{s}", .{owned_stderr}); + } + + // Send EOF to stdin. + child.stdin.?.close(); + child.stdin = null; + + switch (try child.wait()) { + .Exited => |code| { + if (code != 0) { + log.err( + "the following command exited with error code {d}:\n{s}", + .{ code, try Build.Step.allocPrintCmd(arena, null, argv.items) }, + ); + return error.WasmCompilationFailed; + } + }, + .Signal, .Stopped, .Unknown => { + log.err( + "the following command terminated unexpectedly:\n{s}", + .{try Build.Step.allocPrintCmd(arena, null, argv.items)}, + ); + return error.WasmCompilationFailed; + }, + } + + if (result_error_bundle.errorMessageCount() > 0) { + const color = std.zig.Color.auto; + result_error_bundle.renderToStdErr(color.renderOptions()); + log.err("the following command failed with {d} compilation errors:\n{s}", .{ + result_error_bundle.errorMessageCount(), + try Build.Step.allocPrintCmd(arena, null, argv.items), + }); + return error.WasmCompilationFailed; + } + + return result orelse { + log.err("child process failed to report result\n{s}", .{ + try Build.Step.allocPrintCmd(arena, null, argv.items), + }); + return error.WasmCompilationFailed; + }; + } + + fn sendMessage(file: std.fs.File, tag: std.zig.Client.Message.Tag) !void { + const header: std.zig.Client.Message.Header = .{ + .tag = tag, + .bytes_len = 0, + }; + try file.writeAll(std.mem.asBytes(&header)); + } + + fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { + const gpa = ws.gpa; + + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + var send_buffer: [0x4000]u8 = undefined; + var response = request.respondStreaming(.{ + .send_buffer = &send_buffer, + .respond_options = .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "application/x-tar" }, + cache_control_header, + }, + }, + }); + const w = response.writer(); + + const DedupeTable = std.ArrayHashMapUnmanaged(Build.Cache.Path, void, Build.Cache.Path.TableAdapter, false); + var dedupe_table: DedupeTable = .{}; + defer dedupe_table.deinit(gpa); + + for (ws.fuzz_run_steps) |run_step| { + const compile_step_inputs = run_step.producer.?.step.inputs.table; + for (compile_step_inputs.keys(), compile_step_inputs.values()) |dir_path, *file_list| { + try dedupe_table.ensureUnusedCapacity(gpa, file_list.items.len); + for (file_list.items) |sub_path| { + // Special file "." means the entire directory. + if (std.mem.eql(u8, sub_path, ".")) continue; + const joined_path = try dir_path.join(arena, sub_path); + _ = dedupe_table.getOrPutAssumeCapacity(joined_path); + } + } + } + + const deduped_paths = dedupe_table.keys(); + const SortContext = struct { + pub fn lessThan(this: @This(), lhs: Build.Cache.Path, rhs: Build.Cache.Path) bool { + _ = this; + return switch (std.mem.order(u8, lhs.root_dir.path orelse ".", rhs.root_dir.path orelse ".")) { + .lt => true, + .gt => false, + .eq => std.mem.lessThan(u8, lhs.sub_path, rhs.sub_path), + }; + } + }; + std.mem.sortUnstable(Build.Cache.Path, deduped_paths, SortContext{}, SortContext.lessThan); + + for (deduped_paths) |joined_path| { + var file = joined_path.root_dir.handle.openFile(joined_path.sub_path, .{}) catch |err| { + log.err("failed to open {}: {s}", .{ joined_path, @errorName(err) }); + continue; + }; + defer file.close(); + + const stat = file.stat() catch |err| { + log.err("failed to stat {}: {s}", .{ joined_path, @errorName(err) }); + continue; + }; + if (stat.kind != .file) + continue; + + const padding = p: { + const remainder = stat.size % 512; + break :p if (remainder > 0) 512 - remainder else 0; + }; + + var file_header = std.tar.output.Header.init(); + file_header.typeflag = .regular; + try file_header.setPath(joined_path.root_dir.path orelse ".", joined_path.sub_path); + try file_header.setSize(stat.size); + try file_header.updateChecksum(); + try w.writeAll(std.mem.asBytes(&file_header)); + try w.writeFile(file); + try w.writeByteNTimes(0, padding); + } + + // intentionally omitting the pointless trailer + //try w.writeByteNTimes(0, 512 * 2); + try response.end(); + } + + const cache_control_header: std.http.Header = .{ + .name = "cache-control", + .value = "max-age=0, must-revalidate", + }; +}; + fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node) void { const gpa = run.step.owner.allocator; const stderr = std.io.getStdErr(); @@ -88,6 +508,7 @@ fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog fn fuzzWorkerRun( run: *Step.Run, + web_server: *WebServer, unit_test_index: u32, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node, @@ -98,7 +519,7 @@ fn fuzzWorkerRun( const prog_node = parent_prog_node.start(test_name, 0); defer prog_node.end(); - run.rerunInFuzzMode(unit_test_index, prog_node) catch |err| switch (err) { + run.rerunInFuzzMode(web_server, unit_test_index, prog_node) catch |err| switch (err) { error.MakeFailed => { const stderr = std.io.getStdErr(); std.debug.lockStdErr(); diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index 8f3236d867a5..47a6e49a82c2 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -559,7 +559,8 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?[]const u8 { }, .zig_lib => zl: { if (s.cast(Step.Compile)) |compile| { - if (compile.zig_lib_dir) |lp| { + if (compile.zig_lib_dir) |zig_lib_dir| { + const lp = try zig_lib_dir.join(arena, sub_path); try addWatchInput(s, lp); break :zl; } diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index c2d25cd82cbb..e494e969f0f7 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -205,6 +205,7 @@ pub fn enableTestRunnerMode(run: *Run) void { run.stdio = .zig_test; run.addArgs(&.{ std.fmt.allocPrint(arena, "--seed=0x{x}", .{b.graph.random_seed}) catch @panic("OOM"), + std.fmt.allocPrint(arena, "--cache-dir={s}", .{b.cache_root.path orelse ""}) catch @panic("OOM"), "--listen=-", }); } @@ -845,7 +846,12 @@ fn make(step: *Step, options: Step.MakeOptions) !void { ); } -pub fn rerunInFuzzMode(run: *Run, unit_test_index: u32, prog_node: std.Progress.Node) !void { +pub fn rerunInFuzzMode( + run: *Run, + web_server: *std.Build.Fuzz.WebServer, + unit_test_index: u32, + prog_node: std.Progress.Node, +) !void { const step = &run.step; const b = step.owner; const arena = b.allocator; @@ -877,7 +883,10 @@ pub fn rerunInFuzzMode(run: *Run, unit_test_index: u32, prog_node: std.Progress. const has_side_effects = false; const rand_int = std.crypto.random.int(u64); const tmp_dir_path = "tmp" ++ fs.path.sep_str ++ std.fmt.hex(rand_int); - try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, prog_node, unit_test_index); + try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, prog_node, .{ + .unit_test_index = unit_test_index, + .web_server = web_server, + }); } fn populateGeneratedPaths( @@ -952,13 +961,18 @@ fn termMatches(expected: ?std.process.Child.Term, actual: std.process.Child.Term }; } +const FuzzContext = struct { + web_server: *std.Build.Fuzz.WebServer, + unit_test_index: u32, +}; + fn runCommand( run: *Run, argv: []const []const u8, has_side_effects: bool, output_dir_path: []const u8, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !void { const step = &run.step; const b = step.owner; @@ -977,7 +991,7 @@ fn runCommand( var interp_argv = std.ArrayList([]const u8).init(b.allocator); defer interp_argv.deinit(); - const result = spawnChildAndCollect(run, argv, has_side_effects, prog_node, fuzz_unit_test_index) catch |err| term: { + const result = spawnChildAndCollect(run, argv, has_side_effects, prog_node, fuzz_context) catch |err| term: { // InvalidExe: cpu arch mismatch // FileNotFound: can happen with a wrong dynamic linker path if (err == error.InvalidExe or err == error.FileNotFound) interpret: { @@ -1113,7 +1127,7 @@ fn runCommand( try Step.handleVerbose2(step.owner, cwd, run.env_map, interp_argv.items); - break :term spawnChildAndCollect(run, interp_argv.items, has_side_effects, prog_node, fuzz_unit_test_index) catch |e| { + break :term spawnChildAndCollect(run, interp_argv.items, has_side_effects, prog_node, fuzz_context) catch |e| { if (!run.failing_to_execute_foreign_is_an_error) return error.MakeSkipped; return step.fail("unable to spawn interpreter {s}: {s}", .{ @@ -1133,7 +1147,7 @@ fn runCommand( const final_argv = if (interp_argv.items.len == 0) argv else interp_argv.items; - if (fuzz_unit_test_index != null) { + if (fuzz_context != null) { try step.handleChildProcessTerm(result.term, cwd, final_argv); return; } @@ -1298,12 +1312,12 @@ fn spawnChildAndCollect( argv: []const []const u8, has_side_effects: bool, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !ChildProcResult { const b = run.step.owner; const arena = b.allocator; - if (fuzz_unit_test_index != null) { + if (fuzz_context != null) { assert(!has_side_effects); assert(run.stdio == .zig_test); } @@ -1357,7 +1371,7 @@ fn spawnChildAndCollect( var timer = try std.time.Timer.start(); const result = if (run.stdio == .zig_test) - evalZigTest(run, &child, prog_node, fuzz_unit_test_index) + evalZigTest(run, &child, prog_node, fuzz_context) else evalGeneric(run, &child); @@ -1383,7 +1397,7 @@ fn evalZigTest( run: *Run, child: *std.process.Child, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !StdIoResult { const gpa = run.step.owner.allocator; const arena = run.step.owner.allocator; @@ -1394,8 +1408,8 @@ fn evalZigTest( }); defer poller.deinit(); - if (fuzz_unit_test_index) |index| { - try sendRunTestMessage(child.stdin.?, .start_fuzzing, index); + if (fuzz_context) |fuzz| { + try sendRunTestMessage(child.stdin.?, .start_fuzzing, fuzz.unit_test_index); } else { run.fuzz_tests.clearRetainingCapacity(); try sendMessage(child.stdin.?, .query_test_metadata); @@ -1437,7 +1451,7 @@ fn evalZigTest( } }, .test_metadata => { - assert(fuzz_unit_test_index == null); + assert(fuzz_context == null); const TmHdr = std.zig.Server.Message.TestMetadata; const tm_hdr = @as(*align(1) const TmHdr, @ptrCast(body)); test_count = tm_hdr.tests_len; @@ -1466,7 +1480,7 @@ fn evalZigTest( try requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node); }, .test_results => { - assert(fuzz_unit_test_index == null); + assert(fuzz_context == null); const md = metadata.?; const TrHdr = std.zig.Server.Message.TestResults; @@ -1500,6 +1514,16 @@ fn evalZigTest( try requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node); }, + .coverage_id => { + const web_server = fuzz_context.?.web_server; + const msg_ptr: *align(1) const u64 = @ptrCast(body); + const coverage_id = msg_ptr.*; + { + web_server.mutex.lock(); + defer web_server.mutex.unlock(); + try web_server.msg_queue.append(web_server.gpa, .{ .coverage_id = coverage_id }); + } + }, else => {}, // ignore other messages } diff --git a/lib/std/zig/Server.zig b/lib/std/zig/Server.zig index f1e564d43e02..93ce6cc01fdc 100644 --- a/lib/std/zig/Server.zig +++ b/lib/std/zig/Server.zig @@ -28,6 +28,10 @@ pub const Message = struct { /// The remaining bytes is the file path relative to that prefix. /// The prefixes are hard-coded in Compilation.create (cwd, zig lib dir, local cache dir) file_system_inputs, + /// Body is a u64le that indicates the file path within the cache used + /// to store coverage information. The integer is a hash of the PCs + /// stored within that file. + coverage_id, _, }; @@ -180,6 +184,14 @@ pub fn serveMessage( try s.out.writevAll(iovecs[0 .. bufs.len + 1]); } +pub fn serveU64Message(s: *Server, tag: OutMessage.Tag, int: u64) !void { + const msg_le = bswap(int); + return s.serveMessage(.{ + .tag = tag, + .bytes_len = @sizeOf(u64), + }, &.{std.mem.asBytes(&msg_le)}); +} + pub fn serveEmitBinPath( s: *Server, fs_path: []const u8, @@ -187,7 +199,7 @@ pub fn serveEmitBinPath( ) !void { try s.serveMessage(.{ .tag = .emit_bin_path, - .bytes_len = @as(u32, @intCast(fs_path.len + @sizeOf(OutMessage.EmitBinPath))), + .bytes_len = @intCast(fs_path.len + @sizeOf(OutMessage.EmitBinPath)), }, &.{ std.mem.asBytes(&header), fs_path, @@ -201,7 +213,7 @@ pub fn serveTestResults( const msg_le = bswap(msg); try s.serveMessage(.{ .tag = .test_results, - .bytes_len = @as(u32, @intCast(@sizeOf(OutMessage.TestResults))), + .bytes_len = @intCast(@sizeOf(OutMessage.TestResults)), }, &.{ std.mem.asBytes(&msg_le), }); @@ -209,14 +221,14 @@ pub fn serveTestResults( pub fn serveErrorBundle(s: *Server, error_bundle: std.zig.ErrorBundle) !void { const eb_hdr: OutMessage.ErrorBundle = .{ - .extra_len = @as(u32, @intCast(error_bundle.extra.len)), - .string_bytes_len = @as(u32, @intCast(error_bundle.string_bytes.len)), + .extra_len = @intCast(error_bundle.extra.len), + .string_bytes_len = @intCast(error_bundle.string_bytes.len), }; const bytes_len = @sizeOf(OutMessage.ErrorBundle) + 4 * error_bundle.extra.len + error_bundle.string_bytes.len; try s.serveMessage(.{ .tag = .error_bundle, - .bytes_len = @as(u32, @intCast(bytes_len)), + .bytes_len = @intCast(bytes_len), }, &.{ std.mem.asBytes(&eb_hdr), // TODO: implement @ptrCast between slices changing the length @@ -251,7 +263,7 @@ pub fn serveTestMetadata(s: *Server, test_metadata: TestMetadata) !void { return s.serveMessage(.{ .tag = .test_metadata, - .bytes_len = @as(u32, @intCast(bytes_len)), + .bytes_len = @intCast(bytes_len), }, &.{ std.mem.asBytes(&header), // TODO: implement @ptrCast between slices changing the length diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index c375818770ab..b63bde563385 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1840,3 +1840,48 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v try std.testing.expectEqual(source.len, last_token.loc.start); try std.testing.expectEqual(source.len, last_token.loc.end); } + +test "fuzzable properties upheld" { + const source = std.testing.fuzzInput(.{}); + const source0 = try std.testing.allocator.dupeZ(u8, source); + defer std.testing.allocator.free(source0); + var tokenizer = Tokenizer.init(source0); + var tokenization_failed = false; + while (true) { + const token = tokenizer.next(); + + // Property: token end location after start location (or equal) + try std.testing.expect(token.loc.end >= token.loc.start); + + switch (token.tag) { + .invalid => { + tokenization_failed = true; + + // Property: invalid token always ends at newline or eof + try std.testing.expect(source0[token.loc.end] == '\n' or source0[token.loc.end] == 0); + }, + .eof => { + // Property: EOF token is always 0-length at end of source. + try std.testing.expectEqual(source0.len, token.loc.start); + try std.testing.expectEqual(source0.len, token.loc.end); + break; + }, + else => continue, + } + } + + if (source0.len > 0) for (source0, source0[1..][0..source0.len]) |cur, next| { + // Property: No null byte allowed except at end. + if (cur == 0) { + try std.testing.expect(tokenization_failed); + } + // Property: No ASCII control characters other than \n and \t are allowed. + if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t') { + try std.testing.expect(tokenization_failed); + } + // Property: All '\r' must be followed by '\n'. + if (cur == '\r' and next != '\n') { + try std.testing.expect(tokenization_failed); + } + }; +} From 107b27276602d1935a90ae97c57f640b090088b5 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 31 Jul 2024 23:45:43 -0700 Subject: [PATCH 04/34] fuzzer: share zig to html rendering with autodocs --- lib/compiler/std-docs.zig | 9 +- lib/docs/wasm/Decl.zig | 18 +- lib/docs/wasm/Walk.zig | 20 +- lib/docs/wasm/html_render.zig | 388 ++++++++++++++++++++++++++++++++ lib/docs/wasm/main.zig | 406 ++-------------------------------- lib/fuzzer/index.html | 60 ++++- lib/fuzzer/main.js | 48 ++++ lib/fuzzer/wasm/main.zig | 44 ++++ lib/std/Build/Fuzz.zig | 39 ++-- 9 files changed, 598 insertions(+), 434 deletions(-) create mode 100644 lib/docs/wasm/html_render.zig diff --git a/lib/compiler/std-docs.zig b/lib/compiler/std-docs.zig index eaabcfa93643..c11665101c39 100644 --- a/lib/compiler/std-docs.zig +++ b/lib/compiler/std-docs.zig @@ -275,10 +275,6 @@ fn buildWasmBinary( ) ![]const u8 { const gpa = context.gpa; - const main_src_path = try std.fs.path.join(arena, &.{ - context.zig_lib_directory, "docs", "wasm", "main.zig", - }); - var argv: std.ArrayListUnmanaged([]const u8) = .{}; try argv.appendSlice(arena, &.{ @@ -298,7 +294,10 @@ fn buildWasmBinary( "--name", "autodoc", "-rdynamic", - main_src_path, + "--dep", + "Walk", + try std.fmt.allocPrint(arena, "-Mroot={s}/docs/wasm/main.zig", .{context.zig_lib_directory}), + try std.fmt.allocPrint(arena, "-MWalk={s}/docs/wasm/Walk.zig", .{context.zig_lib_directory}), "--listen=-", }); diff --git a/lib/docs/wasm/Decl.zig b/lib/docs/wasm/Decl.zig index 0260ce02850d..254635598799 100644 --- a/lib/docs/wasm/Decl.zig +++ b/lib/docs/wasm/Decl.zig @@ -1,3 +1,12 @@ +const Decl = @This(); +const std = @import("std"); +const Ast = std.zig.Ast; +const Walk = @import("Walk.zig"); +const gpa = std.heap.wasm_allocator; +const assert = std.debug.assert; +const log = std.log; +const Oom = error{OutOfMemory}; + ast_node: Ast.Node.Index, file: Walk.File.Index, /// The decl whose namespace this is in. @@ -215,12 +224,3 @@ pub fn find(search_string: []const u8) Decl.Index { } return current_decl_index; } - -const Decl = @This(); -const std = @import("std"); -const Ast = std.zig.Ast; -const Walk = @import("Walk.zig"); -const gpa = std.heap.wasm_allocator; -const assert = std.debug.assert; -const log = std.log; -const Oom = error{OutOfMemory}; diff --git a/lib/docs/wasm/Walk.zig b/lib/docs/wasm/Walk.zig index a22da861a8ac..ae924b8c3825 100644 --- a/lib/docs/wasm/Walk.zig +++ b/lib/docs/wasm/Walk.zig @@ -1,4 +1,15 @@ //! Find and annotate identifiers with links to their declarations. + +const Walk = @This(); +const std = @import("std"); +const Ast = std.zig.Ast; +const assert = std.debug.assert; +const log = std.log; +const gpa = std.heap.wasm_allocator; +const Oom = error{OutOfMemory}; + +pub const Decl = @import("Decl.zig"); + pub var files: std.StringArrayHashMapUnmanaged(File) = .{}; pub var decls: std.ArrayListUnmanaged(Decl) = .{}; pub var modules: std.StringArrayHashMapUnmanaged(File.Index) = .{}; @@ -1120,15 +1131,6 @@ pub fn isPrimitiveNonType(name: []const u8) bool { // try w.root(); //} -const Walk = @This(); -const std = @import("std"); -const Ast = std.zig.Ast; -const assert = std.debug.assert; -const Decl = @import("Decl.zig"); -const log = std.log; -const gpa = std.heap.wasm_allocator; -const Oom = error{OutOfMemory}; - fn shrinkToFit(m: anytype) void { m.shrinkAndFree(gpa, m.entries.len); } diff --git a/lib/docs/wasm/html_render.zig b/lib/docs/wasm/html_render.zig new file mode 100644 index 000000000000..cce201049d7d --- /dev/null +++ b/lib/docs/wasm/html_render.zig @@ -0,0 +1,388 @@ +const std = @import("std"); +const Ast = std.zig.Ast; +const assert = std.debug.assert; + +const Walk = @import("Walk"); +const Decl = Walk.Decl; + +const gpa = std.heap.wasm_allocator; +const Oom = error{OutOfMemory}; + +/// Delete this to find out where URL escaping needs to be added. +pub const missing_feature_url_escape = true; + +pub const RenderSourceOptions = struct { + skip_doc_comments: bool = false, + skip_comments: bool = false, + collapse_whitespace: bool = false, + fn_link: Decl.Index = .none, +}; + +pub fn fileSourceHtml( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + root_node: Ast.Node.Index, + options: RenderSourceOptions, +) !void { + const ast = file_index.get_ast(); + const file = file_index.get(); + + const g = struct { + var field_access_buffer: std.ArrayListUnmanaged(u8) = .{}; + }; + + const token_tags = ast.tokens.items(.tag); + const token_starts = ast.tokens.items(.start); + const main_tokens = ast.nodes.items(.main_token); + + const start_token = ast.firstToken(root_node); + const end_token = ast.lastToken(root_node) + 1; + + var cursor: usize = token_starts[start_token]; + + var indent: usize = 0; + if (std.mem.lastIndexOf(u8, ast.source[0..cursor], "\n")) |newline_index| { + for (ast.source[newline_index + 1 .. cursor]) |c| { + if (c == ' ') { + indent += 1; + } else { + break; + } + } + } + + for ( + token_tags[start_token..end_token], + token_starts[start_token..end_token], + start_token.., + ) |tag, start, token_index| { + const between = ast.source[cursor..start]; + if (std.mem.trim(u8, between, " \t\r\n").len > 0) { + if (!options.skip_comments) { + try out.appendSlice(gpa, ""); + try appendUnindented(out, between, indent); + try out.appendSlice(gpa, ""); + } + } else if (between.len > 0) { + if (options.collapse_whitespace) { + if (out.items.len > 0 and out.items[out.items.len - 1] != ' ') + try out.append(gpa, ' '); + } else { + try appendUnindented(out, between, indent); + } + } + if (tag == .eof) break; + const slice = ast.tokenSlice(token_index); + cursor = start + slice.len; + switch (tag) { + .eof => unreachable, + + .keyword_addrspace, + .keyword_align, + .keyword_and, + .keyword_asm, + .keyword_async, + .keyword_await, + .keyword_break, + .keyword_catch, + .keyword_comptime, + .keyword_const, + .keyword_continue, + .keyword_defer, + .keyword_else, + .keyword_enum, + .keyword_errdefer, + .keyword_error, + .keyword_export, + .keyword_extern, + .keyword_for, + .keyword_if, + .keyword_inline, + .keyword_noalias, + .keyword_noinline, + .keyword_nosuspend, + .keyword_opaque, + .keyword_or, + .keyword_orelse, + .keyword_packed, + .keyword_anyframe, + .keyword_pub, + .keyword_resume, + .keyword_return, + .keyword_linksection, + .keyword_callconv, + .keyword_struct, + .keyword_suspend, + .keyword_switch, + .keyword_test, + .keyword_threadlocal, + .keyword_try, + .keyword_union, + .keyword_unreachable, + .keyword_usingnamespace, + .keyword_var, + .keyword_volatile, + .keyword_allowzero, + .keyword_while, + .keyword_anytype, + .keyword_fn, + => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .string_literal, + .char_literal, + .multiline_string_literal_line, + => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .builtin => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .doc_comment, + .container_doc_comment, + => { + if (!options.skip_doc_comments) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + } + }, + + .identifier => i: { + if (options.fn_link != .none) { + const fn_link = options.fn_link.get(); + const fn_token = main_tokens[fn_link.ast_node]; + if (token_index == fn_token + 1) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + } + + if (token_index > 0 and token_tags[token_index - 1] == .keyword_fn) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (Walk.isPrimitiveNonType(slice)) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (std.zig.primitives.isPrimitive(slice)) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (file.token_parents.get(token_index)) |field_access_node| { + g.field_access_buffer.clearRetainingCapacity(); + try walkFieldAccesses(file_index, &g.field_access_buffer, field_access_node); + if (g.field_access_buffer.items.len > 0) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + } else { + try appendEscaped(out, slice); + } + break :i; + } + + { + g.field_access_buffer.clearRetainingCapacity(); + try resolveIdentLink(file_index, &g.field_access_buffer, token_index); + if (g.field_access_buffer.items.len > 0) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + } + + try appendEscaped(out, slice); + }, + + .number_literal => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .bang, + .pipe, + .pipe_pipe, + .pipe_equal, + .equal, + .equal_equal, + .equal_angle_bracket_right, + .bang_equal, + .l_paren, + .r_paren, + .semicolon, + .percent, + .percent_equal, + .l_brace, + .r_brace, + .l_bracket, + .r_bracket, + .period, + .period_asterisk, + .ellipsis2, + .ellipsis3, + .caret, + .caret_equal, + .plus, + .plus_plus, + .plus_equal, + .plus_percent, + .plus_percent_equal, + .plus_pipe, + .plus_pipe_equal, + .minus, + .minus_equal, + .minus_percent, + .minus_percent_equal, + .minus_pipe, + .minus_pipe_equal, + .asterisk, + .asterisk_equal, + .asterisk_asterisk, + .asterisk_percent, + .asterisk_percent_equal, + .asterisk_pipe, + .asterisk_pipe_equal, + .arrow, + .colon, + .slash, + .slash_equal, + .comma, + .ampersand, + .ampersand_equal, + .question_mark, + .angle_bracket_left, + .angle_bracket_left_equal, + .angle_bracket_angle_bracket_left, + .angle_bracket_angle_bracket_left_equal, + .angle_bracket_angle_bracket_left_pipe, + .angle_bracket_angle_bracket_left_pipe_equal, + .angle_bracket_right, + .angle_bracket_right_equal, + .angle_bracket_angle_bracket_right, + .angle_bracket_angle_bracket_right_equal, + .tilde, + => try appendEscaped(out, slice), + + .invalid, .invalid_periodasterisks => return error.InvalidToken, + } + } +} + +fn appendUnindented(out: *std.ArrayListUnmanaged(u8), s: []const u8, indent: usize) !void { + var it = std.mem.splitScalar(u8, s, '\n'); + var is_first_line = true; + while (it.next()) |line| { + if (is_first_line) { + try appendEscaped(out, line); + is_first_line = false; + } else { + try out.appendSlice(gpa, "\n"); + try appendEscaped(out, unindent(line, indent)); + } + } +} + +pub fn appendEscaped(out: *std.ArrayListUnmanaged(u8), s: []const u8) !void { + for (s) |c| { + try out.ensureUnusedCapacity(gpa, 6); + switch (c) { + '&' => out.appendSliceAssumeCapacity("&"), + '<' => out.appendSliceAssumeCapacity("<"), + '>' => out.appendSliceAssumeCapacity(">"), + '"' => out.appendSliceAssumeCapacity("""), + else => out.appendAssumeCapacity(c), + } + } +} + +fn walkFieldAccesses( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + node: Ast.Node.Index, +) Oom!void { + const ast = file_index.get_ast(); + const node_tags = ast.nodes.items(.tag); + assert(node_tags[node] == .field_access); + const node_datas = ast.nodes.items(.data); + const main_tokens = ast.nodes.items(.main_token); + const object_node = node_datas[node].lhs; + const dot_token = main_tokens[node]; + const field_ident = dot_token + 1; + switch (node_tags[object_node]) { + .identifier => { + const lhs_ident = main_tokens[object_node]; + try resolveIdentLink(file_index, out, lhs_ident); + }, + .field_access => { + try walkFieldAccesses(file_index, out, object_node); + }, + else => {}, + } + if (out.items.len > 0) { + try out.append(gpa, '.'); + try out.appendSlice(gpa, ast.tokenSlice(field_ident)); + } +} + +fn resolveIdentLink( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + ident_token: Ast.TokenIndex, +) Oom!void { + const decl_index = file_index.get().lookup_token(ident_token); + if (decl_index == .none) return; + try resolveDeclLink(decl_index, out); +} + +fn unindent(s: []const u8, indent: usize) []const u8 { + var indent_idx: usize = 0; + for (s) |c| { + if (c == ' ' and indent_idx < indent) { + indent_idx += 1; + } else { + break; + } + } + return s[indent_idx..]; +} + +pub fn resolveDeclLink(decl_index: Decl.Index, out: *std.ArrayListUnmanaged(u8)) Oom!void { + const decl = decl_index.get(); + switch (decl.categorize()) { + .alias => |alias_decl| try alias_decl.get().fqn(out), + else => try decl.fqn(out), + } +} diff --git a/lib/docs/wasm/main.zig b/lib/docs/wasm/main.zig index 214f28c24b95..55882aaf7df5 100644 --- a/lib/docs/wasm/main.zig +++ b/lib/docs/wasm/main.zig @@ -1,15 +1,17 @@ -/// Delete this to find out where URL escaping needs to be added. -const missing_feature_url_escape = true; - -const gpa = std.heap.wasm_allocator; - const std = @import("std"); const log = std.log; const assert = std.debug.assert; const Ast = std.zig.Ast; -const Walk = @import("Walk.zig"); +const Walk = @import("Walk"); const markdown = @import("markdown.zig"); -const Decl = @import("Decl.zig"); +const Decl = Walk.Decl; + +const fileSourceHtml = @import("html_render.zig").fileSourceHtml; +const appendEscaped = @import("html_render.zig").appendEscaped; +const resolveDeclLink = @import("html_render.zig").resolveDeclLink; +const missing_feature_url_escape = @import("html_render.zig").missing_feature_url_escape; + +const gpa = std.heap.wasm_allocator; const js = struct { extern "js" fn log(ptr: [*]const u8, len: usize) void; @@ -439,7 +441,7 @@ fn decl_field_html_fallible( const decl = decl_index.get(); const ast = decl.file.get_ast(); try out.appendSlice(gpa, "
");
-    try file_source_html(decl.file, out, field_node, .{});
+    try fileSourceHtml(decl.file, out, field_node, .{});
     try out.appendSlice(gpa, "
"); const field = ast.fullContainerField(field_node).?; @@ -478,7 +480,7 @@ fn decl_param_html_fallible( try out.appendSlice(gpa, "
");
     try appendEscaped(out, name);
     try out.appendSlice(gpa, ": ");
-    try file_source_html(decl.file, out, param_node, .{});
+    try fileSourceHtml(decl.file, out, param_node, .{});
     try out.appendSlice(gpa, "
"); if (ast.tokens.items(.tag)[first_doc_comment] == .doc_comment) { @@ -506,7 +508,7 @@ export fn decl_fn_proto_html(decl_index: Decl.Index, linkify_fn_name: bool) Stri }; string_result.clearRetainingCapacity(); - file_source_html(decl.file, &string_result, proto_node, .{ + fileSourceHtml(decl.file, &string_result, proto_node, .{ .skip_doc_comments = true, .skip_comments = true, .collapse_whitespace = true, @@ -521,7 +523,7 @@ export fn decl_source_html(decl_index: Decl.Index) String { const decl = decl_index.get(); string_result.clearRetainingCapacity(); - file_source_html(decl.file, &string_result, decl.ast_node, .{}) catch |err| { + fileSourceHtml(decl.file, &string_result, decl.ast_node, .{}) catch |err| { fatal("unable to render source: {s}", .{@errorName(err)}); }; return String.init(string_result.items); @@ -533,7 +535,7 @@ export fn decl_doctest_html(decl_index: Decl.Index) String { return String.init(""); string_result.clearRetainingCapacity(); - file_source_html(decl.file, &string_result, doctest_ast_node, .{}) catch |err| { + fileSourceHtml(decl.file, &string_result, doctest_ast_node, .{}) catch |err| { fatal("unable to render source: {s}", .{@errorName(err)}); }; return String.init(string_result.items); @@ -691,7 +693,7 @@ fn render_docs( const content = doc.string(data.text.content); if (resolve_decl_path(r.context, content)) |resolved_decl_index| { g.link_buffer.clearRetainingCapacity(); - try resolve_decl_link(resolved_decl_index, &g.link_buffer); + try resolveDeclLink(resolved_decl_index, &g.link_buffer); try writer.writeAll("") catch @panic("OOM"); - file_source_html(decl.file, &string_result, var_decl.ast.type_node, .{ + fileSourceHtml(decl.file, &string_result, var_decl.ast.type_node, .{ .skip_comments = true, .collapse_whitespace = true, }) catch |e| { @@ -902,382 +904,6 @@ export fn namespace_members(parent: Decl.Index, include_private: bool) Slice(Dec return Slice(Decl.Index).init(g.members.items); } -const RenderSourceOptions = struct { - skip_doc_comments: bool = false, - skip_comments: bool = false, - collapse_whitespace: bool = false, - fn_link: Decl.Index = .none, -}; - -fn file_source_html( - file_index: Walk.File.Index, - out: *std.ArrayListUnmanaged(u8), - root_node: Ast.Node.Index, - options: RenderSourceOptions, -) !void { - const ast = file_index.get_ast(); - const file = file_index.get(); - - const g = struct { - var field_access_buffer: std.ArrayListUnmanaged(u8) = .{}; - }; - - const token_tags = ast.tokens.items(.tag); - const token_starts = ast.tokens.items(.start); - const main_tokens = ast.nodes.items(.main_token); - - const start_token = ast.firstToken(root_node); - const end_token = ast.lastToken(root_node) + 1; - - var cursor: usize = token_starts[start_token]; - - var indent: usize = 0; - if (std.mem.lastIndexOf(u8, ast.source[0..cursor], "\n")) |newline_index| { - for (ast.source[newline_index + 1 .. cursor]) |c| { - if (c == ' ') { - indent += 1; - } else { - break; - } - } - } - - for ( - token_tags[start_token..end_token], - token_starts[start_token..end_token], - start_token.., - ) |tag, start, token_index| { - const between = ast.source[cursor..start]; - if (std.mem.trim(u8, between, " \t\r\n").len > 0) { - if (!options.skip_comments) { - try out.appendSlice(gpa, ""); - try appendUnindented(out, between, indent); - try out.appendSlice(gpa, ""); - } - } else if (between.len > 0) { - if (options.collapse_whitespace) { - if (out.items.len > 0 and out.items[out.items.len - 1] != ' ') - try out.append(gpa, ' '); - } else { - try appendUnindented(out, between, indent); - } - } - if (tag == .eof) break; - const slice = ast.tokenSlice(token_index); - cursor = start + slice.len; - switch (tag) { - .eof => unreachable, - - .keyword_addrspace, - .keyword_align, - .keyword_and, - .keyword_asm, - .keyword_async, - .keyword_await, - .keyword_break, - .keyword_catch, - .keyword_comptime, - .keyword_const, - .keyword_continue, - .keyword_defer, - .keyword_else, - .keyword_enum, - .keyword_errdefer, - .keyword_error, - .keyword_export, - .keyword_extern, - .keyword_for, - .keyword_if, - .keyword_inline, - .keyword_noalias, - .keyword_noinline, - .keyword_nosuspend, - .keyword_opaque, - .keyword_or, - .keyword_orelse, - .keyword_packed, - .keyword_anyframe, - .keyword_pub, - .keyword_resume, - .keyword_return, - .keyword_linksection, - .keyword_callconv, - .keyword_struct, - .keyword_suspend, - .keyword_switch, - .keyword_test, - .keyword_threadlocal, - .keyword_try, - .keyword_union, - .keyword_unreachable, - .keyword_usingnamespace, - .keyword_var, - .keyword_volatile, - .keyword_allowzero, - .keyword_while, - .keyword_anytype, - .keyword_fn, - => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - }, - - .string_literal, - .char_literal, - .multiline_string_literal_line, - => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - }, - - .builtin => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - }, - - .doc_comment, - .container_doc_comment, - => { - if (!options.skip_doc_comments) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - } - }, - - .identifier => i: { - if (options.fn_link != .none) { - const fn_link = options.fn_link.get(); - const fn_token = main_tokens[fn_link.ast_node]; - if (token_index == fn_token + 1) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - } - - if (token_index > 0 and token_tags[token_index - 1] == .keyword_fn) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (Walk.isPrimitiveNonType(slice)) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (std.zig.primitives.isPrimitive(slice)) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (file.token_parents.get(token_index)) |field_access_node| { - g.field_access_buffer.clearRetainingCapacity(); - try walk_field_accesses(file_index, &g.field_access_buffer, field_access_node); - if (g.field_access_buffer.items.len > 0) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - } else { - try appendEscaped(out, slice); - } - break :i; - } - - { - g.field_access_buffer.clearRetainingCapacity(); - try resolve_ident_link(file_index, &g.field_access_buffer, token_index); - if (g.field_access_buffer.items.len > 0) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - } - - try appendEscaped(out, slice); - }, - - .number_literal => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - }, - - .bang, - .pipe, - .pipe_pipe, - .pipe_equal, - .equal, - .equal_equal, - .equal_angle_bracket_right, - .bang_equal, - .l_paren, - .r_paren, - .semicolon, - .percent, - .percent_equal, - .l_brace, - .r_brace, - .l_bracket, - .r_bracket, - .period, - .period_asterisk, - .ellipsis2, - .ellipsis3, - .caret, - .caret_equal, - .plus, - .plus_plus, - .plus_equal, - .plus_percent, - .plus_percent_equal, - .plus_pipe, - .plus_pipe_equal, - .minus, - .minus_equal, - .minus_percent, - .minus_percent_equal, - .minus_pipe, - .minus_pipe_equal, - .asterisk, - .asterisk_equal, - .asterisk_asterisk, - .asterisk_percent, - .asterisk_percent_equal, - .asterisk_pipe, - .asterisk_pipe_equal, - .arrow, - .colon, - .slash, - .slash_equal, - .comma, - .ampersand, - .ampersand_equal, - .question_mark, - .angle_bracket_left, - .angle_bracket_left_equal, - .angle_bracket_angle_bracket_left, - .angle_bracket_angle_bracket_left_equal, - .angle_bracket_angle_bracket_left_pipe, - .angle_bracket_angle_bracket_left_pipe_equal, - .angle_bracket_right, - .angle_bracket_right_equal, - .angle_bracket_angle_bracket_right, - .angle_bracket_angle_bracket_right_equal, - .tilde, - => try appendEscaped(out, slice), - - .invalid, .invalid_periodasterisks => return error.InvalidToken, - } - } -} - -fn unindent(s: []const u8, indent: usize) []const u8 { - var indent_idx: usize = 0; - for (s) |c| { - if (c == ' ' and indent_idx < indent) { - indent_idx += 1; - } else { - break; - } - } - return s[indent_idx..]; -} - -fn appendUnindented(out: *std.ArrayListUnmanaged(u8), s: []const u8, indent: usize) !void { - var it = std.mem.splitScalar(u8, s, '\n'); - var is_first_line = true; - while (it.next()) |line| { - if (is_first_line) { - try appendEscaped(out, line); - is_first_line = false; - } else { - try out.appendSlice(gpa, "\n"); - try appendEscaped(out, unindent(line, indent)); - } - } -} - -fn resolve_ident_link( - file_index: Walk.File.Index, - out: *std.ArrayListUnmanaged(u8), - ident_token: Ast.TokenIndex, -) Oom!void { - const decl_index = file_index.get().lookup_token(ident_token); - if (decl_index == .none) return; - try resolve_decl_link(decl_index, out); -} - -fn resolve_decl_link(decl_index: Decl.Index, out: *std.ArrayListUnmanaged(u8)) Oom!void { - const decl = decl_index.get(); - switch (decl.categorize()) { - .alias => |alias_decl| try alias_decl.get().fqn(out), - else => try decl.fqn(out), - } -} - -fn walk_field_accesses( - file_index: Walk.File.Index, - out: *std.ArrayListUnmanaged(u8), - node: Ast.Node.Index, -) Oom!void { - const ast = file_index.get_ast(); - const node_tags = ast.nodes.items(.tag); - assert(node_tags[node] == .field_access); - const node_datas = ast.nodes.items(.data); - const main_tokens = ast.nodes.items(.main_token); - const object_node = node_datas[node].lhs; - const dot_token = main_tokens[node]; - const field_ident = dot_token + 1; - switch (node_tags[object_node]) { - .identifier => { - const lhs_ident = main_tokens[object_node]; - try resolve_ident_link(file_index, out, lhs_ident); - }, - .field_access => { - try walk_field_accesses(file_index, out, object_node); - }, - else => {}, - } - if (out.items.len > 0) { - try out.append(gpa, '.'); - try out.appendSlice(gpa, ast.tokenSlice(field_ident)); - } -} - -fn appendEscaped(out: *std.ArrayListUnmanaged(u8), s: []const u8) !void { - for (s) |c| { - try out.ensureUnusedCapacity(gpa, 6); - switch (c) { - '&' => out.appendSliceAssumeCapacity("&"), - '<' => out.appendSliceAssumeCapacity("<"), - '>' => out.appendSliceAssumeCapacity(">"), - '"' => out.appendSliceAssumeCapacity("""), - else => out.appendAssumeCapacity(c), - } - } -} - fn count_scalar(haystack: []const u8, needle: u8) usize { var total: usize = 0; for (haystack) |elem| { diff --git a/lib/fuzzer/index.html b/lib/fuzzer/index.html index c1ef059ad6e5..dadc2f91d3f1 100644 --- a/lib/fuzzer/index.html +++ b/lib/fuzzer/index.html @@ -2,12 +2,56 @@ - Zig Documentation + Zig Build System Interface + diff --git a/lib/fuzzer/main.js b/lib/fuzzer/main.js index 9b0d4cd8c3c6..71e6b5fa54e8 100644 --- a/lib/fuzzer/main.js +++ b/lib/fuzzer/main.js @@ -1,4 +1,7 @@ (function() { + const domSectSource = document.getElementById("sectSource"); + const domSourceText = document.getElementById("sourceText"); + let wasm_promise = fetch("main.wasm"); let sources_promise = fetch("sources.tar").then(function(response) { if (!response.ok) throw new Error("unable to download sources"); @@ -30,11 +33,56 @@ const wasm_array = new Uint8Array(wasm_exports.memory.buffer, ptr, js_array.length); wasm_array.set(js_array); wasm_exports.unpack(ptr, js_array.length); + + render(); }); }); + function render() { + domSectSource.classList.add("hidden"); + + // TODO this is temporary debugging data + renderSource("/home/andy/dev/zig/lib/std/zig/tokenizer.zig"); + } + + function renderSource(path) { + const decl_index = findFileRoot(path); + if (decl_index == null) throw new Error("file not found: " + path); + + const h2 = domSectSource.children[0]; + h2.innerText = path; + domSourceText.innerHTML = declSourceHtml(decl_index); + + domSectSource.classList.remove("hidden"); + } + + function findFileRoot(path) { + setInputString(path); + const result = wasm_exports.find_file_root(); + if (result === -1) return null; + return result; + } + function decodeString(ptr, len) { if (len === 0) return ""; return text_decoder.decode(new Uint8Array(wasm_exports.memory.buffer, ptr, len)); } + + function setInputString(s) { + const jsArray = text_encoder.encode(s); + const len = jsArray.length; + const ptr = wasm_exports.set_input_string(len); + const wasmArray = new Uint8Array(wasm_exports.memory.buffer, ptr, len); + wasmArray.set(jsArray); + } + + function declSourceHtml(decl_index) { + return unwrapString(wasm_exports.decl_source_html(decl_index)); + } + + function unwrapString(bigint) { + const ptr = Number(bigint & 0xffffffffn); + const len = Number(bigint >> 32n); + return decodeString(ptr, len); + } })(); diff --git a/lib/fuzzer/wasm/main.zig b/lib/fuzzer/wasm/main.zig index 09b9d8106817..5045f784ccbc 100644 --- a/lib/fuzzer/wasm/main.zig +++ b/lib/fuzzer/wasm/main.zig @@ -2,6 +2,8 @@ const std = @import("std"); const assert = std.debug.assert; const Walk = @import("Walk"); +const Decl = Walk.Decl; +const html_render = @import("html_render"); const gpa = std.heap.wasm_allocator; const log = std.log; @@ -52,6 +54,48 @@ export fn unpack(tar_ptr: [*]u8, tar_len: usize) void { }; } +/// Set by `set_input_string`. +var input_string: std.ArrayListUnmanaged(u8) = .{}; +var string_result: std.ArrayListUnmanaged(u8) = .{}; + +export fn set_input_string(len: usize) [*]u8 { + input_string.resize(gpa, len) catch @panic("OOM"); + return input_string.items.ptr; +} + +/// Looks up the root struct decl corresponding to a file by path. +/// Uses `input_string`. +export fn find_file_root() Decl.Index { + const file: Walk.File.Index = @enumFromInt(Walk.files.getIndex(input_string.items) orelse return .none); + return file.findRootDecl(); +} + +export fn decl_source_html(decl_index: Decl.Index) String { + const decl = decl_index.get(); + + string_result.clearRetainingCapacity(); + html_render.fileSourceHtml(decl.file, &string_result, decl.ast_node, .{}) catch |err| { + fatal("unable to render source: {s}", .{@errorName(err)}); + }; + return String.init(string_result.items); +} + +const String = Slice(u8); + +fn Slice(T: type) type { + return packed struct(u64) { + ptr: u32, + len: u32, + + fn init(s: []const T) @This() { + return .{ + .ptr = @intFromPtr(s.ptr), + .len = s.len, + }; + } + }; +} + fn unpackInner(tar_bytes: []u8) !void { var fbs = std.io.fixedBufferStream(tar_bytes); var file_name_buffer: [1024]u8 = undefined; diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index e26f587eacb1..46d9bfc8fd49 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -235,30 +235,29 @@ pub const WebServer = struct { .root_dir = ws.zig_lib_directory, .sub_path = "docs/wasm/Walk.zig", }; + const html_render_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "docs/wasm/html_render.zig", + }; var argv: std.ArrayListUnmanaged([]const u8) = .{}; try argv.appendSlice(arena, &.{ - ws.zig_exe_path, - "build-exe", - "-fno-entry", - "-O", - @tagName(optimize_mode), - "-target", - "wasm32-freestanding", - "-mcpu", - "baseline+atomics+bulk_memory+multivalue+mutable_globals+nontrapping_fptoint+reference_types+sign_ext", - "--cache-dir", - ws.global_cache_directory.path orelse ".", - "--global-cache-dir", - ws.global_cache_directory.path orelse ".", - "--name", - "fuzzer", - "-rdynamic", - "--dep", - "Walk", - try std.fmt.allocPrint(arena, "-Mroot={}", .{main_src_path}), - try std.fmt.allocPrint(arena, "-MWalk={}", .{walk_src_path}), + ws.zig_exe_path, "build-exe", // + "-fno-entry", // + "-O", @tagName(optimize_mode), // + "-target", "wasm32-freestanding", // + "-mcpu", "baseline+atomics+bulk_memory+multivalue+mutable_globals+nontrapping_fptoint+reference_types+sign_ext", // + "--cache-dir", ws.global_cache_directory.path orelse ".", // + "--global-cache-dir", ws.global_cache_directory.path orelse ".", // + "--name", "fuzzer", // + "-rdynamic", // + "--dep", "Walk", // + "--dep", "html_render", // + try std.fmt.allocPrint(arena, "-Mroot={}", .{main_src_path}), // + try std.fmt.allocPrint(arena, "-MWalk={}", .{walk_src_path}), // + "--dep", "Walk", // + try std.fmt.allocPrint(arena, "-Mhtml_render={}", .{html_render_src_path}), // "--listen=-", }); From 2e12b45d8b43d69e144887df4b04a2d383ff25d4 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 16:31:49 -0700 Subject: [PATCH 05/34] introduce tool for dumping coverage file with debug info resolved. begin efforts of providing `std.debug.Info`, a cross-platform abstraction for loading debug information into an in-memory format that supports queries such as "what is the source location of this virtual memory address?" Unlike `std.debug.SelfInfo`, this API does not assume the debug information in question happens to match the host CPU architecture, OS, or other target properties. --- lib/std/Build/Cache/Path.zig | 8 +- lib/std/debug.zig | 33 +-- lib/std/debug/Dwarf.zig | 394 ++++++++++++++++++++++++++++++++--- lib/std/debug/Info.zig | 57 +++++ lib/std/debug/SelfInfo.zig | 264 ++--------------------- tools/dump-cov.zig | 70 +++++++ 6 files changed, 541 insertions(+), 285 deletions(-) create mode 100644 lib/std/debug/Info.zig create mode 100644 tools/dump-cov.zig diff --git a/lib/std/Build/Cache/Path.zig b/lib/std/Build/Cache/Path.zig index b81786d0a8b6..65c6f6a9bc1a 100644 --- a/lib/std/Build/Cache/Path.zig +++ b/lib/std/Build/Cache/Path.zig @@ -32,16 +32,16 @@ pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.E }; } -pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 { +pub fn joinString(p: Path, gpa: Allocator, sub_path: []const u8) Allocator.Error![]u8 { const parts: []const []const u8 = if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.join(allocator, parts); + return p.root_dir.join(gpa, parts); } -pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { +pub fn joinStringZ(p: Path, gpa: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { const parts: []const []const u8 = if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.joinZ(allocator, parts); + return p.root_dir.joinZ(gpa, parts); } pub fn openFile( diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4d3437f665c6..907f7711a79a 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -17,6 +17,7 @@ pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); +pub const Info = @import("debug/Info.zig"); /// Unresolved source locations can be represented with a single `usize` that /// corresponds to a virtual memory address of the program counter. Combined @@ -28,6 +29,12 @@ pub const SourceLocation = struct { file_name: []const u8, }; +pub const Symbol = struct { + name: []const u8 = "???", + compile_unit_name: []const u8 = "???", + source_location: ?SourceLocation = null, +}; + /// Deprecated because it returns the optimization mode of the standard /// library, when the caller probably wants to use the optimization mode of /// their own module. @@ -871,13 +878,13 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, }; - defer symbol_info.deinit(debug_info.allocator); + defer if (symbol_info.source_location) |sl| debug_info.allocator.free(sl.file_name); return printLineInfo( out_stream, - symbol_info.line_info, + symbol_info.source_location, address, - symbol_info.symbol_name, + symbol_info.name, symbol_info.compile_unit_name, tty_config, printLineFromFileAnyOs, @@ -886,7 +893,7 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: fn printLineInfo( out_stream: anytype, - line_info: ?SourceLocation, + source_location: ?SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -896,8 +903,8 @@ fn printLineInfo( nosuspend { try tty_config.setColor(out_stream, .bold); - if (line_info) |*li| { - try out_stream.print("{s}:{d}:{d}", .{ li.file_name, li.line, li.column }); + if (source_location) |*sl| { + try out_stream.print("{s}:{d}:{d}", .{ sl.file_name, sl.line, sl.column }); } else { try out_stream.writeAll("???:?:?"); } @@ -910,11 +917,11 @@ fn printLineInfo( try out_stream.writeAll("\n"); // Show the matching source code line if possible - if (line_info) |li| { - if (printLineFromFile(out_stream, li)) { - if (li.column > 0) { + if (source_location) |sl| { + if (printLineFromFile(out_stream, sl)) { + if (sl.column > 0) { // The caret already takes one char - const space_needed = @as(usize, @intCast(li.column - 1)); + const space_needed = @as(usize, @intCast(sl.column - 1)); try out_stream.writeByteNTimes(' ', space_needed); try tty_config.setColor(out_stream, .green); @@ -932,10 +939,10 @@ fn printLineInfo( } } -fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void { +fn printLineFromFileAnyOs(out_stream: anytype, source_location: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. - var f = try fs.cwd().openFile(line_info.file_name, .{}); + var f = try fs.cwd().openFile(source_location.file_name, .{}); defer f.close(); // TODO fstat and make sure that the file has the correct size @@ -944,7 +951,7 @@ fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void const line_start = seek: { var current_line_start: usize = 0; var next_line: usize = 1; - while (next_line != line_info.line) { + while (next_line != source_location.line) { const slice = buf[current_line_start..amt_read]; if (mem.indexOfScalar(u8, slice, '\n')) |pos| { next_line += 1; diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 991c7315492c..3c150b3b18a5 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -12,6 +12,8 @@ const native_endian = builtin.cpu.arch.endian(); const std = @import("../std.zig"); const Allocator = std.mem.Allocator; +const elf = std.elf; +const mem = std.mem; const DW = std.dwarf; const AT = DW.AT; const EH = DW.EH; @@ -22,8 +24,8 @@ const UT = DW.UT; const assert = std.debug.assert; const cast = std.math.cast; const maxInt = std.math.maxInt; -const readInt = std.mem.readInt; const MemoryAccessor = std.debug.MemoryAccessor; +const Path = std.Build.Cache.Path; /// Did I mention this is deprecated? const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; @@ -252,13 +254,13 @@ pub const Die = struct { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; if (byte_offset + 4 > debug_str_offsets.len) return bad(); - const offset = readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); + const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; if (byte_offset + 8 > debug_str_offsets.len) return bad(); - const offset = readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); + const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); return getStringGeneric(opt_str, offset); }, } @@ -721,12 +723,14 @@ const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); pub const SectionArray = [num_sections]?Section; pub const null_section_array = [_]?Section{null} ** num_sections; +pub const OpenError = ScanError; + /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(di: *Dwarf, allocator: Allocator) !void { - try di.scanAllFunctions(allocator); - try di.scanAllCompileUnits(allocator); +pub fn open(di: *Dwarf, gpa: Allocator) OpenError!void { + try di.scanAllFunctions(gpa); + try di.scanAllCompileUnits(gpa); } const PcRange = struct { @@ -747,21 +751,21 @@ pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address: return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; } -pub fn deinit(di: *Dwarf, allocator: Allocator) void { +pub fn deinit(di: *Dwarf, gpa: Allocator) void { for (di.sections) |opt_section| { - if (opt_section) |s| if (s.owned) allocator.free(s.data); + if (opt_section) |s| if (s.owned) gpa.free(s.data); } for (di.abbrev_table_list.items) |*abbrev| { - abbrev.deinit(allocator); + abbrev.deinit(gpa); } - di.abbrev_table_list.deinit(allocator); + di.abbrev_table_list.deinit(gpa); for (di.compile_unit_list.items) |*cu| { - cu.die.deinit(allocator); + cu.die.deinit(gpa); } - di.compile_unit_list.deinit(allocator); - di.func_list.deinit(allocator); - di.cie_map.deinit(allocator); - di.fde_list.deinit(allocator); + di.compile_unit_list.deinit(gpa); + di.func_list.deinit(gpa); + di.cie_map.deinit(gpa); + di.fde_list.deinit(gpa); di.* = undefined; } @@ -777,7 +781,12 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { return null; } -fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { +const ScanError = error{ + InvalidDebugInfo, + MissingDebugInfo, +} || Allocator.Error || std.debug.DeprecatedFixedBufferReader.Error; + +fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; @@ -964,7 +973,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { } } -fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { +fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; @@ -1070,13 +1079,13 @@ const DebugRangeIterator = struct { .@"32" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); if (offset_loc + 4 > debug_ranges.len) return bad(); - const offset = readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); + const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); if (offset_loc + 8 > debug_ranges.len) return bad(); - const offset = readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); + const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); break :off compile_unit.rnglists_base + offset; }, } @@ -1287,7 +1296,7 @@ fn parseDie( attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, -) !?Die { +) ScanError!?Die { const abbrev_code = try fbr.readUleb128(u64); if (abbrev_code == 0) return null; const table_entry = abbrev_table.get(abbrev_code) orelse return bad(); @@ -1588,7 +1597,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { // The header is 8 or 12 bytes depending on is_64. if (compile_unit.addr_base < 8) return bad(); - const version = readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); + const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); if (version != 5) return bad(); const addr_size = debug_addr[compile_unit.addr_base - 2]; @@ -1598,9 +1607,9 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { 1 => debug_addr[byte_offset], - 2 => readInt(u16, debug_addr[byte_offset..][0..2], di.endian), - 4 => readInt(u32, debug_addr[byte_offset..][0..4], di.endian), - 8 => readInt(u64, debug_addr[byte_offset..][0..8], di.endian), + 2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian), + 4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian), + 8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian), else => bad(), }; } @@ -1699,7 +1708,7 @@ fn parseFormValue( form_id: u64, format: Format, implicit_const: ?i64, -) anyerror!FormValue { +) ScanError!FormValue { return switch (form_id) { FORM.addr => .{ .addr = try fbr.readAddress(switch (@bitSizeOf(usize)) { 32 => .@"32", @@ -1892,7 +1901,8 @@ const UnitHeader = struct { header_length: u4, unit_length: u64, }; -fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) !UnitHeader { + +fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -2023,3 +2033,335 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); } } + +pub const ElfModule = struct { + base_address: usize, + dwarf: Dwarf, + mapped_memory: []align(std.mem.page_size) const u8, + external_mapped_memory: ?[]align(std.mem.page_size) const u8, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + self.dwarf.deinit(allocator); + std.posix.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| std.posix.munmap(m); + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + return self.dwarf.getSymbol(allocator, relocated_address); + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + return &self.dwarf; + } + + pub const LoadError = error{ + InvalidDebugInfo, + MissingDebugInfo, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfEndian, + /// TODO: implement this and then remove this error code + UnimplementedDwarfForeignEndian, + /// The debug info may be valid but this implementation uses memory + /// mapping which limits things to usize. If the target debug info is + /// 64-bit and host is 32-bit, there may be debug info that is not + /// supportable using this method. + Overflow, + + PermissionDenied, + LockedMemoryLimitExceeded, + MemoryMappingNotSupported, + } || Allocator.Error || std.fs.File.OpenError || OpenError; + + /// Reads debug info from an already mapped ELF file. + /// + /// If the required sections aren't present but a reference to external debug + /// info is, then this this function will recurse to attempt to load the debug + /// sections from an external file. + pub fn load( + gpa: Allocator, + mapped_mem: []align(std.mem.page_size) const u8, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.mem.page_size) const u8, + elf_filename: ?[]const u8, + ) LoadError!Dwarf.ElfModule { + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; + + const shoff = hdr.e_shoff; + const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + + // Combine section list. This takes ownership over any owned sections from the parent scope. + for (parent_sections, §ions) |*parent, *section_elem| { + if (parent.*) |*p| { + section_elem.* = p.*; + p.owned = false; + } + } + errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); + const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); + const crc_bytes = gnu_debuglink[crc_offset..][0..4]; + separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); + separate_debug_filename = debug_filename; + continue; + } + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |sect, i| { + if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_stream = std.io.fixedBufferStream(section_bytes); + const section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.decompressor(section_reader); + + const decompressed_section = try gpa.alloc(u8, chdr.ch_size); + errdefer gpa.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + break :blk .{ + .data = decompressed_section, + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + }; + + // /.build-id/<2-character id prefix>/.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; + const filename = std.fmt.bufPrint( + &filename_buf, + "{s}" ++ extension, + .{std.fmt.fmtSliceHexLower(id[1..])}, + ) catch break :blk; + + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ + global_directory, ".build-id", &id_prefix_buf, filename, + }), + }; + defer gpa.free(path.sub_path); + + return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + } + } + + // use the path from .gnu_debuglink, in the same search order as gdb + if (separate_debug_filename) |separate_filename| blk: { + if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) + return error.MissingDebugInfo; + + // / + if (loadPath( + gpa, + .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = separate_filename, + }, + null, + separate_debug_crc, + §ions, + mapped_mem, + )) |debug_info| { + return debug_info; + } else |_| {} + + // /.debug/ + { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), + }; + defer gpa.free(path.sub_path); + + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + + var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; + const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :blk; + + // // + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), + }; + defer gpa.free(path.sub_path); + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + } + + return error.MissingDebugInfo; + } + + var di: Dwarf = .{ + .endian = endian, + .sections = sections, + .is_macho = false, + }; + + try Dwarf.open(&di, gpa); + + return .{ + .base_address = 0, + .dwarf = di, + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, + }; + } + + pub fn loadPath( + gpa: Allocator, + elf_file_path: Path, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.mem.page_size) const u8, + ) LoadError!Dwarf.ElfModule { + const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) { + error.FileNotFound => return missing(), + else => return err, + }; + defer elf_file.close(); + + const end_pos = elf_file.getEndPos() catch return bad(); + const file_len = cast(usize, end_pos) orelse return error.Overflow; + + const mapped_mem = try std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ); + errdefer std.posix.munmap(mapped_mem); + + return load( + gpa, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_file_path.sub_path, + ); + } +}; + +/// Given an array of virtual memory addresses, sorted ascending, outputs a +/// corresponding array of source locations, by appending to the provided +/// array list. +pub fn resolveSourceLocations( + d: *Dwarf, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []std.debug.SourceLocation, +) error{ MissingDebugInfo, InvalidDebugInfo }!void { + assert(sorted_pc_addrs.len == output.len); + _ = d; + _ = gpa; + @panic("TODO"); +} + +fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { + if (di.findCompileUnit(address)) |compile_unit| { + return .{ + .name = di.getSymbolName(address) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .source_location = di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return .{}, + else => return err, + } +} + +pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { + const start = cast(usize, offset) orelse return error.Overflow; + const end = start + (cast(usize, size) orelse return error.Overflow); + return ptr[start..end]; +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig new file mode 100644 index 000000000000..5276ba68ec01 --- /dev/null +++ b/lib/std/debug/Info.zig @@ -0,0 +1,57 @@ +//! Cross-platform abstraction for loading debug information into an in-memory +//! format that supports queries such as "what is the source location of this +//! virtual memory address?" +//! +//! Unlike `std.debug.SelfInfo`, this API does not assume the debug information +//! in question happens to match the host CPU architecture, OS, or other target +//! properties. + +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Path = std.Build.Cache.Path; +const Dwarf = std.debug.Dwarf; +const page_size = std.mem.page_size; +const assert = std.debug.assert; + +const Info = @This(); + +/// Sorted by key, ascending. +address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), + +pub const LoadError = Dwarf.ElfModule.LoadError; + +pub fn load(gpa: Allocator, path: Path) LoadError!Info { + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + const elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + var info: Info = .{ + .address_map = .{}, + }; + try info.address_map.put(gpa, elf_module.base_address, elf_module); + return info; +} + +pub fn deinit(info: *Info, gpa: Allocator) void { + for (info.address_map.values()) |*elf_module| { + elf_module.dwarf.deinit(gpa); + } + info.address_map.deinit(gpa); + info.* = undefined; +} + +pub const ResolveSourceLocationsError = error{ + MissingDebugInfo, + InvalidDebugInfo, +} || Allocator.Error; + +pub fn resolveSourceLocations( + info: *Info, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []std.debug.SourceLocation, +) ResolveSourceLocationsError!void { + assert(sorted_pc_addrs.len == output.len); + if (info.address_map.entries.len != 1) @panic("TODO"); + const elf_module = &info.address_map.values()[0]; + return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); +} diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index f9747a088ea7..79cbd19a4187 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -587,7 +587,7 @@ pub const Module = switch (native_os) { } if (section_index == null) continue; - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); sections[section_index.?] = .{ .data = section_bytes, .virtual_address = sect.addr, @@ -622,7 +622,7 @@ pub const Module = switch (native_os) { return result.value_ptr; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !Dwarf.SymbolInfo { nosuspend { const result = try self.getOFileInfoForAddress(allocator, address); if (result.symbol == null) return .{}; @@ -641,7 +641,7 @@ pub const Module = switch (native_os) { const addr_off = result.relocated_address - result.symbol.?.addr; const o_file_di = &result.o_file_info.?.di; if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return SymbolInfo{ + return .{ .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", .compile_unit_name = compile_unit.die.getAttrString( o_file_di, @@ -662,7 +662,7 @@ pub const Module = switch (native_os) { }; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{ .symbol_name = stab_symbol }; + return .{ .symbol_name = stab_symbol }; }, else => return err, } @@ -729,7 +729,7 @@ pub const Module = switch (native_os) { } } - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { + fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?std.debug.Symbol { var coff_section: *align(1) const coff.SectionHeader = undefined; const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { if (sect_contrib.Section > self.coff_section_headers.len) continue; @@ -759,14 +759,14 @@ pub const Module = switch (native_os) { relocated_address - coff_section.virtual_address, ); - return SymbolInfo{ + return .{ .symbol_name = symbol_name, .compile_unit_name = obj_basename, .line_info = opt_line_info, }; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { // Translate the VA into an address into this object const relocated_address = address - self.base_address; @@ -776,10 +776,10 @@ pub const Module = switch (native_os) { if (self.dwarf) |*dwarf| { const dwarf_address = relocated_address + self.coff_image_base; - return getSymbolFromDwarf(allocator, dwarf_address, dwarf); + return dwarf.getSymbol(allocator, dwarf_address); } - return SymbolInfo{}; + return .{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { @@ -792,41 +792,18 @@ pub const Module = switch (native_os) { }; } }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { - base_address: usize, - dwarf: Dwarf, - mapped_memory: []align(mem.page_size) const u8, - external_mapped_memory: ?[]align(mem.page_size) const u8, - - pub fn deinit(self: *@This(), allocator: Allocator) void { - self.dwarf.deinit(allocator); - posix.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| posix.munmap(m); - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { - _ = allocator; - _ = address; - return &self.dwarf; - } - }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule, .wasi, .emscripten => struct { pub fn deinit(self: *@This(), allocator: Allocator) void { _ = self; _ = allocator; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { _ = self; _ = allocator; _ = address; - return SymbolInfo{}; + return .{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { @@ -1068,7 +1045,7 @@ pub fn readElfDebugInfo( expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(mem.page_size) const u8, -) !Module { +) !Dwarf.ElfModule { nosuspend { const elf_file = (if (elf_filename) |filename| blk: { break :blk fs.cwd().openFile(filename, .{}); @@ -1078,176 +1055,15 @@ pub fn readElfDebugInfo( }; const mapped_mem = try mapWholeFile(elf_file); - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - assert(endian == native_endian); // this is our own debug info - - const shoff = hdr.e_shoff; - const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - - // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section| { - if (parent.*) |*p| { - section.* = p.*; - p.owned = false; - } - } - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); - separate_debug_filename = debug_filename; - continue; - } - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "." ++ section.name, name)) section_index = i; - } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; - - const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_stream = std.io.fixedBufferStream(section_bytes); - var section_reader = section_stream.reader(); - const chdr = section_reader.readStruct(elf.Chdr) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); - - const decompressed_section = try allocator.alloc(u8, chdr.ch_size); - errdefer allocator.free(decompressed_section); - - const read = zlib_stream.reader().readAll(decompressed_section) catch continue; - assert(read == decompressed_section.len); - - break :blk .{ - .data = decompressed_section, - .virtual_address = shdr.sh_addr, - .owned = true, - }; - } else .{ - .data = section_bytes, - .virtual_address = shdr.sh_addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // /.build-id/<2-character id prefix>/.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; - const filename = std.fmt.bufPrint( - &filename_buf, - "{s}" ++ extension, - .{std.fmt.fmtSliceHexLower(id[1..])}, - ) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); - defer allocator.free(path); - - return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - if (separate_debug_filename) |separate_filename| blk: { - if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; - - // / - if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - - // /.debug/ - { - const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); - defer allocator.free(path); - - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - - var cwd_buf: [fs.max_path_bytes]u8 = undefined; - const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; - - // // - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); - defer allocator.free(path); - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var di = Dwarf{ - .endian = endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&di, allocator); - - return .{ - .base_address = undefined, - .dwarf = di, - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - }; + return Dwarf.ElfModule.load( + allocator, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_filename, + ); } } @@ -1289,22 +1105,6 @@ fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { } } -fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { - const start = math.cast(usize, offset) orelse return error.Overflow; - const end = start + (math.cast(usize, size) orelse return error.Overflow); - return ptr[start..end]; -} - -pub const SymbolInfo = struct { - symbol_name: []const u8 = "???", - compile_unit_name: []const u8 = "???", - line_info: ?std.debug.SourceLocation = null, - - pub fn deinit(self: SymbolInfo, allocator: Allocator) void { - if (self.line_info) |li| allocator.free(li.file_name); - } -}; - fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { var min: usize = 0; var max: usize = symbols.len - 1; @@ -1350,26 +1150,6 @@ test machoSearchSymbols { try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); } -fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo { - if (nosuspend di.findCompileUnit(address)) |compile_unit| { - return SymbolInfo{ - .symbol_name = nosuspend di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{}; - }, - else => return err, - } -} - /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig new file mode 100644 index 000000000000..aba2911a9156 --- /dev/null +++ b/tools/dump-cov.zig @@ -0,0 +1,70 @@ +//! Reads a Zig coverage file and prints human-readable information to stdout, +//! including file:line:column information for each PC. + +const std = @import("std"); +const fatal = std.process.fatal; +const Path = std.Build.Cache.Path; +const assert = std.debug.assert; + +pub fn main() !void { + var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .{}; + defer _ = general_purpose_allocator.deinit(); + const gpa = general_purpose_allocator.allocator(); + + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + const args = try std.process.argsAlloc(arena); + const exe_file_name = args[1]; + const cov_file_name = args[2]; + + const exe_path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = exe_file_name, + }; + const cov_path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = cov_file_name, + }; + + var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { + fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); + }; + defer debug_info.deinit(gpa); + + const cov_bytes = cov_path.root_dir.handle.readFileAlloc(arena, cov_path.sub_path, 1 << 30) catch |err| { + fatal("failed to load coverage file {}: {s}", .{ cov_path, @errorName(err) }); + }; + + var bw = std.io.bufferedWriter(std.io.getStdOut().writer()); + const stdout = bw.writer(); + + const header: *align(1) SeenPcsHeader = @ptrCast(cov_bytes); + try stdout.print("{any}\n", .{header.*}); + //const n_bitset_elems = (header.pcs_len + 7) / 8; + const pcs_bytes = cov_bytes[@sizeOf(SeenPcsHeader)..][0 .. header.pcs_len * @sizeOf(usize)]; + const pcs = try arena.alloc(usize, header.pcs_len); + for (0..pcs_bytes.len / @sizeOf(usize), pcs) |i, *pc| { + pc.* = std.mem.readInt(usize, pcs_bytes[i * @sizeOf(usize) ..][0..@sizeOf(usize)], .little); + } + assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); + + const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); + try debug_info.resolveSourceLocations(gpa, pcs, source_locations); + + for (pcs, source_locations) |pc, sl| { + try stdout.print("{x}: {s}:{d}:{d}\n", .{ + pc, sl.file_name, sl.line, sl.column, + }); + } + + try bw.flush(); +} + +const SeenPcsHeader = extern struct { + n_runs: usize, + deduplicated_runs: usize, + pcs_len: usize, + lowest_stack: usize, +}; From de47acd732dca8b4d2f2b3559307f488ccac940d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 17:45:31 -0700 Subject: [PATCH 06/34] code coverage dumping tool basic implementation * std.debug.Dwarf: add `sortCompileUnits` along with a field to track the state for the purpose of assertions and correct API usage. This makes batch lookups faster. - in the future, findCompileUnit should be enhanced to rely on sorted compile units as well. * implement `std.debug.Dwarf.resolveSourceLocations` as well as `std.debug.Info.resolveSourceLocations`. It's still pretty slow, since it calls getLineNumberInfo for each array element, repeating a lot of work unnecessarily. * integrate these APIs with `std.Progress` to understand what is taking so long. The output I'm seeing from this tool shows a lot of missing source locations. In particular, the main area of interest is missing for my tokenizer fuzzing example. --- lib/std/debug.zig | 6 +++ lib/std/debug/Dwarf.zig | 85 +++++++++++++++++++++++++++++++++++++---- lib/std/debug/Info.zig | 17 +++++---- tools/dump-cov.zig | 10 ++++- 4 files changed, 102 insertions(+), 16 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 907f7711a79a..6d034146c3c2 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -27,6 +27,12 @@ pub const SourceLocation = struct { line: u64, column: u64, file_name: []const u8, + + pub const invalid: SourceLocation = .{ + .line = 0, + .column = 0, + .file_name = &.{}, + }; }; pub const Symbol = struct { diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 3c150b3b18a5..170fa774c088 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -39,6 +39,7 @@ pub const call_frame = @import("Dwarf/call_frame.zig"); endian: std.builtin.Endian, sections: SectionArray = null_section_array, is_macho: bool, +compile_units_sorted: bool, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{}, @@ -728,9 +729,9 @@ pub const OpenError = ScanError; /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(di: *Dwarf, gpa: Allocator) OpenError!void { - try di.scanAllFunctions(gpa); - try di.scanAllCompileUnits(gpa); +pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void { + try d.scanAllFunctions(gpa); + try d.scanAllCompileUnits(gpa); } const PcRange = struct { @@ -1061,6 +1062,39 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { } } +/// Populate missing PC ranges in compilation units, and then sort them by start address. +/// Does not guarantee pc_range to be non-null because there could be missing debug info. +pub fn sortCompileUnits(d: *Dwarf) ScanError!void { + assert(!d.compile_units_sorted); + + for (d.compile_unit_list.items) |*cu| { + if (cu.pc_range != null) continue; + const ranges_value = cu.die.getAttr(AT.ranges) orelse continue; + var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue; + var start: u64 = maxInt(u64); + var end: u64 = 0; + while (try iter.next()) |range| { + start = @min(start, range.start_addr); + end = @max(end, range.end_addr); + } + if (end != 0) cu.pc_range = .{ + .start = start, + .end = end, + }; + } + + std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct { + fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { + _ = ctx; + const a_range = a.pc_range orelse return false; + const b_range = b.pc_range orelse return true; + return a_range.start < b_range.start; + } + }.lessThan); + + d.compile_units_sorted = true; +} + const DebugRangeIterator = struct { base_address: u64, section_type: Section.Id, @@ -1208,6 +1242,7 @@ const DebugRangeIterator = struct { } }; +/// TODO: change this to binary searching the sorted compile unit list pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { @@ -2275,6 +2310,7 @@ pub const ElfModule = struct { .endian = endian, .sections = sections, .is_macho = false, + .compile_units_sorted = false, }; try Dwarf.open(&di, gpa); @@ -2326,6 +2362,8 @@ pub const ElfModule = struct { } }; +pub const ResolveSourceLocationsError = Allocator.Error || DeprecatedFixedBufferReader.Error; + /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations, by appending to the provided /// array list. @@ -2335,11 +2373,44 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, -) error{ MissingDebugInfo, InvalidDebugInfo }!void { + parent_prog_node: std.Progress.Node, +) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); - _ = d; - _ = gpa; - @panic("TODO"); + assert(d.compile_units_sorted); + + const prog_node = parent_prog_node.start("Resolve Source Locations", sorted_pc_addrs.len); + defer prog_node.end(); + + var cu_i: usize = 0; + var cu: *const CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + defer prog_node.completeOne(); + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + range = cu.pc_range orelse { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + } + // TODO: instead of calling this function, break the function up into one that parses the + // information once and prepares a context that can be reused for the entire batch. + if (getLineNumberInfo(d, gpa, cu.*, pc)) |src_loc| { + out.* = src_loc; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, + else => |e| return e, + } + } } fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index 5276ba68ec01..3c61c4072fb1 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -20,9 +20,14 @@ address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path) LoadError!Info { +pub fn load(gpa: Allocator, path: Path, parent_prog_node: std.Progress.Node) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; - const elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + var prog_node = parent_prog_node.start("Loading Debug Info", 0); + defer prog_node.end(); + var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + prog_node.end(); + prog_node = parent_prog_node.start("Sort Compile Units", 0); + try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, }; @@ -38,10 +43,7 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub const ResolveSourceLocationsError = error{ - MissingDebugInfo, - InvalidDebugInfo, -} || Allocator.Error; +pub const ResolveSourceLocationsError = Dwarf.ResolveSourceLocationsError; pub fn resolveSourceLocations( info: *Info, @@ -49,9 +51,10 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, + parent_prog_node: std.Progress.Node, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); + return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output, parent_prog_node); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index aba2911a9156..8449dec33e1e 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,7 +28,10 @@ pub fn main() !void { .sub_path = cov_file_name, }; - var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { + const prog_node = std.Progress.start(.{}); + defer prog_node.end(); + + var debug_info = std.debug.Info.load(gpa, exe_path, prog_node) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -51,7 +54,10 @@ pub fn main() !void { assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations); + try debug_info.resolveSourceLocations(gpa, pcs, source_locations, prog_node); + defer for (source_locations) |sl| { + gpa.free(sl.file_name); + }; for (pcs, source_locations) |pc, sl| { try stdout.print("{x}: {s}:{d}:{d}\n", .{ From 66954e833051872308641b3a1af12aa865d5d59a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 21:22:33 -0700 Subject: [PATCH 07/34] std.debug.FixedBufferReader is fine it does not need to be deprecated --- lib/std/debug.zig | 95 +---------------------------- lib/std/debug/Dwarf.zig | 41 ++++++------- lib/std/debug/FixedBufferReader.zig | 91 +++++++++++++++++++++++++++ lib/std/debug/SelfInfo.zig | 2 +- 4 files changed, 114 insertions(+), 115 deletions(-) create mode 100644 lib/std/debug/FixedBufferReader.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 6d034146c3c2..80c196e9d848 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -14,6 +14,7 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); +pub const FixedBufferReader = @import("debug/FixedBufferReader.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); @@ -1494,99 +1495,6 @@ pub const SafetyLock = struct { } }; -/// Deprecated. Don't use this, just read from your memory directly. -/// -/// This only exists because someone was too lazy to rework logic that used to -/// operate on an open file to operate on a memory buffer instead. -pub const DeprecatedFixedBufferReader = struct { - buf: []const u8, - pos: usize = 0, - endian: std.builtin.Endian, - - pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; - - pub fn seekTo(fbr: *DeprecatedFixedBufferReader, pos: u64) Error!void { - if (pos > fbr.buf.len) return error.EndOfBuffer; - fbr.pos = @intCast(pos); - } - - pub fn seekForward(fbr: *DeprecatedFixedBufferReader, amount: u64) Error!void { - if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; - fbr.pos += @intCast(amount); - } - - pub inline fn readByte(fbr: *DeprecatedFixedBufferReader) Error!u8 { - if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; - defer fbr.pos += 1; - return fbr.buf[fbr.pos]; - } - - pub fn readByteSigned(fbr: *DeprecatedFixedBufferReader) Error!i8 { - return @bitCast(try fbr.readByte()); - } - - pub fn readInt(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - const size = @divExact(@typeInfo(T).Int.bits, 8); - if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; - defer fbr.pos += size; - return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); - } - - pub fn readIntChecked( - fbr: *DeprecatedFixedBufferReader, - comptime T: type, - ma: *MemoryAccessor, - ) Error!T { - if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) - return error.InvalidBuffer; - - return fbr.readInt(T); - } - - pub fn readUleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readUleb128(T, fbr); - } - - pub fn readIleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readIleb128(T, fbr); - } - - pub fn readAddress(fbr: *DeprecatedFixedBufferReader, format: std.dwarf.Format) Error!u64 { - return switch (format) { - .@"32" => try fbr.readInt(u32), - .@"64" => try fbr.readInt(u64), - }; - } - - pub fn readAddressChecked( - fbr: *DeprecatedFixedBufferReader, - format: std.dwarf.Format, - ma: *MemoryAccessor, - ) Error!u64 { - return switch (format) { - .@"32" => try fbr.readIntChecked(u32, ma), - .@"64" => try fbr.readIntChecked(u64, ma), - }; - } - - pub fn readBytes(fbr: *DeprecatedFixedBufferReader, len: usize) Error![]const u8 { - if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; - defer fbr.pos += len; - return fbr.buf[fbr.pos..][0..len]; - } - - pub fn readBytesTo(fbr: *DeprecatedFixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { - const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ - u8, - fbr.buf, - fbr.pos, - sentinel, - }) orelse return error.EndOfBuffer; - defer fbr.pos = end + 1; - return fbr.buf[fbr.pos..end :sentinel]; - } -}; - /// Detect whether the program is being executed in the Valgrind virtual machine. /// /// When Valgrind integrations are disabled, this returns comptime-known false. @@ -1600,6 +1508,7 @@ pub inline fn inValgrind() bool { test { _ = &Dwarf; _ = &MemoryAccessor; + _ = &FixedBufferReader; _ = &Pdb; _ = &SelfInfo; _ = &dumpHex; diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 170fa774c088..446dc589904d 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -27,8 +27,7 @@ const maxInt = std.math.maxInt; const MemoryAccessor = std.debug.MemoryAccessor; const Path = std.Build.Cache.Path; -/// Did I mention this is deprecated? -const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; +const FixedBufferReader = std.debug.FixedBufferReader; const Dwarf = @This(); @@ -328,7 +327,7 @@ pub const ExceptionFrameHeader = struct { var left: usize = 0; var len: usize = self.fde_count; - var fbr: DeprecatedFixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; while (len > 1) { const mid = left + len / 2; @@ -371,7 +370,7 @@ pub const ExceptionFrameHeader = struct { const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse maxInt(u32)]; const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: DeprecatedFixedBufferReader = .{ + var eh_frame_fbr: FixedBufferReader = .{ .buf = eh_frame, .pos = fde_offset, .endian = native_endian, @@ -429,9 +428,9 @@ pub const EntryHeader = struct { } /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure. - /// `fbr` must be a DeprecatedFixedBufferReader backed by either the .eh_frame or .debug_frame sections. + /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections. pub fn read( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor, dwarf_section: Section.Id, ) !EntryHeader { @@ -544,7 +543,7 @@ pub const CommonInformationEntry = struct { ) !CommonInformationEntry { if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; const version = try fbr.readByte(); switch (dwarf_section) { @@ -678,7 +677,7 @@ pub const FrameDescriptionEntry = struct { ) !FrameDescriptionEntry { if (addr_size_bytes > 8) return error.InvalidAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), @@ -785,10 +784,10 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { const ScanError = error{ InvalidDebugInfo, MissingDebugInfo, -} || Allocator.Error || std.debug.DeprecatedFixedBufferReader.Error; +} || Allocator.Error || std.debug.FixedBufferReader.Error; fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; while (this_unit_offset < fbr.buf.len) { @@ -975,7 +974,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { } fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; var attrs_buf = std.ArrayList(Die.Attr).init(allocator); @@ -1100,7 +1099,7 @@ const DebugRangeIterator = struct { section_type: Section.Id, di: *const Dwarf, compile_unit: *const CompileUnit, - fbr: DeprecatedFixedBufferReader, + fbr: FixedBufferReader, pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; @@ -1275,7 +1274,7 @@ fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const } fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: DeprecatedFixedBufferReader = .{ + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_abbrev).?, .pos = cast(usize, offset) orelse return bad(), .endian = di.endian, @@ -1327,7 +1326,7 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, @@ -1362,7 +1361,7 @@ pub fn getLineNumberInfo( const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); @@ -1655,7 +1654,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { /// of FDEs is built for binary searching during unwinding. pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; const version = try fbr.readByte(); if (version != 1) break :blk; @@ -1695,7 +1694,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; for (frame_sections) |frame_section| { if (di.section(frame_section)) |section_data| { - var fbr: DeprecatedFixedBufferReader = .{ .buf = section_data, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian }; while (fbr.pos < fbr.buf.len) { const entry_header = try EntryHeader.read(&fbr, null, frame_section); switch (entry_header.type) { @@ -1739,7 +1738,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) } fn parseFormValue( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, form_id: u64, format: Format, implicit_const: ?i64, @@ -1937,7 +1936,7 @@ const UnitHeader = struct { unit_length: u64, }; -fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { +fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -2002,7 +2001,7 @@ const EhPointerContext = struct { text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; -fn readEhPointer(fbr: *DeprecatedFixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { +fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { if (enc == EH.PE.omit) return null; const value: union(enum) { @@ -2362,7 +2361,7 @@ pub const ElfModule = struct { } }; -pub const ResolveSourceLocationsError = Allocator.Error || DeprecatedFixedBufferReader.Error; +pub const ResolveSourceLocationsError = Allocator.Error || FixedBufferReader.Error; /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations, by appending to the provided diff --git a/lib/std/debug/FixedBufferReader.zig b/lib/std/debug/FixedBufferReader.zig new file mode 100644 index 000000000000..2a90ba569e33 --- /dev/null +++ b/lib/std/debug/FixedBufferReader.zig @@ -0,0 +1,91 @@ +const std = @import("std.zig"); +const MemoryAccessor = std.debug.MemoryAccessor; + +const FixedBufferReader = @This(); + +buf: []const u8, +pos: usize = 0, +endian: std.builtin.Endian, + +pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; + +pub fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void { + if (pos > fbr.buf.len) return error.EndOfBuffer; + fbr.pos = @intCast(pos); +} + +pub fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void { + if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; + fbr.pos += @intCast(amount); +} + +pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 { + if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; + defer fbr.pos += 1; + return fbr.buf[fbr.pos]; +} + +pub fn readByteSigned(fbr: *FixedBufferReader) Error!i8 { + return @bitCast(try fbr.readByte()); +} + +pub fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T { + const size = @divExact(@typeInfo(T).Int.bits, 8); + if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; + defer fbr.pos += size; + return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); +} + +pub fn readIntChecked( + fbr: *FixedBufferReader, + comptime T: type, + ma: *MemoryAccessor, +) Error!T { + if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) + return error.InvalidBuffer; + + return fbr.readInt(T); +} + +pub fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readUleb128(T, fbr); +} + +pub fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readIleb128(T, fbr); +} + +pub fn readAddress(fbr: *FixedBufferReader, format: std.dwarf.Format) Error!u64 { + return switch (format) { + .@"32" => try fbr.readInt(u32), + .@"64" => try fbr.readInt(u64), + }; +} + +pub fn readAddressChecked( + fbr: *FixedBufferReader, + format: std.dwarf.Format, + ma: *MemoryAccessor, +) Error!u64 { + return switch (format) { + .@"32" => try fbr.readIntChecked(u32, ma), + .@"64" => try fbr.readIntChecked(u64, ma), + }; +} + +pub fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 { + if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; + defer fbr.pos += len; + return fbr.buf[fbr.pos..][0..len]; +} + +pub fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { + const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ + u8, + fbr.buf, + fbr.pos, + sentinel, + }) orelse return error.EndOfBuffer; + defer fbr.pos = end + 1; + return fbr.buf[fbr.pos..end :sentinel]; +} diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 79cbd19a4187..ba0d7bc03917 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1576,7 +1576,7 @@ pub fn unwindFrameDwarf( const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; if (fde_offset >= frame_section.len) return error.MissingFDE; - var fbr: std.debug.DeprecatedFixedBufferReader = .{ + var fbr: std.debug.FixedBufferReader = .{ .buf = frame_section, .pos = fde_offset, .endian = di.endian, From 1792258dc813cde7083fd7860442e6ec92afd4ba Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 23:31:28 -0700 Subject: [PATCH 08/34] std.debug.Dwarf: precompute .debug_line table yields a 60x speedup for resolveSourceLocations in debug builds --- lib/std/debug.zig | 2 +- lib/std/debug/Dwarf.zig | 310 +++++++++++++--------------- lib/std/debug/FixedBufferReader.zig | 4 +- 3 files changed, 150 insertions(+), 166 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 80c196e9d848..7f4f6b7df20c 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -762,7 +762,7 @@ pub fn writeCurrentStackTrace( // an overflow. We do not need to signal `StackIterator` as it will correctly detect this // condition on the subsequent iteration and return `null` thus terminating the loop. // same behaviour for x86-windows-msvc - const address = if (return_address == 0) return_address else return_address - 1; + const address = return_address -| 1; try printSourceAtAddress(debug_info, out_stream, address, tty_config); } else printLastUnwindError(&it, debug_info, out_stream, tty_config); } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 446dc589904d..06ffad9441de 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -138,6 +138,29 @@ pub const CompileUnit = struct { rnglists_base: usize, loclists_base: usize, frame_base: ?*const FormValue, + + src_loc_cache: ?SrcLocCache, + + pub const SrcLocCache = struct { + line_table: LineTable, + directories: []const FileEntry, + files: []FileEntry, + version: u16, + + pub const LineTable = std.AutoArrayHashMapUnmanaged(u64, LineEntry); + + pub const LineEntry = struct { + line: u32, + column: u32, + file: u32, + }; + + pub fn findSource(slc: *const SrcLocCache, address: u64) !LineEntry { + const index = std.sort.upperBound(u64, address, slc.line_table.keys(), {}, std.sort.asc(u64)); + if (index == 0) return missing(); + return slc.line_table.values()[index - 1]; + } + }; }; pub const FormValue = union(enum) { @@ -760,6 +783,11 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void { } di.abbrev_table_list.deinit(gpa); for (di.compile_unit_list.items) |*cu| { + if (cu.src_loc_cache) |*slc| { + slc.line_table.deinit(gpa); + gpa.free(slc.directories); + gpa.free(slc.files); + } cu.die.deinit(gpa); } di.compile_unit_list.deinit(gpa); @@ -846,6 +874,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { .rnglists_base = 0, .loclists_base = 0, .frame_base = null, + .src_loc_cache = null, }; while (true) { @@ -1032,6 +1061,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0, .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0, .frame_base = compile_unit_die.getAttr(AT.frame_base), + .src_loc_cache = null, }; compile_unit.pc_range = x: { @@ -1242,7 +1272,7 @@ const DebugRangeIterator = struct { }; /// TODO: change this to binary searching the sorted compile unit list -pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { +pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; @@ -1352,34 +1382,36 @@ fn parseDie( }; } -pub fn getLineNumberInfo( - di: *Dwarf, - allocator: Allocator, - compile_unit: CompileUnit, - target_address: u64, -) !std.debug.SourceLocation { - const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); +fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) !CompileUnit.SrcLocCache { + const compile_unit_cwd = try compile_unit.die.getAttrString(d, AT.comp_dir, d.section(.debug_line_str), compile_unit.*); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ + .buf = d.section(.debug_line).?, + .endian = d.endian, + }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); if (unit_header.unit_length == 0) return missing(); + const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); if (version < 2) return bad(); - var addr_size: u8 = switch (unit_header.format) { - .@"32" => 4, - .@"64" => 8, + const addr_size: u8, const seg_size: u8 = if (version >= 5) .{ + try fbr.readByte(), + try fbr.readByte(), + } else .{ + switch (unit_header.format) { + .@"32" => 4, + .@"64" => 8, + }, + 0, }; - var seg_size: u8 = 0; - if (version >= 5) { - addr_size = try fbr.readByte(); - seg_size = try fbr.readByte(); - } + _ = addr_size; + _ = seg_size; const prologue_length = try fbr.readAddress(unit_header.format); const prog_start_offset = fbr.pos + prologue_length; @@ -1388,8 +1420,8 @@ pub fn getLineNumberInfo( if (minimum_instruction_length == 0) return bad(); if (version >= 4) { - // maximum_operations_per_instruction - _ = try fbr.readByte(); + const maximum_operations_per_instruction = try fbr.readByte(); + _ = maximum_operations_per_instruction; } const default_is_stmt = (try fbr.readByte()) != 0; @@ -1402,18 +1434,18 @@ pub fn getLineNumberInfo( const standard_opcode_lengths = try fbr.readBytes(opcode_base - 1); - var include_directories = std.ArrayList(FileEntry).init(allocator); - defer include_directories.deinit(); - var file_entries = std.ArrayList(FileEntry).init(allocator); - defer file_entries.deinit(); + var directories: std.ArrayListUnmanaged(FileEntry) = .{}; + defer directories.deinit(gpa); + var file_entries: std.ArrayListUnmanaged(FileEntry) = .{}; + defer file_entries.deinit(gpa); if (version < 5) { - try include_directories.append(.{ .path = compile_unit_cwd }); + try directories.append(gpa, .{ .path = compile_unit_cwd }); while (true) { const dir = try fbr.readBytesTo(0); if (dir.len == 0) break; - try include_directories.append(.{ .path = dir }); + try directories.append(gpa, .{ .path = dir }); } while (true) { @@ -1422,7 +1454,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = file_name, .dir_index = dir_index, .mtime = mtime, @@ -1446,52 +1478,10 @@ pub fn getLineNumberInfo( } const directories_count = try fbr.readUleb128(usize); - try include_directories.ensureUnusedCapacity(directories_count); - { - var i: usize = 0; - while (i < directories_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue( - &fbr, - ent_fmt.form_code, - unit_header.format, - null, - ); - switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), - DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), - DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), - DW.LNCT.size => e.size = try form_value.getUInt(u64), - DW.LNCT.MD5 => e.md5 = switch (form_value) { - .data16 => |data16| data16.*, - else => return bad(), - }, - else => continue, - } - } - include_directories.appendAssumeCapacity(e); - } - } - } - var file_ent_fmt_buf: [10]FileEntFmt = undefined; - const file_name_entry_format_count = try fbr.readByte(); - if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { - ent_fmt.* = .{ - .content_type_code = try fbr.readUleb128(u8), - .form_code = try fbr.readUleb128(u16), - }; - } - - const file_names_count = try fbr.readUleb128(usize); - try file_entries.ensureUnusedCapacity(file_names_count); - { - var i: usize = 0; - while (i < file_names_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + for (try directories.addManyAsSlice(gpa, directories_count)) |*e| { + e.* = .{ .path = &.{} }; + for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { const form_value = try parseFormValue( &fbr, ent_fmt.form_code, @@ -1499,7 +1489,7 @@ pub fn getLineNumberInfo( null, ); switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), + DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), DW.LNCT.size => e.size = try form_value.getUInt(u64), @@ -1510,17 +1500,49 @@ pub fn getLineNumberInfo( else => continue, } } - file_entries.appendAssumeCapacity(e); + } + } + + var file_ent_fmt_buf: [10]FileEntFmt = undefined; + const file_name_entry_format_count = try fbr.readByte(); + if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { + ent_fmt.* = .{ + .content_type_code = try fbr.readUleb128(u8), + .form_code = try fbr.readUleb128(u16), + }; + } + + const file_names_count = try fbr.readUleb128(usize); + try file_entries.ensureUnusedCapacity(gpa, file_names_count); + + for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| { + e.* = .{ .path = &.{} }; + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + const form_value = try parseFormValue( + &fbr, + ent_fmt.form_code, + unit_header.format, + null, + ); + switch (ent_fmt.content_type_code) { + DW.LNCT.path => e.path = try form_value.getString(d.*), + DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), + DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), + DW.LNCT.size => e.size = try form_value.getUInt(u64), + DW.LNCT.MD5 => e.md5 = switch (form_value) { + .data16 => |data16| data16.*, + else => return bad(), + }, + else => continue, + } } } } - var prog = LineNumberProgram.init( - default_is_stmt, - include_directories.items, - target_address, - version, - ); + var prog = LineNumberProgram.init(default_is_stmt, version); + var line_table: CompileUnit.SrcLocCache.LineTable = .{}; + errdefer line_table.deinit(gpa); try fbr.seekTo(prog_start_offset); @@ -1536,7 +1558,7 @@ pub fn getLineNumberInfo( switch (sub_op) { DW.LNE.end_sequence => { prog.end_sequence = true; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.reset(); }, DW.LNE.set_address => { @@ -1548,7 +1570,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = path, .dir_index = dir_index, .mtime = mtime, @@ -1564,12 +1586,12 @@ pub fn getLineNumberInfo( const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range); prog.line += inc_line; prog.address += inc_addr; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; } else { switch (opcode) { DW.LNS.copy => { - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; }, DW.LNS.advance_pc => { @@ -1611,7 +1633,35 @@ pub fn getLineNumberInfo( } } - return missing(); + return .{ + .line_table = line_table, + .directories = try directories.toOwnedSlice(gpa), + .files = try file_entries.toOwnedSlice(gpa), + .version = version, + }; +} + +pub fn getLineNumberInfo( + d: *Dwarf, + gpa: Allocator, + compile_unit: *CompileUnit, + target_address: u64, +) !std.debug.SourceLocation { + if (compile_unit.src_loc_cache == null) + compile_unit.src_loc_cache = try runLineNumberProgram(d, gpa, compile_unit); + const slc = &compile_unit.src_loc_cache.?; + const entry = try slc.findSource(target_address); + const file_index = entry.file - @intFromBool(slc.version < 5); + if (file_index >= slc.files.len) return bad(); + const file_entry = &slc.files[file_index]; + if (file_entry.dir_index >= slc.directories.len) return bad(); + const dir_name = slc.directories[file_entry.dir_index].path; + const file_name = try std.fs.path.join(gpa, &.{ dir_name, file_entry.path }); + return .{ + .line = entry.line, + .column = entry.column, + .file_name = file_name, + }; } fn getString(di: Dwarf, offset: u64) ![:0]const u8 { @@ -1826,17 +1876,6 @@ const LineNumberProgram = struct { end_sequence: bool, default_is_stmt: bool, - target_address: u64, - include_dirs: []const FileEntry, - - prev_valid: bool, - prev_address: u64, - prev_file: usize, - prev_line: i64, - prev_column: u64, - prev_is_stmt: bool, - prev_basic_block: bool, - prev_end_sequence: bool, // Reset the state machine following the DWARF specification pub fn reset(self: *LineNumberProgram) void { @@ -1847,24 +1886,10 @@ const LineNumberProgram = struct { self.is_stmt = self.default_is_stmt; self.basic_block = false; self.end_sequence = false; - // Invalidate all the remaining fields - self.prev_valid = false; - self.prev_address = 0; - self.prev_file = undefined; - self.prev_line = undefined; - self.prev_column = undefined; - self.prev_is_stmt = undefined; - self.prev_basic_block = undefined; - self.prev_end_sequence = undefined; } - pub fn init( - is_stmt: bool, - include_dirs: []const FileEntry, - target_address: u64, - version: u16, - ) LineNumberProgram { - return LineNumberProgram{ + pub fn init(is_stmt: bool, version: u16) LineNumberProgram { + return .{ .address = 0, .file = 1, .line = 1, @@ -1873,60 +1898,17 @@ const LineNumberProgram = struct { .is_stmt = is_stmt, .basic_block = false, .end_sequence = false, - .include_dirs = include_dirs, .default_is_stmt = is_stmt, - .target_address = target_address, - .prev_valid = false, - .prev_address = 0, - .prev_file = undefined, - .prev_line = undefined, - .prev_column = undefined, - .prev_is_stmt = undefined, - .prev_basic_block = undefined, - .prev_end_sequence = undefined, }; } - pub fn checkLineMatch( - self: *LineNumberProgram, - allocator: Allocator, - file_entries: []const FileEntry, - ) !?std.debug.SourceLocation { - if (self.prev_valid and - self.target_address >= self.prev_address and - self.target_address < self.address) - { - const file_index = if (self.version >= 5) self.prev_file else i: { - if (self.prev_file == 0) return missing(); - break :i self.prev_file - 1; - }; - - if (file_index >= file_entries.len) return bad(); - const file_entry = &file_entries[file_index]; - - if (file_entry.dir_index >= self.include_dirs.len) return bad(); - const dir_name = self.include_dirs[file_entry.dir_index].path; - - const file_name = try std.fs.path.join(allocator, &[_][]const u8{ - dir_name, file_entry.path, - }); - - return std.debug.SourceLocation{ - .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, - .column = self.prev_column, - .file_name = file_name, - }; - } - - self.prev_valid = true; - self.prev_address = self.address; - self.prev_file = self.file; - self.prev_line = self.line; - self.prev_column = self.column; - self.prev_is_stmt = self.is_stmt; - self.prev_basic_block = self.basic_block; - self.prev_end_sequence = self.end_sequence; - return null; + pub fn addRow(prog: *LineNumberProgram, gpa: Allocator, table: *CompileUnit.SrcLocCache.LineTable) !void { + if (prog.line == 0) return; // garbage data + try table.put(gpa, prog.address, .{ + .line = cast(u32, prog.line) orelse maxInt(u32), + .column = cast(u32, prog.column) orelse maxInt(u32), + .file = cast(u32, prog.file) orelse return bad(), + }); } }; @@ -2381,7 +2363,7 @@ pub fn resolveSourceLocations( defer prog_node.end(); var cu_i: usize = 0; - var cu: *const CompileUnit = &d.compile_unit_list.items[0]; + var cu: *CompileUnit = &d.compile_unit_list.items[0]; var range = cu.pc_range.?; next_pc: for (sorted_pc_addrs, output) |pc, *out| { defer prog_node.completeOne(); @@ -2403,7 +2385,7 @@ pub fn resolveSourceLocations( } // TODO: instead of calling this function, break the function up into one that parses the // information once and prepares a context that can be reused for the entire batch. - if (getLineNumberInfo(d, gpa, cu.*, pc)) |src_loc| { + if (getLineNumberInfo(d, gpa, cu, pc)) |src_loc| { out.* = src_loc; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, @@ -2419,7 +2401,7 @@ fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, - .source_location = di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { + .source_location = di.getLineNumberInfo(allocator, compile_unit, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, else => return err, }, diff --git a/lib/std/debug/FixedBufferReader.zig b/lib/std/debug/FixedBufferReader.zig index 2a90ba569e33..494245a9e979 100644 --- a/lib/std/debug/FixedBufferReader.zig +++ b/lib/std/debug/FixedBufferReader.zig @@ -1,4 +1,6 @@ -const std = @import("std.zig"); +//! Optimized for performance in debug builds. + +const std = @import("../std.zig"); const MemoryAccessor = std.debug.MemoryAccessor; const FixedBufferReader = @This(); From c2ab4614b69a2303d640837df357c2336b0cedf2 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 23:38:34 -0700 Subject: [PATCH 09/34] std.Debug.Info: remove std.Progress integration it's too fast to need it now --- lib/std/debug/Dwarf.zig | 5 ----- lib/std/debug/Info.zig | 9 ++------- tools/dump-cov.zig | 7 ++----- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 06ffad9441de..9689ac98b3a3 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -2354,19 +2354,14 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, - parent_prog_node: std.Progress.Node, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); assert(d.compile_units_sorted); - const prog_node = parent_prog_node.start("Resolve Source Locations", sorted_pc_addrs.len); - defer prog_node.end(); - var cu_i: usize = 0; var cu: *CompileUnit = &d.compile_unit_list.items[0]; var range = cu.pc_range.?; next_pc: for (sorted_pc_addrs, output) |pc, *out| { - defer prog_node.completeOne(); while (pc >= range.end) { cu_i += 1; if (cu_i >= d.compile_unit_list.items.len) { diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index 3c61c4072fb1..f31b2f22c4d6 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -20,13 +20,9 @@ address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path, parent_prog_node: std.Progress.Node) LoadError!Info { +pub fn load(gpa: Allocator, path: Path) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; - var prog_node = parent_prog_node.start("Loading Debug Info", 0); - defer prog_node.end(); var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); - prog_node.end(); - prog_node = parent_prog_node.start("Sort Compile Units", 0); try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, @@ -51,10 +47,9 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, - parent_prog_node: std.Progress.Node, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output, parent_prog_node); + return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index 8449dec33e1e..f821dde61129 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,10 +28,7 @@ pub fn main() !void { .sub_path = cov_file_name, }; - const prog_node = std.Progress.start(.{}); - defer prog_node.end(); - - var debug_info = std.debug.Info.load(gpa, exe_path, prog_node) catch |err| { + var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -54,7 +51,7 @@ pub fn main() !void { assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations, prog_node); + try debug_info.resolveSourceLocations(gpa, pcs, source_locations); defer for (source_locations) |sl| { gpa.free(sl.file_name); }; From 53aa9d75a9b10c9cd277031e604a631452d34e8c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 3 Aug 2024 17:42:08 -0700 Subject: [PATCH 10/34] std.debug.Info.resolveSourceLocations: O(N) implementation --- lib/std/debug/Dwarf.zig | 59 +++------------- lib/std/debug/Info.zig | 146 +++++++++++++++++++++++++++++++++++++++- tools/dump-cov.zig | 11 ++- 3 files changed, 157 insertions(+), 59 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 9689ac98b3a3..cd37795351e8 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -152,6 +152,7 @@ pub const CompileUnit = struct { pub const LineEntry = struct { line: u32, column: u32, + /// Offset by 1 depending on whether Dwarf version is >= 5. file: u32, }; @@ -809,7 +810,7 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { return null; } -const ScanError = error{ +pub const ScanError = error{ InvalidDebugInfo, MissingDebugInfo, } || Allocator.Error || std.debug.FixedBufferReader.Error; @@ -1113,7 +1114,7 @@ pub fn sortCompileUnits(d: *Dwarf) ScanError!void { } std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct { - fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { + pub fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { _ = ctx; const a_range = a.pc_range orelse return false; const b_range = b.pc_range orelse return true; @@ -1641,14 +1642,18 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! }; } +pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, cu: *CompileUnit) ScanError!void { + if (cu.src_loc_cache != null) return; + cu.src_loc_cache = try runLineNumberProgram(d, gpa, cu); +} + pub fn getLineNumberInfo( d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit, target_address: u64, ) !std.debug.SourceLocation { - if (compile_unit.src_loc_cache == null) - compile_unit.src_loc_cache = try runLineNumberProgram(d, gpa, compile_unit); + try populateSrcLocCache(d, gpa, compile_unit); const slc = &compile_unit.src_loc_cache.?; const entry = try slc.findSource(target_address); const file_index = entry.file - @intFromBool(slc.version < 5); @@ -2343,52 +2348,6 @@ pub const ElfModule = struct { } }; -pub const ResolveSourceLocationsError = Allocator.Error || FixedBufferReader.Error; - -/// Given an array of virtual memory addresses, sorted ascending, outputs a -/// corresponding array of source locations, by appending to the provided -/// array list. -pub fn resolveSourceLocations( - d: *Dwarf, - gpa: Allocator, - sorted_pc_addrs: []const u64, - /// Asserts its length equals length of `sorted_pc_addrs`. - output: []std.debug.SourceLocation, -) ResolveSourceLocationsError!void { - assert(sorted_pc_addrs.len == output.len); - assert(d.compile_units_sorted); - - var cu_i: usize = 0; - var cu: *CompileUnit = &d.compile_unit_list.items[0]; - var range = cu.pc_range.?; - next_pc: for (sorted_pc_addrs, output) |pc, *out| { - while (pc >= range.end) { - cu_i += 1; - if (cu_i >= d.compile_unit_list.items.len) { - out.* = std.debug.SourceLocation.invalid; - continue :next_pc; - } - cu = &d.compile_unit_list.items[cu_i]; - range = cu.pc_range orelse { - out.* = std.debug.SourceLocation.invalid; - continue :next_pc; - }; - } - if (pc < range.start) { - out.* = std.debug.SourceLocation.invalid; - continue :next_pc; - } - // TODO: instead of calling this function, break the function up into one that parses the - // information once and prepares a context that can be reused for the entire batch. - if (getLineNumberInfo(d, gpa, cu, pc)) |src_loc| { - out.* = src_loc; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, - else => |e| return e, - } - } -} - fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { if (di.findCompileUnit(address)) |compile_unit| { return .{ diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index f31b2f22c4d6..a52de6549b53 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -12,12 +12,66 @@ const Path = std.Build.Cache.Path; const Dwarf = std.debug.Dwarf; const page_size = std.mem.page_size; const assert = std.debug.assert; +const Hash = std.hash.Wyhash; const Info = @This(); /// Sorted by key, ascending. address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), +/// Provides a globally-scoped integer index for directories. +/// +/// As opposed to, for example, a directory index that is compilation-unit +/// scoped inside a single ELF module. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +directories: std.StringArrayHashMapUnmanaged(void), +/// Provides a globally-scoped integer index for files. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), +/// Protects `directories` and `files`. +mutex: std.Thread.Mutex, + +pub const SourceLocation = struct { + file: File.Index, + line: u32, + column: u32, + + pub const invalid: SourceLocation = .{ + .file = .invalid, + .line = 0, + .column = 0, + }; +}; + +pub const File = struct { + directory_index: u32, + basename: []const u8, + + pub const Index = enum(u32) { + invalid = std.math.maxInt(u32), + _, + }; + + pub const MapContext = struct { + pub fn hash(ctx: MapContext, a: File) u32 { + _ = ctx; + return @truncate(Hash.hash(a.directory_index, a.basename)); + } + + pub fn eql(ctx: MapContext, a: File, b: File, b_index: usize) bool { + _ = ctx; + _ = b_index; + return a.directory_index == b.directory_index and std.mem.eql(u8, a.basename, b.basename); + } + }; +}; + pub const LoadError = Dwarf.ElfModule.LoadError; pub fn load(gpa: Allocator, path: Path) LoadError!Info { @@ -26,12 +80,17 @@ pub fn load(gpa: Allocator, path: Path) LoadError!Info { try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, + .directories = .{}, + .files = .{}, + .mutex = .{}, }; try info.address_map.put(gpa, elf_module.base_address, elf_module); return info; } pub fn deinit(info: *Info, gpa: Allocator) void { + info.directories.deinit(gpa); + info.files.deinit(gpa); for (info.address_map.values()) |*elf_module| { elf_module.dwarf.deinit(gpa); } @@ -39,17 +98,98 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub const ResolveSourceLocationsError = Dwarf.ResolveSourceLocationsError; +pub fn fileAt(info: *Info, index: File.Index) *File { + return &info.files.keys()[@intFromEnum(index)]; +} + +pub const ResolveSourceLocationsError = Dwarf.ScanError; +/// Given an array of virtual memory addresses, sorted ascending, outputs a +/// corresponding array of source locations. pub fn resolveSourceLocations( info: *Info, gpa: Allocator, sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. - output: []std.debug.SourceLocation, + output: []SourceLocation, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); + return resolveSourceLocationsDwarf(info, gpa, sorted_pc_addrs, output, &elf_module.dwarf); +} + +pub fn resolveSourceLocationsDwarf( + info: *Info, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, + d: *Dwarf, +) ResolveSourceLocationsError!void { + assert(sorted_pc_addrs.len == output.len); + assert(d.compile_units_sorted); + + var cu_i: usize = 0; + var line_table_i: usize = 0; + var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + // Protects directories and files tables from other threads. + info.mutex.lock(); + defer info.mutex.unlock(); + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + range = cu.pc_range orelse { + out.* = SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + if (line_table_i == 0) { + line_table_i = 1; + info.mutex.unlock(); + defer info.mutex.lock(); + d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + out.* = SourceLocation.invalid; + cu_i += 1; + if (cu_i < d.compile_unit_list.items.len) { + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + if (cu.pc_range) |r| range = r; + } + continue :next_pc; + }, + else => |e| return e, + }; + } + const slc = &cu.src_loc_cache.?; + const table_addrs = slc.line_table.keys(); + while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; + + const entry = slc.line_table.values()[line_table_i - 1]; + const corrected_file_index = entry.file - @intFromBool(slc.version < 5); + const file_entry = slc.files[corrected_file_index]; + const dir_path = slc.directories[file_entry.dir_index].path; + const dir_gop = try info.directories.getOrPut(gpa, dir_path); + const file_gop = try info.files.getOrPut(gpa, .{ + .directory_index = @intCast(dir_gop.index), + .basename = file_entry.path, + }); + out.* = .{ + .file = @enumFromInt(file_gop.index), + .line = entry.line, + .column = entry.column, + }; + } } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index f821dde61129..bd096b9fc0df 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -50,15 +50,14 @@ pub fn main() !void { } assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); - const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); + const source_locations = try arena.alloc(std.debug.Info.SourceLocation, pcs.len); try debug_info.resolveSourceLocations(gpa, pcs, source_locations); - defer for (source_locations) |sl| { - gpa.free(sl.file_name); - }; for (pcs, source_locations) |pc, sl| { - try stdout.print("{x}: {s}:{d}:{d}\n", .{ - pc, sl.file_name, sl.line, sl.column, + const file = debug_info.fileAt(sl.file); + const dir_name = debug_info.directories.keys()[file.directory_index]; + try stdout.print("{x}: {s}/{s}:{d}:{d}\n", .{ + pc, dir_name, file.basename, sl.line, sl.column, }); } From 5f92a036f9a9a137e4276d0f605e4cb940eca3a7 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 3 Aug 2024 18:28:58 -0700 Subject: [PATCH 11/34] README: update how std lib docs are found in a release build --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 51c39f079679..7c4adbf82aea 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,9 @@ Documentation** corresponding to the version of Zig that you are using by following the appropriate link on the [download page](https://ziglang.org/download). -Otherwise, you're looking at a release of Zig, and you can find documentation -here: - - * doc/langref.html - * doc/std/index.html +Otherwise, you're looking at a release of Zig, so you can find the language +reference at `doc/langref.html`, and the standard library documentation by +running `zig std`, which will open a browser tab. ## Installation From 517cfb0dd1e2b5b8efc8e90ce4e5593a38fa158c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 4 Aug 2024 00:16:28 -0700 Subject: [PATCH 12/34] fuzzing: progress towards web UI * libfuzzer: close file after mmap * fuzzer/main.js: connect with EventSource and debug dump the messages. currently this prints how many fuzzer runs have been attempted to console.log. * extract some `std.debug.Info` logic into `std.debug.Coverage`. Prepares for consolidation across multiple different executables which share source files, and makes it possible to send all the PC/SourceLocation mapping data with 4 memcpy'd arrays. * std.Build.Fuzz: - spawn a thread to watch the message queue and signal event subscribers. - track coverage map data - respond to /events URL with EventSource messages on a timer --- lib/fuzzer.zig | 1 + lib/fuzzer/main.js | 13 +- lib/std/Build/Fuzz.zig | 209 +++++++++++++++++++++++++++++-- lib/std/Build/Step/Run.zig | 6 +- lib/std/debug.zig | 1 + lib/std/debug/Coverage.zig | 244 +++++++++++++++++++++++++++++++++++++ lib/std/debug/Info.zig | 153 ++--------------------- tools/dump-cov.zig | 16 ++- 8 files changed, 478 insertions(+), 165 deletions(-) create mode 100644 lib/std/debug/Coverage.zig diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index ede3663cdca0..0d968cd60db3 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -218,6 +218,7 @@ const Fuzzer = struct { .read = true, .truncate = false, }); + defer coverage_file.close(); const n_bitset_elems = (flagged_pcs.len + 7) / 8; const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems; const existing_len = coverage_file.getEndPos() catch |err| { diff --git a/lib/fuzzer/main.js b/lib/fuzzer/main.js index 71e6b5fa54e8..872ac3d4b529 100644 --- a/lib/fuzzer/main.js +++ b/lib/fuzzer/main.js @@ -12,6 +12,9 @@ const text_decoder = new TextDecoder(); const text_encoder = new TextEncoder(); + const eventSource = new EventSource("events"); + eventSource.addEventListener('message', onMessage, false); + WebAssembly.instantiateStreaming(wasm_promise, { js: { log: function(ptr, len) { @@ -38,11 +41,15 @@ }); }); + function onMessage(e) { + console.log("Message", e.data); + } + function render() { - domSectSource.classList.add("hidden"); + domSectSource.classList.add("hidden"); - // TODO this is temporary debugging data - renderSource("/home/andy/dev/zig/lib/std/zig/tokenizer.zig"); + // TODO this is temporary debugging data + renderSource("/home/andy/dev/zig/lib/std/zig/tokenizer.zig"); } function renderSource(path) { diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index 46d9bfc8fd49..0ff82f367708 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -6,6 +6,7 @@ const assert = std.debug.assert; const fatal = std.process.fatal; const Allocator = std.mem.Allocator; const log = std.log; +const Coverage = std.debug.Coverage; const Fuzz = @This(); const build_runner = @import("root"); @@ -53,17 +54,30 @@ pub fn start( .global_cache_directory = global_cache_directory, .zig_lib_directory = zig_lib_directory, .zig_exe_path = zig_exe_path, - .msg_queue = .{}, - .mutex = .{}, .listen_address = listen_address, .fuzz_run_steps = fuzz_run_steps, + + .msg_queue = .{}, + .mutex = .{}, + .condition = .{}, + + .coverage_files = .{}, + .coverage_mutex = .{}, + .coverage_condition = .{}, }; + // For accepting HTTP connections. const web_server_thread = std.Thread.spawn(.{}, WebServer.run, .{&web_server}) catch |err| { fatal("unable to spawn web server thread: {s}", .{@errorName(err)}); }; defer web_server_thread.join(); + // For polling messages and sending updates to subscribers. + const coverage_thread = std.Thread.spawn(.{}, WebServer.coverageRun, .{&web_server}) catch |err| { + fatal("unable to spawn coverage thread: {s}", .{@errorName(err)}); + }; + defer coverage_thread.join(); + { const fuzz_node = prog_node.start("Fuzzing", fuzz_run_steps.len); defer fuzz_node.end(); @@ -88,14 +102,38 @@ pub const WebServer = struct { global_cache_directory: Build.Cache.Directory, zig_lib_directory: Build.Cache.Directory, zig_exe_path: []const u8, + listen_address: std.net.Address, + fuzz_run_steps: []const *Step.Run, + /// Messages from fuzz workers. Protected by mutex. msg_queue: std.ArrayListUnmanaged(Msg), + /// Protects `msg_queue` only. mutex: std.Thread.Mutex, - listen_address: std.net.Address, - fuzz_run_steps: []const *Step.Run, + /// Signaled when there is a message in `msg_queue`. + condition: std.Thread.Condition, + + coverage_files: std.AutoArrayHashMapUnmanaged(u64, CoverageMap), + /// Protects `coverage_files` only. + coverage_mutex: std.Thread.Mutex, + /// Signaled when `coverage_files` changes. + coverage_condition: std.Thread.Condition, + + const CoverageMap = struct { + mapped_memory: []align(std.mem.page_size) const u8, + coverage: Coverage, + + fn deinit(cm: *CoverageMap, gpa: Allocator) void { + std.posix.munmap(cm.mapped_memory); + cm.coverage.deinit(gpa); + cm.* = undefined; + } + }; const Msg = union(enum) { - coverage_id: u64, + coverage: struct { + id: u64, + run: *Step.Run, + }, }; fn run(ws: *WebServer) void { @@ -162,6 +200,10 @@ pub const WebServer = struct { std.mem.eql(u8, request.head.target, "/debug/sources.tar")) { try serveSourcesTar(ws, request); + } else if (std.mem.eql(u8, request.head.target, "/events") or + std.mem.eql(u8, request.head.target, "/debug/events")) + { + try serveEvents(ws, request); } else { try request.respond("not found", .{ .status = .not_found, @@ -384,6 +426,58 @@ pub const WebServer = struct { try file.writeAll(std.mem.asBytes(&header)); } + fn serveEvents(ws: *WebServer, request: *std.http.Server.Request) !void { + var send_buffer: [0x4000]u8 = undefined; + var response = request.respondStreaming(.{ + .send_buffer = &send_buffer, + .respond_options = .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "text/event-stream" }, + }, + .transfer_encoding = .none, + }, + }); + + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + if (getStats(ws)) |stats| { + try response.writer().print("data: {d}\n\n", .{stats.n_runs}); + } else { + try response.writeAll("data: loading debug information\n\n"); + } + try response.flush(); + + while (true) { + ws.coverage_condition.timedWait(&ws.coverage_mutex, std.time.ns_per_ms * 500) catch {}; + if (getStats(ws)) |stats| { + try response.writer().print("data: {d}\n\n", .{stats.n_runs}); + try response.flush(); + } + } + } + + const Stats = struct { + n_runs: u64, + }; + + fn getStats(ws: *WebServer) ?Stats { + const coverage_maps = ws.coverage_files.values(); + if (coverage_maps.len == 0) return null; + // TODO: make each events URL correspond to one coverage map + const ptr = coverage_maps[0].mapped_memory; + const SeenPcsHeader = extern struct { + n_runs: usize, + deduplicated_runs: usize, + pcs_len: usize, + lowest_stack: usize, + }; + const header: *const SeenPcsHeader = @ptrCast(ptr[0..@sizeOf(SeenPcsHeader)]); + return .{ + .n_runs = @atomicLoad(usize, &header.n_runs, .monotonic), + }; + } + fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { const gpa = ws.gpa; @@ -471,6 +565,95 @@ pub const WebServer = struct { .name = "cache-control", .value = "max-age=0, must-revalidate", }; + + fn coverageRun(ws: *WebServer) void { + ws.mutex.lock(); + defer ws.mutex.unlock(); + + while (true) { + ws.condition.wait(&ws.mutex); + for (ws.msg_queue.items) |msg| switch (msg) { + .coverage => |coverage| prepareTables(ws, coverage.run, coverage.id) catch |err| switch (err) { + error.AlreadyReported => continue, + else => |e| log.err("failed to prepare code coverage tables: {s}", .{@errorName(e)}), + }, + }; + ws.msg_queue.clearRetainingCapacity(); + } + } + + fn prepareTables( + ws: *WebServer, + run_step: *Step.Run, + coverage_id: u64, + ) error{ OutOfMemory, AlreadyReported }!void { + const gpa = ws.gpa; + + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + const gop = try ws.coverage_files.getOrPut(gpa, coverage_id); + if (gop.found_existing) { + // We are fuzzing the same executable with multiple threads. + // Perhaps the same unit test; perhaps a different one. In any + // case, since the coverage file is the same, we only have to + // notice changes to that one file in order to learn coverage for + // this particular executable. + return; + } + errdefer _ = ws.coverage_files.pop(); + + gop.value_ptr.* = .{ + .coverage = std.debug.Coverage.init, + .mapped_memory = undefined, // populated below + }; + errdefer gop.value_ptr.coverage.deinit(gpa); + + const rebuilt_exe_path: Build.Cache.Path = .{ + .root_dir = Build.Cache.Directory.cwd(), + .sub_path = run_step.rebuilt_executable.?, + }; + var debug_info = std.debug.Info.load(gpa, rebuilt_exe_path, &gop.value_ptr.coverage) catch |err| { + log.err("step '{s}': failed to load debug information for '{}': {s}", .{ + run_step.step.name, rebuilt_exe_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer debug_info.deinit(gpa); + + const coverage_file_path: Build.Cache.Path = .{ + .root_dir = run_step.step.owner.cache_root, + .sub_path = "v/" ++ std.fmt.hex(coverage_id), + }; + var coverage_file = coverage_file_path.root_dir.handle.openFile(coverage_file_path.sub_path, .{}) catch |err| { + log.err("step '{s}': failed to load coverage file '{}': {s}", .{ + run_step.step.name, coverage_file_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer coverage_file.close(); + + const file_size = coverage_file.getEndPos() catch |err| { + log.err("unable to check len of coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + + const mapped_memory = std.posix.mmap( + null, + file_size, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + coverage_file.handle, + 0, + ) catch |err| { + log.err("failed to map coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + + gop.value_ptr.mapped_memory = mapped_memory; + + ws.coverage_condition.broadcast(); + } }; fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node) void { @@ -493,16 +676,16 @@ fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog build_runner.printErrorMessages(gpa, &compile.step, ttyconf, stderr, false) catch {}; } - if (result) |rebuilt_bin_path| { - run.rebuilt_executable = rebuilt_bin_path; - } else |err| switch (err) { - error.MakeFailed => {}, + const rebuilt_bin_path = result catch |err| switch (err) { + error.MakeFailed => return, else => { - std.debug.print("step '{s}': failed to rebuild in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rebuild in fuzz mode: {s}", .{ compile.step.name, @errorName(err), }); + return; }, - } + }; + run.rebuilt_executable = rebuilt_bin_path; } fn fuzzWorkerRun( @@ -524,11 +707,13 @@ fn fuzzWorkerRun( std.debug.lockStdErr(); defer std.debug.unlockStdErr(); build_runner.printErrorMessages(gpa, &run.step, ttyconf, stderr, false) catch {}; + return; }, else => { - std.debug.print("step '{s}': failed to rebuild '{s}' in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rerun '{s}' in fuzz mode: {s}", .{ run.step.name, test_name, @errorName(err), }); + return; }, }; } diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index e494e969f0f7..b08ecfee78a6 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -1521,7 +1521,11 @@ fn evalZigTest( { web_server.mutex.lock(); defer web_server.mutex.unlock(); - try web_server.msg_queue.append(web_server.gpa, .{ .coverage_id = coverage_id }); + try web_server.msg_queue.append(web_server.gpa, .{ .coverage = .{ + .id = coverage_id, + .run = run, + } }); + web_server.condition.signal(); } }, else => {}, // ignore other messages diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 7f4f6b7df20c..a3a8a533eed0 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -19,6 +19,7 @@ pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); pub const Info = @import("debug/Info.zig"); +pub const Coverage = @import("debug/Coverage.zig"); /// Unresolved source locations can be represented with a single `usize` that /// corresponds to a virtual memory address of the program counter. Combined diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig new file mode 100644 index 000000000000..d9cc7fdebd82 --- /dev/null +++ b/lib/std/debug/Coverage.zig @@ -0,0 +1,244 @@ +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Hash = std.hash.Wyhash; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; + +const Coverage = @This(); + +/// Provides a globally-scoped integer index for directories. +/// +/// As opposed to, for example, a directory index that is compilation-unit +/// scoped inside a single ELF module. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +directories: std.ArrayHashMapUnmanaged(String, void, String.MapContext, false), +/// Provides a globally-scoped integer index for files. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), +string_bytes: std.ArrayListUnmanaged(u8), +/// Protects the other fields. +mutex: std.Thread.Mutex, + +pub const init: Coverage = .{ + .directories = .{}, + .files = .{}, + .mutex = .{}, + .string_bytes = .{}, +}; + +pub const String = enum(u32) { + _, + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a: String, b: String, b_index: usize) bool { + _ = b_index; + const a_slice = span(self.string_bytes[@intFromEnum(a)..]); + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + + pub fn hash(self: @This(), a: String) u32 { + return @truncate(Hash.hash(0, span(self.string_bytes[@intFromEnum(a)..]))); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a_slice: []const u8, b: String, b_index: usize) bool { + _ = b_index; + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + pub fn hash(self: @This(), a: []const u8) u32 { + _ = self; + return @truncate(Hash.hash(0, a)); + } + }; +}; + +pub const SourceLocation = struct { + file: File.Index, + line: u32, + column: u32, + + pub const invalid: SourceLocation = .{ + .file = .invalid, + .line = 0, + .column = 0, + }; +}; + +pub const File = struct { + directory_index: u32, + basename: String, + + pub const Index = enum(u32) { + invalid = std.math.maxInt(u32), + _, + }; + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn hash(self: MapContext, a: File) u32 { + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + return @truncate(Hash.hash(a.directory_index, a_basename)); + } + + pub fn eql(self: MapContext, a: File, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a_basename, b_basename); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub const Entry = struct { + directory_index: u32, + basename: []const u8, + }; + + pub fn hash(self: @This(), a: Entry) u32 { + _ = self; + return @truncate(Hash.hash(a.directory_index, a.basename)); + } + + pub fn eql(self: @This(), a: Entry, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a.basename, b_basename); + } + }; +}; + +pub fn deinit(cov: *Coverage, gpa: Allocator) void { + cov.directories.deinit(gpa); + cov.files.deinit(gpa); + cov.string_bytes.deinit(gpa); + cov.* = undefined; +} + +pub fn fileAt(cov: *Coverage, index: File.Index) *File { + return &cov.files.keys()[@intFromEnum(index)]; +} + +pub fn stringAt(cov: *Coverage, index: String) [:0]const u8 { + return span(cov.string_bytes.items[@intFromEnum(index)..]); +} + +pub const ResolveAddressesDwarfError = Dwarf.ScanError; + +pub fn resolveAddressesDwarf( + cov: *Coverage, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, + d: *Dwarf, +) ResolveAddressesDwarfError!void { + assert(sorted_pc_addrs.len == output.len); + assert(d.compile_units_sorted); + + var cu_i: usize = 0; + var line_table_i: usize = 0; + var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + // Protects directories and files tables from other threads. + cov.mutex.lock(); + defer cov.mutex.unlock(); + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + range = cu.pc_range orelse { + out.* = SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + if (line_table_i == 0) { + line_table_i = 1; + cov.mutex.unlock(); + defer cov.mutex.lock(); + d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + out.* = SourceLocation.invalid; + cu_i += 1; + if (cu_i < d.compile_unit_list.items.len) { + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + if (cu.pc_range) |r| range = r; + } + continue :next_pc; + }, + else => |e| return e, + }; + } + const slc = &cu.src_loc_cache.?; + const table_addrs = slc.line_table.keys(); + while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; + + const entry = slc.line_table.values()[line_table_i - 1]; + const corrected_file_index = entry.file - @intFromBool(slc.version < 5); + const file_entry = slc.files[corrected_file_index]; + const dir_path = slc.directories[file_entry.dir_index].path; + try cov.string_bytes.ensureUnusedCapacity(gpa, dir_path.len + file_entry.path.len + 2); + const dir_gop = try cov.directories.getOrPutContextAdapted(gpa, dir_path, String.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, String.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!dir_gop.found_existing) + dir_gop.key_ptr.* = addStringAssumeCapacity(cov, dir_path); + const file_gop = try cov.files.getOrPutContextAdapted(gpa, File.SliceAdapter.Entry{ + .directory_index = @intCast(dir_gop.index), + .basename = file_entry.path, + }, File.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, File.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!file_gop.found_existing) file_gop.key_ptr.* = .{ + .directory_index = @intCast(dir_gop.index), + .basename = addStringAssumeCapacity(cov, file_entry.path), + }; + out.* = .{ + .file = @enumFromInt(file_gop.index), + .line = entry.line, + .column = entry.column, + }; + } +} + +pub fn addStringAssumeCapacity(cov: *Coverage, s: []const u8) String { + const result: String = @enumFromInt(cov.string_bytes.items.len); + cov.string_bytes.appendSliceAssumeCapacity(s); + cov.string_bytes.appendAssumeCapacity(0); + return result; +} + +fn span(s: []const u8) [:0]const u8 { + return std.mem.sliceTo(@as([:0]const u8, @ptrCast(s)), 0); +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index a52de6549b53..ee191d2c128d 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -12,85 +12,31 @@ const Path = std.Build.Cache.Path; const Dwarf = std.debug.Dwarf; const page_size = std.mem.page_size; const assert = std.debug.assert; -const Hash = std.hash.Wyhash; +const Coverage = std.debug.Coverage; +const SourceLocation = std.debug.Coverage.SourceLocation; const Info = @This(); /// Sorted by key, ascending. address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), - -/// Provides a globally-scoped integer index for directories. -/// -/// As opposed to, for example, a directory index that is compilation-unit -/// scoped inside a single ELF module. -/// -/// String memory references the memory-mapped debug information. -/// -/// Protected by `mutex`. -directories: std.StringArrayHashMapUnmanaged(void), -/// Provides a globally-scoped integer index for files. -/// -/// String memory references the memory-mapped debug information. -/// -/// Protected by `mutex`. -files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), -/// Protects `directories` and `files`. -mutex: std.Thread.Mutex, - -pub const SourceLocation = struct { - file: File.Index, - line: u32, - column: u32, - - pub const invalid: SourceLocation = .{ - .file = .invalid, - .line = 0, - .column = 0, - }; -}; - -pub const File = struct { - directory_index: u32, - basename: []const u8, - - pub const Index = enum(u32) { - invalid = std.math.maxInt(u32), - _, - }; - - pub const MapContext = struct { - pub fn hash(ctx: MapContext, a: File) u32 { - _ = ctx; - return @truncate(Hash.hash(a.directory_index, a.basename)); - } - - pub fn eql(ctx: MapContext, a: File, b: File, b_index: usize) bool { - _ = ctx; - _ = b_index; - return a.directory_index == b.directory_index and std.mem.eql(u8, a.basename, b.basename); - } - }; -}; +/// Externally managed, outlives this `Info` instance. +coverage: *Coverage, pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path) LoadError!Info { +pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, - .directories = .{}, - .files = .{}, - .mutex = .{}, + .coverage = coverage, }; try info.address_map.put(gpa, elf_module.base_address, elf_module); return info; } pub fn deinit(info: *Info, gpa: Allocator) void { - info.directories.deinit(gpa); - info.files.deinit(gpa); for (info.address_map.values()) |*elf_module| { elf_module.dwarf.deinit(gpa); } @@ -98,98 +44,19 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub fn fileAt(info: *Info, index: File.Index) *File { - return &info.files.keys()[@intFromEnum(index)]; -} - -pub const ResolveSourceLocationsError = Dwarf.ScanError; +pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError; /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations. -pub fn resolveSourceLocations( +pub fn resolveAddresses( info: *Info, gpa: Allocator, sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []SourceLocation, -) ResolveSourceLocationsError!void { +) ResolveAddressesError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return resolveSourceLocationsDwarf(info, gpa, sorted_pc_addrs, output, &elf_module.dwarf); -} - -pub fn resolveSourceLocationsDwarf( - info: *Info, - gpa: Allocator, - sorted_pc_addrs: []const u64, - /// Asserts its length equals length of `sorted_pc_addrs`. - output: []SourceLocation, - d: *Dwarf, -) ResolveSourceLocationsError!void { - assert(sorted_pc_addrs.len == output.len); - assert(d.compile_units_sorted); - - var cu_i: usize = 0; - var line_table_i: usize = 0; - var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; - var range = cu.pc_range.?; - // Protects directories and files tables from other threads. - info.mutex.lock(); - defer info.mutex.unlock(); - next_pc: for (sorted_pc_addrs, output) |pc, *out| { - while (pc >= range.end) { - cu_i += 1; - if (cu_i >= d.compile_unit_list.items.len) { - out.* = SourceLocation.invalid; - continue :next_pc; - } - cu = &d.compile_unit_list.items[cu_i]; - line_table_i = 0; - range = cu.pc_range orelse { - out.* = SourceLocation.invalid; - continue :next_pc; - }; - } - if (pc < range.start) { - out.* = SourceLocation.invalid; - continue :next_pc; - } - if (line_table_i == 0) { - line_table_i = 1; - info.mutex.unlock(); - defer info.mutex.lock(); - d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - out.* = SourceLocation.invalid; - cu_i += 1; - if (cu_i < d.compile_unit_list.items.len) { - cu = &d.compile_unit_list.items[cu_i]; - line_table_i = 0; - if (cu.pc_range) |r| range = r; - } - continue :next_pc; - }, - else => |e| return e, - }; - } - const slc = &cu.src_loc_cache.?; - const table_addrs = slc.line_table.keys(); - while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; - - const entry = slc.line_table.values()[line_table_i - 1]; - const corrected_file_index = entry.file - @intFromBool(slc.version < 5); - const file_entry = slc.files[corrected_file_index]; - const dir_path = slc.directories[file_entry.dir_index].path; - const dir_gop = try info.directories.getOrPut(gpa, dir_path); - const file_gop = try info.files.getOrPut(gpa, .{ - .directory_index = @intCast(dir_gop.index), - .basename = file_entry.path, - }); - out.* = .{ - .file = @enumFromInt(file_gop.index), - .line = entry.line, - .column = entry.column, - }; - } + return info.coverage.resolveAddressesDwarf(gpa, sorted_pc_addrs, output, &elf_module.dwarf); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index bd096b9fc0df..fb08907cadd4 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,7 +28,10 @@ pub fn main() !void { .sub_path = cov_file_name, }; - var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { + var coverage = std.debug.Coverage.init; + defer coverage.deinit(gpa); + + var debug_info = std.debug.Info.load(gpa, exe_path, &coverage) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -50,14 +53,15 @@ pub fn main() !void { } assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); - const source_locations = try arena.alloc(std.debug.Info.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations); + const source_locations = try arena.alloc(std.debug.Coverage.SourceLocation, pcs.len); + try debug_info.resolveAddresses(gpa, pcs, source_locations); for (pcs, source_locations) |pc, sl| { - const file = debug_info.fileAt(sl.file); - const dir_name = debug_info.directories.keys()[file.directory_index]; + const file = debug_info.coverage.fileAt(sl.file); + const dir_name = debug_info.coverage.directories.keys()[file.directory_index]; + const dir_name_slice = debug_info.coverage.stringAt(dir_name); try stdout.print("{x}: {s}/{s}:{d}:{d}\n", .{ - pc, dir_name, file.basename, sl.line, sl.column, + pc, dir_name_slice, debug_info.coverage.stringAt(file.basename), sl.line, sl.column, }); } From d36c18274813268fd9edb633a31dd9e94ae11040 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 4 Aug 2024 15:24:54 -0700 Subject: [PATCH 13/34] std.posix: add some more void bits prevents unnecessary compilation errors on wasm32-freestanding --- lib/std/posix.zig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/std/posix.zig b/lib/std/posix.zig index e04efbbcc061..02f2d975ddad 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -47,6 +47,11 @@ else switch (native_os) { .plan9 => std.os.plan9, else => struct { pub const ucontext_t = void; + pub const pid_t = void; + pub const pollfd = void; + pub const fd_t = void; + pub const uid_t = void; + pub const gid_t = void; }, }; From b9fd0eeca6ba45058ed67fc211bb50a93c971a28 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 4 Aug 2024 15:25:42 -0700 Subject: [PATCH 14/34] add std.http.WebSocket --- lib/std/http.zig | 2 + lib/std/http/WebSocket.zig | 243 +++++++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+) create mode 100644 lib/std/http/WebSocket.zig diff --git a/lib/std/http.zig b/lib/std/http.zig index 621c7a5f0d74..d5d5583299e2 100644 --- a/lib/std/http.zig +++ b/lib/std/http.zig @@ -4,6 +4,7 @@ pub const protocol = @import("http/protocol.zig"); pub const HeadParser = @import("http/HeadParser.zig"); pub const ChunkParser = @import("http/ChunkParser.zig"); pub const HeaderIterator = @import("http/HeaderIterator.zig"); +pub const WebSocket = @import("http/WebSocket.zig"); pub const Version = enum { @"HTTP/1.0", @@ -318,6 +319,7 @@ test { _ = Status; _ = HeadParser; _ = ChunkParser; + _ = WebSocket; _ = @import("http/test.zig"); } } diff --git a/lib/std/http/WebSocket.zig b/lib/std/http/WebSocket.zig new file mode 100644 index 000000000000..ad513fddf8af --- /dev/null +++ b/lib/std/http/WebSocket.zig @@ -0,0 +1,243 @@ +//! See https://tools.ietf.org/html/rfc6455 + +const builtin = @import("builtin"); +const std = @import("std"); +const WebSocket = @This(); +const assert = std.debug.assert; +const native_endian = builtin.cpu.arch.endian(); + +key: []const u8, +request: *std.http.Server.Request, +recv_fifo: std.fifo.LinearFifo(u8, .Slice), +reader: std.io.AnyReader, +response: std.http.Server.Response, +/// Number of bytes that have been peeked but not discarded yet. +outstanding_len: usize, + +pub const InitError = error{WebSocketUpgradeMissingKey} || + std.http.Server.Request.ReaderError; + +pub fn init( + ws: *WebSocket, + request: *std.http.Server.Request, + send_buffer: []u8, + recv_buffer: []align(4) u8, +) InitError!bool { + var sec_websocket_key: ?[]const u8 = null; + var upgrade_websocket: bool = false; + var it = request.iterateHeaders(); + while (it.next()) |header| { + if (std.ascii.eqlIgnoreCase(header.name, "sec-websocket-key")) { + sec_websocket_key = header.value; + } else if (std.ascii.eqlIgnoreCase(header.name, "upgrade")) { + if (!std.mem.eql(u8, header.value, "websocket")) + return false; + upgrade_websocket = true; + } + } + if (!upgrade_websocket) + return false; + + const key = sec_websocket_key orelse return error.WebSocketUpgradeMissingKey; + + var sha1 = std.crypto.hash.Sha1.init(.{}); + sha1.update(key); + sha1.update("258EAFA5-E914-47DA-95CA-C5AB0DC85B11"); + var digest: [std.crypto.hash.Sha1.digest_length]u8 = undefined; + sha1.final(&digest); + var base64_digest: [28]u8 = undefined; + assert(std.base64.standard.Encoder.encode(&base64_digest, &digest).len == base64_digest.len); + + request.head.content_length = std.math.maxInt(u64); + + ws.* = .{ + .key = key, + .recv_fifo = std.fifo.LinearFifo(u8, .Slice).init(recv_buffer), + .reader = try request.reader(), + .response = request.respondStreaming(.{ + .send_buffer = send_buffer, + .respond_options = .{ + .status = .switching_protocols, + .extra_headers = &.{ + .{ .name = "upgrade", .value = "websocket" }, + .{ .name = "connection", .value = "upgrade" }, + .{ .name = "sec-websocket-accept", .value = &base64_digest }, + }, + .transfer_encoding = .none, + }, + }), + .request = request, + .outstanding_len = 0, + }; + return true; +} + +pub const Header0 = packed struct(u8) { + opcode: Opcode, + rsv3: u1 = 0, + rsv2: u1 = 0, + rsv1: u1 = 0, + fin: bool, +}; + +pub const Header1 = packed struct(u8) { + payload_len: enum(u7) { + len16 = 126, + len64 = 127, + _, + }, + mask: bool, +}; + +pub const Opcode = enum(u4) { + continuation = 0, + text = 1, + binary = 2, + connection_close = 8, + ping = 9, + /// "A Pong frame MAY be sent unsolicited. This serves as a unidirectional + /// heartbeat. A response to an unsolicited Pong frame is not expected." + pong = 10, + _, +}; + +pub const ReadSmallTextMessageError = error{ + ConnectionClose, + UnexpectedOpCode, + MessageTooBig, + MissingMaskBit, +} || RecvError; + +pub const SmallMessage = struct { + /// Can be text, binary, or ping. + opcode: Opcode, + data: []u8, +}; + +/// Reads the next message from the WebSocket stream, failing if the message does not fit +/// into `recv_buffer`. +pub fn readSmallMessage(ws: *WebSocket) ReadSmallTextMessageError!SmallMessage { + while (true) { + const header_bytes = (try recv(ws, 2))[0..2]; + const h0: Header0 = @bitCast(header_bytes[0]); + const h1: Header1 = @bitCast(header_bytes[1]); + + switch (h0.opcode) { + .text, .binary, .pong, .ping => {}, + .connection_close => return error.ConnectionClose, + .continuation => return error.UnexpectedOpCode, + _ => return error.UnexpectedOpCode, + } + + if (!h0.fin) return error.MessageTooBig; + if (!h1.mask) return error.MissingMaskBit; + + const len: usize = switch (h1.payload_len) { + .len16 => try recvReadInt(ws, u16), + .len64 => std.math.cast(usize, try recvReadInt(ws, u64)) orelse return error.MessageTooBig, + else => @intFromEnum(h1.payload_len), + }; + if (len > ws.recv_fifo.buf.len) return error.MessageTooBig; + + const mask: u32 = @bitCast((try recv(ws, 4))[0..4].*); + const payload = try recv(ws, len); + + // Skip pongs. + if (h0.opcode == .pong) continue; + + // The last item may contain a partial word of unused data. + const floored_len = (payload.len / 4) * 4; + const u32_payload: []align(1) u32 = @alignCast(std.mem.bytesAsSlice(u32, payload[0..floored_len])); + for (u32_payload) |*elem| elem.* ^= mask; + const mask_bytes = std.mem.asBytes(&mask)[0 .. payload.len - floored_len]; + for (payload[floored_len..], mask_bytes) |*leftover, m| leftover.* ^= m; + + return .{ + .opcode = h0.opcode, + .data = payload, + }; + } +} + +const RecvError = std.http.Server.Request.ReadError || error{EndOfStream}; + +fn recv(ws: *WebSocket, len: usize) RecvError![]u8 { + ws.recv_fifo.discard(ws.outstanding_len); + assert(len <= ws.recv_fifo.buf.len); + if (len > ws.recv_fifo.count) { + const small_buf = ws.recv_fifo.writableSlice(0); + const needed = len - ws.recv_fifo.count; + const buf = if (small_buf.len >= needed) small_buf else b: { + ws.recv_fifo.realign(); + break :b ws.recv_fifo.writableSlice(0); + }; + const n = try @as(RecvError!usize, @errorCast(ws.reader.readAtLeast(buf, needed))); + if (n < needed) return error.EndOfStream; + ws.recv_fifo.update(n); + } + ws.outstanding_len = len; + // TODO: improve the std lib API so this cast isn't necessary. + return @constCast(ws.recv_fifo.readableSliceOfLen(len)); +} + +fn recvReadInt(ws: *WebSocket, comptime I: type) !I { + const unswapped: I = @bitCast((try recv(ws, @sizeOf(I)))[0..@sizeOf(I)].*); + return switch (native_endian) { + .little => @byteSwap(unswapped), + .big => unswapped, + }; +} + +pub const WriteError = std.http.Server.Response.WriteError; + +pub fn writeMessage(ws: *WebSocket, message: []const u8, opcode: Opcode) WriteError!void { + const iovecs: [1]std.posix.iovec_const = .{ + .{ .base = message.ptr, .len = message.len }, + }; + return writeMessagev(ws, &iovecs, opcode); +} + +pub fn writeMessagev(ws: *WebSocket, message: []const std.posix.iovec_const, opcode: Opcode) WriteError!void { + const total_len = l: { + var total_len: u64 = 0; + for (message) |iovec| total_len += iovec.len; + break :l total_len; + }; + + var header_buf: [2 + 8]u8 = undefined; + header_buf[0] = @bitCast(@as(Header0, .{ + .opcode = opcode, + .fin = true, + })); + const header = switch (total_len) { + 0...125 => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = @enumFromInt(total_len), + .mask = false, + })); + break :blk header_buf[0..2]; + }, + 126...0xffff => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = .len16, + .mask = false, + })); + std.mem.writeInt(u16, header_buf[2..4], @intCast(total_len), .big); + break :blk header_buf[0..4]; + }, + else => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = .len64, + .mask = false, + })); + std.mem.writeInt(u64, header_buf[2..10], total_len, .big); + break :blk header_buf[0..10]; + }, + }; + + const response = &ws.response; + try response.writeAll(header); + for (message) |iovec| + try response.writeAll(iovec.base[0..iovec.len]); + try response.flush(); +} From 22925636f7afc0f334f1d44257c007a1d2ccd63f Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 4 Aug 2024 15:26:18 -0700 Subject: [PATCH 15/34] std.debug.Coverage: use extern structs helps the serialization use case --- lib/std/debug/Coverage.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig index d9cc7fdebd82..f341efaffbc2 100644 --- a/lib/std/debug/Coverage.zig +++ b/lib/std/debug/Coverage.zig @@ -65,7 +65,7 @@ pub const String = enum(u32) { }; }; -pub const SourceLocation = struct { +pub const SourceLocation = extern struct { file: File.Index, line: u32, column: u32, @@ -77,7 +77,7 @@ pub const SourceLocation = struct { }; }; -pub const File = struct { +pub const File = extern struct { directory_index: u32, basename: String, From dec7e45f7c7e61a3778767bbc7f8e1e9a33b01fa Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 4 Aug 2024 15:27:13 -0700 Subject: [PATCH 16/34] fuzzer web UI: receive coverage information * libfuzzer: track unique runs instead of deduplicated runs - easier for consumers to notice when to recheck the covered bits. * move common definitions to `std.Build.Fuzz.abi`. build runner sends all the information needed to fuzzer web interface client needed in order to display inline coverage information along with source code. --- lib/fuzzer.zig | 15 +- lib/fuzzer/index.html | 1 + lib/fuzzer/main.js | 233 +++++++----- lib/fuzzer/wasm/main.zig | 83 ++++- lib/std/Build/Fuzz.zig | 563 +--------------------------- lib/std/Build/Fuzz/WebServer.zig | 605 +++++++++++++++++++++++++++++++ lib/std/Build/Fuzz/abi.zig | 55 +++ 7 files changed, 892 insertions(+), 663 deletions(-) create mode 100644 lib/std/Build/Fuzz/WebServer.zig create mode 100644 lib/std/Build/Fuzz/abi.zig diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 0d968cd60db3..a3446f982363 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -3,6 +3,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const fatal = std.process.fatal; +const SeenPcsHeader = std.Build.Fuzz.abi.SeenPcsHeader; pub const std_options = .{ .logFn = logOverride, @@ -120,13 +121,6 @@ const Fuzzer = struct { /// information, available to other processes. coverage_id: u64, - const SeenPcsHeader = extern struct { - n_runs: usize, - deduplicated_runs: usize, - pcs_len: usize, - lowest_stack: usize, - }; - const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false); const Coverage = struct { @@ -247,7 +241,7 @@ const Fuzzer = struct { } else { const header: SeenPcsHeader = .{ .n_runs = 0, - .deduplicated_runs = 0, + .unique_runs = 0, .pcs_len = flagged_pcs.len, .lowest_stack = std.math.maxInt(usize), }; @@ -292,8 +286,6 @@ const Fuzzer = struct { }); if (gop.found_existing) { //std.log.info("duplicate analysis: score={d} id={d}", .{ analysis.score, analysis.id }); - const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); - _ = @atomicRmw(usize, &header.deduplicated_runs, .Add, 1, .monotonic); if (f.input.items.len < gop.key_ptr.input.len or gop.key_ptr.score == 0) { gpa.free(gop.key_ptr.input); gop.key_ptr.input = try gpa.dupe(u8, f.input.items); @@ -325,6 +317,9 @@ const Fuzzer = struct { _ = @atomicRmw(u8, elem, .Or, mask, .monotonic); } } + + const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); } if (f.recent_cases.entries.len >= 100) { diff --git a/lib/fuzzer/index.html b/lib/fuzzer/index.html index dadc2f91d3f1..0753bcae67e7 100644 --- a/lib/fuzzer/index.html +++ b/lib/fuzzer/index.html @@ -124,6 +124,7 @@ +

Loading JavaScript...