From fe9e34d47839ef273af7e4ee00320e02967b4030 Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Thu, 25 Jul 2024 18:48:00 +0200 Subject: [PATCH] fix crash in html tokenizer --- build.zig | 18 ++++++++++-------- build.zig.zon | 4 ++-- src/cli/lsp/logic.zig | 5 +++-- src/fuzz/afl.c | 31 ------------------------------ src/fuzz/afl.zig | 43 ++++++++++++++++++++---------------------- src/html/Ast.zig | 42 ++++++++++++++++++++++++++++++++++++++++- src/html/Tokenizer.zig | 15 +++++++++------ 7 files changed, 85 insertions(+), 73 deletions(-) delete mode 100644 src/fuzz/afl.c diff --git a/build.zig b/build.zig index 64d3e81..1c2467d 100644 --- a/build.zig +++ b/build.zig @@ -107,23 +107,26 @@ fn setupFuzzStep( .name = "superfuzz-afl", .root_source_file = b.path("src/fuzz/afl.zig"), .target = target, - .optimize = .Debug, - .single_threaded = true, + .optimize = .ReleaseSafe, }); afl_obj.root_module.addImport("superhtml", superhtml); afl_obj.root_module.stack_check = false; // not linking with compiler-rt afl_obj.root_module.link_libc = true; // afl runtime depends on libc - const afl_fuzz = afl.addInstrumentedExe(b, afl_obj); - fuzz.dependOn(&b.addInstallFile(afl_fuzz, "superfuzz-afl").step); + // const afl_fuzz = afl.addInstrumentedExe( + // b, + // target, + // .ReleaseSafe, + // afl_obj, + // ); + // fuzz.dependOn(&b.addInstallFile(afl_fuzz, "superfuzz-afl").step); const super_fuzz = b.addExecutable(.{ .name = "superfuzz", .root_source_file = b.path("src/fuzz.zig"), .target = target, - .optimize = .Debug, - .single_threaded = true, + .optimize = .ReleaseSafe, }); super_fuzz.root_module.addImport("superhtml", superhtml); @@ -134,7 +137,6 @@ fn setupFuzzStep( .root_source_file = b.path("src/fuzz/astgen.zig"), .target = target, .optimize = .Debug, - .single_threaded = true, }); supergen.root_module.addImport("superhtml", superhtml); @@ -286,7 +288,7 @@ fn getVersion(b: *std.Build) Version { 0 => return .{ .tag = git_describe }, 2 => { // Untagged development build (e.g. 0.8.0-684-gbbe2cca1a). - var it = std.mem.split(u8, git_describe, "-"); + var it = std.mem.splitScalar(u8, git_describe, '-'); const tagged_ancestor = it.next() orelse unreachable; const commit_height = it.next() orelse unreachable; const commit_id = it.next() orelse unreachable; diff --git a/build.zig.zon b/build.zig.zon index 10041ff..b147f2a 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -15,8 +15,8 @@ .hash = "1220165019c862f801952d1529b990ec72de3d3b53c3a81be23500b1a10cac7266c2", }, .@"zig-afl-kit" = .{ - .url = "git+https://github.com/kristoff-it/zig-afl-kit#8784c51828b0fc9774a57d83d345f4ab73c23ffd", - .hash = "12205cb4a9a2c74fe9c501ff290367e22e93e69811e4fd8bdb8a614b3fdabd2b5fe7", + .url = "git+https://github.com/kristoff-it/zig-afl-kit#f003bfe714f2964c90939fdc940d5993190a66ec", + .hash = "1220f2d8402bb7bbc4786b9c0aad73910929ea209cbd3b063842371d68abfed33c1e", }, }, .paths = .{ diff --git a/src/cli/lsp/logic.zig b/src/cli/lsp/logic.zig index f12ba6a..95c6354 100644 --- a/src/cli/lsp/logic.zig +++ b/src/cli/lsp/logic.zig @@ -15,17 +15,18 @@ pub fn loadFile( uri: []const u8, language: super.Language, ) !void { + errdefer @panic("error while loading document!"); + var res: lsp.types.PublishDiagnosticsParams = .{ .uri = uri, .diagnostics = &.{}, }; - var doc = try Document.init( + const doc = try Document.init( self.gpa, new_text, language, ); - errdefer doc.deinit(self.gpa); log.debug("document init", .{}); diff --git a/src/fuzz/afl.c b/src/fuzz/afl.c deleted file mode 100644 index c49dff7..0000000 --- a/src/fuzz/afl.c +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include -#include -#include -#include - -/* Main entry point. */ - -/* To ensure checks are not optimized out it is recommended to disable - code optimization for the fuzzer harness main() */ -#pragma clang optimize off -#pragma GCC optimize("O0") - -void zig_fuzz_test_direct(unsigned char *, ssize_t); -void zig_fuzz_test_astgen(unsigned char *, ssize_t); - -__AFL_FUZZ_INIT(); - -int main(int argc, char **argv) { - - __AFL_INIT(); - unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; - - while (__AFL_LOOP(UINT_MAX)) { - int len = __AFL_FUZZ_TESTCASE_LEN; - zig_fuzz_test_astgen(buf, len); - } - - return 0; -} diff --git a/src/fuzz/afl.zig b/src/fuzz/afl.zig index e4885fe..3b6eb7f 100644 --- a/src/fuzz/afl.zig +++ b/src/fuzz/afl.zig @@ -4,46 +4,43 @@ const astgen = @import("astgen.zig"); pub const std_options = .{ .log_level = .err }; -const mem = std.mem; +export fn zig_fuzz_init() void {} -// const toggle_me = std.mem.backend_can_use_eql_bytes; -// comptime { -// std.debug.assert(toggle_me == false); -// } +export fn zig_fuzz_test(buf: [*]u8, len: isize) void { + var gpa_impl: std.heap.GeneralPurposeAllocator(.{}) = .{}; + defer std.debug.assert(gpa_impl.deinit() == .ok); -var gpa_impl: std.heap.GeneralPurposeAllocator(.{}) = .{}; -export fn zig_fuzz_test_direct(buf: [*]u8, len: isize) void { const gpa = gpa_impl.allocator(); const src = buf[0..@intCast(len)]; const html_ast = super.html.Ast.init(gpa, src, .superhtml) catch unreachable; defer html_ast.deinit(gpa); - if (html_ast.errors.len == 0) { - const super_ast = super.Ast.init(gpa, html_ast, src) catch unreachable; - defer super_ast.deinit(gpa); - } - // if (html_ast.errors.len == 0) { - // var out = std.ArrayList(u8).init(gpa); - // defer out.deinit(); - // html_ast.render(src, out.writer()) catch unreachable; + // const super_ast = super.Ast.init(gpa, html_ast, src) catch unreachable; + // defer super_ast.deinit(gpa); + // } - // eqlIgnoreWhitespace(src, out.items); + if (html_ast.errors.len == 0) { + var out = std.ArrayList(u8).init(gpa); + defer out.deinit(); + html_ast.render(src, out.writer()) catch unreachable; - // var full_circle = std.ArrayList(u8).init(gpa); - // defer full_circle.deinit(); - // html_ast.render(out.items, full_circle.writer()) catch unreachable; + eqlIgnoreWhitespace(src, out.items); - // std.debug.assert(std.mem.eql(u8, out.items, full_circle.items)); + var full_circle = std.ArrayList(u8).init(gpa); + defer full_circle.deinit(); + html_ast.render(out.items, full_circle.writer()) catch unreachable; - // const super_ast = super.Ast.init(gpa, html_ast, src) catch unreachable; - // defer super_ast.deinit(gpa); - // } + std.debug.assert(std.mem.eql(u8, out.items, full_circle.items)); + const super_ast = super.Ast.init(gpa, html_ast, src) catch unreachable; + defer super_ast.deinit(gpa); + } } export fn zig_fuzz_test_astgen(buf: [*]u8, len: isize) void { + var gpa_impl: std.heap.GeneralPurposeAllocator(.{}) = .{}; const gpa = gpa_impl.allocator(); const astgen_src = buf[0..@intCast(len)]; diff --git a/src/html/Ast.zig b/src/html/Ast.zig index 2f7f5c0..36f3f60 100644 --- a/src/html/Ast.zig +++ b/src/html/Ast.zig @@ -695,6 +695,7 @@ pub fn render(ast: Ast, src: []const u8, w: anytype) !void { log.debug("retokenizing: '{s}'", .{current.open.slice(src)}); const name = tt.next(src[0..current.open.end]).?.tag_name.slice(src); + log.debug("tag name: '{s}'", .{name}); if (std.ascii.eqlIgnoreCase("pre", name)) { pre += 1; } @@ -707,11 +708,26 @@ pub fn render(ast: Ast, src: []const u8, w: anytype) !void { src[current.open.end - 2], ); + // if (std.mem.eql(u8, name, "path")) @breakpoint(); + while (tt.next(src[0..current.open.end])) |maybe_attr| { log.debug("tt: {s}", .{@tagName(maybe_attr)}); log.debug("tt: {any}", .{maybe_attr}); switch (maybe_attr) { - else => unreachable, + else => { + log.debug( + "got unexpected {any}", + .{maybe_attr}, + ); + unreachable; + }, + .tag_name => { + log.debug( + "got unexpected tag_name '{s}'", + .{maybe_attr.tag_name.slice(src)}, + ); + unreachable; + }, .tag => break, .attr => |attr| { if (vertical) { @@ -1122,6 +1138,30 @@ test "arrow span" { try std.testing.expectFmt(expected, "{s}", .{ast.formatter(case)}); } +test "self-closing tag complex example" { + const case = + \\extend template="base.html"/> + \\ + \\
+ \\ + \\ + \\ + \\
+ ; + const expected = + \\extend template="base.html"/> + \\
+ \\ + \\ + \\ + \\
+ ; + const ast = try Ast.init(std.testing.allocator, case, .html); + defer ast.deinit(std.testing.allocator); + + try std.testing.expectFmt(expected, "{s}", .{ast.formatter(case)}); +} + pub const Cursor = struct { ast: Ast, idx: u32, diff --git a/src/html/Tokenizer.zig b/src/html/Tokenizer.zig index 30e3f4e..b428b71 100644 --- a/src/html/Tokenizer.zig +++ b/src/html/Tokenizer.zig @@ -2248,11 +2248,14 @@ fn next2(self: *Tokenizer, src: []const u8) ?struct { // Anything else // Start a new attribute in the current tag token. Set that attribute name and value to the empty string. Reconsume in the attribute name state. - else => self.state = .{ - .attribute_name = .{ - .tag = state, - .name_start = self.idx - 1, - }, + else => { + self.idx -= 1; + self.state = .{ + .attribute_name = .{ + .tag = state, + .name_start = self.idx, + }, + }; }, } }, @@ -2864,7 +2867,7 @@ fn next2(self: *Tokenizer, src: []const u8) ?struct { else => unreachable, }; - if (self.return_attrs) { + if (self.return_attrs and tag.attr_count == 0) { const deferred: Token = if (tag.kind == .end_self) .{ .parse_error = .{ .tag = .end_tag_with_trailing_solidus,