Skip to content

Commit

Permalink
fuzz subcommand & fixes from frist round of fuzzing
Browse files Browse the repository at this point in the history
  • Loading branch information
Loris Cro authored and Loris Cro committed Jul 15, 2024
1 parent 0657499 commit 303969a
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 62 deletions.
28 changes: 17 additions & 11 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,9 @@ pub fn build(b: *std.Build) !void {
const mode = .{ .target = target, .optimize = optimize };

const scripty = b.dependency("scripty", mode);

const super = b.addModule("super", .{
.root_source_file = b.path("src/root.zig"),
});

const super = b.addModule("super", .{ .root_source_file = b.path("src/root.zig") });
super.addImport("scripty", scripty.module("scripty"));

// super.include_dirs.append(b.allocator, .{ .other_step = ts.artifact("tree-sitter") }) catch unreachable;

const unit_tests = b.addTest(.{
.root_source_file = b.path("src/root.zig"),
.target = target,
Expand Down Expand Up @@ -164,14 +158,26 @@ pub fn build(b: *std.Build) !void {
super_fuzz.root_module.link_libc = true; // afl runtime depends on libc
_ = super_fuzz.getEmittedBin(); // hack around build system bug

const afl_clang_fast_path = try b.findProgram(
const afl_clang_fast_path = b.findProgram(
&.{ "afl-clang-fast", "afl-clang" },
if (b.option([]const u8, "afl-path", "Path to AFLplusplus")) |afl_path| &.{afl_path} else &.{},
);
) catch "afl-clang";

const fuzz = b.step("fuzz", "Generate an executable to fuzz html/Parser");
const run_afl_clang_fast = b.addSystemCommand(&.{ afl_clang_fast_path, "-o" });
const prog_exe = run_afl_clang_fast.addOutputFileArg(super_fuzz_name);
run_afl_clang_fast.addFileArg(super_fuzz.getEmittedLlvmBc());

const fuzz = b.step("fuzz", "Generate an executable to fuzz html/Parser");
fuzz.dependOn(&b.addInstallBinFile(prog_exe, super_fuzz_name).step);

const fuzz_tests = b.addTest(.{
.root_source_file = b.path("src/fuzz.zig"),
.target = target,
.optimize = .Debug,
// .strip = true,
// .filter = "nesting",
});

fuzz_tests.root_module.addImport("super", super);
const run_fuzz_tests = b.addRunArtifact(fuzz_tests);
test_step.dependOn(&run_fuzz_tests.step);
}
15 changes: 15 additions & 0 deletions src/fuzz.zig
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,18 @@ pub fn main() !void {
const ast = try super.html.Ast.init(gpa, data, .html);
defer ast.deinit(gpa);
}

test "afl++ fuzz cases" {
const cases: []const []const u8 = &.{
@embedFile("fuzz/2.html"),
@embedFile("fuzz/3.html"),
@embedFile("fuzz/12.html"),
};

for (cases) |c| {
std.debug.print("test: \n\n{s}\n\n", .{c});
const ast = try super.html.Ast.init(std.testing.allocator, c, .html);
defer ast.deinit(std.testing.allocator);
ast.debug(c);
}
}
Binary file added src/fuzz/12.html
Binary file not shown.
Binary file added src/fuzz/2.html
Binary file not shown.
Binary file added src/fuzz/3.html
Binary file not shown.
6 changes: 5 additions & 1 deletion src/html.zig
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
pub const Ast = @import("html/Ast.zig");
pub const Tokenizer = @import("html/Tokenizer.zig");
pub const max_size = 4 * 1024 * 1024 * 1024;

test {
_ = @import("html/Tokenizer.zig");
_ = @import("html/Ast.zig");
}
67 changes: 60 additions & 7 deletions src/html/Ast.zig
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,10 @@ pub fn init(
const original_current_idx = current_idx;

if (current.isClosed()) {
log.debug("current {} is closed, going up to {}", .{
current_idx,
current.parent_idx,
});
current_idx = current.parent_idx;
current = &nodes.items[current.parent_idx];
}
Expand All @@ -385,12 +389,12 @@ pub fn init(
// malformed HTML and we also expect in all of
// those cases that errors were already emitted
// by the tokenizer
const maybe_name = temp_tok.next(tag_src) orelse break;

switch (maybe_name) {
.tag_name => |n| break :blk n.slice(tag_src),
else => break,
}
const name_span = temp_tok.getName(tag_src) orelse {
current = original_current;
current_idx = original_current_idx;
break;
};
break :blk name_span.slice(tag_src);
};

log.debug("matching cn: {s} tag: {s}", .{
Expand All @@ -415,7 +419,7 @@ pub fn init(
.return_attrs = true,
};
const tag_src = cur.open.slice(src);
const rel_name = temp_tok.next(tag_src).?.tag_name;
const rel_name = temp_tok.getName(tag_src).?;
break :blk .{
.start = rel_name.start + cur.open.start,
.end = rel_name.end + cur.open.start,
Expand Down Expand Up @@ -447,6 +451,9 @@ pub fn init(
switch (current.direction()) {
.in => {
new.parent_idx = current_idx;
if (current.first_child_idx != 0) {
debugNodes(nodes.items, src);
}
std.debug.assert(current.first_child_idx == 0);
current_idx = @intCast(nodes.items.len);
current.first_child_idx = current_idx;
Expand Down Expand Up @@ -826,6 +833,52 @@ const Formatter = struct {
}
};

pub fn debug(ast: Ast, src: []const u8) void {
var c = ast.cursor(0);
var last_depth: u32 = 0;
std.debug.print(" \n node count: {}\n", .{ast.nodes.len});
while (c.next()) |n| {
if (c.dir == .out) {
std.debug.print("\n", .{});
while (last_depth > c.depth) : (last_depth -= 1) {
for (0..last_depth - 2) |_| std.debug.print(" ", .{});
std.debug.print(")", .{});
if (last_depth - c.depth > 1) {
std.debug.print("\n", .{});
}
}
last_depth = c.depth;
continue;
}
std.debug.print("\n", .{});
for (0..c.depth - 1) |_| std.debug.print(" ", .{});
const range = n.open.range(src);
std.debug.print("({s} #{} @{} [{}, {}] - [{}, {}]", .{
@tagName(n.kind),
c.idx,
c.depth,
range.start.row,
range.start.col,
range.end.row,
range.end.col,
});
if (n.first_child_idx == 0) {
std.debug.print(")", .{});
}
last_depth = c.depth;
}
std.debug.print("\n", .{});
while (last_depth > 1) : (last_depth -= 1) {
for (0..last_depth - 2) |_| std.debug.print(" ", .{});
std.debug.print(")\n", .{});
}
}

fn debugNodes(nodes: []const Node, src: []const u8) void {
const ast = Ast{ .nodes = nodes, .errors = &.{}, .language = .html };
ast.debug(src);
}

test "basics" {
const case = "<html><head></head><body><div><link></div></body></html>";

Expand Down
90 changes: 52 additions & 38 deletions src/html/Tokenizer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,17 @@ fn consume(self: *Tokenizer, src: []const u8) bool {
return true;
}

pub fn getName(tokenizer: *Tokenizer, tag_src: []const u8) ?Span {
std.debug.assert(tokenizer.return_attrs);
return while (tokenizer.next(tag_src)) |maybe_name| {
switch (maybe_name) {
.tag_name => |n| break n,
.tag => break null,
else => continue,
}
} else null;
}

pub fn next(self: *Tokenizer, src: []const u8) ?Token {
if (self.deferred_token) |t| {
const token_copy = t;
Expand Down Expand Up @@ -2131,20 +2142,17 @@ fn next2(self: *Tokenizer, src: []const u8) ?struct {
.before_attribute_name => |state| {
// See EOF case from below
if (!self.consume(src)) {
self.state = .data;
var tag = state;
tag.span.end = self.idx;
return .{ .token = .{ .tag = tag } };
// self.idx -= 1;
// self.state = .{
// .after_attribute_name = .{
// .tag = state,
// .name = .{
// .start = self.idx,
// .end = self.idx + 1,
// },
// },
// };
// self.state = .data;
// var tag = state;
// tag.span.end = self.idx;
// return .{ .token = .{ .tag = tag } };
self.idx -= 1;
self.state = .{
.after_attribute_name = .{
.tag = state,
.name = .{ .start = 0, .end = 0 },
},
};
} else switch (self.current) {
// U+0009 CHARACTER TABULATION (tab)
// U+000A LINE FEED (LF)
Expand All @@ -2157,27 +2165,22 @@ fn next2(self: *Tokenizer, src: []const u8) ?struct {
// U+003E GREATER-THAN SIGN (>)
// EOF
// Reconsume in the after attribute name state.
// NOTE: handled differently to simplify
// control flow in after_attribute_name
// as otherwise it would need to keep
// track of when we don't have an attribute
// at all
'/', '>' => {
self.state = .data;
var tag = state;
tag.span.end = self.idx;
if (self.current == '/') {
std.debug.assert(tag.kind == .start);
tag.kind = .start_self;
}
return .{ .token = .{ .tag = tag } };
// self.idx -= 1;
// self.state = .{
// .after_attribute_name = .{
// .tag = state,
// .name = .{ .start = 0, .end = 0 },
// },
// };
// self.state = .data;
// var tag = state;
// tag.span.end = self.idx;
// if (self.current == '/') {
// std.debug.assert(tag.kind == .start);
// tag.kind = .start_self;
// }
// return .{ .token = .{ .tag = tag } };
self.idx -= 1;
self.state = .{
.after_attribute_name = .{
.tag = state,
.name = .{ .start = 0, .end = 0 },
},
};
},

//U+003D EQUALS SIGN (=)
Expand Down Expand Up @@ -2301,6 +2304,8 @@ fn next2(self: *Tokenizer, src: []const u8) ?struct {
// EOF
// This is an eof-in-tag parse error. Emit an end-of-file token.
self.state = .eof;
// here, in > and in / there could be no attr name
// to return.
return .{
.token = .{
.parse_error = .{
Expand All @@ -2323,9 +2328,14 @@ fn next2(self: *Tokenizer, src: []const u8) ?struct {
// Switch to the self-closing start tag state.
'/' => {
var tag = state.tag;
tag.attr_count += 1;

// here, in > and in EOF there could be no attr name
// to return.
if (state.name.len() > 0) {
tag.attr_count += 1;
}
self.state = .{ .self_closing_start_tag = tag };
if (self.return_attrs) {
if (self.return_attrs and state.name.len() > 0) {
return .{
.token = .{
.attr = .{
Expand All @@ -2350,10 +2360,14 @@ fn next2(self: *Tokenizer, src: []const u8) ?struct {
'>' => {
var tag = state.tag;
tag.span.end = self.idx;
tag.attr_count += 1;
// here, in / and in EOF there could be no attr name
// to return.
if (state.name.len() > 0) {
tag.attr_count += 1;
}

self.state = .data;
if (self.return_attrs) {
if (self.return_attrs and state.name.len() > 0) {
return .{
.token = .{
.attr = .{
Expand Down
10 changes: 5 additions & 5 deletions src/root.zig
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
// const interpreter = @import("interpreter.zig");
const std = @import("std");
pub const html = @import("html.zig");
pub const max_size = html.max_size;
pub const Ast = @import("Ast.zig");

pub const Language = enum { html, superhtml };

// pub const SuperVM = interpreter.SuperVM;
// pub const Exception = interpreter.Exception;

pub const Language = enum { html, superhtml };
pub const max_size = 4 * 1024 * 1024 * 1024;

const Range = struct {
start: Pos,
end: Pos,
Expand Down Expand Up @@ -78,6 +77,7 @@ pub const Span = struct {
};

test {
_ = @import("html.zig");
// _ = @import("Ast.zig");
// _ = @import("template.zig");
_ = @import("Ast.zig");
}

0 comments on commit 303969a

Please sign in to comment.