Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix july 2024 #476

Merged
merged 16 commits into from
Jul 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
0969a98
tests(fuzzing): adding one more ramdisk for afl docker image
SuperFola Jul 6, 2024
3506cc3
feat(fuzzing): return exit code 0 when fuzzing and a diagnostic is pr…
SuperFola Jul 6, 2024
938d563
feat(diagnostics): adding bound checking when generating an error con…
SuperFola Jul 6, 2024
fc795ff
fix(parser): checking for invalid characters when decoding utf8 codep…
SuperFola Jul 6, 2024
cd890a2
fix(macros): adding a max recursion depth when evaluating macros to a…
SuperFola Jul 6, 2024
a19fb5c
fix(vm): futures can be awaited multiple times, however they will ret…
SuperFola Jul 6, 2024
41dd545
fix(parser): do not allow reusing argument names inside macros
SuperFola Jul 6, 2024
d329344
fix(parser, formatter): enhanced comment after node handling inside m…
SuperFola Jul 6, 2024
b71201b
fix(parser): adding a hard limit on package names length (255 chars)
SuperFola Jul 6, 2024
5937829
fix(compiler): disallow passing invalid nodes (that doesn't produce a…
SuperFola Jul 7, 2024
8295be5
fix(macro processor): adding check for unevaluated spread inside macros
SuperFola Jul 7, 2024
29c98e8
fix(macro processor): checking for invalid symbols when defining a fu…
SuperFola Jul 7, 2024
147ed3d
feat(tools, fuzzing): fixing an indexing bug in the fuzzer-crash-tria…
SuperFola Jul 7, 2024
3ccf57f
fix(macro processor): added a max macro unification depth of 256 to a…
SuperFola Jul 7, 2024
8d49e31
feat(tools, fuzzing): better fuzzing tooling to start the docker imag…
SuperFola Jul 7, 2024
c1c39c6
fix(macro processor): set max macro processing depth to 256
SuperFola Jul 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@
- fixed a bug when passing the wrong number of arguments to a function inside an async call was crashing the VM because the function couldn't be named
- fixed a bug in the compiler generating invalid `fun` nodes
- fixed a bug when generating `let`, `mut` or `set` nodes inside macros with an invalid node type
- fixed a bug when reading invalid UTF8 codepoints in the parser caused out of bounds reads
- fixed a bug with recursive macro, exhausting the stack space due to recursive evaluation
- futures can be awaited again, they will return nil on all the tries
- checking for reused argument name in macros during parsing
- enhanced comment after node handling in macros
- adding a hard limit on package names length (255 characters, to comply with posix limits)
- disallow passing invalid nodes as arguments to functions and operators
- checking for unevaluated spread inside macros
- checking for invalid symbols when defining a function through a macro
- added a max macro unification depth

### Removed
- removed unused `NodeType::Closure`
Expand Down
11 changes: 10 additions & 1 deletion include/Ark/Compiler/AST/utf8_char.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ namespace Ark::internal
m_codepoint(cp), m_length(len), m_repr(repr) {}

// https://github.com/sheredom/utf8.h/blob/4e4d828174c35e4564c31a9e35580c299c69a063/utf8.h#L1178
static std::pair<std::string::iterator, utf8_char_t> at(std::string::iterator it)
static std::pair<std::string::iterator, utf8_char_t> at(std::string::iterator it, const std::string::iterator end)
{
codepoint_t codepoint;
length_t length;
repr_t repr = {};

if (0xf0 == (0xf8 & *it)) // 4 byte utf8 codepoint
{
if (it + 3 == end || it + 2 == end || it + 1 == end)
return std::make_pair(end, utf8_char_t {});

codepoint = (static_cast<codepoint_t>(0x07 & *it) << 18) |
(static_cast<codepoint_t>(0x3f & *(it + 1)) << 12) |
(static_cast<codepoint_t>(0x3f & *(it + 2)) << 6) |
Expand All @@ -39,13 +42,19 @@ namespace Ark::internal
}
else if (0xe0 == (0xf0 & *it)) // 3 byte utf8 codepoint
{
if (it + 2 == end || it + 1 == end)
return std::make_pair(end, utf8_char_t {});

codepoint = (static_cast<codepoint_t>(0x0f & *it) << 12) |
(static_cast<codepoint_t>(0x3f & *(it + 1)) << 6) |
static_cast<codepoint_t>(0x3f & *(it + 2));
length = 3;
}
else if (0xc0 == (0xe0 & *it)) // 2 byte utf8 codepoint
{
if (it + 1 == end)
return std::make_pair(end, utf8_char_t {});

codepoint = (static_cast<codepoint_t>(0x1f & *it) << 6) |
static_cast<codepoint_t>(0x3f & *(it + 1));
length = 2;
Expand Down
8 changes: 5 additions & 3 deletions include/Ark/Compiler/Macros/Executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ namespace Ark::internal
unsigned int m_debug;
MacroProcessor* m_processor; ///< This is a non-owned pointer.

void setWithFileAttributes(const Node origin, Node& output, const Node& macro);

/**
* @brief Find the nearest macro matching a giving name
*
Expand All @@ -69,7 +71,7 @@ namespace Ark::internal
* @param name
* @return const Node* nullptr if no macro was found
*/
const Node* findNearestMacro(const std::string& name) const;
[[nodiscard]] const Node* findNearestMacro(const std::string& name) const;

/**
* @brief Registers macros based on their type
Expand All @@ -88,7 +90,7 @@ namespace Ark::internal
* @return true
* @return false
*/
bool isTruthy(const Node& node) const;
[[nodiscard]] bool isTruthy(const Node& node) const;

/**
* @brief Evaluate only the macros
Expand Down Expand Up @@ -133,7 +135,7 @@ namespace Ark::internal
* @return true
* @return false
*/
bool isPredefined(const std::string& symbol) const;
[[nodiscard]] bool isPredefined(const std::string& symbol) const;
};

}
Expand Down
5 changes: 4 additions & 1 deletion include/Ark/Compiler/Macros/Processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,11 @@ namespace Ark::internal
* @param target
* @param parent
* @param index position of target inside parent->list()
* @param unify_depth call depth to unify, to avoid deep recursive unification
*/
void unify(const std::unordered_map<std::string, Node>& map, Node& target, Node* parent, std::size_t index = 0);
void unify(const std::unordered_map<std::string, Node>& map, Node& target, Node* parent, std::size_t index, std::size_t unify_depth = 0);

void setWithFileAttributes(const Node origin, Node& output, const Node& macro);

/**
* @brief Evaluate only the macros
Expand Down
2 changes: 2 additions & 0 deletions include/Ark/Constants.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ namespace Ark
// Default features for the VM x Compiler x Parser
constexpr uint16_t DefaultFeatures = FeatureRemoveUnusedVars | FeatureShowWarnings;

constexpr std::size_t MaxMacroProcessingDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::processNode
constexpr std::size_t MaxMacroUnificationDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::unify
constexpr std::size_t VMStackSize = 8192;
}

Expand Down
1 change: 0 additions & 1 deletion src/arkreactor/Builtins/Async.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ namespace Ark::internal::Builtins::Async

auto& f = n[0].usertypeRef().as<Future>();
Value res = f.resolve();
vm->deleteFuture(&f);

return res;
}
Expand Down
5 changes: 2 additions & 3 deletions src/arkreactor/Compiler/AST/BaseParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace Ark::internal
}

// getting a character from the stream
auto [it, sym] = utf8_char_t::at(m_it);
auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
m_next_it = it;
m_sym = sym;

Expand Down Expand Up @@ -80,11 +80,10 @@ namespace Ark::internal
return;

m_it = m_str.begin() + n;
auto [it, sym] = utf8_char_t::at(m_it);
auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
m_next_it = it;
m_sym = sym;

// TODO: create a kind of map vec<pair<it, row>>
// search for the nearest it < m_it in the map to know the line number
for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i)
{
Expand Down
37 changes: 36 additions & 1 deletion src/arkreactor/Compiler/AST/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,15 @@ namespace Ark::internal

Import import_data;

const auto pos = getCount();
if (!packageName(&import_data.prefix))
errorWithNextToken("Import expected a package name");

if (import_data.prefix.size() > 255)
{
backtrack(pos);
errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", import_data.prefix.size()));
}
import_data.package.push_back(import_data.prefix);

const auto [row, col] = getCursor();
Expand All @@ -312,6 +319,12 @@ namespace Ark::internal
setNodePosAndFilename(packageNode.list().back());
import_data.package.push_back(path);
import_data.prefix = path; // in the end we will store the last element of the package, which is what we want

if (path.size() > 255)
{
backtrack(pos);
errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", path.size()));
}
}
}
else if (accept(IsChar(':')) && accept(IsChar('*'))) // parsing :*
Expand Down Expand Up @@ -595,16 +608,27 @@ namespace Ark::internal
newlineOrComment(&comment);
args->attachNearestCommentBefore(comment);

std::vector<std::string> names;
while (!isEOF())
{
const auto pos = getCount();

std::string arg_name;
if (!name(&arg_name))
break;
comment.clear();
newlineOrComment(&comment);
args->push_back(Node(NodeType::Symbol, arg_name).attachNearestCommentBefore(comment));

if (std::ranges::find(names, arg_name) != names.end())
{
backtrack(pos);
errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", arg_name));
}
names.push_back(arg_name);
}

const auto pos = getCount();
if (sequence("..."))
{
std::string spread_name;
Expand All @@ -615,13 +639,24 @@ namespace Ark::internal
comment.clear();
if (newlineOrComment(&comment))
args->list().back().attachCommentAfter(comment);

if (std::ranges::find(names, spread_name) != names.end())
{
backtrack(pos);
errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", spread_name));
}
}

if (!accept(IsChar(')')))
return std::nullopt;
comment.clear();
if (newlineOrComment(&comment))
args->list().back().attachCommentAfter(comment);
{
if (args->list().empty())
args->attachCommentAfter(comment);
else
args->list().back().attachCommentAfter(comment);
}

return args;
}
Expand Down
25 changes: 20 additions & 5 deletions src/arkreactor/Compiler/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,9 @@ namespace Ark
bool Compiler::nodeProducesOutput(const Node& node)
{
if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword)
return node.constList()[0].keyword() == Keyword::Begin || node.constList()[0].keyword() == Keyword::Fun || node.constList()[0].keyword() == Keyword::If;
return (node.constList()[0].keyword() == Keyword::Begin && node.constList().size() > 1) ||
node.constList()[0].keyword() == Keyword::Fun ||
node.constList()[0].keyword() == Keyword::If;
return true; // any other node, function call, symbol, number...
}

Expand Down Expand Up @@ -563,7 +565,7 @@ namespace Ark
if (node.nodeType() == NodeType::Symbol && maybe_operator.has_value())
page(proc_page).emplace_back(static_cast<uint8_t>(FIRST_OPERATOR + maybe_operator.value()));
else
// closure chains have been handled: closure.field.field.function
// closure chains have been handled (eg: closure.field.field.function)
compileExpression(node, proc_page, false, false); // storing proc

if (m_temp_pages.back().empty())
Expand All @@ -579,7 +581,12 @@ namespace Ark

// push the arguments in reverse order
for (std::size_t i = x.constList().size() - 1; i >= start_index; --i)
compileExpression(x.constList()[i], p, false, false);
{
if (nodeProducesOutput(x.constList()[i]))
compileExpression(x.constList()[i], p, false, false);
else
throwCompilerError(fmt::format("Invalid node inside tail call to `{}'", node.repr()), x);
}

// jump to the top of the function
page(p).emplace_back(JUMP, 0_u16);
Expand All @@ -589,7 +596,12 @@ namespace Ark
{
// push arguments on current page
for (auto exp = x.constList().begin() + start_index, exp_end = x.constList().end(); exp != exp_end; ++exp)
compileExpression(*exp, p, false, false);
{
if (nodeProducesOutput(*exp))
compileExpression(*exp, p, false, false);
else
throwCompilerError(fmt::format("Invalid node inside call to `{}'", node.repr()), x);
}
// push proc from temp page
for (const Word& word : m_temp_pages.back())
page(p).push_back(word);
Expand Down Expand Up @@ -619,7 +631,10 @@ namespace Ark
std::size_t exp_count = 0;
for (std::size_t index = start_index, size = x.constList().size(); index < size; ++index)
{
compileExpression(x.constList()[index], p, false, false);
if (nodeProducesOutput(x.constList()[index]))
compileExpression(x.constList()[index], p, false, false);
else
throwCompilerError(fmt::format("Invalid node inside call to operator `{}'", node.repr()), x);

if ((index + 1 < size && x.constList()[index + 1].nodeType() != NodeType::Capture) || index + 1 == size)
exp_count++;
Expand Down
9 changes: 8 additions & 1 deletion src/arkreactor/Compiler/Macros/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ namespace Ark::internal
m_processor(processor)
{}

void MacroExecutor::setWithFileAttributes(const Node origin, Node& output, const Node& macro)
{
output = macro;
output.setFilename(origin.filename());
output.setPos(origin.line(), origin.col());
}

const Node* MacroExecutor::findNearestMacro(const std::string& name) const
{
return m_processor->findNearestMacro(name);
Expand All @@ -31,7 +38,7 @@ namespace Ark::internal

void MacroExecutor::unify(const std::unordered_map<std::string, Node>& map, Node& target, Node* parent) const
{
m_processor->unify(map, target, parent);
m_processor->unify(map, target, parent, /* index= */ 0, /* unify_depth= */ 0);
}

void MacroExecutor::throwMacroProcessingError(const std::string& message, const Node& node) const
Expand Down
4 changes: 2 additions & 2 deletions src/arkreactor/Compiler/Macros/Executors/Conditional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ namespace Ark::internal

// evaluate cond
if (isTruthy(temp))
node = if_true;
setWithFileAttributes(node, node, if_true);
else if (node.constList().size() > 3)
node = if_false;
setWithFileAttributes(node, node, if_false);
else
{
// remove node because nothing matched
Expand Down
4 changes: 2 additions & 2 deletions src/arkreactor/Compiler/Macros/Executors/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ namespace Ark::internal
if (!args_applied.empty())
unify(args_applied, temp_body, nullptr);

node = evaluate(temp_body, false);
setWithFileAttributes(node, node, evaluate(temp_body, false));
applyMacroProxy(node); // todo: this seems useless
return true;
}
}
else if (isPredefined(first.string()))
{
node = evaluate(node, false);
setWithFileAttributes(node, node, evaluate(node, false));
return true;
}

Expand Down
3 changes: 2 additions & 1 deletion src/arkreactor/Compiler/Macros/Executors/Symbol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ namespace Ark::internal
// ($ name value)
if (macro->constList().size() == 2)
{
node = macro->constList()[1];
setWithFileAttributes(node, node, macro->constList()[1]);
evaluate(node, false);
return true;
}
}
Expand Down
Loading
Loading