diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a4dbde9b..a63ed24b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -188,6 +188,7 @@ if (ARK_TESTS) add_executable(unittests ${UT_SOURCES}) add_subdirectory(${ark_SOURCE_DIR}/lib/ut) + target_include_directories(unittests PUBLIC ${ark_SOURCE_DIR}/include) target_link_libraries(unittests PUBLIC ArkReactor termcolor ut) add_compile_definitions(BOOST_UT_DISABLE_MODULE) diff --git a/README.md b/README.md index 271b3c453..b487b13fe 100644 --- a/README.md +++ b/README.md @@ -186,20 +186,35 @@ SYNOPSIS arkscript --dev-info arkscript -e arkscript -c [-d] + arkscript [-d] [-L ] + arkscript -f [--dry-run] + arkscript --ast [-d] [-L ] arkscript -bcr -on arkscript -bcr -a [-s ] arkscript -bcr -st [-s ] arkscript -bcr -vt [-s ] arkscript -bcr [-cs] [-p ] [-s ] - arkscript [-d] [-L ] OPTIONS -h, --help Display this message -v, --version Display ArkScript version and exit --dev-info Display development information and exit -e, --eval Evaluate ArkScript expression + -c, --compile Compile the given program to bytecode, but do not run -d, --debug... Increase debug level (default: 0) + + -L, --lib Set the location of the ArkScript standard library. Paths can be + delimited by ';' + + -f, --format Format the given source file in place + --dry-run Do not modify the file, only print out the changes + + --ast Compile the given program and output its AST as JSON to stdout + -d, --debug... Increase debug level (default: 0) + -L, --lib Set the location of the ArkScript standard library. Paths can be + delimited by ';' + -bcr, --bytecode-reader Launch the bytecode reader -on, --only-names Display only the bytecode segments names and sizes -a, --all Display all the bytecode segments (default) @@ -208,8 +223,9 @@ OPTIONS -cs, --code Display only the code segments -p, --page Set the bytecode reader code segment to display -s, --slice Select a slice of instructions in the bytecode - -L, --lib Set the location of the ArkScript standard library. Paths can be - delimited by ';' + +VERSION + 4.0.0-86587c14 LICENSE Mozilla Public License 2.0 diff --git a/include/CLI/Formatter.hpp b/include/CLI/Formatter.hpp new file mode 100644 index 000000000..e32267c20 --- /dev/null +++ b/include/CLI/Formatter.hpp @@ -0,0 +1,66 @@ +#ifndef ARK_FORMATTER_HPP +#define ARK_FORMATTER_HPP + +#include + +#include + +class Formatter final +{ +public: + Formatter(std::string filename, bool dry_run); + + void run(); + + const std::string& output() const; + +private: + const std::string m_filename; + bool m_dry_run; ///< If true, only prints the formatted file instead of saving it to disk + Ark::internal::Parser m_parser; + std::string m_output; + + bool isListStartingWithKeyword(const Ark::internal::Node& node, Ark::internal::Keyword keyword); + bool isBeginBlock(const Ark::internal::Node& node); + bool isFuncDef(const Ark::internal::Node& node); + bool isFuncCall(const Ark::internal::Node& node); + + /** + * @param node + * @return true if the node is a String|Number|Symbol|Field + * @return false + */ + bool isPlainValue(const Ark::internal::Node& node); + + /** + * @brief Compute the line on which the deepest right most node of node is at + * @param node + * @return + */ + std::size_t lineOfLastNodeIn(const Ark::internal::Node& node); + + bool should_split_on_newline(const Ark::internal::Node& node); + + /** + * @brief Handles all node formatting + * @param node + * @param indent indentation level, starting at 0, increment by 1 + * @param after_newline when false, do not add prefix + * @return + */ + std::string format(const Ark::internal::Node& node, std::size_t indent, bool after_newline); + + std::string formatBlock(const Ark::internal::Node& node, std::size_t indent, bool after_newline); + + std::string formatFunction(const Ark::internal::Node& node, std::size_t indent); + std::string formatVariable(const Ark::internal::Node& node, std::size_t indent); + std::string formatCondition(const Ark::internal::Node& node, std::size_t indent, bool is_macro = false); + std::string formatLoop(const Ark::internal::Node& node, std::size_t indent); + std::string formatBegin(const Ark::internal::Node& node, std::size_t indent, bool after_newline); + std::string formatImport(const Ark::internal::Node& node, std::size_t indent); + std::string formatDel(const Ark::internal::Node& node, std::size_t indent); + std::string formatCall(const Ark::internal::Node& node, std::size_t indent); + std::string formatMacro(const Ark::internal::Node& node, std::size_t indent); +}; + +#endif // ARK_FORMATTER_HPP diff --git a/src/arkscript/Formatter.cpp b/src/arkscript/Formatter.cpp new file mode 100644 index 000000000..6cfadab24 --- /dev/null +++ b/src/arkscript/Formatter.cpp @@ -0,0 +1,374 @@ +#include + +#include +#include +#include + +#include +#include + +using namespace Ark; +using namespace Ark::internal; + +Formatter::Formatter(std::string filename, bool dry_run) : + m_filename(std::move(filename)), m_dry_run(dry_run), m_parser(/* interpret= */ false) +{} + +void Formatter::run() +{ + try + { + m_parser.processFile(m_filename); + // TODO remove trailing whitespaces on each line + + // remove useless surrounding begin (generated by the parser) + if (isBeginBlock(m_parser.ast())) + { + std::size_t previous_line = 0; + for (std::size_t i = 1, end = m_parser.ast().constList().size(); i < end; ++i) + { + const Node node = m_parser.ast().constList()[i]; + if (node.line() - previous_line > 1 && !m_output.empty()) + m_output += "\n"; + previous_line = lineOfLastNodeIn(node); + m_output += format(node, 0, false) + "\n"; + } + } + else + m_output = format(m_parser.ast(), 0, false); + + if (!m_dry_run) + { + std::ofstream stream(m_filename); + stream << m_output; + } + } + catch (const CodeError& e) + { + Diagnostics::generate(e); + } +} + +const std::string& Formatter::output() const +{ + return m_output; +} + +bool Formatter::isListStartingWithKeyword(const Ark::internal::Node& node, Ark::internal::Keyword keyword) +{ + return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword && node.constList()[0].keyword() == keyword; +} + +bool Formatter::isBeginBlock(const Node& node) +{ + return isListStartingWithKeyword(node, Keyword::Begin); +} + +bool Formatter::isFuncDef(const Ark::internal::Node& node) +{ + return isListStartingWithKeyword(node, Keyword::Fun); +} + +bool Formatter::isFuncCall(const Ark::internal::Node& node) +{ + return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol; +} + +bool Formatter::isPlainValue(const Ark::internal::Node& node) +{ + switch (node.nodeType()) + { + case NodeType::Symbol: [[fallthrough]]; + case NodeType::Number: [[fallthrough]]; + case NodeType::String: [[fallthrough]]; + case NodeType::Field: return true; + + case NodeType::Capture: [[fallthrough]]; + case NodeType::Keyword: [[fallthrough]]; + case NodeType::List: [[fallthrough]]; + case NodeType::Spread: [[fallthrough]]; + case NodeType::Macro: [[fallthrough]]; + case NodeType::Unused: return false; + } +} + +std::size_t Formatter::lineOfLastNodeIn(const Ark::internal::Node& node) +{ + if (node.isListLike() && !node.constList().empty()) + return lineOfLastNodeIn(node.constList().back()); + else + return node.line(); +} + +bool Formatter::should_split_on_newline(const Ark::internal::Node& node) +{ + std::string formatted = format(node, 0, false); + std::string::size_type sz = formatted.find_first_of('\n'); + + bool is_long_line = !((sz < 32 || (sz == std::string::npos && formatted.size() < 32))); + if (isBeginBlock(node) || isFuncCall(node)) + return false; + else if (is_long_line) + return true; + else if (node.isListLike() && node.constList().size() > 1) + return true; + return false; +} + +std::string Formatter::format(const Ark::internal::Node& node, std::size_t indent, bool after_newline) +{ + const std::string prefix = std::string(indent * 4, ' '); + + std::string output; + if (!node.comment().empty()) + { + std::string comment = node.comment(); + output += prefix; + for (std::size_t i = 0, end = comment.size(); i < end; ++i) + { + output += comment[i]; + if (comment[i] == '\n' && i != end - 1) + output += prefix; + } + } + if (after_newline) + output += prefix; + + switch (node.nodeType()) + { + case NodeType::Symbol: + output += node.string(); + break; + case NodeType::Capture: + output += "&" + node.string(); + break; + case NodeType::Keyword: + output += std::string(keywords[static_cast(node.keyword())]); + break; + case NodeType::String: + output += fmt::format("\"{}\"", node.string()); + break; + case NodeType::Number: + output += fmt::format("{}", node.number()); + break; + case NodeType::List: + output += formatBlock(node, indent, after_newline); + break; + case NodeType::Spread: + output += fmt::format("...{}", node.string()); + break; + case NodeType::Field: + { + std::string field = format(node.constList()[0], indent, false); + for (std::size_t i = 1, end = node.constList().size(); i < end; ++i) + field += "." + format(node.constList()[1], indent, false); + output += field; + break; + } + case NodeType::Macro: + output += formatMacro(node, indent); + break; + case NodeType::Unused: + break; + } + + return output; +} + +std::string Formatter::formatBlock(const Ark::internal::Node& node, std::size_t indent, bool after_newline) +{ + if (node.constList().empty()) + return "()"; + + const Node first = node.constList().front(); + if (first.nodeType() == NodeType::Keyword) + { + switch (first.keyword()) + { + case Keyword::Fun: + return formatFunction(node, indent); + case Keyword::Let: + [[fallthrough]]; + case Keyword::Mut: + [[fallthrough]]; + case Keyword::Set: + return formatVariable(node, indent); + case Keyword::If: + return formatCondition(node, indent); + case Keyword::While: + return formatLoop(node, indent); + case Keyword::Begin: + return formatBegin(node, indent, after_newline); + case Keyword::Import: + return formatImport(node, indent); + case Keyword::Del: + return formatDel(node, indent); + } + } + else + return formatCall(node, indent); +} + +std::string Formatter::formatFunction(const Ark::internal::Node& node, std::size_t indent) +{ + const Node args_node = node.constList()[1]; + const Node body_node = node.constList()[2]; + std::string args; + bool comment_in_args = false; + for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i) + { + const Node arg_i = args_node.constList()[i]; + bool has_comment = !arg_i.comment().empty(); + if (has_comment) + comment_in_args = true; + + args += format(arg_i, indent + (comment_in_args ? 1 : 0), comment_in_args); + if (i != end - 1) + args += comment_in_args ? '\n' : ' '; + } + + std::string formatted_body = format(body_node, indent + 1, false); + std::string formatted_args = fmt::format("({}{})", (comment_in_args ? "\n" : ""), args); + + if (!should_split_on_newline(body_node)) + return fmt::format("(fun {} {})", formatted_args, formatted_body); + else + return fmt::format("(fun {}\n{})", formatted_args, format(body_node, indent + 1, true)); +} + +std::string Formatter::formatVariable(const Ark::internal::Node& node, std::size_t indent) +{ + std::string keyword = std::string(keywords[static_cast(node.constList()[0].keyword())]); + + const Node body_node = node.constList()[2]; + std::string formatted_body = format(body_node, indent, false); + + if (!should_split_on_newline(body_node) || isFuncDef(body_node)) + return fmt::format("({} {} {})", keyword, format(node.constList()[1], indent, false), formatted_body); + else + return fmt::format("({} {}\n{})", keyword, format(node.constList()[1], indent, false), format(node.constList()[2], indent + 1, true)); +} + +std::string Formatter::formatCondition(const Ark::internal::Node& node, std::size_t indent, bool is_macro) +{ + const Node cond_node = node.constList()[1]; + const Node then_node = node.constList()[2]; + + std::string formatted_cond = format(cond_node, indent, false); + if (formatted_cond.find('\n') != std::string::npos) + formatted_cond = format(cond_node, indent, true); + + std::string start_formatted = fmt::format( + "({}if {}\n{}", + is_macro ? "$" : "", + formatted_cond, + format(then_node, indent + 1, true)); + + // if cond then + if (node.constList().size() == 3) + return start_formatted + ")"; + else // if cond then else + return fmt::format("{}\n{})", start_formatted, format(node.constList()[3], indent + 1, true)); +} + +std::string Formatter::formatLoop(const Ark::internal::Node& node, std::size_t indent) +{ + const Node cond_node = node.constList()[1]; + const Node body_node = node.constList()[2]; + return fmt::format("(while {} {})", format(cond_node, indent, false), format(body_node, indent + 1, false)); +} + +std::string Formatter::formatBegin(const Ark::internal::Node& node, std::size_t indent, bool after_newline) +{ + std::string output = "{\n"; + std::size_t previous_line = 0; + // skip begin keyword + for (std::size_t i = 1, end = node.constList().size(); i < end; ++i) + { + const Node child = node.constList()[i]; + // we want to preserve the node grouping by the user, but remove useless duplicate new line + // but that shouldn't apply to the first node of the block + if (child.line() - previous_line > 1 && i > 1) + output += "\n"; + previous_line = lineOfLastNodeIn(child); + if (previous_line < child.line()) // FIXME: shouldn't happen, but here we are + previous_line = child.line(); + + output += format(child, indent + (after_newline ? 1 : 0), true); + if (i != end - 1) + output += "\n"; + } + output += " }"; + return output; +} + +std::string Formatter::formatImport(const Ark::internal::Node& node, std::size_t indent) +{ + const Node package_node = node.constList()[1]; + std::string package; + for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i) + { + package += package_node.constList()[i].string(); + if (i != end - 1) + package += "."; + } + + const Node symbols = node.constList()[2]; + if (symbols.nodeType() == NodeType::Symbol && symbols.string() == "*") + package += ":*"; + else // symbols is a list + { + for (const auto& sym : symbols.constList()) + package += " :" + sym.string(); // TODO: handle comments? + } + + return fmt::format("(import {})", package); +} + +std::string Formatter::formatDel(const Ark::internal::Node& node, std::size_t indent) +{ + return fmt::format("(del {})", format(node.constList()[1], indent, false)); +} + +std::string Formatter::formatCall(const Ark::internal::Node& node, std::size_t indent) +{ + bool is_list = false; + if (!node.constList().empty() && node.constList().front().nodeType() == NodeType::Symbol && + node.constList().front().string() == "list") + is_list = true; + + bool is_multiline = false; + + std::vector formatted_args; + for (std::size_t i = 1, end = node.constList().size(); i < end; ++i) + { + formatted_args.push_back(format(node.constList()[i], indent, false)); + // if we have at least one argument taking multiple lines, split them all on their own line + if (formatted_args.back().find('\n') != std::string::npos) + is_multiline = true; + } + + std::string output = is_list ? "[" : ("(" + format(node.constList()[0], indent, false)); + for (std::size_t i = 0, end = formatted_args.size(); i < end; ++i) + { + const std::string formatted_node = formatted_args[i]; + if (is_multiline && formatted_args.size() > 1) + output += "\n" + format(node.constList()[i + 1], indent + 1, true); + else + output += (is_list && i == 0 ? "" : " ") + formatted_node; + } + output += is_list ? "]" : ")"; + return output; +} + +std::string Formatter::formatMacro(const Ark::internal::Node& node, std::size_t indent) +{ + if (isListStartingWithKeyword(node, Keyword::If)) + return formatCondition(node, indent, /* is_macro= */ true); + + std::string output = "($"; + for (const auto& child : node.constList()) + output += " " + format(child, indent + 1, false); + + return output + ")"; +} diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index 2ba892724..474c38fb9 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -3,18 +3,16 @@ #include #include #include -#include #include #include #include -#include -#include - #include #include #include +#include +#include int main(int argc, char** argv) { @@ -30,25 +28,28 @@ int main(int argc, char** argv) repl, compile, eval, - ast + ast, + format }; mode selected = mode::repl; - std::string file, eval_expression; - unsigned debug = 0; constexpr uint16_t max_uint16 = std::numeric_limits::max(); + // Bytecode reader // by default, select all pages and segment types, without slicing anything uint16_t bcr_page = max_uint16; uint16_t bcr_start = max_uint16; uint16_t bcr_end = max_uint16; Ark::BytecodeSegment segment = Ark::BytecodeSegment::All; - - std::vector wrong, script_args; - + // Eval / Run / AST dump + std::string file, eval_expression; std::string libdir; + // Formatting + bool dry_run = false; + // Generic arguments + std::vector wrong, script_args; // clang-format off auto cli = ( @@ -75,6 +76,11 @@ int main(int argc, char** argv) ) , any_other(script_args) ) + | ( + required("-f", "--format").set(selected, mode::format).doc("Format the given source file in place") + & value("file", file) + , option("--dry-run").set(dry_run, true).doc("Do not modify the file, only print out the changes\n") + ) | ( required("--ast").set(selected, mode::ast).doc("Compile the given program and output its AST as JSON to stdout") & value("file", file) @@ -282,6 +288,14 @@ int main(int argc, char** argv) } break; } + + case mode::format: + { + Formatter formatter(file, dry_run); + formatter.run(); + if (dry_run) + std::cout << formatter.output() << std::endl; + } } } else diff --git a/tests/unittests/FormatterSuite.cpp b/tests/unittests/FormatterSuite.cpp new file mode 100644 index 000000000..d667dfabe --- /dev/null +++ b/tests/unittests/FormatterSuite.cpp @@ -0,0 +1,24 @@ +#include + +#include + +#include "TestsHelper.hpp" + +using namespace boost; + +ut::suite<"Formatter"> formatter_suite = [] { + using namespace ut; + + iter_test_files( + "FormatterSuite", + [](TestData&& data) { + Formatter formatter(data.path, /* dry_run= */ true); + should("output a correctly formatted code for " + data.stem) = [&] { + expect(nothrow([&] { + mut(formatter).run(); + })); + std::string code = formatter.output(); + expect(that % code == data.expected); + }; + }); +}; \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/functions.ark b/tests/unittests/resources/FormatterSuite/functions.ark new file mode 100644 index 000000000..9756f6e91 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/functions.ark @@ -0,0 +1,12 @@ +(fun () ()) +(fun ( a b ) ( + a b)) +( +fun +( +a) { +a +}) +(call me maybe) +(call (fun () { + hello +}) maybe) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/functions.expected b/tests/unittests/resources/FormatterSuite/functions.expected new file mode 100644 index 000000000..6ed234c00 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/functions.expected @@ -0,0 +1,9 @@ +(fun () ()) +(fun (a b) (+ a b)) +(fun (a) { + a }) +(call me maybe) +(call + (fun () { + hello }) + maybe)