From 2682f2e3a65ca416dfb73038b620a4134f95a497 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sun, 21 Jul 2024 22:51:41 +0200 Subject: [PATCH 1/8] feat(compiler): introducing compiler passes --- CHANGELOG.md | 1 + include/Ark/Compiler/AST/Optimizer.hpp | 12 +-- include/Ark/Compiler/AST/Parser.hpp | 10 +- include/Ark/Compiler/ImportSolver.hpp | 22 +++- include/Ark/Compiler/Macros/Processor.hpp | 8 +- include/Ark/Compiler/Pass.hpp | 100 +++++++++++++++++++ include/Ark/Compiler/Welder.hpp | 21 ++-- src/arkreactor/Compiler/AST/Optimizer.cpp | 13 +-- src/arkreactor/Compiler/AST/Parser.cpp | 5 +- src/arkreactor/Compiler/ImportSolver.cpp | 27 ++--- src/arkreactor/Compiler/Macros/Processor.cpp | 11 +- src/arkreactor/Compiler/Pass.cpp | 14 +++ src/arkreactor/Compiler/Welder.cpp | 34 +++++-- src/arkscript/JsonCompiler.cpp | 1 + 14 files changed, 215 insertions(+), 64 deletions(-) create mode 100644 include/Ark/Compiler/Pass.hpp create mode 100644 src/arkreactor/Compiler/Pass.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index bdb547990..beb9ae230 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - warning when the formatter deletes comment(s) by mistake - check on arguments passed to `list`, `concat`, `append` and friends to only push valid nodes (that produces a value) - `$paste` to paste a node inside a maro without evaluating it further ; useful to stop recursive evaluation of nodes inside function macros +- introduced `Ark::internal::Pass` to describe compiler passes: they all output an AST (parser, import solver, macro processor, and optimizer for now) ### Changed - instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument diff --git a/include/Ark/Compiler/AST/Optimizer.hpp b/include/Ark/Compiler/AST/Optimizer.hpp index de3b3da7d..378ac1ccf 100644 --- a/include/Ark/Compiler/AST/Optimizer.hpp +++ b/include/Ark/Compiler/AST/Optimizer.hpp @@ -2,7 +2,7 @@ * @file Optimizer.hpp * @author Alexandre Plateau (lexplt.dev@gmail.com) * @brief Optimizes a given ArkScript AST - * @version 1.0 + * @version 1.1 * @date 2024-07-09 * * @copyright Copyright (c) 2020-2024 @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -26,7 +27,7 @@ namespace Ark::internal * @brief The ArkScript AST optimizer * */ - class Optimizer + class Optimizer final : public Pass { public: /** @@ -41,18 +42,17 @@ namespace Ark::internal * * @param ast */ - void process(const Node& ast); + void process(const Node& ast) override; /** * @brief Returns the modified AST * * @return const Node& */ - [[nodiscard]] const Node& ast() const noexcept; + [[nodiscard]] const Node& ast() const noexcept override; private: Node m_ast; - unsigned m_debug; std::unordered_map m_sym_appearances; /** @@ -67,7 +67,7 @@ namespace Ark::internal * @brief Iterate over the AST and remove unused top level functions and constants * */ - void remove_unused(); + void removeUnused(); /** * @brief Run a given functor on the global scope symbols diff --git a/include/Ark/Compiler/AST/Parser.hpp b/include/Ark/Compiler/AST/Parser.hpp index e402a52b4..77a253df4 100644 --- a/include/Ark/Compiler/AST/Parser.hpp +++ b/include/Ark/Compiler/AST/Parser.hpp @@ -2,7 +2,7 @@ * @file Parser.hpp * @author Alexandre Plateau (lexplt.dev@gmail.com) * @brief Parse ArkScript code, but do not handle any import declarations - * @version 0.1 + * @version 0.2 * @date 2024-05-12 * * @copyright Copyright (c) 2024 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -26,7 +27,7 @@ namespace Ark::internal { - class ARK_API Parser : public BaseParser + class ARK_API Parser final : public BaseParser, public Pass { public: /** @@ -37,7 +38,7 @@ namespace Ark::internal void process(const std::string& filename, const std::string& code); - [[nodiscard]] const Node& ast() const; + [[nodiscard]] const Node& ast() const noexcept override; [[nodiscard]] const std::vector& imports() const; private: @@ -46,6 +47,9 @@ namespace Ark::internal std::vector m_imports; unsigned m_allow_macro_behavior; ///< Toggled on when inside a macro definition, off afterward + // HACK so that the parser can be a pass and use the loggers + void process([[maybe_unused]] const Node&) override {} + void run(); Node& setNodePosAndFilename(Node& node, const std::optional& cursor = std::nullopt) const; diff --git a/include/Ark/Compiler/ImportSolver.hpp b/include/Ark/Compiler/ImportSolver.hpp index 6b8e87670..b09b1cf10 100644 --- a/include/Ark/Compiler/ImportSolver.hpp +++ b/include/Ark/Compiler/ImportSolver.hpp @@ -1,31 +1,45 @@ +/** + * @file ImportSolver.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief Handle imports, resolve them with modules and everything + * @version 2.0 + * @date 2024-07-21 + * + * @copyright Copyright (c) 2020-2024 + * + */ + #ifndef ARK_COMPILER_IMPORTSOLVER_HPP #define ARK_COMPILER_IMPORTSOLVER_HPP +#include #include #include #include #include +#include #include #include #include namespace Ark::internal { - class ImportSolver final + class ImportSolver final : public Pass { public: ImportSolver(unsigned debug, const std::vector& libenv); - void process(const std::filesystem::path& root, const Node& origin_ast, const std::vector& origin_imports); + ImportSolver& setup(const std::filesystem::path& root, const std::vector& origin_imports); + void process(const Node& origin_ast) override; - [[nodiscard]] const Node& ast() const noexcept; + [[nodiscard]] const Node& ast() const noexcept override; private: - unsigned m_debug; std::vector m_libenv; std::filesystem::path m_root; ///< Folder were the entry file is Node m_ast; + std::stack m_imports; std::unordered_map m_modules; ///< Package to module map // TODO is this ok? is this fine? this is sort of ugly std::vector m_imported; ///< List of imports, in the order they were found and parsed diff --git a/include/Ark/Compiler/Macros/Processor.hpp b/include/Ark/Compiler/Macros/Processor.hpp index a02cefd44..ff73ed260 100644 --- a/include/Ark/Compiler/Macros/Processor.hpp +++ b/include/Ark/Compiler/Macros/Processor.hpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -27,7 +28,7 @@ namespace Ark::internal * @brief The class handling the macros definitions and calls, given an AST * */ - class MacroProcessor + class MacroProcessor final : public Pass { public: /** @@ -42,19 +43,18 @@ namespace Ark::internal * * @param ast */ - void process(const Node& ast); + void process(const Node& ast) override; /** * @brief Return the modified AST * * @return Node& */ - [[nodiscard]] const Node& ast() const noexcept; + [[nodiscard]] const Node& ast() const noexcept override; friend class MacroExecutor; private: - unsigned m_debug; ///< The debug level Node m_ast; ///< The modified AST std::vector m_macros; ///< Handling macros in a scope fashion std::vector> m_executors; diff --git a/include/Ark/Compiler/Pass.hpp b/include/Ark/Compiler/Pass.hpp new file mode 100644 index 000000000..bf81d847b --- /dev/null +++ b/include/Ark/Compiler/Pass.hpp @@ -0,0 +1,100 @@ +/** + * @file Pass.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief Interface for a compiler pass (take in an AST, output an AST) + * @version 0.1 + * @date 2024-07-21 + * + * @copyright Copyright (c) 2024 + * + */ +#ifndef ARK_COMPILER_PASS_HPP +#define ARK_COMPILER_PASS_HPP + +#include + +#include +#include + +namespace Ark::internal +{ + /** + * @brief An interface to describe compiler passes + */ + class Pass + { + public: + /** + * @brief Construct a new Pass object + */ + Pass(); + + /** + * @brief Construct a new Pass object + * + * @param name the pass name, used for logging + * @param debug_level debug level + */ + Pass(std::string name, unsigned debug_level); + + virtual ~Pass() = default; + + virtual void process(const Node& ast) = 0; + + /** + * @brief Output of the compiler pass + * + * @return const Node& the modified AST + */ + [[nodiscard]] virtual const Node& ast() const noexcept = 0; + + enum class LogLevel + { + None, + Info, + Debug, + Trace, + Other + }; + + protected: + inline unsigned debugLevel() const { return m_debug; } + + inline bool shouldInfo() const { return m_debug >= 1; } + inline bool shouldDebug() const { return m_debug >= 2; } + inline bool shouldTrace() const { return m_debug >= 3; } + + template + void log(const char* fmt, Args&&... args) + { + if (shouldInfo()) + std::cout << fmt::format("[INFO ][{}] ", m_name) + << fmt::vformat(fmt, fmt::make_format_args(args...)) + << std::endl; + } + + template + void logDebug(const char* fmt, Args&&... args) + { + if (shouldDebug()) + std::cout << fmt::format("[DEBUG][{}] ", m_name) + << fmt::vformat(fmt, fmt::make_format_args(args...)) + << std::endl; + } + + template + void logTrace(const char* fmt, Args&&... args) + { + if (shouldTrace()) + std::cout << fmt::format("[TRACE][{}] ", m_name) + << fmt::vformat(fmt, fmt::make_format_args(args...)) + << std::endl; + } + + private: + unsigned m_debug; + std::string m_name; + }; +} + +#endif // ARK_COMPILER_PASS_HPP diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp index 94f3a15a6..2ea206886 100644 --- a/include/Ark/Compiler/Welder.hpp +++ b/include/Ark/Compiler/Welder.hpp @@ -19,14 +19,15 @@ #include #include #include -#include -#include -#include #include +#include namespace Ark { - class ARK_API Welder final + /** + * @brief The welder joins all the compiler passes, being itself one since it can output an AST too + */ + class ARK_API Welder final : public internal::Pass { public: Welder(unsigned debug, const std::vector& lib_env); @@ -44,22 +45,24 @@ namespace Ark bool generateBytecode(); bool saveBytecodeToFile(const std::string& filename); - [[nodiscard]] const internal::Node& ast() const noexcept; + [[nodiscard]] const internal::Node& ast() const noexcept override; [[nodiscard]] const bytecode_t& bytecode() const noexcept; private: - unsigned m_debug; ///< The debug level + std::vector m_lib_env; + std::filesystem::path m_root_file; std::vector m_imports; bytecode_t m_bytecode; + std::vector> m_passes; internal::Parser m_parser; - internal::ImportSolver m_importer; - internal::MacroProcessor m_macro_processor; - internal::Optimizer m_optimizer; Compiler m_compiler; bool computeAST(const std::string& filename, const std::string& code); + + // HACK so that the parser can be a pass and use the loggers + void process([[maybe_unused]] const internal::Node&) override {} }; } // namespace Ark diff --git a/src/arkreactor/Compiler/AST/Optimizer.cpp b/src/arkreactor/Compiler/AST/Optimizer.cpp index 09639d5f2..1fb45e3b0 100644 --- a/src/arkreactor/Compiler/AST/Optimizer.cpp +++ b/src/arkreactor/Compiler/AST/Optimizer.cpp @@ -3,14 +3,13 @@ namespace Ark::internal { Optimizer::Optimizer(const unsigned debug) noexcept : - m_ast(), - m_debug(debug) + Pass("Optimizer", debug), m_ast() {} void Optimizer::process(const Node& ast) { m_ast = ast; - // FIXME activate this remove_unused(); + // FIXME activate this removeUnused(); } const Node& Optimizer::ast() const noexcept @@ -23,7 +22,7 @@ namespace Ark::internal throw CodeError(message, node.filename(), node.line(), node.col(), node.repr()); } - void Optimizer::remove_unused() + void Optimizer::removeUnused() { // do not handle non-list nodes if (m_ast.nodeType() != NodeType::List) @@ -40,8 +39,7 @@ namespace Ark::internal // a variable was only declared and never used if (m_sym_appearances.contains(name) && m_sym_appearances[name] == 1 && parent.list()[idx].list()[2].nodeType() != NodeType::List) { - if (m_debug > 1) - std::cout << "Removing unused variable '" << name << "'" << std::endl; + logDebug("Removing unused variable '{}'", name); // erase the node from the list parent.list().erase(parent.list().begin() + static_cast::difference_type>(idx)); } @@ -79,9 +77,8 @@ namespace Ark::internal { if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::Capture) { - const std::string& name = node.string(); // check if it's the name of something declared in global scope - if (m_sym_appearances.find(name) != m_sym_appearances.end()) + if (const auto name = node.string(); m_sym_appearances.contains(name)) m_sym_appearances[name]++; } else if (node.nodeType() == NodeType::List) diff --git a/src/arkreactor/Compiler/AST/Parser.cpp b/src/arkreactor/Compiler/AST/Parser.cpp index cddbffaec..ac51a3895 100644 --- a/src/arkreactor/Compiler/AST/Parser.cpp +++ b/src/arkreactor/Compiler/AST/Parser.cpp @@ -5,7 +5,8 @@ namespace Ark::internal { Parser::Parser(const bool interpret) : - BaseParser(), m_interpret(interpret), m_ast(NodeType::List), m_imports({}), m_allow_macro_behavior(0) + BaseParser(), Pass("Parser", 0), m_interpret(interpret), + m_ast(NodeType::List), m_imports({}), m_allow_macro_behavior(0) { m_ast.push_back(Node(Keyword::Begin)); } @@ -16,7 +17,7 @@ namespace Ark::internal run(); } - const Node& Parser::ast() const + const Node& Parser::ast() const noexcept { return m_ast; } diff --git a/src/arkreactor/Compiler/ImportSolver.cpp b/src/arkreactor/Compiler/ImportSolver.cpp index a3b22232f..b10fef5be 100644 --- a/src/arkreactor/Compiler/ImportSolver.cpp +++ b/src/arkreactor/Compiler/ImportSolver.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include @@ -12,35 +11,39 @@ namespace Ark::internal { ImportSolver::ImportSolver(const unsigned debug, const std::vector& libenv) : - m_debug(debug), m_libenv(libenv), m_ast() + Pass("ImportSolver", debug), m_libenv(libenv), m_ast() {} - void ImportSolver::process(const std::filesystem::path& root, const Node& origin_ast, const std::vector& origin_imports) + ImportSolver& ImportSolver::setup(const std::filesystem::path& root, const std::vector& origin_imports) { if (is_directory(root)) m_root = root; else m_root = root.parent_path(); - std::stack imports; for (const auto& origin_import : std::ranges::reverse_view(origin_imports)) - imports.push(origin_import); + m_imports.push(origin_import); - while (!imports.empty()) + return *this; + } + + void ImportSolver::process(const Node& origin_ast) + { + while (!m_imports.empty()) { - Import import = imports.top(); - if (m_debug >= 2) - std::cout << "Importing " << import.toPackageString() << std::endl; + Import import = m_imports.top(); + logDebug("Importing {}", import.toPackageString()); + // Remove the top element to process the other imports // It needs to be removed first because we might be adding // other imports later and don't want to pop THEM - imports.pop(); + m_imports.pop(); // TODO: add special handling for each type of import (prefixed, with symbols, glob pattern) if (!m_modules.contains(import.toPackageString())) { // NOTE: since the "file" (=root) argument doesn't change between all calls, we could get rid of it - std::vector additional_imports = parseImport(root, import); + std::vector additional_imports = parseImport(m_root, import); // TODO import and store the new node as a Module node. // Module nodes should be scoped relatively to their packages // They should provide specific methods to resolve symbols, @@ -49,7 +52,7 @@ namespace Ark::internal // accordingly, and once we are done concat all the nodes // in a single AST. for (auto& additional_import : std::ranges::reverse_view(additional_imports)) - imports.push(additional_import); + m_imports.push(additional_import); } else { diff --git a/src/arkreactor/Compiler/Macros/Processor.cpp b/src/arkreactor/Compiler/Macros/Processor.cpp index f53451d55..8526e2f06 100644 --- a/src/arkreactor/Compiler/Macros/Processor.cpp +++ b/src/arkreactor/Compiler/Macros/Processor.cpp @@ -17,7 +17,7 @@ namespace Ark::internal { MacroProcessor::MacroProcessor(const unsigned debug) noexcept : - m_debug(debug) + Pass("MacroProcessor", debug) { // create executors pipeline m_executors = { { std::make_shared(this), @@ -34,18 +34,15 @@ namespace Ark::internal void MacroProcessor::process(const Node& ast) { - if (m_debug >= 2) - std::cout << "Processing macros...\n"; + logDebug("Processing macros..."); // to be able to modify it m_ast = ast; processNode(m_ast, 0); - if (m_debug >= 3) - { - std::cout << "(MacroProcessor) AST after processing macros\n"; + logTrace("AST after processing macros"); + if (shouldTrace()) m_ast.debugPrint(std::cout) << '\n'; - } } const Node& MacroProcessor::ast() const noexcept diff --git a/src/arkreactor/Compiler/Pass.cpp b/src/arkreactor/Compiler/Pass.cpp new file mode 100644 index 000000000..74263ca03 --- /dev/null +++ b/src/arkreactor/Compiler/Pass.cpp @@ -0,0 +1,14 @@ +#include +#include + +namespace Ark::internal +{ + Pass::Pass() : + m_debug(0) + {} + + Pass::Pass(std::string name, const unsigned debug_level) : + m_debug(debug_level), m_name(std::move(name)) + {} + +} diff --git a/src/arkreactor/Compiler/Welder.cpp b/src/arkreactor/Compiler/Welder.cpp index 2e816ad2e..dcf90ba76 100644 --- a/src/arkreactor/Compiler/Welder.cpp +++ b/src/arkreactor/Compiler/Welder.cpp @@ -1,7 +1,9 @@ #include #include -#include +#include +#include +#include #include #include @@ -9,7 +11,8 @@ namespace Ark { Welder::Welder(const unsigned debug, const std::vector& lib_env) : - m_debug(debug), m_importer(debug, lib_env), m_macro_processor(debug), m_optimizer(debug), m_compiler(debug) + Pass("Welder", debug), m_lib_env(lib_env), + m_compiler(debug) {} void Welder::registerSymbol(const std::string& name) @@ -36,7 +39,7 @@ namespace Ark { try { - m_compiler.process(m_optimizer.ast()); + m_compiler.process(m_passes.back()->ast()); m_bytecode = m_compiler.bytecode(); return true; @@ -50,8 +53,7 @@ namespace Ark bool Welder::saveBytecodeToFile(const std::string& filename) { - if (m_debug >= 1) - std::cout << "Final bytecode size: " << m_bytecode.size() * sizeof(uint8_t) << "B\n"; + log("Final bytecode size: {}B", m_bytecode.size() * sizeof(uint8_t)); if (m_bytecode.empty()) return false; @@ -66,7 +68,7 @@ namespace Ark const internal::Node& Welder::ast() const noexcept { - return m_optimizer.ast(); + return m_passes.back()->ast(); } const bytecode_t& Welder::bytecode() const noexcept @@ -79,9 +81,23 @@ namespace Ark try { m_parser.process(filename, code); - m_importer.process(m_root_file, m_parser.ast(), m_parser.imports()); - m_macro_processor.process(m_importer.ast()); - m_optimizer.process(m_macro_processor.ast()); + + { + auto import_solver_pass = std::make_unique(debugLevel(), m_lib_env); + import_solver_pass->setup(m_root_file, m_parser.imports()); + m_passes.push_back(std::move(import_solver_pass)); + } + m_passes.emplace_back(std::make_unique(debugLevel())); + m_passes.emplace_back(std::make_unique(debugLevel())); + + std::accumulate( + m_passes.begin(), + m_passes.end(), + m_parser.ast(), + [](const internal::Node& ast, const std::unique_ptr& pass) { + pass->process(ast); + return pass->ast(); + }); return true; } diff --git a/src/arkscript/JsonCompiler.cpp b/src/arkscript/JsonCompiler.cpp index b50b8b458..c4e91d337 100644 --- a/src/arkscript/JsonCompiler.cpp +++ b/src/arkscript/JsonCompiler.cpp @@ -1,6 +1,7 @@ #include #include +#include #include From 8b1f9ec9204d6984194e8566cd3e6cb2a17130a7 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Mon, 22 Jul 2024 20:58:10 +0200 Subject: [PATCH 2/8] feat(cli): allow passing feature flag to toggle compiler passes --- CHANGELOG.md | 1 + include/Ark/Compiler/BytecodeReader.hpp | 2 +- include/Ark/Compiler/Welder.hpp | 5 ++- include/Ark/Constants.hpp.in | 7 ++-- include/Ark/VM/State.hpp | 10 +++-- lib/std | 2 +- src/arkreactor/Compiler/BytecodeReader.cpp | 9 ++--- src/arkreactor/Compiler/Welder.cpp | 19 ++++++---- src/arkreactor/VM/State.cpp | 12 +++--- src/arkscript/main.cpp | 43 +++++++++++++++------- 10 files changed, 68 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index beb9ae230..d157f6e94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - check on arguments passed to `list`, `concat`, `append` and friends to only push valid nodes (that produces a value) - `$paste` to paste a node inside a maro without evaluating it further ; useful to stop recursive evaluation of nodes inside function macros - introduced `Ark::internal::Pass` to describe compiler passes: they all output an AST (parser, import solver, macro processor, and optimizer for now) +- add `-f(no-)importsolver`, `-f(no-)macroprocessor` and `-f(no-)optimizer` to toggle on and off those compiler passes ### Changed - instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument diff --git a/include/Ark/Compiler/BytecodeReader.hpp b/include/Ark/Compiler/BytecodeReader.hpp index 833bb98c3..14df13ecd 100644 --- a/include/Ark/Compiler/BytecodeReader.hpp +++ b/include/Ark/Compiler/BytecodeReader.hpp @@ -151,7 +151,7 @@ namespace Ark void display(BytecodeSegment segment = BytecodeSegment::All, std::optional sStart = std::nullopt, std::optional sEnd = std::nullopt, - std::optional cPage = std::nullopt); + std::optional cPage = std::nullopt) const; friend class Ark::State; diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp index 2ea206886..b00eb0c2a 100644 --- a/include/Ark/Compiler/Welder.hpp +++ b/include/Ark/Compiler/Welder.hpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace Ark { @@ -30,7 +31,7 @@ namespace Ark class ARK_API Welder final : public internal::Pass { public: - Welder(unsigned debug, const std::vector& lib_env); + Welder(unsigned debug, const std::vector& lib_env, uint16_t features = DefaultFeatures); /** * @brief Register a symbol as a global in the compiler @@ -50,10 +51,12 @@ namespace Ark private: std::vector m_lib_env; + uint16_t m_features; std::filesystem::path m_root_file; std::vector m_imports; bytecode_t m_bytecode; + internal::Node m_computed_ast; std::vector> m_passes; internal::Parser m_parser; diff --git a/include/Ark/Constants.hpp.in b/include/Ark/Constants.hpp.in index 2d8480f3e..13af41fc3 100644 --- a/include/Ark/Constants.hpp.in +++ b/include/Ark/Constants.hpp.in @@ -46,11 +46,12 @@ constexpr std::string_view ARK_FULL_VERSION { "@ARK_VERSION_MAJOR@.@ARK_VERSION_ namespace Ark { // Compiler options - constexpr uint16_t FeatureRemoveUnusedVars = 1 << 4; - constexpr uint16_t FeatureShowWarnings = 1 << 5; + constexpr uint16_t FeatureImportSolver = 1 << 0; + constexpr uint16_t FeatureMacroProcessor = 1 << 1; + constexpr uint16_t FeatureASTOptimizer = 1 << 2; // Default features for the VM x Compiler x Parser - constexpr uint16_t DefaultFeatures = FeatureRemoveUnusedVars | FeatureShowWarnings; + constexpr uint16_t DefaultFeatures = FeatureImportSolver | FeatureMacroProcessor | FeatureASTOptimizer; constexpr std::size_t MaxMacroProcessingDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::processNode constexpr std::size_t MaxMacroUnificationDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::unify diff --git a/include/Ark/VM/State.hpp b/include/Ark/VM/State.hpp index 5e2c2b3a4..653e1c6cf 100644 --- a/include/Ark/VM/State.hpp +++ b/include/Ark/VM/State.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -59,19 +60,21 @@ namespace Ark * @brief Compile a file, and use the resulting bytecode * * @param filename path to an ArkScript code file + * @param features compiler features to enable/disable * @return true on success * @return false on failure */ - bool doFile(const std::string& filename); + bool doFile(const std::string& filename, uint16_t features = DefaultFeatures); /** * @brief Compile a string (representing ArkScript code) and store resulting bytecode in m_bytecode * * @param code the ArkScript code + * @param features compiler features to enable/disable * @return true on success * @return false on failure */ - bool doString(const std::string& code); + bool doString(const std::string& code, uint16_t features = DefaultFeatures); /** * @brief Register a function in the virtual machine @@ -125,10 +128,11 @@ namespace Ark * * @param file the path of file code to compile * @param output set path of .arkc file + * @param features compiler features to enable/disable * @return true on success * @return false on failure and raise an exception */ - bool compile(const std::string& file, const std::string& output) const; + bool compile(const std::string& file, const std::string& output, uint16_t features) const; static void throwStateError(const std::string& message) { diff --git a/lib/std b/lib/std index 9ec80d651..107dc1f8a 160000 --- a/lib/std +++ b/lib/std @@ -1 +1 @@ -Subproject commit 9ec80d651ede7f9130befcffd984a9aa941af60e +Subproject commit 107dc1f8a2aa0482a12465c650c102370d143591 diff --git a/src/arkreactor/Compiler/BytecodeReader.cpp b/src/arkreactor/Compiler/BytecodeReader.cpp index a54e07e99..078f98ce3 100644 --- a/src/arkreactor/Compiler/BytecodeReader.cpp +++ b/src/arkreactor/Compiler/BytecodeReader.cpp @@ -42,7 +42,6 @@ namespace Ark m_bytecode[3] == internal::Instruction::NOP; } - const bytecode_t& BytecodeReader::bytecode() noexcept { return m_bytecode; @@ -60,7 +59,6 @@ namespace Ark }; } - unsigned long long BytecodeReader::timestamp() const { // 4 (ark\0) + version (2 bytes / number) + timestamp = 18 bytes @@ -195,11 +193,10 @@ namespace Ark return block; } - void BytecodeReader::display(const BytecodeSegment segment, const std::optional sStart, const std::optional sEnd, - const std::optional cPage) + const std::optional cPage) const { std::ostream& os = std::cout; @@ -359,7 +356,7 @@ namespace Ark { const uint8_t padding = page[j]; const uint8_t inst = page[j + 1]; - const uint16_t arg = static_cast((page[j + 2] << 8) + page[j + 3]); + const auto arg = static_cast((page[j + 2] << 8) + page[j + 3]); // instruction number os << termcolor::cyan << fmt::format("{:>4}", j / 4) << termcolor::reset; @@ -489,7 +486,7 @@ namespace Ark uint16_t BytecodeReader::readNumber(std::size_t& i) const { - const uint16_t x = static_cast(m_bytecode[i] << 8); + const auto x = static_cast(m_bytecode[i] << 8); const uint16_t y = m_bytecode[++i]; return x + y; } diff --git a/src/arkreactor/Compiler/Welder.cpp b/src/arkreactor/Compiler/Welder.cpp index dcf90ba76..f99add738 100644 --- a/src/arkreactor/Compiler/Welder.cpp +++ b/src/arkreactor/Compiler/Welder.cpp @@ -10,9 +10,9 @@ namespace Ark { - Welder::Welder(const unsigned debug, const std::vector& lib_env) : - Pass("Welder", debug), m_lib_env(lib_env), - m_compiler(debug) + Welder::Welder(const unsigned debug, const std::vector& lib_env, const uint16_t features) : + Pass("Welder", debug), m_lib_env(lib_env), m_features(features), + m_computed_ast(internal::NodeType::Unused), m_compiler(debug) {} void Welder::registerSymbol(const std::string& name) @@ -39,7 +39,7 @@ namespace Ark { try { - m_compiler.process(m_passes.back()->ast()); + m_compiler.process(m_computed_ast); m_bytecode = m_compiler.bytecode(); return true; @@ -68,7 +68,7 @@ namespace Ark const internal::Node& Welder::ast() const noexcept { - return m_passes.back()->ast(); + return m_computed_ast; } const bytecode_t& Welder::bytecode() const noexcept @@ -82,15 +82,18 @@ namespace Ark { m_parser.process(filename, code); + if ((m_features & FeatureImportSolver) != 0) { auto import_solver_pass = std::make_unique(debugLevel(), m_lib_env); import_solver_pass->setup(m_root_file, m_parser.imports()); m_passes.push_back(std::move(import_solver_pass)); } - m_passes.emplace_back(std::make_unique(debugLevel())); - m_passes.emplace_back(std::make_unique(debugLevel())); + if ((m_features & FeatureMacroProcessor) != 0) + m_passes.emplace_back(std::make_unique(debugLevel())); + if ((m_features & FeatureASTOptimizer) != 0) + m_passes.emplace_back(std::make_unique(debugLevel())); - std::accumulate( + m_computed_ast = std::accumulate( m_passes.begin(), m_passes.end(), m_parser.ast(), diff --git a/src/arkreactor/VM/State.cpp b/src/arkreactor/VM/State.cpp index b908137b6..2bea50255 100644 --- a/src/arkreactor/VM/State.cpp +++ b/src/arkreactor/VM/State.cpp @@ -51,9 +51,9 @@ namespace Ark } } - bool State::compile(const std::string& file, const std::string& output) const + bool State::compile(const std::string& file, const std::string& output, const uint16_t features) const { - Welder welder(m_debug_level, m_libenv); + Welder welder(m_debug_level, m_libenv, features); if (!welder.computeASTFromFile(file)) return false; @@ -71,7 +71,7 @@ namespace Ark return true; } - bool State::doFile(const std::string& file) + bool State::doFile(const std::string& file, const uint16_t features) { if (!Utils::fileExists(file)) { @@ -95,7 +95,7 @@ namespace Ark if (!exists(directory)) // create ark cache directory create_directory(directory); - if (compile(file, path) && feed(path)) + if (compile(file, path, features) && feed(path)) return true; } else if (feed(bytecode)) // it's a bytecode file @@ -103,9 +103,9 @@ namespace Ark return false; } - bool State::doString(const std::string& code) + bool State::doString(const std::string& code, const uint16_t features) { - Welder welder(m_debug_level, m_libenv); + Welder welder(m_debug_level, m_libenv, features); if (!welder.computeASTFromString(code)) return false; diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index 932406429..91019cbb8 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -51,7 +51,28 @@ int main(int argc, char** argv) // Generic arguments std::vector wrong, script_args; + uint16_t passes = Ark::DefaultFeatures; + // clang-format off + auto debug_flag = joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }) + .doc("Increase debug level (default: 0)\n"))); + auto lib_dir_flag = option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'\n") + & value("lib_dir", libdir); + + auto import_solver_pass_flag = ( + option("-fimportsolver").call([&] { passes |= Ark::FeatureImportSolver; }) + | option("-fno-importsolver").call([&] { passes &= ~Ark::FeatureImportSolver; }) + ).doc("Toggle on and off the import solver pass"); + auto macro_proc_pass_flag = ( + option("-fmacroprocessor").call([&] { passes |= Ark::FeatureMacroProcessor; }) + | option("-fno-macroprocessor").call([&] { passes &= ~Ark::FeatureMacroProcessor; }) + ).doc("Toggle on and off the macro processor pass"); + auto optimizer_pass_flag = ( + option("-foptimizer").call([&] { passes |= Ark::FeatureASTOptimizer; }) + | option("-fno-optimizer").call([&] { passes &= ~Ark::FeatureASTOptimizer; }) + ).doc("Toggle on and off the optimizer pass"); + auto compiler_passes_flag = (import_solver_pass_flag, macro_proc_pass_flag, optimizer_pass_flag); + auto cli = ( option("-h", "--help").set(selected, mode::help).doc("Display this message") | option("-v", "--version").set(selected, mode::version).doc("Display ArkScript version and exit") @@ -63,16 +84,15 @@ int main(int argc, char** argv) | ( required("-c", "--compile").set(selected, mode::compile).doc("Compile the given program to bytecode, but do not run") & value("file", file) - , joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }).doc("Increase debug level (default: 0)\n"))) + , debug_flag + , compiler_passes_flag ) | ( value("file", file).set(selected, mode::run) , ( - joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }))) - , ( - option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'\n") - & value("lib_dir", libdir) - ) + debug_flag + , lib_dir_flag + , compiler_passes_flag ) , any_other(script_args) ) @@ -84,11 +104,8 @@ int main(int argc, char** argv) | ( required("--ast").set(selected, mode::ast).doc("Compile the given program and output its AST as JSON to stdout") & value("file", file) - , joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }).doc("Increase debug level (default: 0)"))) - , ( - option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'") - & value("lib_dir", libdir) - ) + , debug_flag + , lib_dir_flag ) | ( required("-bcr", "--bytecode-reader").set(selected, mode::bytecode_reader).doc("Launch the bytecode reader") @@ -215,7 +232,7 @@ int main(int argc, char** argv) Ark::State state(lib_paths); state.setDebug(debug); - if (!state.doFile(file)) + if (!state.doFile(file, passes)) return -1; break; @@ -227,7 +244,7 @@ int main(int argc, char** argv) state.setDebug(debug); state.setArgs(script_args); - if (!state.doFile(file)) + if (!state.doFile(file, passes)) return -1; Ark::VM vm(state); From a7d51d430d65ece12309965fd4bb01bfda8592dc Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Tue, 23 Jul 2024 14:13:40 +0200 Subject: [PATCH 3/8] feat(compiler): adding a name resolution pass, which was originally done by the compiler --- include/Ark/Compiler/Compiler.hpp | 22 -- include/Ark/Compiler/NameResolutionPass.hpp | 87 ++++++++ include/Ark/Compiler/Welder.hpp | 10 +- src/arkreactor/Compiler/Compiler.cpp | 63 +----- .../Compiler/NameResolutionPass.cpp | 197 ++++++++++++++++++ src/arkreactor/Compiler/Welder.cpp | 41 ++-- src/arkreactor/VM/State.cpp | 15 +- tests/errors/compiler/invalid_while.ark | 5 +- tests/errors/compiler/invalid_while.expected | 12 +- tests/errors/compiler/well_formed_args.ark | 5 +- .../errors/compiler/well_formed_args.expected | 12 +- 11 files changed, 343 insertions(+), 126 deletions(-) create mode 100644 include/Ark/Compiler/NameResolutionPass.hpp create mode 100644 src/arkreactor/Compiler/NameResolutionPass.cpp diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Compiler.hpp index 46ad57cbb..cb35a2645 100644 --- a/include/Ark/Compiler/Compiler.hpp +++ b/include/Ark/Compiler/Compiler.hpp @@ -68,7 +68,6 @@ namespace Ark // tables: symbols, values, plugins and codes std::vector m_symbols; - std::vector m_defined_symbols; std::vector m_plugins; std::vector m_values; std::vector> m_code_pages; @@ -254,27 +253,6 @@ namespace Ark * @return std::size_t */ uint16_t addValue(std::size_t page_id, const internal::Node& current); - - /** - * @brief Register a symbol as defined, so that later we can throw errors on undefined symbols - * - * @param sym - */ - void addDefinedSymbol(const std::string& sym); - - /** - * @brief Checks for undefined symbols, not present in the defined symbols table - * - */ - void checkForUndefinedSymbol(); - - /** - * @brief Suggest a symbol of what the user may have meant to input - * - * @param str the string - * @return std::string - */ - std::string offerSuggestion(const std::string& str) const; }; } diff --git a/include/Ark/Compiler/NameResolutionPass.hpp b/include/Ark/Compiler/NameResolutionPass.hpp new file mode 100644 index 000000000..5056505a1 --- /dev/null +++ b/include/Ark/Compiler/NameResolutionPass.hpp @@ -0,0 +1,87 @@ +/** + * @file NameResolutionPass.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief + * @version 0.1 + * @date 2024-07-22 + * + * @copyright Copyright (c) 2024 + * + */ + +#ifndef ARK_COMPILER_NAMERESOLUTIONPASS_HPP +#define ARK_COMPILER_NAMERESOLUTIONPASS_HPP + +#include +#include + +#include + +namespace Ark::internal +{ + class NameResolutionPass final : public Pass + { + public: + explicit NameResolutionPass(unsigned debug); + + void process(const Node& ast) override; + + [[nodiscard]] const Node& ast() const noexcept override; + + /** + * @brief Register a symbol as defined, so that later we can throw errors on undefined symbols + * + * @param sym + */ + void addDefinedSymbol(const std::string& sym); + + private: + Node m_ast; + // fixme: use a map/unordered_map/set/unordered_set for faster lookups? + // we can pre compute the operators+builtins+list operators + std::vector m_symbol_nodes; + std::vector m_defined_symbols; + std::vector m_plugin_names; + + void visit(const Node& node); + + /** + * + * @param node a list node whose first child is a keyword + * @param keyword + */ + void visitKeyword(const Node& node, Keyword keyword); + + /** + * @brief Register a given node in the symbol table + * + * @param symbol + */ + void addSymbolNode(const Node& symbol); + + /** + * @brief Checking if a symbol may be coming from a plugin + * + * @param name symbol name + * @return true the symbol may be from a plugin, loaded at runtime + * @return false + */ + [[nodiscard]] bool mayBeFromPlugin(const std::string& name) const noexcept; + + /** + * @brief Checks for undefined symbols, not present in the defined symbols table + * + */ + void checkForUndefinedSymbol() const; + + /** + * @brief Suggest a symbol of what the user may have meant to input + * + * @param str the string + * @return std::string + */ + [[nodiscard]] std::string offerSuggestion(const std::string& str) const; + }; +} + +#endif // ARK_COMPILER_NAMERESOLUTIONPASS_HPP diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp index b00eb0c2a..f3f1625a9 100644 --- a/include/Ark/Compiler/Welder.hpp +++ b/include/Ark/Compiler/Welder.hpp @@ -22,6 +22,10 @@ #include #include #include +#include +#include +#include +#include namespace Ark { @@ -58,8 +62,12 @@ namespace Ark bytecode_t m_bytecode; internal::Node m_computed_ast; - std::vector> m_passes; internal::Parser m_parser; + internal::ImportSolver m_import_solver; + internal::MacroProcessor m_macro_processor; + internal::Optimizer m_ast_optimizer; + internal::NameResolutionPass m_name_resolver; + Compiler m_compiler; bool computeAST(const std::string& filename, const std::string& code); diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Compiler.cpp index 513a9cf96..f23524eac 100644 --- a/src/arkreactor/Compiler/Compiler.cpp +++ b/src/arkreactor/Compiler/Compiler.cpp @@ -36,8 +36,6 @@ namespace Ark /* current_page */ Page { .index = 0, .is_temp = false }, /* is_result_unused */ false, /* is_terminal */ false); - // throw an error on undefined symbol uses - checkForUndefinedSymbol(); pushSymAndValTables(); @@ -454,17 +452,7 @@ namespace Ark for (const auto& node : x.constList()[1].constList()) { if (node.nodeType() == NodeType::Capture) - { - // first check that the capture is a defined symbol - if (std::ranges::find(m_defined_symbols, node.string()) == m_defined_symbols.end()) - { - // we didn't find node in the defined symbol list, thus we can't capture node - throwCompilerError("Can not capture " + node.string() + " because node is referencing an unbound variable.", node); - } - - addDefinedSymbol(node.string()); page(p).emplace_back(CAPTURE, addSymbol(node)); - } } // create new page for function body @@ -477,10 +465,7 @@ namespace Ark for (const auto& node : x.constList()[1].constList()) { if (node.nodeType() == NodeType::Symbol) - { - addDefinedSymbol(node.string()); page(function_body_page).emplace_back(MUT, addSymbol(node)); - } } // push body of the function @@ -506,8 +491,6 @@ namespace Ark const std::string name = x.constList()[1].string(); uint16_t i = addSymbol(x.constList()[1]); - if (n != Keyword::Set) - addDefinedSymbol(name); // put value before symbol id // starting at index = 2 because x is a (let|mut|set variable ...) node @@ -567,7 +550,7 @@ namespace Ark void Compiler::handleCalls(const Node& x, const Page p, bool is_result_unused, const bool is_terminal, const std::string& var_name) { m_temp_pages.emplace_back(); - const Page proc_page = Page { .index = m_temp_pages.size() - 1u, .is_temp = true }; + const auto proc_page = Page { .index = m_temp_pages.size() - 1u, .is_temp = true }; constexpr std::size_t start_index = 1; const auto node = x.constList()[0]; @@ -744,48 +727,4 @@ namespace Ark return static_cast(distance); throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", current); } - - void Compiler::addDefinedSymbol(const std::string& sym) - { - // otherwise, add the symbol, and return its id in the table - if (std::ranges::find(m_defined_symbols, sym) == m_defined_symbols.end()) - m_defined_symbols.push_back(sym); - } - - void Compiler::checkForUndefinedSymbol() - { - for (const Node& sym : m_symbols) - { - const std::string& str = sym.string(); - const bool is_plugin = mayBeFromPlugin(str); - - if (auto it = std::ranges::find(m_defined_symbols, str); it == m_defined_symbols.end() && !is_plugin) - { - const std::string suggestion = offerSuggestion(str); - if (suggestion.empty()) - throwCompilerError("Unbound variable error \"" + str + "\" (variable is used but not defined)", sym); - - throwCompilerError("Unbound variable error \"" + str + "\" (did you mean \"" + suggestion + "\"?)", sym); - } - } - } - - std::string Compiler::offerSuggestion(const std::string& str) const - { - std::string suggestion; - // our suggestion shouldn't require more than half the string to change - std::size_t suggestion_distance = str.size() / 2; - - for (const std::string& symbol : m_defined_symbols) - { - const std::size_t current_distance = Utils::levenshteinDistance(str, symbol); - if (current_distance <= suggestion_distance) - { - suggestion_distance = current_distance; - suggestion = symbol; - } - } - - return suggestion; - } } diff --git a/src/arkreactor/Compiler/NameResolutionPass.cpp b/src/arkreactor/Compiler/NameResolutionPass.cpp new file mode 100644 index 000000000..fd8e4c530 --- /dev/null +++ b/src/arkreactor/Compiler/NameResolutionPass.cpp @@ -0,0 +1,197 @@ +#include + +#include +#include +#include + +#include + +namespace Ark::internal +{ + // todo add scope resolution to this pass + NameResolutionPass::NameResolutionPass(const unsigned debug) : + Pass("NameResolution", debug), + m_ast() + {} + + void NameResolutionPass::process(const Node& ast) + { + m_ast = ast; + visit(ast); + checkForUndefinedSymbol(); + } + + const Node& NameResolutionPass::ast() const noexcept + { + return m_ast; + } + + void NameResolutionPass::addDefinedSymbol(const std::string& sym) + { + // otherwise, add the symbol, and return its id in the table + if (std::ranges::find(m_defined_symbols, sym) == m_defined_symbols.end()) + m_defined_symbols.push_back(sym); + } + + void NameResolutionPass::visit(const Node& node) + { + switch (node.nodeType()) + { + case NodeType::Symbol: + addSymbolNode(node); + break; + + case NodeType::Field: + for (const auto& child : node.constList()) + addSymbolNode(child); + break; + + case NodeType::List: + if (!node.constList().empty()) + { + if (node.constList()[0].nodeType() == NodeType::Keyword) + visitKeyword(node, node.constList()[0].keyword()); + else + { + // function calls + for (const auto& child : node.constList()) + visit(child); + } + } + break; + + default: + break; + } + } + + void NameResolutionPass::visitKeyword(const Node& node, const Keyword keyword) + { + switch (keyword) + { + case Keyword::Set: + [[fallthrough]]; + case Keyword::Let: + [[fallthrough]]; + case Keyword::Mut: + if (node.constList().size() > 1 && node.constList()[1].nodeType() == NodeType::Symbol) + addDefinedSymbol(node.constList()[1].string()); + if (node.constList().size() > 2) + visit(node.constList()[2]); + break; + + case Keyword::Import: + if (!node.constList().empty()) + m_plugin_names.push_back(node.constList()[1].constList().back().string()); + break; + + case Keyword::Fun: + if (node.constList()[1].nodeType() == NodeType::List) + { + for (const auto& child : node.constList()[1].constList()) + { + if (child.nodeType() == NodeType::Capture || child.nodeType() == NodeType::Symbol) + { + // FIXME first check that the capture is a defined symbol + // if (std::ranges::find(m_defined_symbols, node.string()) == m_defined_symbols.end()) + // { + // // we didn't find node in the defined symbol list, thus we can't capture node + // throwCompilerError("Can not capture " + node.string() + " because node is referencing an unbound variable.", node); + // } + + addDefinedSymbol(child.string()); + } + } + } + if (node.constList().size() > 2) + visit(node.constList()[2]); + break; + + default: + for (const auto& child : node.constList()) + visit(child); + break; + } + } + + void NameResolutionPass::addSymbolNode(const Node& symbol) + { + const std::string& name = symbol.string(); + + // we don't accept builtins as a user symbol + const auto it_builtins = std::ranges::find_if(Builtins::builtins, + [&name](const std::pair& element) -> bool { + return name == element.first; + }); + if (it_builtins != Builtins::builtins.end()) + return; + + // we don't accept operators as a user symbol + if (std::ranges::find(operators, name) != operators.end()) + return; + + // FIXME find a way to mutualise this + // something like name -> instruction so that we can reuse it in the compiler as well + if (name == "list" || name == "append" || name == "append!" || + name == "concat" || name == "concat!" || name == "pop" || name == "pop!") + return; + + const auto it = std::ranges::find_if(m_symbol_nodes, [&name](const Node& sym_node) -> bool { + return sym_node.string() == name; + }); + if (it == m_symbol_nodes.end()) + m_symbol_nodes.push_back(symbol); + } + + bool NameResolutionPass::mayBeFromPlugin(const std::string& name) const noexcept + { + std::string splitted = Utils::splitString(name, ':')[0]; + const auto it = std::ranges::find_if( + m_plugin_names, + [&splitted](const std::string& plugin) -> bool { + return plugin == splitted; + }); + return it != m_plugin_names.end(); + } + + void NameResolutionPass::checkForUndefinedSymbol() const + { + for (const auto& sym : m_symbol_nodes) + { + const auto& str = sym.string(); + const bool is_plugin = mayBeFromPlugin(str); + + if (auto it = std::ranges::find(m_defined_symbols, str); it == m_defined_symbols.end() && !is_plugin) + { + std::string message; + + const std::string suggestion = offerSuggestion(str); + if (suggestion.empty()) + message = fmt::format(R"(Unbound variable error "{}" (variable is used but not defined))", str); + else + message = fmt::format(R"(Unbound variable error "{}" (did you mean "{}"?))", str, suggestion); + + throw CodeError(message, sym.filename(), sym.line(), sym.col(), sym.repr()); + } + } + } + + std::string NameResolutionPass::offerSuggestion(const std::string& str) const + { + std::string suggestion; + // our suggestion shouldn't require more than half the string to change + std::size_t suggestion_distance = str.size() / 2; + + for (const std::string& symbol : m_defined_symbols) + { + const std::size_t current_distance = Utils::levenshteinDistance(str, symbol); + if (current_distance <= suggestion_distance) + { + suggestion_distance = current_distance; + suggestion = symbol; + } + } + + return suggestion; + } +} diff --git a/src/arkreactor/Compiler/Welder.cpp b/src/arkreactor/Compiler/Welder.cpp index f99add738..cc7bcbf2e 100644 --- a/src/arkreactor/Compiler/Welder.cpp +++ b/src/arkreactor/Compiler/Welder.cpp @@ -7,17 +7,23 @@ #include #include +#include namespace Ark { Welder::Welder(const unsigned debug, const std::vector& lib_env, const uint16_t features) : Pass("Welder", debug), m_lib_env(lib_env), m_features(features), - m_computed_ast(internal::NodeType::Unused), m_compiler(debug) + m_computed_ast(internal::NodeType::Unused), + m_import_solver(debug, lib_env), + m_macro_processor(debug), + m_ast_optimizer(debug), + m_name_resolver(debug), + m_compiler(debug) {} void Welder::registerSymbol(const std::string& name) { - m_compiler.addDefinedSymbol(name); + m_name_resolver.addDefinedSymbol(name); } bool Welder::computeASTFromFile(const std::string& filename) @@ -81,26 +87,29 @@ namespace Ark try { m_parser.process(filename, code); + m_computed_ast = m_parser.ast(); if ((m_features & FeatureImportSolver) != 0) { - auto import_solver_pass = std::make_unique(debugLevel(), m_lib_env); - import_solver_pass->setup(m_root_file, m_parser.imports()); - m_passes.push_back(std::move(import_solver_pass)); + m_import_solver.setup(m_root_file, m_parser.imports()); + m_import_solver.process(m_computed_ast); + m_computed_ast = m_import_solver.ast(); } + if ((m_features & FeatureMacroProcessor) != 0) - m_passes.emplace_back(std::make_unique(debugLevel())); + { + m_macro_processor.process(m_computed_ast); + m_computed_ast = m_macro_processor.ast(); + } + if ((m_features & FeatureASTOptimizer) != 0) - m_passes.emplace_back(std::make_unique(debugLevel())); - - m_computed_ast = std::accumulate( - m_passes.begin(), - m_passes.end(), - m_parser.ast(), - [](const internal::Node& ast, const std::unique_ptr& pass) { - pass->process(ast); - return pass->ast(); - }); + { + m_ast_optimizer.process(m_computed_ast); + m_computed_ast = m_ast_optimizer.ast(); + } + + // NOTE: ast isn't modified by the name resolver, no need to update m_computed_ast + m_name_resolver.process(m_computed_ast); return true; } diff --git a/src/arkreactor/VM/State.cpp b/src/arkreactor/VM/State.cpp index 2bea50255..a37e08168 100644 --- a/src/arkreactor/VM/State.cpp +++ b/src/arkreactor/VM/State.cpp @@ -54,13 +54,11 @@ namespace Ark bool State::compile(const std::string& file, const std::string& output, const uint16_t features) const { Welder welder(m_debug_level, m_libenv, features); - - if (!welder.computeASTFromFile(file)) - return false; - for (auto& p : m_binded) welder.registerSymbol(p.first); + if (!welder.computeASTFromFile(file)) + return false; if (!welder.generateBytecode()) return false; @@ -106,14 +104,13 @@ namespace Ark bool State::doString(const std::string& code, const uint16_t features) { Welder welder(m_debug_level, m_libenv, features); - - if (!welder.computeASTFromString(code)) - return false; - for (auto& p : m_binded) welder.registerSymbol(p.first); - welder.generateBytecode(); + if (!welder.computeASTFromString(code)) + return false; + if (!welder.generateBytecode()) + return false; return feed(welder.bytecode()); } diff --git a/tests/errors/compiler/invalid_while.ark b/tests/errors/compiler/invalid_while.ark index 021d422aa..a666e368d 100644 --- a/tests/errors/compiler/invalid_while.ark +++ b/tests/errors/compiler/invalid_while.ark @@ -1,3 +1,2 @@ -(while ($ a b) { - (set acc (+ acc (@ src a))) - (set a (+ 1 a))}) +(while ($ a 1) { + (set acc (+ acc (@ [] a)))}) diff --git a/tests/errors/compiler/invalid_while.expected b/tests/errors/compiler/invalid_while.expected index 68793cc05..6449104ad 100644 --- a/tests/errors/compiler/invalid_while.expected +++ b/tests/errors/compiler/invalid_while.expected @@ -1,7 +1,7 @@ -At (while (begin (set acc (+ acc (@ src b))) (set b (+ 1 b)))) @ 1:1 - 1 | (while ($ a b) { +At (while (begin (set acc (+ acc (@ (list) 1))))) @ 1:1 + 1 | (while ($ a 1) { | ^~~~~~~~~~~~~~~~ - 2 | (set acc (+ acc (@ src a))) - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 3 | (set a (+ 1 a))}) - Invalid node ; if it was computed by a macro, check that a node is returned + 2 | (set acc (+ acc (@ [] a)))}) + | ^~~~~~~~~~~~~~~ + 3 | + Invalid node ; if it was computed by a macro, check that a node is returned \ No newline at end of file diff --git a/tests/errors/compiler/well_formed_args.ark b/tests/errors/compiler/well_formed_args.ark index 5ce688e10..0b78bcd78 100644 --- a/tests/errors/compiler/well_formed_args.ark +++ b/tests/errors/compiler/well_formed_args.ark @@ -1,2 +1,5 @@ -($ defun (let name (fun args body))) +($ defun (let name (fun args nil))) +(let foo 1) +(let a 2) +(let b 3) (defun foo (a b) (+ a b)) diff --git a/tests/errors/compiler/well_formed_args.expected b/tests/errors/compiler/well_formed_args.expected index ce25a2e16..5918725b3 100644 --- a/tests/errors/compiler/well_formed_args.expected +++ b/tests/errors/compiler/well_formed_args.expected @@ -1,6 +1,6 @@ -At args @ 1:30 - 1 | ($ defun (let name (fun args body))) - | ^~~~ - 2 | (defun foo (a b) (+ a b)) - 3 | - Expected a well formed argument(s) list, got a Symbol +At args @ 1:29 + 1 | ($ defun (let name (fun args nil))) + | ^~~~ + 2 | (let foo 1) + 3 | (let a 2) + Expected a well formed argument(s) list, got a Symbol \ No newline at end of file From d14dbb21aa66c7cce6dc10c62e05ba9afe5fb355 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sun, 28 Jul 2024 14:25:30 +0200 Subject: [PATCH 4/8] feat(name resolution pass): precomputing a language symbols list for faster lookup --- include/Ark/Compiler/NameResolutionPass.hpp | 4 +-- .../Compiler/NameResolutionPass.cpp | 30 ++++++++----------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/include/Ark/Compiler/NameResolutionPass.hpp b/include/Ark/Compiler/NameResolutionPass.hpp index 5056505a1..c3ac7b7bf 100644 --- a/include/Ark/Compiler/NameResolutionPass.hpp +++ b/include/Ark/Compiler/NameResolutionPass.hpp @@ -14,6 +14,7 @@ #include #include +#include #include @@ -37,8 +38,7 @@ namespace Ark::internal private: Node m_ast; - // fixme: use a map/unordered_map/set/unordered_set for faster lookups? - // we can pre compute the operators+builtins+list operators + std::unordered_set m_language_symbols; std::vector m_symbol_nodes; std::vector m_defined_symbols; std::vector m_plugin_names; diff --git a/src/arkreactor/Compiler/NameResolutionPass.cpp b/src/arkreactor/Compiler/NameResolutionPass.cpp index fd8e4c530..04acb6a3e 100644 --- a/src/arkreactor/Compiler/NameResolutionPass.cpp +++ b/src/arkreactor/Compiler/NameResolutionPass.cpp @@ -11,8 +11,16 @@ namespace Ark::internal // todo add scope resolution to this pass NameResolutionPass::NameResolutionPass(const unsigned debug) : Pass("NameResolution", debug), - m_ast() - {} + m_ast(), + m_language_symbols(operators.begin(), operators.end()) + { + for (const auto& builtin : Builtins::builtins) + m_language_symbols.emplace(builtin.first); + + // FIXME find a way to mutualise this + // something like name -> instruction so that we can reuse it in the compiler as well + m_language_symbols.insert({ "list", "append", "append!", "concat", "concat!", "pop", "pop!" }); + } void NameResolutionPass::process(const Node& ast) { @@ -118,22 +126,8 @@ namespace Ark::internal { const std::string& name = symbol.string(); - // we don't accept builtins as a user symbol - const auto it_builtins = std::ranges::find_if(Builtins::builtins, - [&name](const std::pair& element) -> bool { - return name == element.first; - }); - if (it_builtins != Builtins::builtins.end()) - return; - - // we don't accept operators as a user symbol - if (std::ranges::find(operators, name) != operators.end()) - return; - - // FIXME find a way to mutualise this - // something like name -> instruction so that we can reuse it in the compiler as well - if (name == "list" || name == "append" || name == "append!" || - name == "concat" || name == "concat!" || name == "pop" || name == "pop!") + // we don't accept builtins/operators as a user symbol + if (m_language_symbols.contains(name)) return; const auto it = std::ranges::find_if(m_symbol_nodes, [&name](const Node& sym_node) -> bool { From 819a21894fd4a7e5425ea7b9f87726340fb02f8f Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Mon, 29 Jul 2024 13:59:12 +0200 Subject: [PATCH 5/8] feat(compiler): refactoring the specific instructions into a single listInstructions vector + lookup by name to get their index and map it to an instruction --- CHANGELOG.md | 1 + include/Ark/Compiler/Common.hpp | 12 +++ include/Ark/Compiler/Compiler.hpp | 44 +++-------- src/arkreactor/Compiler/Compiler.cpp | 74 ++++++++++--------- .../Compiler/NameResolutionPass.cpp | 11 ++- 5 files changed, 68 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d157f6e94..6a94f1cf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ - checking for invalid symbols when defining a function through a macro - added a max macro unification depth - added a max macro evaluation depth +- introduced `internal::listInstructions` with the different instructions, to be used by the compiler and name resolution pass ### Removed - removed unused `NodeType::Closure` diff --git a/include/Ark/Compiler/Common.hpp b/include/Ark/Compiler/Common.hpp index 51ec37d00..765483407 100644 --- a/include/Ark/Compiler/Common.hpp +++ b/include/Ark/Compiler/Common.hpp @@ -79,6 +79,18 @@ namespace Ark::internal "del" }; + // This list is related to include/Ark/Compiler/Instructions.hpp + // The order is very important + constexpr std::array listInstructions = { + "list", + "append", + "concat", + "append!", + "concat!", + "pop", + "pop!" + }; + // This list is related to include/Ark/Compiler/Instructions.hpp // from FIRST_OPERATOR, to LAST_OPERATOR // The order is very important diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Compiler.hpp index cb35a2645..084fbd42d 100644 --- a/include/Ark/Compiler/Compiler.hpp +++ b/include/Ark/Compiler/Compiler.hpp @@ -2,7 +2,7 @@ * @file Compiler.hpp * @author Alexandre Plateau (lexplt.dev@gmail.com) * @brief ArkScript compiler is in charge of transforming the AST into bytecode - * @version 2.1 + * @version 3.0 * @date 2020-10-27 * * @copyright Copyright (c) 2020-2024 @@ -118,43 +118,25 @@ namespace Ark * @brief Checking if a symbol is an operator * * @param name symbol name - * @return std::optional position in the operators' list + * @return std::optional operator instruction */ - static std::optional getOperator(const std::string& name) noexcept; + static std::optional getOperator(const std::string& name) noexcept; /** * @brief Checking if a symbol is a builtin * * @param name symbol name - * @return std::optional position in the builtins' list + * @return std::optional builtin number */ - static std::optional getBuiltin(const std::string& name) noexcept; + static std::optional getBuiltin(const std::string& name) noexcept; /** - * @brief Check if a symbol needs to be compiled to a specific instruction + * @brief Checking if a symbol is a list instruction * * @param name - * @return std::optional corresponding instruction if it exists + * @return std::optional list instruction */ - static std::optional getSpecific(const std::string& name) noexcept - { - if (name == "list") - return internal::Instruction::LIST; - if (name == "append") - return internal::Instruction::APPEND; - if (name == "concat") - return internal::Instruction::CONCAT; - if (name == "append!") - return internal::Instruction::APPEND_IN_PLACE; - if (name == "concat!") - return internal::Instruction::CONCAT_IN_PLACE; - if (name == "pop") - return internal::Instruction::POP_LIST; - if (name == "pop!") - return internal::Instruction::POP_LIST_IN_PLACE; - - return std::nullopt; - } + static std::optional getListInstruction(const std::string& name) noexcept; /** * Checks if a node is a list and has a keyboard as its first node, indicating if it's producing a value on the stack or not @@ -173,14 +155,6 @@ namespace Ark */ static bool isUnaryInst(internal::Instruction inst) noexcept; - /** - * @brief Compute specific instruction argument count - * - * @param inst - * @param previous - */ - static uint16_t computeSpecificInstArgc(internal::Instruction inst, uint16_t previous) noexcept; - /** * @brief Checking if a symbol may be coming from a plugin * @@ -218,7 +192,7 @@ namespace Ark void compileExpression(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name = ""); void compileSymbol(const internal::Node& x, Page p, bool is_result_unused); - void compileSpecific(const internal::Node& c0, const internal::Node& x, Page p, bool is_result_unused); + void compileListInstruction(const internal::Node& c0, const internal::Node& x, Page p, bool is_result_unused); void compileIf(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); void compileFunction(const internal::Node& x, Page p, bool is_result_unused, const std::string& var_name); void compileLetMutSet(internal::Keyword n, const internal::Node& x, Page p); diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Compiler.cpp index f23524eac..da653e545 100644 --- a/src/arkreactor/Compiler/Compiler.cpp +++ b/src/arkreactor/Compiler/Compiler.cpp @@ -186,22 +186,30 @@ namespace Ark } } - std::optional Compiler::getOperator(const std::string& name) noexcept + std::optional Compiler::getOperator(const std::string& name) noexcept { const auto it = std::ranges::find(internal::operators, name); if (it != internal::operators.end()) - return std::distance(internal::operators.begin(), it); + return static_cast(std::distance(internal::operators.begin(), it) + FIRST_OPERATOR); return std::nullopt; } - std::optional Compiler::getBuiltin(const std::string& name) noexcept + std::optional Compiler::getBuiltin(const std::string& name) noexcept { const auto it = std::ranges::find_if(Builtins::builtins, [&name](const std::pair& element) -> bool { return name == element.first; }); if (it != Builtins::builtins.end()) - return std::distance(Builtins::builtins.begin(), it); + return static_cast(std::distance(Builtins::builtins.begin(), it)); + return std::nullopt; + } + + std::optional Compiler::getListInstruction(const std::string& name) noexcept + { + const auto it = std::ranges::find(internal::listInstructions, name); + if (it != internal::listInstructions.end()) + return static_cast(std::distance(internal::listInstructions.begin(), it) + LIST); return std::nullopt; } @@ -234,24 +242,6 @@ namespace Ark } } - uint16_t Compiler::computeSpecificInstArgc(const Instruction inst, const uint16_t previous) noexcept - { - switch (inst) - { - case LIST: - return previous; - - case APPEND: - case APPEND_IN_PLACE: - case CONCAT: - case CONCAT_IN_PLACE: - return previous - 1; - - default: - return 0; - } - } - bool Compiler::mayBeFromPlugin(const std::string& name) noexcept { std::string splitted = Utils::splitString(name, ':')[0]; @@ -300,13 +290,13 @@ namespace Ark { if (!is_result_unused) { - static const std::optional nil = getBuiltin("nil"); - page(p).emplace_back(BUILTIN, static_cast(nil.value())); + static const std::optional nil = getBuiltin("nil"); + page(p).emplace_back(BUILTIN, nil.value()); } } - // specific instructions - else if (const auto c0 = x.constList()[0]; c0.nodeType() == NodeType::Symbol && getSpecific(c0.string()).has_value()) - compileSpecific(c0, x, p, is_result_unused); + // list instructions + else if (const auto c0 = x.constList()[0]; c0.nodeType() == NodeType::Symbol && getListInstruction(c0.string()).has_value()) + compileListInstruction(c0, x, p, is_result_unused); // registering structures else if (x.constList()[0].nodeType() == NodeType::Keyword) { @@ -370,7 +360,7 @@ namespace Ark const std::string& name = x.string(); if (const auto it_builtin = getBuiltin(name)) - page(p).emplace_back(Instruction::BUILTIN, static_cast(it_builtin.value())); + page(p).emplace_back(Instruction::BUILTIN, it_builtin.value()); else if (getOperator(name).has_value()) throwCompilerError(fmt::format("Found a free standing operator: `{}`", name), x); else @@ -383,10 +373,10 @@ namespace Ark } } - void Compiler::compileSpecific(const Node& c0, const Node& x, const Page p, const bool is_result_unused) + void Compiler::compileListInstruction(const Node& c0, const Node& x, const Page p, const bool is_result_unused) { std::string name = c0.string(); - Instruction inst = getSpecific(name).value(); + Instruction inst = getListInstruction(name).value(); // length of at least 1 since we got a symbol name const auto argc = x.constList().size() - 1u; @@ -407,7 +397,25 @@ namespace Ark } // put inst and number of arguments - page(p).emplace_back(inst, computeSpecificInstArgc(inst, static_cast(argc))); + std::size_t inst_argc; + switch (inst) + { + case LIST: + inst_argc = argc; + break; + + case APPEND: + case APPEND_IN_PLACE: + case CONCAT: + case CONCAT_IN_PLACE: + inst_argc = argc - 1; + break; + + default: + inst_argc = 0; + break; + } + page(p).emplace_back(inst, static_cast(inst_argc)); if (is_result_unused && name.back() != '!') // in-place functions never push a value { @@ -555,8 +563,8 @@ namespace Ark const auto node = x.constList()[0]; const auto maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt; - if (node.nodeType() == NodeType::Symbol && maybe_operator.has_value()) - page(proc_page).emplace_back(static_cast(FIRST_OPERATOR + maybe_operator.value())); + if (maybe_operator.has_value()) + page(proc_page).emplace_back(maybe_operator.value()); else // closure chains have been handled (eg: closure.field.field.function) compileExpression(node, proc_page, false, false); // storing proc diff --git a/src/arkreactor/Compiler/NameResolutionPass.cpp b/src/arkreactor/Compiler/NameResolutionPass.cpp index 04acb6a3e..3228663a4 100644 --- a/src/arkreactor/Compiler/NameResolutionPass.cpp +++ b/src/arkreactor/Compiler/NameResolutionPass.cpp @@ -11,15 +11,14 @@ namespace Ark::internal // todo add scope resolution to this pass NameResolutionPass::NameResolutionPass(const unsigned debug) : Pass("NameResolution", debug), - m_ast(), - m_language_symbols(operators.begin(), operators.end()) + m_ast() { for (const auto& builtin : Builtins::builtins) m_language_symbols.emplace(builtin.first); - - // FIXME find a way to mutualise this - // something like name -> instruction so that we can reuse it in the compiler as well - m_language_symbols.insert({ "list", "append", "append!", "concat", "concat!", "pop", "pop!" }); + for (auto ope : operators) + m_language_symbols.emplace(ope); + for (auto inst : listInstructions) + m_language_symbols.emplace(inst); } void NameResolutionPass::process(const Node& ast) From 2aa690724e54dde19d2c4dcb4a9d034cdac8e655 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Mon, 29 Jul 2024 19:29:01 +0200 Subject: [PATCH 6/8] feat(compiler): enhance the name resolution pass to include the unbound capture check --- include/Ark/Compiler/NameResolutionPass.hpp | 2 +- .../Compiler/NameResolutionPass.cpp | 35 +++++++++++-------- tests/errors/capture/unbound.expected | 3 +- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/include/Ark/Compiler/NameResolutionPass.hpp b/include/Ark/Compiler/NameResolutionPass.hpp index c3ac7b7bf..dd378c7d9 100644 --- a/include/Ark/Compiler/NameResolutionPass.hpp +++ b/include/Ark/Compiler/NameResolutionPass.hpp @@ -40,7 +40,7 @@ namespace Ark::internal Node m_ast; std::unordered_set m_language_symbols; std::vector m_symbol_nodes; - std::vector m_defined_symbols; + std::unordered_set m_defined_symbols; std::vector m_plugin_names; void visit(const Node& node); diff --git a/src/arkreactor/Compiler/NameResolutionPass.cpp b/src/arkreactor/Compiler/NameResolutionPass.cpp index 3228663a4..0bd1a9f5f 100644 --- a/src/arkreactor/Compiler/NameResolutionPass.cpp +++ b/src/arkreactor/Compiler/NameResolutionPass.cpp @@ -35,9 +35,7 @@ namespace Ark::internal void NameResolutionPass::addDefinedSymbol(const std::string& sym) { - // otherwise, add the symbol, and return its id in the table - if (std::ranges::find(m_defined_symbols, sym) == m_defined_symbols.end()) - m_defined_symbols.push_back(sym); + m_defined_symbols.emplace(sym); } void NameResolutionPass::visit(const Node& node) @@ -81,10 +79,12 @@ namespace Ark::internal case Keyword::Let: [[fallthrough]]; case Keyword::Mut: - if (node.constList().size() > 1 && node.constList()[1].nodeType() == NodeType::Symbol) - addDefinedSymbol(node.constList()[1].string()); + // first, visit the value, then register the symbol + // this allows us to detect things like (let foo (fun (&foo) ())) if (node.constList().size() > 2) visit(node.constList()[2]); + if (node.constList().size() > 1 && node.constList()[1].nodeType() == NodeType::Symbol) + addDefinedSymbol(node.constList()[1].string()); break; case Keyword::Import: @@ -97,17 +97,24 @@ namespace Ark::internal { for (const auto& child : node.constList()[1].constList()) { - if (child.nodeType() == NodeType::Capture || child.nodeType() == NodeType::Symbol) + if (child.nodeType() == NodeType::Capture) { - // FIXME first check that the capture is a defined symbol - // if (std::ranges::find(m_defined_symbols, node.string()) == m_defined_symbols.end()) - // { - // // we didn't find node in the defined symbol list, thus we can't capture node - // throwCompilerError("Can not capture " + node.string() + " because node is referencing an unbound variable.", node); - // } - + // First, check that the capture is a defined symbol + // TODO add a scope thing to see if we are capturing something in scope + if (!m_defined_symbols.contains(child.string())) + { + // we didn't find node in the defined symbol list, thus we can't capture node + throw CodeError( + fmt::format("Can not capture {} because node is referencing an unbound variable.", child.string()), + child.filename(), + child.line(), + child.col(), + child.repr()); + } addDefinedSymbol(child.string()); } + else if (child.nodeType() == NodeType::Symbol) + addDefinedSymbol(child.string()); } } if (node.constList().size() > 2) @@ -154,7 +161,7 @@ namespace Ark::internal const auto& str = sym.string(); const bool is_plugin = mayBeFromPlugin(str); - if (auto it = std::ranges::find(m_defined_symbols, str); it == m_defined_symbols.end() && !is_plugin) + if (!m_defined_symbols.contains(str) && !is_plugin) { std::string message; diff --git a/tests/errors/capture/unbound.expected b/tests/errors/capture/unbound.expected index 11f3454f0..75c10aed4 100644 --- a/tests/errors/capture/unbound.expected +++ b/tests/errors/capture/unbound.expected @@ -1 +1,2 @@ -ScopeError: Couldn't capture `d' as it is currently unbound +At &d @ 1:0 + Can not capture d because node is referencing an unbound variable. From e0b38bcf7cbc0a11043a5929ef8b52dbe47938a8 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Mon, 29 Jul 2024 19:49:01 +0200 Subject: [PATCH 7/8] feat(compiler): adding basic scope resolution to the name resolution pass --- include/Ark/Compiler/NameResolutionPass.hpp | 33 +++++++++ .../Compiler/NameResolutionPass.cpp | 74 ++++++++++++++++++- tests/errors/capture/capture_out_of_scope.ark | 3 + .../capture/capture_out_of_scope.expected | 2 + tests/errors/capture/unbound.expected | 2 +- 5 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 tests/errors/capture/capture_out_of_scope.ark create mode 100644 tests/errors/capture/capture_out_of_scope.expected diff --git a/include/Ark/Compiler/NameResolutionPass.hpp b/include/Ark/Compiler/NameResolutionPass.hpp index dd378c7d9..1aae59325 100644 --- a/include/Ark/Compiler/NameResolutionPass.hpp +++ b/include/Ark/Compiler/NameResolutionPass.hpp @@ -20,6 +20,38 @@ namespace Ark::internal { + class ScopeResolver + { + public: + ScopeResolver(); + + void createNew(); + + void removeLocalScope(); + + void registerInCurrent(const std::string& name); + + [[nodiscard]] bool isRegistered(const std::string& name) const; + + [[nodiscard]] bool isLocalVar(const std::string& name) const; + + [[nodiscard]] bool isGlobalVar(const std::string& name) const; + + private: + class Scope + { + public: + void add(const std::string& name); + + [[nodiscard]] bool has(const std::string& name) const; + + private: + std::unordered_set m_vars; + }; + + std::vector m_scopes; + }; + class NameResolutionPass final : public Pass { public: @@ -42,6 +74,7 @@ namespace Ark::internal std::vector m_symbol_nodes; std::unordered_set m_defined_symbols; std::vector m_plugin_names; + ScopeResolver m_scope_resolver; void visit(const Node& node); diff --git a/src/arkreactor/Compiler/NameResolutionPass.cpp b/src/arkreactor/Compiler/NameResolutionPass.cpp index 0bd1a9f5f..023743fb7 100644 --- a/src/arkreactor/Compiler/NameResolutionPass.cpp +++ b/src/arkreactor/Compiler/NameResolutionPass.cpp @@ -5,10 +5,66 @@ #include #include +#include namespace Ark::internal { - // todo add scope resolution to this pass + void ScopeResolver::Scope::add(const std::string& name) + { + m_vars.emplace(name); + } + + bool ScopeResolver::Scope::has(const std::string& name) const + { + return m_vars.contains(name); + } + + ScopeResolver::ScopeResolver() + { + createNew(); + } + + void ScopeResolver::createNew() + { + m_scopes.emplace_back(); + } + + void ScopeResolver::removeLocalScope() + { + m_scopes.pop_back(); + } + + void ScopeResolver::registerInCurrent(const std::string& name) + { + m_scopes.back().add(name); + } + + bool ScopeResolver::isRegistered(const std::string& name) const + { + return std::ranges::any_of( + m_scopes.rbegin(), + m_scopes.rend(), + [name](const Scope& scope) { + return scope.has(name); + }); + } + + bool ScopeResolver::isLocalVar(const std::string& name) const + { + // only one scope, this is the global one + if (m_scopes.size() == 1) + return false; + return m_scopes.back().has(name); + } + + bool ScopeResolver::isGlobalVar(const std::string& name) const + { + if (m_scopes.size() == 1) + return m_scopes[0].has(name); + // for a variable to be considered global, it has to not be shadowed in the current local scope + return !m_scopes.back().has(name) && m_scopes[0].has(name); + } + NameResolutionPass::NameResolutionPass(const unsigned debug) : Pass("NameResolution", debug), m_ast() @@ -36,6 +92,7 @@ namespace Ark::internal void NameResolutionPass::addDefinedSymbol(const std::string& sym) { m_defined_symbols.emplace(sym); + m_scope_resolver.registerInCurrent(sym); } void NameResolutionPass::visit(const Node& node) @@ -93,6 +150,8 @@ namespace Ark::internal break; case Keyword::Fun: + // create a new scope to track variables + m_scope_resolver.createNew(); if (node.constList()[1].nodeType() == NodeType::List) { for (const auto& child : node.constList()[1].constList()) @@ -100,12 +159,20 @@ namespace Ark::internal if (child.nodeType() == NodeType::Capture) { // First, check that the capture is a defined symbol - // TODO add a scope thing to see if we are capturing something in scope if (!m_defined_symbols.contains(child.string())) { // we didn't find node in the defined symbol list, thus we can't capture node throw CodeError( - fmt::format("Can not capture {} because node is referencing an unbound variable.", child.string()), + fmt::format("Can not capture {} because it is referencing an unbound variable.", child.string()), + child.filename(), + child.line(), + child.col(), + child.repr()); + } + else if (!m_scope_resolver.isRegistered(child.string())) + { + throw CodeError( + fmt::format("Can not capture {} because it is referencing a variable defined in an unreachable scope.", child.string()), child.filename(), child.line(), child.col(), @@ -119,6 +186,7 @@ namespace Ark::internal } if (node.constList().size() > 2) visit(node.constList()[2]); + m_scope_resolver.removeLocalScope(); // and remove it once the function has been compiled break; default: diff --git a/tests/errors/capture/capture_out_of_scope.ark b/tests/errors/capture/capture_out_of_scope.ark new file mode 100644 index 000000000..5fe57292f --- /dev/null +++ b/tests/errors/capture/capture_out_of_scope.ark @@ -0,0 +1,3 @@ +(let bar (fun () (let cap 0))) +(let foo (fun (&cap) ())) +(print foo bar) diff --git a/tests/errors/capture/capture_out_of_scope.expected b/tests/errors/capture/capture_out_of_scope.expected new file mode 100644 index 000000000..75e3e7c3c --- /dev/null +++ b/tests/errors/capture/capture_out_of_scope.expected @@ -0,0 +1,2 @@ +At &cap @ 1:0 + Can not capture cap because it is referencing a variable defined in an unreachable scope. diff --git a/tests/errors/capture/unbound.expected b/tests/errors/capture/unbound.expected index 75c10aed4..b142540b1 100644 --- a/tests/errors/capture/unbound.expected +++ b/tests/errors/capture/unbound.expected @@ -1,2 +1,2 @@ At &d @ 1:0 - Can not capture d because node is referencing an unbound variable. + Can not capture d because it is referencing an unbound variable. From 052bf89ee69e8a10c6398b05b677f39fc47d3147 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Mon, 29 Jul 2024 22:01:23 +0200 Subject: [PATCH 8/8] feat(benchmarks): adding a welder benchmark to see how long a full compilation takes --- tests/benchmarks/main.cpp | 21 +++++++++++++++++++ tests/benchmarks/resources/parser/big.ark | 8 +++---- tests/benchmarks/resources/parser/medium.ark | 14 ++++++------- tests/benchmarks/resources/parser/simple.ark | 11 +++++----- .../resources/parser/some/important.ark | 2 ++ 5 files changed, 40 insertions(+), 16 deletions(-) create mode 100644 tests/benchmarks/resources/parser/some/important.ark diff --git a/tests/benchmarks/main.cpp b/tests/benchmarks/main.cpp index 5428705fb..d5c63091e 100644 --- a/tests/benchmarks/main.cpp +++ b/tests/benchmarks/main.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -110,4 +111,24 @@ BENCHMARK(BM_Parse)->Name("New parser - Simple - 39 nodes")->Arg(simple)->Unit(b BENCHMARK(BM_Parse)->Name("New parser - Medium - 83 nodes")->Arg(medium)->Unit(benchmark::kMillisecond); BENCHMARK(BM_Parse)->Name("New parser - Big - 665 nodes")->Arg(big)->Unit(benchmark::kMillisecond); +// cppcheck-suppress constParameterCallback +static void BM_Welder(benchmark::State& state) +{ + using namespace std::string_literals; + + const long selection = state.range(0); + const std::string filename = "tests/benchmarks/resources/parser/"s + (selection == simple ? "simple.ark" : (selection == medium ? "medium.ark" : "big.ark")); + + for (auto _ : state) + { + Ark::Welder welder(0, { ARK_TESTS_ROOT "lib" }); + benchmark::DoNotOptimize(welder.computeASTFromFile(filename)); + benchmark::DoNotOptimize(welder.generateBytecode()); + } +} + +BENCHMARK(BM_Welder)->Name("Welder - Simple - 39 nodes")->Arg(simple)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Welder)->Name("Welder - Medium - 83 nodes")->Arg(medium)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_Welder)->Name("Welder - Big - 665 nodes")->Arg(big)->Unit(benchmark::kMillisecond); + BENCHMARK_MAIN(); diff --git a/tests/benchmarks/resources/parser/big.ark b/tests/benchmarks/resources/parser/big.ark index 2d9cee185..1addbb6b1 100644 --- a/tests/benchmarks/resources/parser/big.ark +++ b/tests/benchmarks/resources/parser/big.ark @@ -51,7 +51,7 @@ (set _index (+ 1 _index))}) _output })) -(import lib.math :min :max) # needed for math:min, math:max +(import std.Math :min :max) # needed for math:min, math:max # @brief Drop the first n elements of a list # @param _L the list to work on @@ -68,7 +68,7 @@ (list:drop (tail _L) (- _n 1)) _L) { - (mut _index (max 0 _n)) + (mut _index (math:max 0 _n)) (mut _output []) (while (< _index (len _L)) { (set _output (append _output (@ _L _index))) @@ -197,7 +197,7 @@ (let list:take (fun (_L _n) { (mut _index 0) (mut _output []) - (set _n (min _n (len _L))) + (set _n (math:min _n (len _L))) (while (< _index _n) { (set _output (append _output (@ _L _index))) @@ -255,7 +255,7 @@ # =end # @author https://github.com/FrenchMasterSword (let list:zip (fun (_a _b) { - (let _m (min (len _a) (len _b))) + (let _m (math:min (len _a) (len _b))) (mut _c []) (mut _index 0) (while (< _index _m) { diff --git a/tests/benchmarks/resources/parser/medium.ark b/tests/benchmarks/resources/parser/medium.ark index 62d05c3e1..2df0da04c 100644 --- a/tests/benchmarks/resources/parser/medium.ark +++ b/tests/benchmarks/resources/parser/medium.ark @@ -9,17 +9,17 @@ (print "Generating callbacks") (mut acc 0) # here we are filling the list callbacks -(while (notEq acc (len data)) { - (mut d (at data acc)) +(while (!= acc (len data)) { + (mut d (@ data acc)) (set callbacks (append callbacks (fun (&d) (egg d)))) - (set acc (add 1 acc))}) + (set acc (+ 1 acc))}) # then we reset the accumulator (set acc 0) -(while (notEq acc (len callbacks)) { - (mut var (at callback acc)) +(while (!= acc (len callbacks)) { + (mut var (@ callbacks acc)) (print "stored: " var.d) (puts "Calling callback number " acc ": ") - (mut stored (at callbacks acc)) + (mut stored (@ callbacks acc)) (stored) - (set acc (add 1 acc))}) + (set acc (+ 1 acc))}) diff --git a/tests/benchmarks/resources/parser/simple.ark b/tests/benchmarks/resources/parser/simple.ark index 89acfc1a6..72164f345 100644 --- a/tests/benchmarks/resources/parser/simple.ark +++ b/tests/benchmarks/resources/parser/simple.ark @@ -1,12 +1,13 @@ (import some.important :code :base) -(let a (if (equal b c) 1 "4")) +(let a (if (= 5 code) 1 "4")) + +(mut foo (fun (bar egg &a) { + (let yes egg) + (+ 1 bar)})) + (if (foo a) (print a " is a very interesting variable")) -(mut foo (fun (bar egg &captured) { - (let yes indeed) - (add 1 yes)})) - (while true { (foo true nil false)}) diff --git a/tests/benchmarks/resources/parser/some/important.ark b/tests/benchmarks/resources/parser/some/important.ark new file mode 100644 index 000000000..c27e09cc3 --- /dev/null +++ b/tests/benchmarks/resources/parser/some/important.ark @@ -0,0 +1,2 @@ +(let code 12) +(let base (fun () 14))