From e77cfbe1dd48ff7fcedb987cbe3a20649da2586e Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sat, 5 Oct 2024 23:41:47 +0200 Subject: [PATCH] feat(compiler): the compiler now outputs an IR instead of bytecode, which is now generated by the IRCompiler --- include/Ark/Compiler/Compiler.hpp | 105 ++++---- .../IntermediateRepresentation/Entity.hpp | 66 +++++ .../IntermediateRepresentation/IRCompiler.hpp | 79 ++++++ include/Ark/Compiler/Welder.hpp | 5 +- src/arkreactor/Compiler/Compiler.cpp | 239 +++--------------- .../IntermediateRepresentation/Entity.cpp | 44 ++++ .../IntermediateRepresentation/IRCompiler.cpp | 225 +++++++++++++++++ src/arkreactor/Compiler/Welder.cpp | 6 +- 8 files changed, 509 insertions(+), 260 deletions(-) create mode 100644 include/Ark/Compiler/IntermediateRepresentation/Entity.hpp create mode 100644 include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp create mode 100644 src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp create mode 100644 src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Compiler.hpp index 084fbd42d..996a5a430 100644 --- a/include/Ark/Compiler/Compiler.hpp +++ b/include/Ark/Compiler/Compiler.hpp @@ -19,11 +19,11 @@ #include #include -#include +#include #include #include -namespace Ark +namespace Ark::internal { class State; class Welder; @@ -32,7 +32,7 @@ namespace Ark * @brief The ArkScript bytecode compiler * */ - class ARK_API Compiler + class ARK_API Compiler final { public: /** @@ -47,17 +47,23 @@ namespace Ark * * @param ast */ - void process(const internal::Node& ast); + void process(const Node& ast); /** - * @brief Return the constructed bytecode object + * @brief Return the IR blocks (one per scope) * - * @return const bytecode_t& + * @return const std::vector& */ - [[nodiscard]] const bytecode_t& bytecode() const noexcept; + [[nodiscard]] const std::vector& intermediateRepresentation() const noexcept; - friend class State; - friend class Welder; + [[nodiscard]] const std::vector& symbols() const noexcept; + + /** + * @brief Return the value table pre-computed + * + * @return const std::vector& + */ + [[nodiscard]] const std::vector& values() const noexcept; private: struct Page @@ -67,34 +73,20 @@ namespace Ark }; // tables: symbols, values, plugins and codes - std::vector m_symbols; - std::vector m_plugins; - std::vector m_values; - std::vector> m_code_pages; - std::vector> m_temp_pages; ///< we need temporary code pages for some compilations passes + std::vector m_symbols; + std::vector m_values; + std::vector m_code_pages; + std::vector m_temp_pages; ///< we need temporary code pages for some compilations passes - bytecode_t m_bytecode; unsigned m_debug; ///< the debug level of the compiler - /** - * @brief Push the file headers (magic, version used, timestamp) - * - */ - void pushFileHeader() noexcept; - - /** - * @brief Push the symbols and values tables - * - */ - void pushSymAndValTables(); - /** * @brief helper functions to get a temp or finalized code page * * @param page page descriptor - * @return std::vector& + * @return std::vector& */ - std::vector& page(const Page page) noexcept + IR::Block& page(const Page page) noexcept { if (!page.is_temp) return m_code_pages[page.index]; @@ -105,9 +97,9 @@ namespace Ark * @brief helper functions to get a temp or finalized code page * * @param page page descriptor - * @return std::vector* + * @return std::vector* */ - std::vector* page_ptr(const Page page) noexcept + IR::Block* page_ptr(const Page page) noexcept { if (!page.is_temp) return &m_code_pages[page.index]; @@ -118,9 +110,9 @@ namespace Ark * @brief Checking if a symbol is an operator * * @param name symbol name - * @return std::optional operator instruction + * @return std::optional operator instruction */ - static std::optional getOperator(const std::string& name) noexcept; + static std::optional getOperator(const std::string& name) noexcept; /** * @brief Checking if a symbol is a builtin @@ -134,9 +126,9 @@ namespace Ark * @brief Checking if a symbol is a list instruction * * @param name - * @return std::optional list instruction + * @return std::optional list instruction */ - static std::optional getListInstruction(const std::string& name) noexcept; + static std::optional getListInstruction(const std::string& name) noexcept; /** * Checks if a node is a list and has a keyboard as its first node, indicating if it's producing a value on the stack or not @@ -144,7 +136,7 @@ namespace Ark * @return true if the node produces an output on the stack (fun, if, begin) * @return false otherwise (let, mut, set, while, import, del) */ - static bool nodeProducesOutput(const internal::Node& node); + static bool nodeProducesOutput(const Node& node); /** * @brief Check if a given instruction is unary (takes only one argument) @@ -153,16 +145,7 @@ namespace Ark * @return true the instruction is unary * @return false */ - static bool isUnaryInst(internal::Instruction inst) noexcept; - - /** - * @brief Checking if a symbol may be coming from a plugin - * - * @param name symbol name - * @return true the symbol may be from a plugin, loaded at runtime - * @return false - */ - bool mayBeFromPlugin(const std::string& name) noexcept; + static bool isUnaryInst(Instruction inst) noexcept; /** * @brief Display a warning message @@ -170,7 +153,7 @@ namespace Ark * @param message * @param node */ - static void compilerWarning(const std::string& message, const internal::Node& node); + static void compilerWarning(const std::string& message, const Node& node); /** * @brief Throw a nice error message @@ -178,27 +161,27 @@ namespace Ark * @param message * @param node */ - [[noreturn]] static void throwCompilerError(const std::string& message, const internal::Node& node); + [[noreturn]] static void throwCompilerError(const std::string& message, const Node& node); /** * @brief Compile an expression (a node) recursively * - * @param x the internal::Node to compile + * @param x the Node to compile * @param p the current page number we're on * @param is_result_unused * @param is_terminal * @param var_name */ - void compileExpression(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name = ""); + void compileExpression(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name = ""); - void compileSymbol(const internal::Node& x, Page p, bool is_result_unused); - void compileListInstruction(const internal::Node& c0, const internal::Node& x, Page p, bool is_result_unused); - void compileIf(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); - void compileFunction(const internal::Node& x, Page p, bool is_result_unused, const std::string& var_name); - void compileLetMutSet(internal::Keyword n, const internal::Node& x, Page p); - void compileWhile(const internal::Node& x, Page p); - void compilePluginImport(const internal::Node& x, Page p); - void handleCalls(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); + void compileSymbol(const Node& x, Page p, bool is_result_unused); + void compileListInstruction(const Node& c0, const Node& x, Page p, bool is_result_unused); + void compileIf(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); + void compileFunction(const Node& x, Page p, bool is_result_unused, const std::string& var_name); + void compileLetMutSet(Keyword n, const Node& x, Page p); + void compileWhile(const Node& x, Page p); + void compilePluginImport(const Node& x, Page p); + void handleCalls(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); /** * @brief Register a given node in the symbol table @@ -207,7 +190,7 @@ namespace Ark * @param sym * @return uint16_t */ - uint16_t addSymbol(const internal::Node& sym); + uint16_t addSymbol(const Node& sym); /** * @brief Register a given node in the value table @@ -216,7 +199,7 @@ namespace Ark * @param x * @return uint16_t */ - uint16_t addValue(const internal::Node& x); + uint16_t addValue(const Node& x); /** * @brief Register a page id (function reference) in the value table @@ -226,7 +209,7 @@ namespace Ark * @param current A reference to the current node, for context * @return std::size_t */ - uint16_t addValue(std::size_t page_id, const internal::Node& current); + uint16_t addValue(std::size_t page_id, const Node& current); }; } diff --git a/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp b/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp new file mode 100644 index 000000000..fd84e469c --- /dev/null +++ b/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp @@ -0,0 +1,66 @@ +/** + * @file Entity.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief An entity in the IR is a bundle of information + * @version 0.1 + * @date 2024-10-05 + * + * @copyright Copyright (c) 2024 + * + */ + +#ifndef ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP +#define ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP + +#include +#include + +#include +#include + +namespace Ark::internal::IR +{ + enum class Kind + { + Label, + Goto, + GotoIfTrue, + GotoIfFalse, + Opcode + }; + + using label_t = std::size_t; + + class Entity + { + public: + explicit Entity(Kind kind); + + explicit Entity(Instruction inst, uint16_t arg = 0); + + static Entity Label(); + + static Entity Goto(const Entity& label); + + static Entity GotoIf(const Entity& label, bool cond); + + [[nodiscard]] Word bytecode() const; + + [[nodiscard]] inline label_t label() const { return m_label; } + + [[nodiscard]] inline Kind kind() const { return m_kind; } + + private: + inline static label_t LabelCounter = 0; + + Kind m_kind; + label_t m_label { 0 }; + Instruction m_inst; + uint8_t m_secondary_arg { 0 }; + uint16_t m_primary_arg { 0 }; + }; + + using Block = std::vector; +} + +#endif // ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP diff --git a/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp b/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp new file mode 100644 index 000000000..f57aeb4fa --- /dev/null +++ b/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp @@ -0,0 +1,79 @@ +/** + * @file IRCompiler.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief Compile the intermediate representation to bytecode + * @version 0.1 + * @date 2024-10-05 + * + * @copyright Copyright (c) 2024 + * + */ + +#ifndef ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP +#define ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP + +#include +#include + +#include +#include +#include +#include +#include + +namespace Ark::internal +{ + class ARK_API IRCompiler final + { + public: + /** + * @brief Create a new IRCompiler + * + * @param debug debug level + */ + explicit IRCompiler(unsigned debug); + + /** + * @brief Turn a given IR into bytecode + * + * @param pages list of lists of IR entities generated by the compiler + * @param symbols symbol table generated by the compiler + * @param values value table generated by the compiler + */ + void process(const std::vector& pages, const std::vector& symbols, const std::vector& values); + + /** + * @brief Return the constructed bytecode object + * + * @return const bytecode_t& + */ + [[nodiscard]] const bytecode_t& bytecode() const noexcept; + + private: + Logger m_logger; + bytecode_t m_bytecode; + std::vector m_ir; + + void compile(); + + /** + * @brief Push a word to the m_bytecode + * @param word + */ + void pushWord(const Word& word); + + /** + * @brief Push the file headers (magic, version used, timestamp) + * + */ + void pushFileHeader() noexcept; + + /** + * @brief Push the symbols and values tables + * + */ + void pushSymAndValTables(const std::vector& symbols, const std::vector& values); + }; +} + +#endif // ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp index ca672967e..ad0c13b5f 100644 --- a/include/Ark/Compiler/Welder.hpp +++ b/include/Ark/Compiler/Welder.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -89,6 +90,7 @@ namespace Ark std::filesystem::path m_root_file; std::vector m_imports; + std::vector m_ir; bytecode_t m_bytecode; internal::Node m_computed_ast; @@ -99,7 +101,8 @@ namespace Ark internal::NameResolutionPass m_name_resolver; internal::Logger m_logger; - Compiler m_compiler; + internal::IRCompiler m_ir_compiler; + internal::Compiler m_compiler; bool computeAST(const std::string& filename, const std::string& code, bool fail_with_exception); }; diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Compiler.cpp index 5ad2d7ac2..90eb2ae19 100644 --- a/src/arkreactor/Compiler/Compiler.cpp +++ b/src/arkreactor/Compiler/Compiler.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -15,9 +14,8 @@ #include #include -namespace Ark +namespace Ark::internal { - using namespace internal; using namespace literals; Compiler::Compiler(const unsigned debug) : @@ -26,8 +24,6 @@ namespace Ark void Compiler::process(const Node& ast) { - pushFileHeader(); - m_code_pages.emplace_back(); // create empty page // gather symbols, values, and start to create code segments @@ -36,163 +32,28 @@ namespace Ark /* current_page */ Page { .index = 0, .is_temp = false }, /* is_result_unused */ false, /* is_terminal */ false); - - pushSymAndValTables(); - - // push the different code segments - for (std::size_t i = 0, end = m_code_pages.size(); i < end; ++i) - { - std::vector& page = m_code_pages[i]; - // just in case we got too far, always add a HALT to be sure the - // VM won't do anything crazy - page.emplace_back(Instruction::HALT); - - // push number of elements - const std::size_t page_size = page.size(); - if (page_size > std::numeric_limits::max()) - throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i)); - - m_bytecode.push_back(Instruction::CODE_SEGMENT_START); - m_bytecode.push_back(static_cast((page_size & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(page_size & 0x00ff)); - - for (auto inst : page) - { - m_bytecode.push_back(inst.padding); - m_bytecode.push_back(inst.opcode); - - auto [first, second] = inst.bytes(); - m_bytecode.push_back(first); - m_bytecode.push_back(second); - } - } - - if (m_code_pages.empty()) - { - // code segment with a single instruction - m_bytecode.push_back(Instruction::CODE_SEGMENT_START); - m_bytecode.push_back(0_u8); - m_bytecode.push_back(1_u8); - - m_bytecode.push_back(0_u8); - m_bytecode.push_back(Instruction::HALT); - m_bytecode.push_back(0_u8); - m_bytecode.push_back(0_u8); - } - - constexpr std::size_t header_size = 18; - - // generate a hash of the tables + bytecode - std::vector hash_out(picosha2::k_digest_size); - picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out); - m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end()); } - const bytecode_t& Compiler::bytecode() const noexcept + const std::vector& Compiler::intermediateRepresentation() const noexcept { - return m_bytecode; + return m_code_pages; } - void Compiler::pushFileHeader() noexcept + const std::vector& Compiler::symbols() const noexcept { - /* - Generating headers: - - lang name (to be sure we are executing an ArkScript file) - on 4 bytes (ark + padding) - - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes) - - timestamp (8 bytes, unix format) - */ - - m_bytecode.push_back('a'); - m_bytecode.push_back('r'); - m_bytecode.push_back('k'); - m_bytecode.push_back(0_u8); - - // push version - for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH }) - { - m_bytecode.push_back(static_cast((n & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(n & 0x00ff)); - } - - // push timestamp - const long long timestamp = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - for (long i = 0; i < 8; ++i) - { - const long shift = 8 * (7 - i); - const auto ts_byte = static_cast((timestamp & (0xffLL << shift)) >> shift); - m_bytecode.push_back(ts_byte); - } + return m_symbols; } - void Compiler::pushSymAndValTables() + const std::vector& Compiler::values() const noexcept { - const std::size_t symbol_size = m_symbols.size(); - if (symbol_size > std::numeric_limits::max()) - throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size)); - - m_bytecode.push_back(SYM_TABLE_START); - m_bytecode.push_back(static_cast((symbol_size & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(symbol_size & 0x00ff)); - - for (const auto& sym : m_symbols) - { - // push the string, null terminated - std::string s = sym.string(); - std::ranges::transform(s, std::back_inserter(m_bytecode), [](const char i) { - return static_cast(i); - }); - m_bytecode.push_back(0_u8); - } - - const std::size_t value_size = m_values.size(); - if (value_size > std::numeric_limits::max()) - throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size)); - - m_bytecode.push_back(VAL_TABLE_START); - m_bytecode.push_back(static_cast((value_size & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(value_size & 0x00ff)); - - for (const ValTableElem& val : m_values) - { - if (val.type == ValTableElemType::Number) - { - m_bytecode.push_back(NUMBER_TYPE); - const auto n = std::get(val.value); - std::string t = std::to_string(n); - std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { - return static_cast(i); - }); - } - else if (val.type == ValTableElemType::String) - { - m_bytecode.push_back(STRING_TYPE); - auto t = std::get(val.value); - std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { - return static_cast(i); - }); - } - else if (val.type == ValTableElemType::PageAddr) - { - m_bytecode.push_back(FUNC_TYPE); - const std::size_t addr = std::get(val.value); - m_bytecode.push_back(static_cast((addr & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(addr & 0x00ff)); - } - else - throw Error("The compiler is trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code"); - - m_bytecode.push_back(0_u8); - } + return m_values; } - std::optional Compiler::getOperator(const std::string& name) noexcept + std::optional Compiler::getOperator(const std::string& name) noexcept { - const auto it = std::ranges::find(internal::Language::operators, name); - if (it != internal::Language::operators.end()) - return static_cast(std::distance(internal::Language::operators.begin(), it) + FIRST_OPERATOR); + const auto it = std::ranges::find(Language::operators, name); + if (it != Language::operators.end()) + return static_cast(std::distance(Language::operators.begin(), it) + FIRST_OPERATOR); return std::nullopt; } @@ -209,9 +70,9 @@ namespace Ark std::optional Compiler::getListInstruction(const std::string& name) noexcept { - const auto it = std::ranges::find(internal::Language::listInstructions, name); - if (it != internal::Language::listInstructions.end()) - return static_cast(std::distance(internal::Language::listInstructions.begin(), it) + LIST); + const auto it = std::ranges::find(Language::listInstructions, name); + if (it != Language::listInstructions.end()) + return static_cast(std::distance(Language::listInstructions.begin(), it) + LIST); return std::nullopt; } @@ -244,16 +105,6 @@ namespace Ark } } - bool Compiler::mayBeFromPlugin(const std::string& name) noexcept - { - std::string splitted = Utils::splitString(name, ':')[0]; - const auto it = std::ranges::find_if(m_plugins, - [&splitted](const std::string& plugin) -> bool { - return std::filesystem::path(plugin).stem().string() == splitted; - }); - return it != m_plugins.end(); - } - void Compiler::compilerWarning(const std::string& message, const Node& node) { fmt::println("{} {}", fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)), Diagnostics::makeContextWithNode(message, node)); @@ -432,23 +283,23 @@ namespace Ark compileExpression(x.constList()[1], p, false, false); // jump only if needed to the if - const std::size_t jump_to_if_pos = page(p).size(); - page(p).emplace_back(Instruction::POP_JUMP_IF_TRUE); + const auto label_then = IR::Entity::Label(); + page(p).emplace_back(IR::Entity::GotoIf(label_then, true)); // else code if (x.constList().size() == 4) // we have an else clause compileExpression(x.constList()[3], p, is_result_unused, is_terminal, var_name); // when else is finished, jump to end - const std::size_t jump_to_end_pos = page(p).size(); - page(p).emplace_back(Instruction::JUMP); + const auto label_end = IR::Entity::Label(); + page(p).emplace_back(IR::Entity::Goto(label_end)); // absolute address to jump to if condition is true - page(p)[jump_to_if_pos].data = static_cast(page(p).size()); + page(p).emplace_back(label_then); // if code compileExpression(x.constList()[2], p, is_result_unused, is_terminal, var_name); // set jump to end pos - page(p)[jump_to_end_pos].data = static_cast(page(p).size()); + page(p).emplace_back(label_end); } void Compiler::compileFunction(const Node& x, const Page p, const bool is_result_unused, const std::string& var_name) @@ -523,20 +374,21 @@ namespace Ark throwCompilerError("Invalid node ; if it was computed by a macro, check that a node is returned", x); // save current position to jump there at the end of the loop - std::size_t current = page(p).size(); + const auto label_loop = IR::Entity::Label(); + page(p).emplace_back(label_loop); // push condition compileExpression(x.constList()[1], p, false, false); // absolute jump to end of block if condition is false - const std::size_t jump_to_end_pos = page(p).size(); - page(p).emplace_back(POP_JUMP_IF_FALSE); + const auto label_end = IR::Entity::Label(); + page(p).emplace_back(IR::Entity::GotoIf(label_end, false)); // push code to page compileExpression(x.constList()[2], p, true, false); // loop, jump to the condition - page(p).emplace_back(JUMP, current); + page(p).emplace_back(IR::Entity::Goto(label_loop)); // absolute address to jump to if condition is false - page(p)[jump_to_end_pos].data = static_cast(page(p).size()); + page(p).emplace_back(label_end); } void Compiler::compilePluginImport(const Node& x, const Page p) @@ -553,8 +405,6 @@ namespace Ark // register plugin path in the constants table uint16_t id = addValue(Node(NodeType::String, path)); - // save plugin name to use it later - m_plugins.push_back(path); // add plugin instruction + id of the constant referring to the plugin path page(p).emplace_back(PLUGIN, id); } @@ -564,7 +414,7 @@ namespace Ark constexpr std::size_t start_index = 1; const auto node = x.constList()[0]; - const auto maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt; + const std::optional maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt; enum class ShortcircuitOp { @@ -587,18 +437,16 @@ namespace Ark compileExpression(x.constList()[1], p, false, false); page(p).emplace_back(DUP); - std::vector to_update; + const auto label_shortcircuit = IR::Entity::Label(); for (std::size_t i = 2, end = x.constList().size(); i < end; ++i) { - to_update.push_back(page(p).size()); - switch (maybe_shortcircuit.value()) { case ShortcircuitOp::And: - page(p).emplace_back(POP_JUMP_IF_FALSE); + page(p).emplace_back(IR::Entity::GotoIf(label_shortcircuit, false)); break; case ShortcircuitOp::Or: - page(p).emplace_back(POP_JUMP_IF_TRUE); + page(p).emplace_back(IR::Entity::GotoIf(label_shortcircuit, true)); break; } page(p).emplace_back(POP); @@ -608,8 +456,7 @@ namespace Ark page(p).emplace_back(DUP); } - for (const auto pos : to_update) - page(p)[pos].data = static_cast(page(p).size()); + page(p).emplace_back(label_shortcircuit); } else if (!maybe_operator.has_value()) { @@ -646,8 +493,8 @@ namespace Ark throwCompilerError(fmt::format("Invalid node inside call to `{}'", node.repr()), x); } // push proc from temp page - for (const Word& word : m_temp_pages.back()) - page(p).push_back(word); + for (const auto& inst : m_temp_pages.back()) + page(p).push_back(inst); m_temp_pages.pop_back(); // number of arguments @@ -664,9 +511,9 @@ namespace Ark else // operator { // retrieve operator - auto op = Word(maybe_operator.value()); + auto op = maybe_operator.value(); - if (op.opcode == ASSERT) + if (op == ASSERT) is_result_unused = false; // push arguments on current page @@ -684,14 +531,14 @@ namespace Ark // in order to be able to handle things like (op A B C D...) // which should be transformed into A B op C op D op... if (exp_count >= 2) - page(p).emplace_back(op.opcode, 2); // TODO generalize to n arguments (n >= 2) + page(p).emplace_back(op, 2); // TODO generalize to n arguments (n >= 2) } - if (isUnaryInst(static_cast(op.opcode))) + if (isUnaryInst(op)) { if (exp_count != 1) throwCompilerError(fmt::format("Operator needs one argument, but was called with {}", exp_count), x.constList()[0]); - page(p).emplace_back(op.opcode); + page(p).emplace_back(op); } else if (exp_count <= 1) { @@ -701,7 +548,7 @@ namespace Ark // need to check we didn't push the (op A B C D...) things for operators not supporting it if (exp_count > 2) { - switch (op.opcode) + switch (op) { // authorized instructions case ADD: [[fallthrough]]; @@ -716,7 +563,7 @@ namespace Ark fmt::format( "can not create a chained expression (of length {}) for operator `{}'. You most likely forgot a `)'.", exp_count, - Language::operators[static_cast(op.opcode - FIRST_OPERATOR)]), + Language::operators[static_cast(op - FIRST_OPERATOR)]), x); } } @@ -729,12 +576,10 @@ namespace Ark uint16_t Compiler::addSymbol(const Node& sym) { // otherwise, add the symbol, and return its id in the table - auto it = std::ranges::find_if(m_symbols, [&sym](const Node& sym_node) -> bool { - return sym_node.string() == sym.string(); - }); + auto it = std::ranges::find(m_symbols, sym.string()); if (it == m_symbols.end()) { - m_symbols.push_back(sym); + m_symbols.push_back(sym.string()); it = m_symbols.begin() + static_cast::difference_type>(m_symbols.size() - 1); } diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp new file mode 100644 index 000000000..09ce0e0b4 --- /dev/null +++ b/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp @@ -0,0 +1,44 @@ +#include + +namespace Ark::internal::IR +{ + Entity::Entity(const Kind kind) : + m_kind(kind), + m_inst(NOP) + {} + + Entity::Entity(const Instruction inst, const uint16_t arg) : + m_kind(Kind::Opcode), + m_inst(inst), m_primary_arg(arg) + {} + + Entity Entity::Label() + { + auto label = Entity(Kind::Label); + label.m_label = Entity::LabelCounter++; + + return label; + } + + Entity Entity::Goto(const Entity& label) + { + auto jump = Entity(Kind::Goto); + jump.m_label = label.m_label; + + return jump; + } + + Entity Entity::GotoIf(const Entity& label, const bool cond) + { + auto jump = Entity(cond ? Kind::GotoIfTrue : Kind::GotoIfFalse); + jump.m_label = label.m_label; + + return jump; + } + + Word Entity::bytecode() const + { + // todo: handle secondary_arg + return Word(m_inst, m_primary_arg + m_secondary_arg); + } +} diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp new file mode 100644 index 000000000..3b28c5d89 --- /dev/null +++ b/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp @@ -0,0 +1,225 @@ +#include + +#include +#include +#include + +#include +#include + +namespace Ark::internal +{ + using namespace literals; + + IRCompiler::IRCompiler(const unsigned debug) : + m_logger("IRCompiler", debug) + {} + + void IRCompiler::process(const std::vector& pages, const std::vector& symbols, const std::vector& values) + { + pushFileHeader(); + pushSymAndValTables(symbols, values); + + m_ir = pages; + compile(); + + if (m_ir.empty()) + { + // code segment with a single instruction + m_bytecode.push_back(CODE_SEGMENT_START); + m_bytecode.push_back(0_u8); + m_bytecode.push_back(1_u8); + + m_bytecode.push_back(0_u8); + m_bytecode.push_back(HALT); + m_bytecode.push_back(0_u8); + m_bytecode.push_back(0_u8); + } + + constexpr std::size_t header_size = 18; + + // generate a hash of the tables + bytecode + std::vector hash_out(picosha2::k_digest_size); + picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out); + m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end()); + } + + const bytecode_t& IRCompiler::bytecode() const noexcept + { + return m_bytecode; + } + + void IRCompiler::compile() + { + // push the different code segments + for (std::size_t i = 0, end = m_ir.size(); i < end; ++i) + { + IR::Block& page = m_ir[i]; + // just in case we got too far, always add a HALT to be sure the + // VM won't do anything crazy + page.emplace_back(HALT); + + // push number of elements + const auto page_size = std::ranges::count_if(page, [](const auto& a) { + return a.kind() != IR::Kind::Label; + }); + if (std::cmp_greater(page_size, std::numeric_limits::max())) + throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i)); + + m_bytecode.push_back(CODE_SEGMENT_START); + m_bytecode.push_back(static_cast((page_size & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(page_size & 0x00ff)); + + // register labels position + uint16_t pos = 0; + std::unordered_map label_to_position; + for (auto inst : page) + { + switch (inst.kind()) + { + case IR::Kind::Label: + label_to_position[inst.label()] = pos; + break; + + default: + ++pos; + } + } + + for (auto inst : page) + { + switch (inst.kind()) + { + case IR::Kind::Goto: + pushWord(Word(JUMP, label_to_position[inst.label()])); + break; + + case IR::Kind::GotoIfTrue: + pushWord(Word(POP_JUMP_IF_TRUE, label_to_position[inst.label()])); + break; + + case IR::Kind::GotoIfFalse: + pushWord(Word(POP_JUMP_IF_FALSE, label_to_position[inst.label()])); + break; + + case IR::Kind::Opcode: + pushWord(inst.bytecode()); + break; + + default: + break; + } + } + } + } + + void IRCompiler::pushWord(const Word& word) + { + m_bytecode.push_back(word.padding); + m_bytecode.push_back(word.opcode); + + auto [first, second] = word.bytes(); + m_bytecode.push_back(first); + m_bytecode.push_back(second); + } + + void IRCompiler::pushFileHeader() noexcept + { + /* + Generating headers: + - lang name (to be sure we are executing an ArkScript file) + on 4 bytes (ark + padding) + - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes) + - timestamp (8 bytes, unix format) + */ + + m_bytecode.push_back('a'); + m_bytecode.push_back('r'); + m_bytecode.push_back('k'); + m_bytecode.push_back(0_u8); + + // push version + for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH }) + { + m_bytecode.push_back(static_cast((n & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(n & 0x00ff)); + } + + // push timestamp + const long long timestamp = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + for (long i = 0; i < 8; ++i) + { + const long shift = 8 * (7 - i); + const auto ts_byte = static_cast((timestamp & (0xffLL << shift)) >> shift); + m_bytecode.push_back(ts_byte); + } + } + + void IRCompiler::pushSymAndValTables(const std::vector& symbols, const std::vector& values) + { + const std::size_t symbol_size = symbols.size(); + if (symbol_size > std::numeric_limits::max()) + throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size)); + + m_bytecode.push_back(SYM_TABLE_START); + m_bytecode.push_back(static_cast((symbol_size & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(symbol_size & 0x00ff)); + + for (const auto& sym : symbols) + { + // push the string, null terminated + std::ranges::transform(sym, std::back_inserter(m_bytecode), [](const char i) { + return static_cast(i); + }); + m_bytecode.push_back(0_u8); + } + + const std::size_t value_size = values.size(); + if (value_size > std::numeric_limits::max()) + throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size)); + + m_bytecode.push_back(VAL_TABLE_START); + m_bytecode.push_back(static_cast((value_size & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(value_size & 0x00ff)); + + for (const ValTableElem& val : values) + { + switch (val.type) + { + case ValTableElemType::Number: + { + m_bytecode.push_back(NUMBER_TYPE); + const auto n = std::get(val.value); + std::string t = std::to_string(n); + std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { + return static_cast(i); + }); + break; + } + + case ValTableElemType::String: + { + m_bytecode.push_back(STRING_TYPE); + auto t = std::get(val.value); + std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { + return static_cast(i); + }); + break; + } + + case ValTableElemType::PageAddr: + { + m_bytecode.push_back(FUNC_TYPE); + const std::size_t addr = std::get(val.value); + m_bytecode.push_back(static_cast((addr & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(addr & 0x00ff)); + break; + } + } + + m_bytecode.push_back(0_u8); + } + } +} diff --git a/src/arkreactor/Compiler/Welder.cpp b/src/arkreactor/Compiler/Welder.cpp index a6abd45c9..d57ed4a99 100644 --- a/src/arkreactor/Compiler/Welder.cpp +++ b/src/arkreactor/Compiler/Welder.cpp @@ -20,6 +20,7 @@ namespace Ark m_ast_optimizer(debug), m_name_resolver(debug), m_logger("Welder", debug), + m_ir_compiler(debug), m_compiler(debug) {} @@ -48,7 +49,10 @@ namespace Ark try { m_compiler.process(m_computed_ast); - m_bytecode = m_compiler.bytecode(); + m_ir = m_compiler.intermediateRepresentation(); + + m_ir_compiler.process(m_ir, m_compiler.symbols(), m_compiler.values()); + m_bytecode = m_ir_compiler.bytecode(); return true; }