diff --git a/.github/images/diagram.svg b/.github/images/diagram.svg index 79bc11d69..b39699b05 100644 --- a/.github/images/diagram.svg +++ b/.github/images/diagram.svg @@ -1 +1 @@ -teststestssrcsrcliblibincludeincludefuzzingfuzzingexamplesexamplesdocsdocscmakecmake.vscode.vscode.github.githubunittestsunittestserrorserrorsarkscriptarkscriptarkscriptarkscriptarkreactorarkreactorArkArkuniqueuniqueinputinputcorpuscorpusworkflowsworkflowsresourcesresourcesREPLREPLVMVMCompilerCompilerBuiltinsBuiltinsVMVMREPLREPLCompilerCompilerParserSuiteParserSuiteASTSuiteASTSuiteValueValueMacrosMacrosASTASTinlineinlineValueValueMacrosMacrosASTASTsuccesssuccessfailurefailureExecutorsExecutorsCHANGELO...CHANGELO...CHANGELO...VM.cppVM.cppVM.cppCompiler...Compiler...Compiler...Bytecode...Bytecode...Bytecode...Processo...Processo...Processo...Parser.cppParser.cppParser.cpp.cmake.cpp.gitignore.hpp.inl.json.md.py.sh.svg.txt.xml.yaml.ymleach dot sized by file size +teststestssrcsrcliblibincludeincludeexamplesexamplesdocsdocscmakecmake.github.githubunittestsunittestsfuzzingfuzzingbenchmarksbenchmarksarkscriptarkscriptarkreactorarkreactorCLICLIArkArkgamesgamesworkflowsworkflowsresourcesresourcescorpus-cmin-tmincorpus-cmin-tmincorpus-cmincorpus-cmincorpuscorpusresourcesresourcesREPLREPLVMVMCompilerCompilerBuiltinsBuiltinsVMVMCompilerCompilerParserSuiteParserSuiteLangSuiteLangSuiteDiagnosticsSuiteDiagnosticsSuiteASTSuiteASTSuiteparserparserMacrosMacrosASTASTValueValueMacrosMacrosASTASTsuccesssuccessfailurefailureruntimeruntimecompileTimecompileTime.cmake.cpp.csv.dockerfile.gitignore.hpp.inl.json.md.py.sh.svg.txt.xml.yaml.ymleach dot sized by file size \ No newline at end of file diff --git a/.gitignore b/.gitignore index 07c17cf6d..698b3809e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ __arkscript__/ *.arkc *.arkm /*.ark +/*.ark.ir !tests/unittests/resources/BytecodeReaderSuite/*.arkc # Generated files diff --git a/CHANGELOG.md b/CHANGELOG.md index c07a149e4..e93f837fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ - the name & scope resolution pass now checks for mutability errors - compile time checks for mutability errors with `append!`, `concat!` and `pop!` - new `MAKE_CLOSURE ` instruction, generated in place of a `LOAD_CONST` when a closure is made +- added `-fdump-ir` to dump the IR entities to a file named `{file}.ark.ir` +- added 11 super instructions and their implementation to the VM ### Changed - instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument @@ -81,6 +83,9 @@ - the `STORE` instruction has been renamed `SET_VAL` - the `STORE` instruction is emitted in place of the `LET` and `MUT` instructions, without any mutability checking now - `io:writeFile` no longer takes a mode and has been split into `io:writeFile` and `io:appendToFile` +- instructions are now positioned like this: `inst byte1 byte2 byte3` + - byte1 is 0 if the instruction takes a single argument on 16 bits, split on byte2 and byte3 + - if the instruction takes two arguments, they each have 12 bits ; the second one is on byte1 and upper half of byte2, the first on lower half of byte2 and then byte3 ### Removed - removed unused `NodeType::Closure` diff --git a/include/Ark/Compiler/AST/Predicates.hpp b/include/Ark/Compiler/AST/Predicates.hpp index 2c60ff03c..7ccc0dac5 100644 --- a/include/Ark/Compiler/AST/Predicates.hpp +++ b/include/Ark/Compiler/AST/Predicates.hpp @@ -59,26 +59,6 @@ namespace Ark::internal } } IsHex; - inline struct IsUpper final : CharPred - { - IsUpper() : - CharPred("uppercase") {} - bool operator()(const utf8_char_t::codepoint_t c) const override - { - return 0 <= c && c <= 255 && std::isupper(c) != 0; - } - } IsUpper; - - inline struct IsLower final : CharPred - { - IsLower() : - CharPred("lowercase") {} - bool operator()(const utf8_char_t::codepoint_t c) const override - { - return 0 <= c && c <= 255 && std::islower(c) != 0; - } - } IsLower; - inline struct IsAlpha final : CharPred { IsAlpha() : @@ -99,16 +79,6 @@ namespace Ark::internal } } IsAlnum; - inline struct IsPrint final : CharPred - { - IsPrint() : - CharPred("printable") {} - bool operator()(const utf8_char_t::codepoint_t c) const override - { - return 0 <= c && c <= 255 && std::isprint(c) != 0; - } - } IsPrint; - struct IsChar final : CharPred { explicit IsChar(const char c) : @@ -186,16 +156,6 @@ namespace Ark::internal } } IsSymbol; - inline struct IsAny final : CharPred - { - IsAny() : - CharPred("any") {} - bool operator()(const utf8_char_t::codepoint_t) const override - { - return true; - } - } IsAny; - const IsChar IsMinus('-'); } diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Compiler.hpp index 084fbd42d..65fc7c9fa 100644 --- a/include/Ark/Compiler/Compiler.hpp +++ b/include/Ark/Compiler/Compiler.hpp @@ -19,11 +19,11 @@ #include #include -#include +#include #include #include -namespace Ark +namespace Ark::internal { class State; class Welder; @@ -32,7 +32,7 @@ namespace Ark * @brief The ArkScript bytecode compiler * */ - class ARK_API Compiler + class ARK_API Compiler final { public: /** @@ -47,17 +47,28 @@ namespace Ark * * @param ast */ - void process(const internal::Node& ast); + void process(const Node& ast); /** - * @brief Return the constructed bytecode object + * @brief Return the IR blocks (one per scope) * - * @return const bytecode_t& + * @return const std::vector& */ - [[nodiscard]] const bytecode_t& bytecode() const noexcept; + [[nodiscard]] const std::vector& intermediateRepresentation() const noexcept; - friend class State; - friend class Welder; + /** + * @brief Return the symbol table pre-computed + * + * @return const std::vector& + */ + [[nodiscard]] const std::vector& symbols() const noexcept; + + /** + * @brief Return the value table pre-computed + * + * @return const std::vector& + */ + [[nodiscard]] const std::vector& values() const noexcept; private: struct Page @@ -67,60 +78,33 @@ namespace Ark }; // tables: symbols, values, plugins and codes - std::vector m_symbols; - std::vector m_plugins; - std::vector m_values; - std::vector> m_code_pages; - std::vector> m_temp_pages; ///< we need temporary code pages for some compilations passes + std::vector m_symbols; + std::vector m_values; + std::vector m_code_pages; + std::vector m_temp_pages; ///< we need temporary code pages for some compilations passes - bytecode_t m_bytecode; unsigned m_debug; ///< the debug level of the compiler - /** - * @brief Push the file headers (magic, version used, timestamp) - * - */ - void pushFileHeader() noexcept; - - /** - * @brief Push the symbols and values tables - * - */ - void pushSymAndValTables(); - /** * @brief helper functions to get a temp or finalized code page * * @param page page descriptor - * @return std::vector& + * @return std::vector& */ - std::vector& page(const Page page) noexcept + IR::Block& page(const Page page) noexcept { if (!page.is_temp) return m_code_pages[page.index]; return m_temp_pages[page.index]; } - /** - * @brief helper functions to get a temp or finalized code page - * - * @param page page descriptor - * @return std::vector* - */ - std::vector* page_ptr(const Page page) noexcept - { - if (!page.is_temp) - return &m_code_pages[page.index]; - return &m_temp_pages[page.index]; - } - /** * @brief Checking if a symbol is an operator * * @param name symbol name - * @return std::optional operator instruction + * @return std::optional operator instruction */ - static std::optional getOperator(const std::string& name) noexcept; + static std::optional getOperator(const std::string& name) noexcept; /** * @brief Checking if a symbol is a builtin @@ -134,9 +118,9 @@ namespace Ark * @brief Checking if a symbol is a list instruction * * @param name - * @return std::optional list instruction + * @return std::optional list instruction */ - static std::optional getListInstruction(const std::string& name) noexcept; + static std::optional getListInstruction(const std::string& name) noexcept; /** * Checks if a node is a list and has a keyboard as its first node, indicating if it's producing a value on the stack or not @@ -144,7 +128,7 @@ namespace Ark * @return true if the node produces an output on the stack (fun, if, begin) * @return false otherwise (let, mut, set, while, import, del) */ - static bool nodeProducesOutput(const internal::Node& node); + static bool nodeProducesOutput(const Node& node); /** * @brief Check if a given instruction is unary (takes only one argument) @@ -153,16 +137,7 @@ namespace Ark * @return true the instruction is unary * @return false */ - static bool isUnaryInst(internal::Instruction inst) noexcept; - - /** - * @brief Checking if a symbol may be coming from a plugin - * - * @param name symbol name - * @return true the symbol may be from a plugin, loaded at runtime - * @return false - */ - bool mayBeFromPlugin(const std::string& name) noexcept; + static bool isUnaryInst(Instruction inst) noexcept; /** * @brief Display a warning message @@ -170,7 +145,7 @@ namespace Ark * @param message * @param node */ - static void compilerWarning(const std::string& message, const internal::Node& node); + static void compilerWarning(const std::string& message, const Node& node); /** * @brief Throw a nice error message @@ -178,27 +153,27 @@ namespace Ark * @param message * @param node */ - [[noreturn]] static void throwCompilerError(const std::string& message, const internal::Node& node); + [[noreturn]] static void throwCompilerError(const std::string& message, const Node& node); /** * @brief Compile an expression (a node) recursively * - * @param x the internal::Node to compile + * @param x the Node to compile * @param p the current page number we're on * @param is_result_unused * @param is_terminal * @param var_name */ - void compileExpression(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name = ""); + void compileExpression(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name = ""); - void compileSymbol(const internal::Node& x, Page p, bool is_result_unused); - void compileListInstruction(const internal::Node& c0, const internal::Node& x, Page p, bool is_result_unused); - void compileIf(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); - void compileFunction(const internal::Node& x, Page p, bool is_result_unused, const std::string& var_name); - void compileLetMutSet(internal::Keyword n, const internal::Node& x, Page p); - void compileWhile(const internal::Node& x, Page p); - void compilePluginImport(const internal::Node& x, Page p); - void handleCalls(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); + void compileSymbol(const Node& x, Page p, bool is_result_unused); + void compileListInstruction(const Node& c0, const Node& x, Page p, bool is_result_unused); + void compileIf(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); + void compileFunction(const Node& x, Page p, bool is_result_unused, const std::string& var_name); + void compileLetMutSet(Keyword n, const Node& x, Page p); + void compileWhile(const Node& x, Page p); + void compilePluginImport(const Node& x, Page p); + void handleCalls(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name); /** * @brief Register a given node in the symbol table @@ -207,7 +182,7 @@ namespace Ark * @param sym * @return uint16_t */ - uint16_t addSymbol(const internal::Node& sym); + uint16_t addSymbol(const Node& sym); /** * @brief Register a given node in the value table @@ -216,7 +191,7 @@ namespace Ark * @param x * @return uint16_t */ - uint16_t addValue(const internal::Node& x); + uint16_t addValue(const Node& x); /** * @brief Register a page id (function reference) in the value table @@ -226,7 +201,7 @@ namespace Ark * @param current A reference to the current node, for context * @return std::size_t */ - uint16_t addValue(std::size_t page_id, const internal::Node& current); + uint16_t addValue(std::size_t page_id, const Node& current); }; } diff --git a/include/Ark/Compiler/Instructions.hpp b/include/Ark/Compiler/Instructions.hpp index 1cb05d058..a634309a9 100644 --- a/include/Ark/Compiler/Instructions.hpp +++ b/include/Ark/Compiler/Instructions.hpp @@ -12,6 +12,8 @@ #ifndef ARK_COMPILER_INSTRUCTIONS_HPP #define ARK_COMPILER_INSTRUCTIONS_HPP +#include + namespace Ark::internal { /** @@ -86,7 +88,86 @@ namespace Ark::internal NOT = 0x30, LAST_OPERATOR = 0x30, - LAST_INSTRUCTION = 0x30 + FIRST_SUPER_INSTRUCTION = 0x31, + LOAD_CONST_LOAD_CONST = 0x31, + LOAD_CONST_STORE = 0x32, + LOAD_CONST_SET_VAL = 0x33, + STORE_FROM = 0x34, + SET_VAL_FROM = 0x35, + INCREMENT = 0x36, + DECREMENT = 0x37, + STORE_TAIL = 0x38, + STORE_HEAD = 0x39, + SET_VAL_TAIL = 0x3a, + SET_VAL_HEAD = 0x3b, + LAST_SUPER_INSTRUCTION = 0x3b, + + LAST_INSTRUCTION = 0x3d + }; + + constexpr std::array InstructionNames = { + "NOP", + "LOAD_SYMBOL", + "LOAD_CONST", + "POP_JUMP_IF_TRUE", + "STORE", + "SET_VAL", + "POP_JUMP_IF_FALSE", + "JUMP", + "RET", + "HALT", + "CALL", + "CAPTURE", + "BUILTIN", + "DEL", + "MAKE_CLOSURE", + "GET_FIELD", + "PLUGIN", + "LIST", + "APPEND", + "CONCAT", + "APPEND_IN_PLACE", + "CONCAT_IN_PLACE", + "POP_LIST", + "POP_LIST_IN_PLACE", + "POP", + "DUP", + // operators + "ADD", + "SUB", + "MUL", + "DIV", + "GT", + "LT", + "LE", + "GE", + "NEQ", + "EQ", + "LEN", + "EMPTY", + "TAIL", + "HEAD", + "ISNIL", + "ASSERT", + "TO_NUM", + "TO_STR", + "AT", + "MOD", + "TYPE", + "HASFIELD", + "NOT", + // super instructions + "LOAD_CONST_LOAD_CONST", + "LOAD_CONST_STORE", + "LOAD_CONST_SET_VAL", + "STORE_FROM", + "SET_VAL_FROM", + "INCREMENT", + "DECREMENT", + "STORE_TAIL", + "STORE_HEAD", + "SET_VAL_TAIL", + "SET_VAL_HEAD", }; } diff --git a/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp b/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp new file mode 100644 index 000000000..4f44f9e03 --- /dev/null +++ b/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp @@ -0,0 +1,75 @@ +/** + * @file Entity.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief An entity in the IR is a bundle of information + * @version 0.1 + * @date 2024-10-05 + * + * @copyright Copyright (c) 2024 + * + */ + +#ifndef ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP +#define ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP + +#include +#include + +#include +#include + +namespace Ark::internal::IR +{ + enum class Kind + { + Label, + Goto, + GotoIfTrue, + GotoIfFalse, + Opcode, + Opcode2Args + }; + + using label_t = std::size_t; + + class Entity + { + public: + explicit Entity(Kind kind); + + explicit Entity(Instruction inst, uint16_t arg = 0); + + Entity(Instruction inst, uint16_t primary_arg, uint16_t secondary_arg); + + static Entity Label(); + + static Entity Goto(const Entity& label); + + static Entity GotoIf(const Entity& label, bool cond); + + [[nodiscard]] Word bytecode() const; + + [[nodiscard]] inline label_t label() const { return m_label; } + + [[nodiscard]] inline Kind kind() const { return m_kind; } + + [[nodiscard]] inline Instruction inst() const { return m_inst; } + + [[nodiscard]] inline uint16_t primaryArg() const { return m_primary_arg; } + + [[nodiscard]] inline uint16_t secondaryArg() const { return m_secondary_arg; } + + private: + inline static label_t LabelCounter = 0; + + Kind m_kind; + label_t m_label { 0 }; + Instruction m_inst { NOP }; + uint16_t m_primary_arg { 0 }; + uint16_t m_secondary_arg { 0 }; + }; + + using Block = std::vector; +} + +#endif // ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP diff --git a/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp b/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp new file mode 100644 index 000000000..f57aeb4fa --- /dev/null +++ b/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp @@ -0,0 +1,79 @@ +/** + * @file IRCompiler.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief Compile the intermediate representation to bytecode + * @version 0.1 + * @date 2024-10-05 + * + * @copyright Copyright (c) 2024 + * + */ + +#ifndef ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP +#define ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP + +#include +#include + +#include +#include +#include +#include +#include + +namespace Ark::internal +{ + class ARK_API IRCompiler final + { + public: + /** + * @brief Create a new IRCompiler + * + * @param debug debug level + */ + explicit IRCompiler(unsigned debug); + + /** + * @brief Turn a given IR into bytecode + * + * @param pages list of lists of IR entities generated by the compiler + * @param symbols symbol table generated by the compiler + * @param values value table generated by the compiler + */ + void process(const std::vector& pages, const std::vector& symbols, const std::vector& values); + + /** + * @brief Return the constructed bytecode object + * + * @return const bytecode_t& + */ + [[nodiscard]] const bytecode_t& bytecode() const noexcept; + + private: + Logger m_logger; + bytecode_t m_bytecode; + std::vector m_ir; + + void compile(); + + /** + * @brief Push a word to the m_bytecode + * @param word + */ + void pushWord(const Word& word); + + /** + * @brief Push the file headers (magic, version used, timestamp) + * + */ + void pushFileHeader() noexcept; + + /** + * @brief Push the symbols and values tables + * + */ + void pushSymAndValTables(const std::vector& symbols, const std::vector& values); + }; +} + +#endif // ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP diff --git a/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp b/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp new file mode 100644 index 000000000..150da64ed --- /dev/null +++ b/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp @@ -0,0 +1,55 @@ +/** + * @file IROptimizer.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief Optimize IR based on IR entity grouped by 2 (or more) + * @version 0.1 + * @date 2024-10-11 + * + * @copyright Copyright (c) 2024 + * + */ +#ifndef ARK_COMPILER_INTERMEDIATEREPRESENTATION_IROPTIMIZER_HPP +#define ARK_COMPILER_INTERMEDIATEREPRESENTATION_IROPTIMIZER_HPP + +#include +#include +#include +#include + +namespace Ark::internal +{ + class ARK_API IROptimizer final + { + public: + /** + * @brief Create a new IROptimizer + * + * @param debug debug level + */ + explicit IROptimizer(unsigned debug); + + /** + * @brief Turn a given IR into bytecode + * + * @param pages list of lists of IR entities generated by the compiler + * @param symbols symbol table generated by the compiler + * @param values value table generated by the compiler + */ + void process(const std::vector& pages, const std::vector& symbols, const std::vector& values); + + /** + * @brief Return the IR blocks (one per scope) + * + * @return const std::vector& + */ + [[nodiscard]] const std::vector& intermediateRepresentation() const noexcept; + + private: + Logger m_logger; + std::vector m_ir; + std::vector m_symbols; + std::vector m_values; + }; +} + +#endif // ARK_COMPILER_INTERMEDIATEREPRESENTATION_IROPTIMIZER_HPP diff --git a/include/Ark/Compiler/ImportSolver.hpp b/include/Ark/Compiler/Package/ImportSolver.hpp similarity index 98% rename from include/Ark/Compiler/ImportSolver.hpp rename to include/Ark/Compiler/Package/ImportSolver.hpp index 23a05b86c..3b8be5526 100644 --- a/include/Ark/Compiler/ImportSolver.hpp +++ b/include/Ark/Compiler/Package/ImportSolver.hpp @@ -21,7 +21,7 @@ #include #include #include -#include +#include namespace Ark::internal { diff --git a/include/Ark/Compiler/AST/Module.hpp b/include/Ark/Compiler/Package/Module.hpp similarity index 100% rename from include/Ark/Compiler/AST/Module.hpp rename to include/Ark/Compiler/Package/Module.hpp diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp index f8ae9ea94..63581b52e 100644 --- a/include/Ark/Compiler/Welder.hpp +++ b/include/Ark/Compiler/Welder.hpp @@ -20,9 +20,11 @@ #include #include #include +#include +#include #include #include -#include +#include #include #include #include @@ -86,6 +88,7 @@ namespace Ark std::filesystem::path m_root_file; std::vector m_imports; + std::vector m_ir; bytecode_t m_bytecode; internal::Node m_computed_ast; @@ -96,7 +99,11 @@ namespace Ark internal::NameResolutionPass m_name_resolver; internal::Logger m_logger; - Compiler m_compiler; + internal::IROptimizer m_ir_optimizer; + internal::IRCompiler m_ir_compiler; + internal::Compiler m_compiler; + + void dumpIRToFile() const; bool computeAST(const std::string& filename, const std::string& code); }; diff --git a/include/Ark/Compiler/Word.hpp b/include/Ark/Compiler/Word.hpp index 4ec6dabd2..c5e261949 100644 --- a/include/Ark/Compiler/Word.hpp +++ b/include/Ark/Compiler/Word.hpp @@ -14,28 +14,29 @@ namespace Ark::internal { - struct bytes_t - { - uint8_t first {}; - uint8_t second {}; - }; - struct Word { - uint8_t padding = 0; ///< Padding reserved for future use - uint8_t opcode = 0; ///< Instruction opcode - uint16_t data = 0; ///< Immediate data, interpreted differently for different instructions + uint8_t opcode = 0; ///< Instruction opcode + uint8_t byte_1 = 0; + uint8_t byte_2 = 0; + uint8_t byte_3 = 0; explicit Word(const uint8_t inst, const uint16_t arg = 0) : - opcode(inst), data(arg) + opcode(inst), byte_2(static_cast(arg >> 8)), byte_3(static_cast(arg & 0xff)) {} - [[nodiscard]] bytes_t bytes() const + /** + * @brief Construct a word with two arguments, each on 12 bits. It's up to the caller to ensure that no data is lost + * @param inst + * @param primary_arg argument on 12 bits, the upper 4 bits are lost + * @param secondary_arg 2nd argument on 12 bits, the upper 4 bits are lost + */ + Word(const uint8_t inst, const uint16_t primary_arg, const uint16_t secondary_arg) : + opcode(inst) { - return bytes_t { - .first = static_cast((data & 0xff00) >> 8), - .second = static_cast(data & 0x00ff) - }; + byte_1 = static_cast((secondary_arg & 0xff0) >> 4); + byte_2 = static_cast((secondary_arg & 0x00f) << 4 | (primary_arg & 0xf00) >> 8); + byte_3 = static_cast(primary_arg & 0x0ff); } }; } diff --git a/include/Ark/Constants.hpp.in b/include/Ark/Constants.hpp.in index 5964cdccb..d693163e0 100644 --- a/include/Ark/Constants.hpp.in +++ b/include/Ark/Constants.hpp.in @@ -49,11 +49,18 @@ namespace Ark constexpr uint16_t FeatureImportSolver = 1 << 0; constexpr uint16_t FeatureMacroProcessor = 1 << 1; constexpr uint16_t FeatureASTOptimizer = 1 << 2; + constexpr uint16_t FeatureIROptimizer = 1 << 3; + + constexpr uint16_t FeatureDumpIR = 1 << 14; /// This feature should only be used in tests, to disable diagnostics generation and enable exceptions to be thrown constexpr uint16_t FeatureTestFailOnException = 1 << 15; // Default features for the VM x Compiler x Parser - constexpr uint16_t DefaultFeatures = FeatureImportSolver | FeatureMacroProcessor | FeatureASTOptimizer; + constexpr uint16_t DefaultFeatures = + FeatureImportSolver + | FeatureMacroProcessor + | FeatureASTOptimizer + | FeatureIROptimizer; constexpr std::size_t MaxMacroProcessingDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::processNode constexpr std::size_t MaxMacroUnificationDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::unify diff --git a/include/Ark/VM/VM.hpp b/include/Ark/VM/VM.hpp index 63789d584..d0e46e409 100644 --- a/include/Ark/VM/VM.hpp +++ b/include/Ark/VM/VM.hpp @@ -181,6 +181,15 @@ namespace Ark */ void init() noexcept; + // ================================================ + // instruction helpers + // ================================================ + + inline Value* loadSymbol(uint16_t id, internal::ExecutionContext& context); + inline Value* loadConstAsPtr(uint16_t id) const; + inline void store(uint16_t id, const Value* val, internal::ExecutionContext& context); + inline void setVal(uint16_t id, const Value* val, internal::ExecutionContext& context); + // ================================================ // stack related // ================================================ diff --git a/include/Ark/VM/VM.inl b/include/Ark/VM/VM.inl index ffe424fd6..7620d9a1b 100644 --- a/include/Ark/VM/VM.inl +++ b/include/Ark/VM/VM.inl @@ -123,6 +123,58 @@ inline Value VM::resolve(internal::ExecutionContext* context, std::vector return *popAndResolveAsPtr(*context); } +#pragma region "instruction helpers" + +inline Value* VM::loadSymbol(const uint16_t id, internal::ExecutionContext& context) +{ + context.last_symbol = id; + if (Value* var = findNearestVariable(context.last_symbol, context); var != nullptr) [[likely]] + { + // push internal reference, shouldn't break anything so far, unless it's already a ref + if (var->valueType() == ValueType::Reference) + return var->reference(); + return var; + } + else [[unlikely]] + throwVMError(internal::ErrorKind::Scope, fmt::format("Unbound variable `{}'", m_state.m_symbols[context.last_symbol])); + return nullptr; +} + +inline Value* VM::loadConstAsPtr(const uint16_t id) const +{ + return &m_state.m_constants[id]; +} + +inline void VM::store(const uint16_t id, const Value* val, internal::ExecutionContext& context) +{ + // avoid adding the pair (id, _) multiple times, with different values + Value* local = context.locals.back()[id]; + if (local == nullptr) [[likely]] + context.locals.back().push_back(id, *val); + else + *local = *val; +} + +inline void VM::setVal(const uint16_t id, const Value* val, internal::ExecutionContext& context) +{ + if (Value* var = findNearestVariable(id, context); var != nullptr) [[likely]] + { + if (var->valueType() == ValueType::Reference) + *var->reference() = *val; + else [[likely]] + *var = *val; + } + else + throwVMError( + internal::ErrorKind::Scope, + fmt::format( + "Unbound variable `{}', can not change its value to {}", + m_state.m_symbols[id], + val->toString(*this))); +} + +#pragma endregion + #pragma region "stack management" inline Value* VM::pop(internal::ExecutionContext& context) @@ -338,8 +390,7 @@ inline void VM::call(internal::ExecutionContext& context, const uint16_t argc) needed_argc = 0; // every argument is a MUT declaration in the bytecode - // index+1 to skip the padding - while (m_state.m_pages[context.pp][index + 1] == STORE) + while (m_state.m_pages[context.pp][index] == STORE) { needed_argc += 1; index += 4; // instructions are on 4 bytes diff --git a/src/arkreactor/Compiler/BytecodeReader.cpp b/src/arkreactor/Compiler/BytecodeReader.cpp index c51450bac..3b13188ec 100644 --- a/src/arkreactor/Compiler/BytecodeReader.cpp +++ b/src/arkreactor/Compiler/BytecodeReader.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -331,6 +332,28 @@ namespace Ark uint16_t arg; }; + const std::unordered_map arg_kinds = { + { LOAD_SYMBOL, ArgKind::Symbol }, + { LOAD_CONST, ArgKind::Value }, + { POP_JUMP_IF_TRUE, ArgKind::Raw }, + { STORE, ArgKind::Symbol }, + { SET_VAL, ArgKind::Symbol }, + { POP_JUMP_IF_FALSE, ArgKind::Raw }, + { JUMP, ArgKind::Raw }, + { CALL, ArgKind::Raw }, + { CAPTURE, ArgKind::Symbol }, + { BUILTIN, ArgKind::Builtin }, + { DEL, ArgKind::Symbol }, + { MAKE_CLOSURE, ArgKind::Value }, + { GET_FIELD, ArgKind::Symbol }, + { PLUGIN, ArgKind::Value }, + { LIST, ArgKind::Raw }, + { APPEND, ArgKind::Raw }, + { CONCAT, ArgKind::Raw }, + { APPEND_IN_PLACE, ArgKind::Raw }, + { CONCAT_IN_PLACE, ArgKind::Raw } + }; + const auto color_print_inst = [&syms, &vals, &stringify_value](const std::string& name, std::optional arg = std::nullopt) { fmt::print("{}", fmt::styled(name, fmt::fg(fmt::color::gold))); if (arg.has_value()) @@ -392,113 +415,24 @@ namespace Ark for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(page.size()); j < end; j += 4) { - const uint8_t padding = page[j]; - const uint8_t inst = page[j + 1]; + const uint8_t inst = page[j]; + // TEMP + const uint8_t padding = page[j + 1]; const auto arg = static_cast((page[j + 2] << 8) + page[j + 3]); // instruction number fmt::print(fmt::fg(fmt::color::cyan), "{:>4}", j / 4); // padding inst arg arg - fmt::print(" {:02x} {:02x} {:02x} {:02x} ", padding, inst, page[j + 2], page[j + 3]); - - if (inst == NOP) - color_print_inst("NOP"); - else if (inst == LOAD_SYMBOL) - color_print_inst("LOAD_SYMBOL", Arg { ArgKind::Symbol, arg }); - else if (inst == LOAD_CONST) - color_print_inst("LOAD_CONST", Arg { ArgKind::Value, arg }); - else if (inst == POP_JUMP_IF_TRUE) - color_print_inst("POP_JUMP_IF_TRUE", Arg { ArgKind::Raw, arg }); - else if (inst == STORE) - color_print_inst("STORE", Arg { ArgKind::Symbol, arg }); - else if (inst == SET_VAL) - color_print_inst("SET_VAL", Arg { ArgKind::Symbol, arg }); - else if (inst == POP_JUMP_IF_FALSE) - color_print_inst("POP_JUMP_IF_FALSE", Arg { ArgKind::Raw, arg }); - else if (inst == JUMP) - color_print_inst("JUMP", Arg { ArgKind::Raw, arg }); - else if (inst == RET) - color_print_inst("RET"); - else if (inst == HALT) - color_print_inst("HALT"); - else if (inst == CALL) - color_print_inst("CALL", Arg { ArgKind::Raw, arg }); - else if (inst == CAPTURE) - color_print_inst("CAPTURE", Arg { ArgKind::Symbol, arg }); - else if (inst == BUILTIN) - color_print_inst("BUILTIN", Arg { ArgKind::Builtin, arg }); - else if (inst == DEL) - color_print_inst("DEL", Arg { ArgKind::Symbol, arg }); - else if (inst == MAKE_CLOSURE) - color_print_inst("MAKE_CLOSURE", Arg { ArgKind::Value, arg }); - else if (inst == GET_FIELD) - color_print_inst("GET_FIELD", Arg { ArgKind::Symbol, arg }); - else if (inst == PLUGIN) - color_print_inst("PLUGIN", Arg { ArgKind::Value, arg }); - else if (inst == LIST) - color_print_inst("LIST", Arg { ArgKind::Raw, arg }); - else if (inst == APPEND) - color_print_inst("APPEND", Arg { ArgKind::Raw, arg }); - else if (inst == CONCAT) - color_print_inst("CONCAT", Arg { ArgKind::Raw, arg }); - else if (inst == APPEND_IN_PLACE) - color_print_inst("APPEND_IN_PLACE", Arg { ArgKind::Raw, arg }); - else if (inst == CONCAT_IN_PLACE) - color_print_inst("CONCAT_IN_PLACE", Arg { ArgKind::Raw, arg }); - else if (inst == POP_LIST) - color_print_inst("POP_LIST"); - else if (inst == POP_LIST_IN_PLACE) - color_print_inst("POP_LIST_IN_PLACE"); - else if (inst == POP) - color_print_inst("POP"); - else if (inst == DUP) - color_print_inst("DUP"); - else if (inst == ADD) - color_print_inst("ADD"); - else if (inst == SUB) - color_print_inst("SUB"); - else if (inst == MUL) - color_print_inst("MUL"); - else if (inst == DIV) - color_print_inst("DIV"); - else if (inst == GT) - color_print_inst("GT"); - else if (inst == LT) - color_print_inst("LT"); - else if (inst == LE) - color_print_inst("LE"); - else if (inst == GE) - color_print_inst("GE"); - else if (inst == NEQ) - color_print_inst("NEQ"); - else if (inst == EQ) - color_print_inst("EQ"); - else if (inst == LEN) - color_print_inst("LEN"); - else if (inst == EMPTY) - color_print_inst("EMPTY"); - else if (inst == TAIL) - color_print_inst("TAIL"); - else if (inst == HEAD) - color_print_inst("HEAD"); - else if (inst == ISNIL) - color_print_inst("ISNIL"); - else if (inst == ASSERT) - color_print_inst("ASSERT"); - else if (inst == TO_NUM) - color_print_inst("TO_NUM"); - else if (inst == TO_STR) - color_print_inst("TO_STR"); - else if (inst == AT) - color_print_inst("AT"); - else if (inst == MOD) - color_print_inst("MOD"); - else if (inst == TYPE) - color_print_inst("TYPE"); - else if (inst == HASFIELD) - color_print_inst("HASFIELD"); - else if (inst == NOT) - color_print_inst("NOT"); + fmt::print(" {:02x} {:02x} {:02x} {:02x} ", inst, padding, page[j + 2], page[j + 3]); + + if (const auto idx = static_cast(inst); idx < InstructionNames.size()) + { + const auto inst_name = InstructionNames[idx]; + if (const auto iinst = static_cast(inst); arg_kinds.contains(iinst)) + color_print_inst(inst_name, Arg { arg_kinds.at(iinst), arg }); + else + color_print_inst(inst_name); + } else fmt::println("Unknown instruction"); } diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Compiler.cpp index 5ad2d7ac2..eebf5058f 100644 --- a/src/arkreactor/Compiler/Compiler.cpp +++ b/src/arkreactor/Compiler/Compiler.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -15,9 +14,8 @@ #include #include -namespace Ark +namespace Ark::internal { - using namespace internal; using namespace literals; Compiler::Compiler(const unsigned debug) : @@ -26,8 +24,6 @@ namespace Ark void Compiler::process(const Node& ast) { - pushFileHeader(); - m_code_pages.emplace_back(); // create empty page // gather symbols, values, and start to create code segments @@ -36,163 +32,28 @@ namespace Ark /* current_page */ Page { .index = 0, .is_temp = false }, /* is_result_unused */ false, /* is_terminal */ false); - - pushSymAndValTables(); - - // push the different code segments - for (std::size_t i = 0, end = m_code_pages.size(); i < end; ++i) - { - std::vector& page = m_code_pages[i]; - // just in case we got too far, always add a HALT to be sure the - // VM won't do anything crazy - page.emplace_back(Instruction::HALT); - - // push number of elements - const std::size_t page_size = page.size(); - if (page_size > std::numeric_limits::max()) - throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i)); - - m_bytecode.push_back(Instruction::CODE_SEGMENT_START); - m_bytecode.push_back(static_cast((page_size & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(page_size & 0x00ff)); - - for (auto inst : page) - { - m_bytecode.push_back(inst.padding); - m_bytecode.push_back(inst.opcode); - - auto [first, second] = inst.bytes(); - m_bytecode.push_back(first); - m_bytecode.push_back(second); - } - } - - if (m_code_pages.empty()) - { - // code segment with a single instruction - m_bytecode.push_back(Instruction::CODE_SEGMENT_START); - m_bytecode.push_back(0_u8); - m_bytecode.push_back(1_u8); - - m_bytecode.push_back(0_u8); - m_bytecode.push_back(Instruction::HALT); - m_bytecode.push_back(0_u8); - m_bytecode.push_back(0_u8); - } - - constexpr std::size_t header_size = 18; - - // generate a hash of the tables + bytecode - std::vector hash_out(picosha2::k_digest_size); - picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out); - m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end()); } - const bytecode_t& Compiler::bytecode() const noexcept + const std::vector& Compiler::intermediateRepresentation() const noexcept { - return m_bytecode; + return m_code_pages; } - void Compiler::pushFileHeader() noexcept + const std::vector& Compiler::symbols() const noexcept { - /* - Generating headers: - - lang name (to be sure we are executing an ArkScript file) - on 4 bytes (ark + padding) - - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes) - - timestamp (8 bytes, unix format) - */ - - m_bytecode.push_back('a'); - m_bytecode.push_back('r'); - m_bytecode.push_back('k'); - m_bytecode.push_back(0_u8); - - // push version - for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH }) - { - m_bytecode.push_back(static_cast((n & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(n & 0x00ff)); - } - - // push timestamp - const long long timestamp = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - for (long i = 0; i < 8; ++i) - { - const long shift = 8 * (7 - i); - const auto ts_byte = static_cast((timestamp & (0xffLL << shift)) >> shift); - m_bytecode.push_back(ts_byte); - } + return m_symbols; } - void Compiler::pushSymAndValTables() + const std::vector& Compiler::values() const noexcept { - const std::size_t symbol_size = m_symbols.size(); - if (symbol_size > std::numeric_limits::max()) - throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size)); - - m_bytecode.push_back(SYM_TABLE_START); - m_bytecode.push_back(static_cast((symbol_size & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(symbol_size & 0x00ff)); - - for (const auto& sym : m_symbols) - { - // push the string, null terminated - std::string s = sym.string(); - std::ranges::transform(s, std::back_inserter(m_bytecode), [](const char i) { - return static_cast(i); - }); - m_bytecode.push_back(0_u8); - } - - const std::size_t value_size = m_values.size(); - if (value_size > std::numeric_limits::max()) - throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size)); - - m_bytecode.push_back(VAL_TABLE_START); - m_bytecode.push_back(static_cast((value_size & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(value_size & 0x00ff)); - - for (const ValTableElem& val : m_values) - { - if (val.type == ValTableElemType::Number) - { - m_bytecode.push_back(NUMBER_TYPE); - const auto n = std::get(val.value); - std::string t = std::to_string(n); - std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { - return static_cast(i); - }); - } - else if (val.type == ValTableElemType::String) - { - m_bytecode.push_back(STRING_TYPE); - auto t = std::get(val.value); - std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { - return static_cast(i); - }); - } - else if (val.type == ValTableElemType::PageAddr) - { - m_bytecode.push_back(FUNC_TYPE); - const std::size_t addr = std::get(val.value); - m_bytecode.push_back(static_cast((addr & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(addr & 0x00ff)); - } - else - throw Error("The compiler is trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code"); - - m_bytecode.push_back(0_u8); - } + return m_values; } - std::optional Compiler::getOperator(const std::string& name) noexcept + std::optional Compiler::getOperator(const std::string& name) noexcept { - const auto it = std::ranges::find(internal::Language::operators, name); - if (it != internal::Language::operators.end()) - return static_cast(std::distance(internal::Language::operators.begin(), it) + FIRST_OPERATOR); + const auto it = std::ranges::find(Language::operators, name); + if (it != Language::operators.end()) + return static_cast(std::distance(Language::operators.begin(), it) + FIRST_OPERATOR); return std::nullopt; } @@ -209,9 +70,9 @@ namespace Ark std::optional Compiler::getListInstruction(const std::string& name) noexcept { - const auto it = std::ranges::find(internal::Language::listInstructions, name); - if (it != internal::Language::listInstructions.end()) - return static_cast(std::distance(internal::Language::listInstructions.begin(), it) + LIST); + const auto it = std::ranges::find(Language::listInstructions, name); + if (it != Language::listInstructions.end()) + return static_cast(std::distance(Language::listInstructions.begin(), it) + LIST); return std::nullopt; } @@ -244,16 +105,6 @@ namespace Ark } } - bool Compiler::mayBeFromPlugin(const std::string& name) noexcept - { - std::string splitted = Utils::splitString(name, ':')[0]; - const auto it = std::ranges::find_if(m_plugins, - [&splitted](const std::string& plugin) -> bool { - return std::filesystem::path(plugin).stem().string() == splitted; - }); - return it != m_plugins.end(); - } - void Compiler::compilerWarning(const std::string& message, const Node& node) { fmt::println("{} {}", fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)), Diagnostics::makeContextWithNode(message, node)); @@ -432,23 +283,23 @@ namespace Ark compileExpression(x.constList()[1], p, false, false); // jump only if needed to the if - const std::size_t jump_to_if_pos = page(p).size(); - page(p).emplace_back(Instruction::POP_JUMP_IF_TRUE); + const auto label_then = IR::Entity::Label(); + page(p).emplace_back(IR::Entity::GotoIf(label_then, true)); // else code if (x.constList().size() == 4) // we have an else clause compileExpression(x.constList()[3], p, is_result_unused, is_terminal, var_name); // when else is finished, jump to end - const std::size_t jump_to_end_pos = page(p).size(); - page(p).emplace_back(Instruction::JUMP); + const auto label_end = IR::Entity::Label(); + page(p).emplace_back(IR::Entity::Goto(label_end)); // absolute address to jump to if condition is true - page(p)[jump_to_if_pos].data = static_cast(page(p).size()); + page(p).emplace_back(label_then); // if code compileExpression(x.constList()[2], p, is_result_unused, is_terminal, var_name); // set jump to end pos - page(p)[jump_to_end_pos].data = static_cast(page(p).size()); + page(p).emplace_back(label_end); } void Compiler::compileFunction(const Node& x, const Page p, const bool is_result_unused, const std::string& var_name) @@ -523,20 +374,21 @@ namespace Ark throwCompilerError("Invalid node ; if it was computed by a macro, check that a node is returned", x); // save current position to jump there at the end of the loop - std::size_t current = page(p).size(); + const auto label_loop = IR::Entity::Label(); + page(p).emplace_back(label_loop); // push condition compileExpression(x.constList()[1], p, false, false); // absolute jump to end of block if condition is false - const std::size_t jump_to_end_pos = page(p).size(); - page(p).emplace_back(POP_JUMP_IF_FALSE); + const auto label_end = IR::Entity::Label(); + page(p).emplace_back(IR::Entity::GotoIf(label_end, false)); // push code to page compileExpression(x.constList()[2], p, true, false); // loop, jump to the condition - page(p).emplace_back(JUMP, current); + page(p).emplace_back(IR::Entity::Goto(label_loop)); // absolute address to jump to if condition is false - page(p)[jump_to_end_pos].data = static_cast(page(p).size()); + page(p).emplace_back(label_end); } void Compiler::compilePluginImport(const Node& x, const Page p) @@ -553,8 +405,6 @@ namespace Ark // register plugin path in the constants table uint16_t id = addValue(Node(NodeType::String, path)); - // save plugin name to use it later - m_plugins.push_back(path); // add plugin instruction + id of the constant referring to the plugin path page(p).emplace_back(PLUGIN, id); } @@ -564,7 +414,7 @@ namespace Ark constexpr std::size_t start_index = 1; const auto node = x.constList()[0]; - const auto maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt; + const std::optional maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt; enum class ShortcircuitOp { @@ -587,18 +437,16 @@ namespace Ark compileExpression(x.constList()[1], p, false, false); page(p).emplace_back(DUP); - std::vector to_update; + const auto label_shortcircuit = IR::Entity::Label(); for (std::size_t i = 2, end = x.constList().size(); i < end; ++i) { - to_update.push_back(page(p).size()); - switch (maybe_shortcircuit.value()) { case ShortcircuitOp::And: - page(p).emplace_back(POP_JUMP_IF_FALSE); + page(p).emplace_back(IR::Entity::GotoIf(label_shortcircuit, false)); break; case ShortcircuitOp::Or: - page(p).emplace_back(POP_JUMP_IF_TRUE); + page(p).emplace_back(IR::Entity::GotoIf(label_shortcircuit, true)); break; } page(p).emplace_back(POP); @@ -608,8 +456,7 @@ namespace Ark page(p).emplace_back(DUP); } - for (const auto pos : to_update) - page(p)[pos].data = static_cast(page(p).size()); + page(p).emplace_back(label_shortcircuit); } else if (!maybe_operator.has_value()) { @@ -646,8 +493,8 @@ namespace Ark throwCompilerError(fmt::format("Invalid node inside call to `{}'", node.repr()), x); } // push proc from temp page - for (const Word& word : m_temp_pages.back()) - page(p).push_back(word); + for (const auto& inst : m_temp_pages.back()) + page(p).push_back(inst); m_temp_pages.pop_back(); // number of arguments @@ -664,9 +511,9 @@ namespace Ark else // operator { // retrieve operator - auto op = Word(maybe_operator.value()); + auto op = maybe_operator.value(); - if (op.opcode == ASSERT) + if (op == ASSERT) is_result_unused = false; // push arguments on current page @@ -684,14 +531,14 @@ namespace Ark // in order to be able to handle things like (op A B C D...) // which should be transformed into A B op C op D op... if (exp_count >= 2) - page(p).emplace_back(op.opcode, 2); // TODO generalize to n arguments (n >= 2) + page(p).emplace_back(op); } - if (isUnaryInst(static_cast(op.opcode))) + if (isUnaryInst(op)) { if (exp_count != 1) throwCompilerError(fmt::format("Operator needs one argument, but was called with {}", exp_count), x.constList()[0]); - page(p).emplace_back(op.opcode); + page(p).emplace_back(op); } else if (exp_count <= 1) { @@ -701,7 +548,7 @@ namespace Ark // need to check we didn't push the (op A B C D...) things for operators not supporting it if (exp_count > 2) { - switch (op.opcode) + switch (op) { // authorized instructions case ADD: [[fallthrough]]; @@ -716,7 +563,7 @@ namespace Ark fmt::format( "can not create a chained expression (of length {}) for operator `{}'. You most likely forgot a `)'.", exp_count, - Language::operators[static_cast(op.opcode - FIRST_OPERATOR)]), + Language::operators[static_cast(op - FIRST_OPERATOR)]), x); } } @@ -729,12 +576,10 @@ namespace Ark uint16_t Compiler::addSymbol(const Node& sym) { // otherwise, add the symbol, and return its id in the table - auto it = std::ranges::find_if(m_symbols, [&sym](const Node& sym_node) -> bool { - return sym_node.string() == sym.string(); - }); + auto it = std::ranges::find(m_symbols, sym.string()); if (it == m_symbols.end()) { - m_symbols.push_back(sym); + m_symbols.push_back(sym.string()); it = m_symbols.begin() + static_cast::difference_type>(m_symbols.size() - 1); } diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp new file mode 100644 index 000000000..269ea72d9 --- /dev/null +++ b/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp @@ -0,0 +1,52 @@ +#include + +namespace Ark::internal::IR +{ + Entity::Entity(const Kind kind) : + m_kind(kind), + m_inst(NOP) + {} + + Entity::Entity(const Instruction inst, const uint16_t arg) : + m_kind(Kind::Opcode), + m_inst(inst), m_primary_arg(arg) + {} + + Entity::Entity(const Instruction inst, const uint16_t primary_arg, const uint16_t secondary_arg) : + m_kind(Kind::Opcode2Args), + m_inst(inst), m_primary_arg(primary_arg), m_secondary_arg(secondary_arg) + {} + + Entity Entity::Label() + { + auto label = Entity(Kind::Label); + label.m_label = Entity::LabelCounter++; + + return label; + } + + Entity Entity::Goto(const Entity& label) + { + auto jump = Entity(Kind::Goto); + jump.m_label = label.m_label; + + return jump; + } + + Entity Entity::GotoIf(const Entity& label, const bool cond) + { + auto jump = Entity(cond ? Kind::GotoIfTrue : Kind::GotoIfFalse); + jump.m_label = label.m_label; + + return jump; + } + + Word Entity::bytecode() const + { + if (m_kind == Kind::Opcode) + return Word(m_inst, m_primary_arg); + if (m_kind == Kind::Opcode2Args) + return Word(m_inst, m_primary_arg, m_secondary_arg); + return Word(0, 0); + } +} diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp new file mode 100644 index 000000000..2c1026139 --- /dev/null +++ b/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp @@ -0,0 +1,226 @@ +#include + +#include +#include +#include +#include + +#include +#include + +namespace Ark::internal +{ + using namespace literals; + + IRCompiler::IRCompiler(const unsigned debug) : + m_logger("IRCompiler", debug) + {} + + void IRCompiler::process(const std::vector& pages, const std::vector& symbols, const std::vector& values) + { + pushFileHeader(); + pushSymAndValTables(symbols, values); + + m_ir = pages; + compile(); + + if (m_ir.empty()) + { + // code segment with a single instruction + m_bytecode.push_back(CODE_SEGMENT_START); + m_bytecode.push_back(0_u8); + m_bytecode.push_back(1_u8); + + m_bytecode.push_back(0_u8); + m_bytecode.push_back(HALT); + m_bytecode.push_back(0_u8); + m_bytecode.push_back(0_u8); + } + + constexpr std::size_t header_size = 18; + + // generate a hash of the tables + bytecode + std::vector hash_out(picosha2::k_digest_size); + picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out); + m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end()); + } + + const bytecode_t& IRCompiler::bytecode() const noexcept + { + return m_bytecode; + } + + void IRCompiler::compile() + { + // push the different code segments + for (std::size_t i = 0, end = m_ir.size(); i < end; ++i) + { + IR::Block& page = m_ir[i]; + // just in case we got too far, always add a HALT to be sure the + // VM won't do anything crazy + page.emplace_back(HALT); + + // push number of elements + const auto page_size = std::ranges::count_if(page, [](const auto& a) { + return a.kind() != IR::Kind::Label; + }); + if (std::cmp_greater(page_size, std::numeric_limits::max())) + throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i)); + + m_bytecode.push_back(CODE_SEGMENT_START); + m_bytecode.push_back(static_cast((page_size & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(page_size & 0x00ff)); + + // register labels position + uint16_t pos = 0; + std::unordered_map label_to_position; + for (auto inst : page) + { + switch (inst.kind()) + { + case IR::Kind::Label: + label_to_position[inst.label()] = pos; + break; + + default: + ++pos; + } + } + + for (auto inst : page) + { + switch (inst.kind()) + { + case IR::Kind::Goto: + pushWord(Word(JUMP, label_to_position[inst.label()])); + break; + + case IR::Kind::GotoIfTrue: + pushWord(Word(POP_JUMP_IF_TRUE, label_to_position[inst.label()])); + break; + + case IR::Kind::GotoIfFalse: + pushWord(Word(POP_JUMP_IF_FALSE, label_to_position[inst.label()])); + break; + + case IR::Kind::Opcode: + [[fallthrough]]; + case IR::Kind::Opcode2Args: + pushWord(inst.bytecode()); + break; + + default: + break; + } + } + } + } + + void IRCompiler::pushWord(const Word& word) + { + m_bytecode.push_back(word.opcode); + m_bytecode.push_back(word.byte_1); + m_bytecode.push_back(word.byte_2); + m_bytecode.push_back(word.byte_3); + } + + void IRCompiler::pushFileHeader() noexcept + { + /* + Generating headers: + - lang name (to be sure we are executing an ArkScript file) + on 4 bytes (ark + padding) + - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes) + - timestamp (8 bytes, unix format) + */ + + m_bytecode.push_back('a'); + m_bytecode.push_back('r'); + m_bytecode.push_back('k'); + m_bytecode.push_back(0_u8); + + // push version + for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH }) + { + m_bytecode.push_back(static_cast((n & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(n & 0x00ff)); + } + + // push timestamp + const long long timestamp = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + for (long i = 0; i < 8; ++i) + { + const long shift = 8 * (7 - i); + const auto ts_byte = static_cast((timestamp & (0xffLL << shift)) >> shift); + m_bytecode.push_back(ts_byte); + } + } + + void IRCompiler::pushSymAndValTables(const std::vector& symbols, const std::vector& values) + { + const std::size_t symbol_size = symbols.size(); + if (symbol_size > std::numeric_limits::max()) + throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size)); + + m_bytecode.push_back(SYM_TABLE_START); + m_bytecode.push_back(static_cast((symbol_size & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(symbol_size & 0x00ff)); + + for (const auto& sym : symbols) + { + // push the string, null terminated + std::ranges::transform(sym, std::back_inserter(m_bytecode), [](const char i) { + return static_cast(i); + }); + m_bytecode.push_back(0_u8); + } + + const std::size_t value_size = values.size(); + if (value_size > std::numeric_limits::max()) + throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size)); + + m_bytecode.push_back(VAL_TABLE_START); + m_bytecode.push_back(static_cast((value_size & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(value_size & 0x00ff)); + + for (const ValTableElem& val : values) + { + switch (val.type) + { + case ValTableElemType::Number: + { + m_bytecode.push_back(NUMBER_TYPE); + const auto n = std::get(val.value); + std::string t = std::to_string(n); + std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { + return static_cast(i); + }); + break; + } + + case ValTableElemType::String: + { + m_bytecode.push_back(STRING_TYPE); + auto t = std::get(val.value); + std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { + return static_cast(i); + }); + break; + } + + case ValTableElemType::PageAddr: + { + m_bytecode.push_back(FUNC_TYPE); + const std::size_t addr = std::get(val.value); + m_bytecode.push_back(static_cast((addr & 0xff00) >> 8)); + m_bytecode.push_back(static_cast(addr & 0x00ff)); + break; + } + } + + m_bytecode.push_back(0_u8); + } + } +} diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp new file mode 100644 index 000000000..caf78b79a --- /dev/null +++ b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp @@ -0,0 +1,139 @@ +#include + +namespace Ark::internal +{ + IROptimizer::IROptimizer(const unsigned debug) : + m_logger("IROptimizer", debug) + {} + + void IROptimizer::process(const std::vector& pages, const std::vector& symbols, const std::vector& values) + { + m_symbols = symbols; + m_values = values; + + for (const auto& block : pages) + { + m_ir.emplace_back(); + IR::Block& current_block = m_ir.back(); + + std::size_t i = 0; + const std::size_t end = block.size(); + + while (i < end) + { + const Instruction first = block[i].inst(); + const uint16_t arg_1 = block[i].primaryArg(); + + if (i + 1 < end) + { + const Instruction second = block[i + 1].inst(); + const uint16_t arg_2 = block[i + 1].primaryArg(); + + // LOAD_CONST x + // LOAD_CONST y + // ---> LOAD_CONST_LOAD_CONST x y + if (first == LOAD_CONST && second == LOAD_CONST) + { + current_block.emplace_back(LOAD_CONST_LOAD_CONST, arg_1, arg_2); + i += 2; + } + // LOAD_CONST x + // STORE / SET_VAL a + // ---> LOAD_CONST_STORE x a ; LOAD_CONST_SET_VAL x a + else if (first == LOAD_CONST && second == STORE) + { + current_block.emplace_back(LOAD_CONST_STORE, arg_1, arg_2); + i += 2; + } + else if (first == LOAD_CONST && second == SET_VAL) + { + current_block.emplace_back(LOAD_CONST_SET_VAL, arg_1, arg_2); + i += 2; + } + // LOAD_SYMBOL a + // STORE / SET_VAL b + // ---> STORE_FROM a b ; SET_VAL_FROM a b + else if (first == LOAD_SYMBOL && second == STORE) + { + current_block.emplace_back(STORE_FROM, arg_1, arg_2); + i += 2; + } + else if (first == LOAD_SYMBOL && second == SET_VAL) + { + current_block.emplace_back(SET_VAL_FROM, arg_1, arg_2); + i += 2; + } + else if (i + 2 < end) + { + const Instruction third = block[i + 2].inst(); + const uint16_t arg_3 = block[i + 2].primaryArg(); + + // LOAD_SYMBOL a + // LOAD_CONST n (1) + // ADD / SUB + // ---> INCREMENT / DECREMENT a + if (third == ADD && first == LOAD_CONST && second == LOAD_SYMBOL && m_values[arg_1].type == ValTableElemType::Number && std::get(m_values[arg_1].value) == 1) + { + current_block.emplace_back(INCREMENT, arg_2); + i += 3; + } + else if (third == ADD && first == LOAD_SYMBOL && second == LOAD_CONST && m_values[arg_2].type == ValTableElemType::Number && std::get(m_values[arg_2].value) == 1) + { + current_block.emplace_back(INCREMENT, arg_1); + i += 3; + } + else if (third == SUB && first == LOAD_SYMBOL && second == LOAD_CONST && m_values[arg_2].type == ValTableElemType::Number && std::get(m_values[arg_2].value) == 1) + { + current_block.emplace_back(DECREMENT, arg_1); + i += 3; + } + // LOAD_SYMBOL list + // TAIL / HEAD + // STORE / SET_VAL a + // ---> STORE_TAIL list a ; STORE_HEAD ; SET_VAL_TAIL ; SET_VAL_HEAD + else if (first == LOAD_SYMBOL && second == TAIL && third == STORE) + { + current_block.emplace_back(STORE_TAIL, arg_1, arg_3); + i += 3; + } + else if (first == LOAD_SYMBOL && second == TAIL && third == SET_VAL) + { + current_block.emplace_back(SET_VAL_TAIL, arg_1, arg_3); + i += 3; + } + else if (first == LOAD_SYMBOL && second == HEAD && third == STORE) + { + current_block.emplace_back(STORE_HEAD, arg_1, arg_3); + i += 3; + } + else if (first == LOAD_SYMBOL && second == HEAD && third == SET_VAL) + { + current_block.emplace_back(SET_VAL_HEAD, arg_1, arg_3); + i += 3; + } + else + { + current_block.emplace_back(block[i]); + ++i; + } + } + else + { + current_block.emplace_back(block[i]); + ++i; + } + } + else + { + current_block.emplace_back(block[i]); + ++i; + } + } + } + } + + const std::vector& IROptimizer::intermediateRepresentation() const noexcept + { + return m_ir; + } +} diff --git a/src/arkreactor/Compiler/ImportSolver.cpp b/src/arkreactor/Compiler/Package/ImportSolver.cpp similarity index 99% rename from src/arkreactor/Compiler/ImportSolver.cpp rename to src/arkreactor/Compiler/Package/ImportSolver.cpp index e4ecf070b..19ca4b8a7 100644 --- a/src/arkreactor/Compiler/ImportSolver.cpp +++ b/src/arkreactor/Compiler/Package/ImportSolver.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/arkreactor/Compiler/Welder.cpp b/src/arkreactor/Compiler/Welder.cpp index 7c0966165..0e6cb2e62 100644 --- a/src/arkreactor/Compiler/Welder.cpp +++ b/src/arkreactor/Compiler/Welder.cpp @@ -1,13 +1,14 @@ #include #include -#include +#include #include #include - +#include #include #include -#include + +#include namespace Ark { @@ -20,6 +21,8 @@ namespace Ark m_ast_optimizer(debug), m_name_resolver(debug), m_logger("Welder", debug), + m_ir_optimizer(debug), + m_ir_compiler(debug), m_compiler(debug) {} @@ -48,7 +51,19 @@ namespace Ark try { m_compiler.process(m_computed_ast); - m_bytecode = m_compiler.bytecode(); + m_ir = m_compiler.intermediateRepresentation(); + + if ((m_features & FeatureIROptimizer) != 0) + { + m_ir_optimizer.process(m_ir, m_compiler.symbols(), m_compiler.values()); + m_ir = m_ir_optimizer.intermediateRepresentation(); + } + + if ((m_features & FeatureDumpIR) != 0) + dumpIRToFile(); + + m_ir_compiler.process(m_ir, m_compiler.symbols(), m_compiler.values()); + m_bytecode = m_ir_compiler.bytecode(); return true; } @@ -87,6 +102,57 @@ namespace Ark return m_bytecode; } + void Welder::dumpIRToFile() const + { + std::filesystem::path path = m_root_file; + if (is_directory(m_root_file)) + path /= "output.ark.ir"; + else + path.replace_extension(".ark.ir"); + + std::ofstream output(path); + + std::size_t index = 0; + for (const auto& block : m_ir) + { + fmt::println(output, "page_{}", index); + for (const auto entity : block) + { + switch (entity.kind()) + { + case internal::IR::Kind::Label: + fmt::println(output, ".L{}:", entity.label()); + break; + + case internal::IR::Kind::Goto: + fmt::println(output, "\tGOTO L{}", entity.label()); + break; + + case internal::IR::Kind::GotoIfTrue: + fmt::println(output, "\tGOTO_IF_TRUE L{}", entity.label()); + break; + + case internal::IR::Kind::GotoIfFalse: + fmt::println(output, "\tGOTO_IF_FALSE L{}", entity.label()); + break; + + case internal::IR::Kind::Opcode: + fmt::println(output, "\t{} {}", internal::InstructionNames[entity.inst()], entity.primaryArg()); + break; + + case internal::IR::Kind::Opcode2Args: + fmt::println(output, "\t{} {}, {}", internal::InstructionNames[entity.inst()], entity.primaryArg(), entity.secondaryArg()); + break; + } + } + + fmt::println(output, ""); + ++index; + } + + output.close(); + } + bool Welder::computeAST(const std::string& filename, const std::string& code) { try diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index 85c95a1c2..ba2b9fe64 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -22,6 +22,60 @@ namespace Ark { using namespace internal; + namespace helper + { + inline Value tail(Value* a) + { + if (a->valueType() == ValueType::List) + { + if (a->constList().size() < 2) + return Value(ValueType::List); + + std::vector tmp(a->constList().size() - 1); + for (std::size_t i = 1, end = a->constList().size(); i < end; ++i) + tmp[i - 1] = a->constList()[i]; + return Value(std::move(tmp)); + } + if (a->valueType() == ValueType::String) + { + if (a->string().size() < 2) + return Value(ValueType::String); + + Value b { *a }; + b.stringRef().erase(b.stringRef().begin()); + return b; + } + + types::generateError( + "tail", + { { types::Contract { { types::Typedef("value", ValueType::List) } }, + types::Contract { { types::Typedef("value", ValueType::String) } } } }, + { *a }); + } + + inline Value head(Value* a) + { + if (a->valueType() == ValueType::List) + { + if (a->constList().empty()) + return Builtins::nil; + return a->constList()[0]; + } + if (a->valueType() == ValueType::String) + { + if (a->string().empty()) + return Value(ValueType::String); + return Value(std::string(1, a->stringRef()[0])); + } + + types::generateError( + "head", + { { types::Contract { { types::Typedef("value", ValueType::List) } }, + types::Contract { { types::Typedef("value", ValueType::String) } } } }, + { *a }); + } + } + VM::VM(State& state) noexcept : m_state(state), m_exit_code(0), m_running(false) { @@ -281,8 +335,8 @@ namespace Ark #define NEXTOPARG() \ do \ { \ - padding = m_state.m_pages[context.pp][context.ip]; \ - inst = m_state.m_pages[context.pp][context.ip + 1]; \ + inst = m_state.m_pages[context.pp][context.ip]; \ + padding = m_state.m_pages[context.pp][context.ip + 1]; \ arg = static_cast((m_state.m_pages[context.pp][context.ip + 2] << 8) + \ m_state.m_pages[context.pp][context.ip + 3]); \ context.ip += 4; \ @@ -290,6 +344,12 @@ namespace Ark #define DISPATCH() \ NEXTOPARG(); \ DISPATCH_GOTO(); +#define UNPACK_ARGS() \ + do \ + { \ + secondary_arg = static_cast((padding << 4) | (arg & 0xf000) >> 12); \ + primary_arg = arg & 0x0fff; \ + } while (false) #if ARK_USE_COMPUTED_GOTOS # pragma GCC diagnostic push @@ -344,15 +404,29 @@ namespace Ark &&TARGET_TYPE, &&TARGET_HASFIELD, &&TARGET_NOT, + &&TARGET_LOAD_CONST_LOAD_CONST, + &&TARGET_LOAD_CONST_STORE, + &&TARGET_LOAD_CONST_SET_VAL, + &&TARGET_STORE_FROM, + &&TARGET_SET_VAL_FROM, + &&TARGET_INCREMENT, + &&TARGET_DECREMENT, + &&TARGET_STORE_TAIL, + &&TARGET_STORE_HEAD, + &&TARGET_SET_VAL_TAIL, + &&TARGET_SET_VAL_HEAD }; # pragma GCC diagnostic pop #endif try { - [[maybe_unused]] uint8_t padding = 0; uint8_t inst = 0; + uint8_t padding = 0; uint16_t arg = 0; + uint16_t primary_arg = 0; + uint16_t secondary_arg = 0; + m_running = true; DISPATCH(); @@ -370,23 +444,13 @@ namespace Ark TARGET(LOAD_SYMBOL) { - context.last_symbol = arg; - if (Value* var = findNearestVariable(context.last_symbol, context); var != nullptr) [[likely]] - { - // push internal reference, shouldn't break anything so far, unless it's already a ref - if (var->valueType() == ValueType::Reference) - push(var->reference(), context); - else - push(var, context); - } - else [[unlikely]] - throwVMError(ErrorKind::Scope, fmt::format("Unbound variable `{}'", m_state.m_symbols[context.last_symbol])); + push(loadSymbol(arg, context), context); DISPATCH(); } TARGET(LOAD_CONST) { - push(&(m_state.m_constants[arg]), context); + push(loadConstAsPtr(arg), context); DISPATCH(); } @@ -399,33 +463,13 @@ namespace Ark TARGET(STORE) { - { - Value val = *popAndResolveAsPtr(context); - // avoid adding the pair (id, _) multiple times, with different values - Value* local = context.locals.back()[arg]; - if (local == nullptr) [[likely]] - context.locals.back().push_back(arg, val); - else - *local = val; - } - + store(arg, popAndResolveAsPtr(context), context); DISPATCH(); } TARGET(SET_VAL) { - { - Value val = *popAndResolveAsPtr(context); - if (Value* var = findNearestVariable(arg, context); var != nullptr) [[likely]] - { - if (var->valueType() == ValueType::Reference) - *var->reference() = val; - else [[likely]] - *var = val; - } - else - throwVMError(ErrorKind::Scope, fmt::format("Unbound variable `{}', can not change its value to {}", m_state.m_symbols[arg], val.toString(*this))); - } + setVal(arg, popAndResolveAsPtr(context), context); DISPATCH(); } @@ -535,7 +579,6 @@ namespace Ark } throwVMError(ErrorKind::Scope, fmt::format("Can not delete unbound variable `{}'", m_state.m_symbols[arg])); - DISPATCH(); } TARGET(MAKE_CLOSURE) @@ -568,9 +611,8 @@ namespace Ark if (Value* field = var->refClosure().refScope()[arg]; field != nullptr) { - // check for CALL instruction - // doing a +1 on the IP to read the instruction because context.ip is already on the next instruction word (the padding) - if (context.ip + 1 < m_state.m_pages[context.pp].size() && m_state.m_pages[context.pp][context.ip + 1] == CALL) + // check for CALL instruction (the instruction because context.ip is already on the next instruction word) + if (m_state.m_pages[context.pp][context.ip] == CALL) push(Value(Closure(var->refClosure().scopePtr(), field->pageAddr())), context); else push(field, context); @@ -896,63 +938,14 @@ namespace Ark TARGET(TAIL) { Value* a = popAndResolveAsPtr(context); - - if (a->valueType() == ValueType::List) - { - if (a->constList().size() < 2) - push(Value(ValueType::List), context); - else - { - std::vector tmp(a->constList().size() - 1); - for (std::size_t i = 1, end = a->constList().size(); i < end; ++i) - tmp[i - 1] = a->constList()[i]; - push(Value(std::move(tmp)), context); - } - } - else if (a->valueType() == ValueType::String) - { - if (a->string().size() < 2) - push(Value(ValueType::String), context); - else - { - Value b { *a }; - b.stringRef().erase(b.stringRef().begin()); - push(std::move(b), context); - } - } - else - types::generateError( - "tail", - { { types::Contract { { types::Typedef("value", ValueType::List) } }, - types::Contract { { types::Typedef("value", ValueType::String) } } } }, - { *a }); + push(helper::tail(a), context); DISPATCH(); } TARGET(HEAD) { Value* a = popAndResolveAsPtr(context); - - if (a->valueType() == ValueType::List) - { - if (a->constList().empty()) - push(Builtins::nil, context); - else - push(a->constList()[0], context); - } - else if (a->valueType() == ValueType::String) - { - if (a->string().empty()) - push(Value(ValueType::String), context); - else - push(Value(std::string(1, a->stringRef()[0])), context); - } - else - types::generateError( - "head", - { { types::Contract { { types::Typedef("value", ValueType::List) } }, - types::Contract { { types::Typedef("value", ValueType::String) } } } }, - { *a }); + push(helper::head(a), context); DISPATCH(); } @@ -1101,6 +1094,130 @@ namespace Ark DISPATCH(); } +#pragma endregion + +#pragma region "Super Instructions" + TARGET(LOAD_CONST_LOAD_CONST) + { + UNPACK_ARGS(); + push(loadConstAsPtr(primary_arg), context); + push(loadConstAsPtr(secondary_arg), context); + DISPATCH(); + } + + TARGET(LOAD_CONST_STORE) + { + UNPACK_ARGS(); + store(secondary_arg, loadConstAsPtr(primary_arg), context); + DISPATCH(); + } + + TARGET(LOAD_CONST_SET_VAL) + { + UNPACK_ARGS(); + setVal(secondary_arg, loadConstAsPtr(primary_arg), context); + DISPATCH(); + } + + TARGET(STORE_FROM) + { + UNPACK_ARGS(); + store(secondary_arg, loadSymbol(primary_arg, context), context); + DISPATCH(); + } + + TARGET(SET_VAL_FROM) + { + UNPACK_ARGS(); + setVal(secondary_arg, loadSymbol(primary_arg, context), context); + DISPATCH(); + } + + TARGET(INCREMENT) + { + UNPACK_ARGS(); + { + Value* var = loadSymbol(primary_arg, context); + + // use internal reference, shouldn't break anything so far, unless it's already a ref + if (var->valueType() == ValueType::Reference) + var = var->reference(); + + if (var->valueType() == ValueType::Number) + push(Value(var->number() + 1), context); + else + types::generateError( + "+", + { { types::Contract { { types::Typedef("a", ValueType::Number), types::Typedef("b", ValueType::Number) } } } }, + { *var, Value(1) }); + } + DISPATCH(); + } + + TARGET(DECREMENT) + { + UNPACK_ARGS(); + { + Value* var = loadSymbol(primary_arg, context); + + // use internal reference, shouldn't break anything so far, unless it's already a ref + if (var->valueType() == ValueType::Reference) + var = var->reference(); + + if (var->valueType() == ValueType::Number) + push(Value(var->number() - 1), context); + else + types::generateError( + "-", + { { types::Contract { { types::Typedef("a", ValueType::Number), types::Typedef("b", ValueType::Number) } } } }, + { *var, Value(1) }); + } + DISPATCH(); + } + + TARGET(STORE_TAIL) + { + UNPACK_ARGS(); + { + Value* list = loadSymbol(primary_arg, context); + Value tail = helper::tail(list); + store(secondary_arg, &tail, context); + } + DISPATCH(); + } + + TARGET(STORE_HEAD) + { + UNPACK_ARGS(); + { + Value* list = loadSymbol(primary_arg, context); + Value head = helper::head(list); + store(secondary_arg, &head, context); + } + DISPATCH(); + } + + TARGET(SET_VAL_TAIL) + { + UNPACK_ARGS(); + { + Value* list = loadSymbol(primary_arg, context); + Value tail = helper::tail(list); + setVal(secondary_arg, &tail, context); + } + DISPATCH(); + } + + TARGET(SET_VAL_HEAD) + { + UNPACK_ARGS(); + { + Value* list = loadSymbol(primary_arg, context); + Value head = helper::head(list); + setVal(secondary_arg, &head, context); + } + DISPATCH(); + } #pragma endregion } #if ARK_USE_COMPUTED_GOTOS diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index c8812fb8d..e99c48cf1 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -42,7 +42,7 @@ int main(int argc, char** argv) uint16_t bcr_page = max_uint16; uint16_t bcr_start = max_uint16; uint16_t bcr_end = max_uint16; - Ark::BytecodeSegment segment = Ark::BytecodeSegment::All; + auto segment = Ark::BytecodeSegment::All; // Eval / Run / AST dump std::string file, eval_expression; std::string libdir; @@ -72,8 +72,17 @@ int main(int argc, char** argv) option("-foptimizer").call([&] { passes |= Ark::FeatureASTOptimizer; }) | option("-fno-optimizer").call([&] { passes &= ~Ark::FeatureASTOptimizer; }) ).doc("Toggle on and off the optimizer pass"); - // cppcheck-suppress constStatement - const auto compiler_passes_flag = (import_solver_pass_flag, macro_proc_pass_flag, optimizer_pass_flag); + auto ir_optimizer_pass_flag = ( + option("-firoptimizer").call([&] { passes |= Ark::FeatureIROptimizer; }) + | option("-fno-iroptimizer").call([&] { passes &= ~Ark::FeatureIROptimizer; }) + ).doc("Toggle on and off the IR optimizer pass"); + auto ir_dump = option("-fdump-ir").call([&] { passes |= Ark::FeatureDumpIR; }) + .doc("Dump IR to file.ark.ir"); + + const auto compiler_passes_flag = ( + // cppcheck-suppress constStatement + import_solver_pass_flag, macro_proc_pass_flag, optimizer_pass_flag, ir_optimizer_pass_flag, ir_dump + ); auto cli = ( option("-h", "--help").set(selected, mode::help).doc("Display this message") diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md new file mode 100644 index 000000000..52568b31c --- /dev/null +++ b/tests/benchmarks/README.md @@ -0,0 +1,20 @@ +# Benchmarks + +## Running the benchmarks and storing the results + +1. Run for the repository root +2. Make sure the target `arkscript` has been compiled in release mode, as well as `bench` +```bash +result="tests/benchmarks/results/$(set -- tests/benchmarks/results/*.csv; echo $#)-$(git rev-parse --short HEAD).csv" +cmake-build-release/bench \ + --benchmark_min_warmup_time=1 \ + --benchmark_format=csv \ + --benchmark_time_unit=ms \ + --v=0 | grep -Ev "(New parser|Welder)" > $result +``` + +## Generate the comparison + +```bash +python3 tests/benchmarks/compare.py tests/benchmarks/results/*.csv +``` diff --git a/tests/benchmarks/results/5-ee9ff764.csv b/tests/benchmarks/results/5-ee9ff764.csv new file mode 100644 index 000000000..22c636dd0 --- /dev/null +++ b/tests/benchmarks/results/5-ee9ff764.csv @@ -0,0 +1,5 @@ +name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second,label,error_occurred,error_message +"quicksort",4601,0.150416,0.150228,ms,,,,, +"ackermann/iterations:50",50,59.9067,59.8294,ms,,,,, +"fibonacci/iterations:100",100,6.27221,6.26509,ms,,,,, +"man_or_boy",45605,0.0153367,0.015325,ms,,,,, diff --git a/tests/unittests/CompilerSuite.cpp b/tests/unittests/CompilerSuite.cpp new file mode 100644 index 000000000..d177bbbb7 --- /dev/null +++ b/tests/unittests/CompilerSuite.cpp @@ -0,0 +1,37 @@ +#include + +#include + +using namespace boost; + +ut::suite<"Compiler"> compiler_suite = [] { + using namespace ut; + + "Word construction"_test = [] { + should("create a word with a single argument on 2 bytes") = [] { + const auto word = Ark::internal::Word(12, 0x5678); + expect(that % word.opcode == 12); + expect(that % word.byte_1 == 0); + expect(that % word.byte_2 == 0x56); + expect(that % word.byte_3 == 0x78); + }; + + constexpr uint16_t primary_arg = 0x0567; + constexpr uint16_t secondary_arg = 0x089a; + const auto word = Ark::internal::Word(12, primary_arg, secondary_arg); + should("split arguments evenly between 3 bytes") = [&] { + expect(that % word.opcode == 12); + expect(that % word.byte_1 == 0x89); + expect(that % word.byte_2 == 0xa5); + expect(that % word.byte_3 == 0x67); + }; + + should("be able to unpack both arguments from word") = [&] { + const uint8_t padding = word.byte_1; + const auto arg = static_cast((word.byte_2 << 8) | word.byte_3); + + expect(that % primary_arg == (arg & 0x0fff)); + expect(that % secondary_arg == ((padding << 4) | (arg & 0xf000) >> 12)); + }; + }; +}; diff --git a/tests/unittests/EmbeddingSuite.cpp b/tests/unittests/EmbeddingSuite.cpp index d4041f131..0a277b5a7 100644 --- a/tests/unittests/EmbeddingSuite.cpp +++ b/tests/unittests/EmbeddingSuite.cpp @@ -76,15 +76,20 @@ ut::suite<"Embedding"> embedding_suite = [] { }; should("have symbol foo registered") = [&] { - auto func = mut(vm)["foo"]; + const auto func = mut(vm)["foo"]; expect(func.isFunction()); }; should("(foo 5 6.0) have a value of 13") = [&] { - auto value = mut(vm).call("foo", 5, 6.0); + const auto value = mut(vm).call("foo", 5, 6.0); expect(value.valueType() == Ark::ValueType::Number); expect(value.number() == 13.0_d); }; + + should("get nil when retrieving unbound symbol") = [&] { + const auto value = mut(vm)["unknown"]; + expect(value.valueType() == Ark::ValueType::Nil); + }; }; "[reset the VM and use it to run code again]"_test = [] {