diff --git a/.github/images/diagram.svg b/.github/images/diagram.svg
index 79bc11d69..b39699b05 100644
--- a/.github/images/diagram.svg
+++ b/.github/images/diagram.svg
@@ -1 +1 @@
-
+
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 07c17cf6d..698b3809e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@ __arkscript__/
*.arkc
*.arkm
/*.ark
+/*.ark.ir
!tests/unittests/resources/BytecodeReaderSuite/*.arkc
# Generated files
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c07a149e4..e93f837fb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,8 @@
- the name & scope resolution pass now checks for mutability errors
- compile time checks for mutability errors with `append!`, `concat!` and `pop!`
- new `MAKE_CLOSURE ` instruction, generated in place of a `LOAD_CONST` when a closure is made
+- added `-fdump-ir` to dump the IR entities to a file named `{file}.ark.ir`
+- added 11 super instructions and their implementation to the VM
### Changed
- instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument
@@ -81,6 +83,9 @@
- the `STORE` instruction has been renamed `SET_VAL`
- the `STORE` instruction is emitted in place of the `LET` and `MUT` instructions, without any mutability checking now
- `io:writeFile` no longer takes a mode and has been split into `io:writeFile` and `io:appendToFile`
+- instructions are now positioned like this: `inst byte1 byte2 byte3`
+ - byte1 is 0 if the instruction takes a single argument on 16 bits, split on byte2 and byte3
+ - if the instruction takes two arguments, they each have 12 bits ; the second one is on byte1 and upper half of byte2, the first on lower half of byte2 and then byte3
### Removed
- removed unused `NodeType::Closure`
diff --git a/include/Ark/Compiler/AST/Predicates.hpp b/include/Ark/Compiler/AST/Predicates.hpp
index 2c60ff03c..7ccc0dac5 100644
--- a/include/Ark/Compiler/AST/Predicates.hpp
+++ b/include/Ark/Compiler/AST/Predicates.hpp
@@ -59,26 +59,6 @@ namespace Ark::internal
}
} IsHex;
- inline struct IsUpper final : CharPred
- {
- IsUpper() :
- CharPred("uppercase") {}
- bool operator()(const utf8_char_t::codepoint_t c) const override
- {
- return 0 <= c && c <= 255 && std::isupper(c) != 0;
- }
- } IsUpper;
-
- inline struct IsLower final : CharPred
- {
- IsLower() :
- CharPred("lowercase") {}
- bool operator()(const utf8_char_t::codepoint_t c) const override
- {
- return 0 <= c && c <= 255 && std::islower(c) != 0;
- }
- } IsLower;
-
inline struct IsAlpha final : CharPred
{
IsAlpha() :
@@ -99,16 +79,6 @@ namespace Ark::internal
}
} IsAlnum;
- inline struct IsPrint final : CharPred
- {
- IsPrint() :
- CharPred("printable") {}
- bool operator()(const utf8_char_t::codepoint_t c) const override
- {
- return 0 <= c && c <= 255 && std::isprint(c) != 0;
- }
- } IsPrint;
-
struct IsChar final : CharPred
{
explicit IsChar(const char c) :
@@ -186,16 +156,6 @@ namespace Ark::internal
}
} IsSymbol;
- inline struct IsAny final : CharPred
- {
- IsAny() :
- CharPred("any") {}
- bool operator()(const utf8_char_t::codepoint_t) const override
- {
- return true;
- }
- } IsAny;
-
const IsChar IsMinus('-');
}
diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Compiler.hpp
index 084fbd42d..65fc7c9fa 100644
--- a/include/Ark/Compiler/Compiler.hpp
+++ b/include/Ark/Compiler/Compiler.hpp
@@ -19,11 +19,11 @@
#include
#include
-#include
+#include
#include
#include
-namespace Ark
+namespace Ark::internal
{
class State;
class Welder;
@@ -32,7 +32,7 @@ namespace Ark
* @brief The ArkScript bytecode compiler
*
*/
- class ARK_API Compiler
+ class ARK_API Compiler final
{
public:
/**
@@ -47,17 +47,28 @@ namespace Ark
*
* @param ast
*/
- void process(const internal::Node& ast);
+ void process(const Node& ast);
/**
- * @brief Return the constructed bytecode object
+ * @brief Return the IR blocks (one per scope)
*
- * @return const bytecode_t&
+ * @return const std::vector&
*/
- [[nodiscard]] const bytecode_t& bytecode() const noexcept;
+ [[nodiscard]] const std::vector& intermediateRepresentation() const noexcept;
- friend class State;
- friend class Welder;
+ /**
+ * @brief Return the symbol table pre-computed
+ *
+ * @return const std::vector&
+ */
+ [[nodiscard]] const std::vector& symbols() const noexcept;
+
+ /**
+ * @brief Return the value table pre-computed
+ *
+ * @return const std::vector&
+ */
+ [[nodiscard]] const std::vector& values() const noexcept;
private:
struct Page
@@ -67,60 +78,33 @@ namespace Ark
};
// tables: symbols, values, plugins and codes
- std::vector m_symbols;
- std::vector m_plugins;
- std::vector m_values;
- std::vector> m_code_pages;
- std::vector> m_temp_pages; ///< we need temporary code pages for some compilations passes
+ std::vector m_symbols;
+ std::vector m_values;
+ std::vector m_code_pages;
+ std::vector m_temp_pages; ///< we need temporary code pages for some compilations passes
- bytecode_t m_bytecode;
unsigned m_debug; ///< the debug level of the compiler
- /**
- * @brief Push the file headers (magic, version used, timestamp)
- *
- */
- void pushFileHeader() noexcept;
-
- /**
- * @brief Push the symbols and values tables
- *
- */
- void pushSymAndValTables();
-
/**
* @brief helper functions to get a temp or finalized code page
*
* @param page page descriptor
- * @return std::vector&
+ * @return std::vector&
*/
- std::vector& page(const Page page) noexcept
+ IR::Block& page(const Page page) noexcept
{
if (!page.is_temp)
return m_code_pages[page.index];
return m_temp_pages[page.index];
}
- /**
- * @brief helper functions to get a temp or finalized code page
- *
- * @param page page descriptor
- * @return std::vector*
- */
- std::vector* page_ptr(const Page page) noexcept
- {
- if (!page.is_temp)
- return &m_code_pages[page.index];
- return &m_temp_pages[page.index];
- }
-
/**
* @brief Checking if a symbol is an operator
*
* @param name symbol name
- * @return std::optional operator instruction
+ * @return std::optional operator instruction
*/
- static std::optional getOperator(const std::string& name) noexcept;
+ static std::optional getOperator(const std::string& name) noexcept;
/**
* @brief Checking if a symbol is a builtin
@@ -134,9 +118,9 @@ namespace Ark
* @brief Checking if a symbol is a list instruction
*
* @param name
- * @return std::optional list instruction
+ * @return std::optional list instruction
*/
- static std::optional getListInstruction(const std::string& name) noexcept;
+ static std::optional getListInstruction(const std::string& name) noexcept;
/**
* Checks if a node is a list and has a keyboard as its first node, indicating if it's producing a value on the stack or not
@@ -144,7 +128,7 @@ namespace Ark
* @return true if the node produces an output on the stack (fun, if, begin)
* @return false otherwise (let, mut, set, while, import, del)
*/
- static bool nodeProducesOutput(const internal::Node& node);
+ static bool nodeProducesOutput(const Node& node);
/**
* @brief Check if a given instruction is unary (takes only one argument)
@@ -153,16 +137,7 @@ namespace Ark
* @return true the instruction is unary
* @return false
*/
- static bool isUnaryInst(internal::Instruction inst) noexcept;
-
- /**
- * @brief Checking if a symbol may be coming from a plugin
- *
- * @param name symbol name
- * @return true the symbol may be from a plugin, loaded at runtime
- * @return false
- */
- bool mayBeFromPlugin(const std::string& name) noexcept;
+ static bool isUnaryInst(Instruction inst) noexcept;
/**
* @brief Display a warning message
@@ -170,7 +145,7 @@ namespace Ark
* @param message
* @param node
*/
- static void compilerWarning(const std::string& message, const internal::Node& node);
+ static void compilerWarning(const std::string& message, const Node& node);
/**
* @brief Throw a nice error message
@@ -178,27 +153,27 @@ namespace Ark
* @param message
* @param node
*/
- [[noreturn]] static void throwCompilerError(const std::string& message, const internal::Node& node);
+ [[noreturn]] static void throwCompilerError(const std::string& message, const Node& node);
/**
* @brief Compile an expression (a node) recursively
*
- * @param x the internal::Node to compile
+ * @param x the Node to compile
* @param p the current page number we're on
* @param is_result_unused
* @param is_terminal
* @param var_name
*/
- void compileExpression(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name = "");
+ void compileExpression(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name = "");
- void compileSymbol(const internal::Node& x, Page p, bool is_result_unused);
- void compileListInstruction(const internal::Node& c0, const internal::Node& x, Page p, bool is_result_unused);
- void compileIf(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name);
- void compileFunction(const internal::Node& x, Page p, bool is_result_unused, const std::string& var_name);
- void compileLetMutSet(internal::Keyword n, const internal::Node& x, Page p);
- void compileWhile(const internal::Node& x, Page p);
- void compilePluginImport(const internal::Node& x, Page p);
- void handleCalls(const internal::Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name);
+ void compileSymbol(const Node& x, Page p, bool is_result_unused);
+ void compileListInstruction(const Node& c0, const Node& x, Page p, bool is_result_unused);
+ void compileIf(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name);
+ void compileFunction(const Node& x, Page p, bool is_result_unused, const std::string& var_name);
+ void compileLetMutSet(Keyword n, const Node& x, Page p);
+ void compileWhile(const Node& x, Page p);
+ void compilePluginImport(const Node& x, Page p);
+ void handleCalls(const Node& x, Page p, bool is_result_unused, bool is_terminal, const std::string& var_name);
/**
* @brief Register a given node in the symbol table
@@ -207,7 +182,7 @@ namespace Ark
* @param sym
* @return uint16_t
*/
- uint16_t addSymbol(const internal::Node& sym);
+ uint16_t addSymbol(const Node& sym);
/**
* @brief Register a given node in the value table
@@ -216,7 +191,7 @@ namespace Ark
* @param x
* @return uint16_t
*/
- uint16_t addValue(const internal::Node& x);
+ uint16_t addValue(const Node& x);
/**
* @brief Register a page id (function reference) in the value table
@@ -226,7 +201,7 @@ namespace Ark
* @param current A reference to the current node, for context
* @return std::size_t
*/
- uint16_t addValue(std::size_t page_id, const internal::Node& current);
+ uint16_t addValue(std::size_t page_id, const Node& current);
};
}
diff --git a/include/Ark/Compiler/Instructions.hpp b/include/Ark/Compiler/Instructions.hpp
index 1cb05d058..a634309a9 100644
--- a/include/Ark/Compiler/Instructions.hpp
+++ b/include/Ark/Compiler/Instructions.hpp
@@ -12,6 +12,8 @@
#ifndef ARK_COMPILER_INSTRUCTIONS_HPP
#define ARK_COMPILER_INSTRUCTIONS_HPP
+#include
+
namespace Ark::internal
{
/**
@@ -86,7 +88,86 @@ namespace Ark::internal
NOT = 0x30,
LAST_OPERATOR = 0x30,
- LAST_INSTRUCTION = 0x30
+ FIRST_SUPER_INSTRUCTION = 0x31,
+ LOAD_CONST_LOAD_CONST = 0x31,
+ LOAD_CONST_STORE = 0x32,
+ LOAD_CONST_SET_VAL = 0x33,
+ STORE_FROM = 0x34,
+ SET_VAL_FROM = 0x35,
+ INCREMENT = 0x36,
+ DECREMENT = 0x37,
+ STORE_TAIL = 0x38,
+ STORE_HEAD = 0x39,
+ SET_VAL_TAIL = 0x3a,
+ SET_VAL_HEAD = 0x3b,
+ LAST_SUPER_INSTRUCTION = 0x3b,
+
+ LAST_INSTRUCTION = 0x3d
+ };
+
+ constexpr std::array InstructionNames = {
+ "NOP",
+ "LOAD_SYMBOL",
+ "LOAD_CONST",
+ "POP_JUMP_IF_TRUE",
+ "STORE",
+ "SET_VAL",
+ "POP_JUMP_IF_FALSE",
+ "JUMP",
+ "RET",
+ "HALT",
+ "CALL",
+ "CAPTURE",
+ "BUILTIN",
+ "DEL",
+ "MAKE_CLOSURE",
+ "GET_FIELD",
+ "PLUGIN",
+ "LIST",
+ "APPEND",
+ "CONCAT",
+ "APPEND_IN_PLACE",
+ "CONCAT_IN_PLACE",
+ "POP_LIST",
+ "POP_LIST_IN_PLACE",
+ "POP",
+ "DUP",
+ // operators
+ "ADD",
+ "SUB",
+ "MUL",
+ "DIV",
+ "GT",
+ "LT",
+ "LE",
+ "GE",
+ "NEQ",
+ "EQ",
+ "LEN",
+ "EMPTY",
+ "TAIL",
+ "HEAD",
+ "ISNIL",
+ "ASSERT",
+ "TO_NUM",
+ "TO_STR",
+ "AT",
+ "MOD",
+ "TYPE",
+ "HASFIELD",
+ "NOT",
+ // super instructions
+ "LOAD_CONST_LOAD_CONST",
+ "LOAD_CONST_STORE",
+ "LOAD_CONST_SET_VAL",
+ "STORE_FROM",
+ "SET_VAL_FROM",
+ "INCREMENT",
+ "DECREMENT",
+ "STORE_TAIL",
+ "STORE_HEAD",
+ "SET_VAL_TAIL",
+ "SET_VAL_HEAD",
};
}
diff --git a/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp b/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp
new file mode 100644
index 000000000..4f44f9e03
--- /dev/null
+++ b/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp
@@ -0,0 +1,75 @@
+/**
+ * @file Entity.hpp
+ * @author Alexandre Plateau (lexplt.dev@gmail.com)
+ * @brief An entity in the IR is a bundle of information
+ * @version 0.1
+ * @date 2024-10-05
+ *
+ * @copyright Copyright (c) 2024
+ *
+ */
+
+#ifndef ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP
+#define ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP
+
+#include
+#include
+
+#include
+#include
+
+namespace Ark::internal::IR
+{
+ enum class Kind
+ {
+ Label,
+ Goto,
+ GotoIfTrue,
+ GotoIfFalse,
+ Opcode,
+ Opcode2Args
+ };
+
+ using label_t = std::size_t;
+
+ class Entity
+ {
+ public:
+ explicit Entity(Kind kind);
+
+ explicit Entity(Instruction inst, uint16_t arg = 0);
+
+ Entity(Instruction inst, uint16_t primary_arg, uint16_t secondary_arg);
+
+ static Entity Label();
+
+ static Entity Goto(const Entity& label);
+
+ static Entity GotoIf(const Entity& label, bool cond);
+
+ [[nodiscard]] Word bytecode() const;
+
+ [[nodiscard]] inline label_t label() const { return m_label; }
+
+ [[nodiscard]] inline Kind kind() const { return m_kind; }
+
+ [[nodiscard]] inline Instruction inst() const { return m_inst; }
+
+ [[nodiscard]] inline uint16_t primaryArg() const { return m_primary_arg; }
+
+ [[nodiscard]] inline uint16_t secondaryArg() const { return m_secondary_arg; }
+
+ private:
+ inline static label_t LabelCounter = 0;
+
+ Kind m_kind;
+ label_t m_label { 0 };
+ Instruction m_inst { NOP };
+ uint16_t m_primary_arg { 0 };
+ uint16_t m_secondary_arg { 0 };
+ };
+
+ using Block = std::vector;
+}
+
+#endif // ARK_COMPILER_INTERMEDIATEREPRESENTATION_ENTITY_HPP
diff --git a/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp b/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp
new file mode 100644
index 000000000..f57aeb4fa
--- /dev/null
+++ b/include/Ark/Compiler/IntermediateRepresentation/IRCompiler.hpp
@@ -0,0 +1,79 @@
+/**
+ * @file IRCompiler.hpp
+ * @author Alexandre Plateau (lexplt.dev@gmail.com)
+ * @brief Compile the intermediate representation to bytecode
+ * @version 0.1
+ * @date 2024-10-05
+ *
+ * @copyright Copyright (c) 2024
+ *
+ */
+
+#ifndef ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP
+#define ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+namespace Ark::internal
+{
+ class ARK_API IRCompiler final
+ {
+ public:
+ /**
+ * @brief Create a new IRCompiler
+ *
+ * @param debug debug level
+ */
+ explicit IRCompiler(unsigned debug);
+
+ /**
+ * @brief Turn a given IR into bytecode
+ *
+ * @param pages list of lists of IR entities generated by the compiler
+ * @param symbols symbol table generated by the compiler
+ * @param values value table generated by the compiler
+ */
+ void process(const std::vector& pages, const std::vector& symbols, const std::vector& values);
+
+ /**
+ * @brief Return the constructed bytecode object
+ *
+ * @return const bytecode_t&
+ */
+ [[nodiscard]] const bytecode_t& bytecode() const noexcept;
+
+ private:
+ Logger m_logger;
+ bytecode_t m_bytecode;
+ std::vector m_ir;
+
+ void compile();
+
+ /**
+ * @brief Push a word to the m_bytecode
+ * @param word
+ */
+ void pushWord(const Word& word);
+
+ /**
+ * @brief Push the file headers (magic, version used, timestamp)
+ *
+ */
+ void pushFileHeader() noexcept;
+
+ /**
+ * @brief Push the symbols and values tables
+ *
+ */
+ void pushSymAndValTables(const std::vector& symbols, const std::vector& values);
+ };
+}
+
+#endif // ARK_COMPILER_INTERMEDIATEREPRESENTATION_IRCOMPILER_HPP
diff --git a/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp b/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp
new file mode 100644
index 000000000..150da64ed
--- /dev/null
+++ b/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp
@@ -0,0 +1,55 @@
+/**
+ * @file IROptimizer.hpp
+ * @author Alexandre Plateau (lexplt.dev@gmail.com)
+ * @brief Optimize IR based on IR entity grouped by 2 (or more)
+ * @version 0.1
+ * @date 2024-10-11
+ *
+ * @copyright Copyright (c) 2024
+ *
+ */
+#ifndef ARK_COMPILER_INTERMEDIATEREPRESENTATION_IROPTIMIZER_HPP
+#define ARK_COMPILER_INTERMEDIATEREPRESENTATION_IROPTIMIZER_HPP
+
+#include
+#include
+#include
+#include
+
+namespace Ark::internal
+{
+ class ARK_API IROptimizer final
+ {
+ public:
+ /**
+ * @brief Create a new IROptimizer
+ *
+ * @param debug debug level
+ */
+ explicit IROptimizer(unsigned debug);
+
+ /**
+ * @brief Turn a given IR into bytecode
+ *
+ * @param pages list of lists of IR entities generated by the compiler
+ * @param symbols symbol table generated by the compiler
+ * @param values value table generated by the compiler
+ */
+ void process(const std::vector& pages, const std::vector& symbols, const std::vector& values);
+
+ /**
+ * @brief Return the IR blocks (one per scope)
+ *
+ * @return const std::vector&
+ */
+ [[nodiscard]] const std::vector& intermediateRepresentation() const noexcept;
+
+ private:
+ Logger m_logger;
+ std::vector m_ir;
+ std::vector m_symbols;
+ std::vector m_values;
+ };
+}
+
+#endif // ARK_COMPILER_INTERMEDIATEREPRESENTATION_IROPTIMIZER_HPP
diff --git a/include/Ark/Compiler/ImportSolver.hpp b/include/Ark/Compiler/Package/ImportSolver.hpp
similarity index 98%
rename from include/Ark/Compiler/ImportSolver.hpp
rename to include/Ark/Compiler/Package/ImportSolver.hpp
index 23a05b86c..3b8be5526 100644
--- a/include/Ark/Compiler/ImportSolver.hpp
+++ b/include/Ark/Compiler/Package/ImportSolver.hpp
@@ -21,7 +21,7 @@
#include
#include
#include
-#include
+#include
namespace Ark::internal
{
diff --git a/include/Ark/Compiler/AST/Module.hpp b/include/Ark/Compiler/Package/Module.hpp
similarity index 100%
rename from include/Ark/Compiler/AST/Module.hpp
rename to include/Ark/Compiler/Package/Module.hpp
diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp
index f8ae9ea94..63581b52e 100644
--- a/include/Ark/Compiler/Welder.hpp
+++ b/include/Ark/Compiler/Welder.hpp
@@ -20,9 +20,11 @@
#include
#include
#include
+#include
+#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -86,6 +88,7 @@ namespace Ark
std::filesystem::path m_root_file;
std::vector m_imports;
+ std::vector m_ir;
bytecode_t m_bytecode;
internal::Node m_computed_ast;
@@ -96,7 +99,11 @@ namespace Ark
internal::NameResolutionPass m_name_resolver;
internal::Logger m_logger;
- Compiler m_compiler;
+ internal::IROptimizer m_ir_optimizer;
+ internal::IRCompiler m_ir_compiler;
+ internal::Compiler m_compiler;
+
+ void dumpIRToFile() const;
bool computeAST(const std::string& filename, const std::string& code);
};
diff --git a/include/Ark/Compiler/Word.hpp b/include/Ark/Compiler/Word.hpp
index 4ec6dabd2..c5e261949 100644
--- a/include/Ark/Compiler/Word.hpp
+++ b/include/Ark/Compiler/Word.hpp
@@ -14,28 +14,29 @@
namespace Ark::internal
{
- struct bytes_t
- {
- uint8_t first {};
- uint8_t second {};
- };
-
struct Word
{
- uint8_t padding = 0; ///< Padding reserved for future use
- uint8_t opcode = 0; ///< Instruction opcode
- uint16_t data = 0; ///< Immediate data, interpreted differently for different instructions
+ uint8_t opcode = 0; ///< Instruction opcode
+ uint8_t byte_1 = 0;
+ uint8_t byte_2 = 0;
+ uint8_t byte_3 = 0;
explicit Word(const uint8_t inst, const uint16_t arg = 0) :
- opcode(inst), data(arg)
+ opcode(inst), byte_2(static_cast(arg >> 8)), byte_3(static_cast(arg & 0xff))
{}
- [[nodiscard]] bytes_t bytes() const
+ /**
+ * @brief Construct a word with two arguments, each on 12 bits. It's up to the caller to ensure that no data is lost
+ * @param inst
+ * @param primary_arg argument on 12 bits, the upper 4 bits are lost
+ * @param secondary_arg 2nd argument on 12 bits, the upper 4 bits are lost
+ */
+ Word(const uint8_t inst, const uint16_t primary_arg, const uint16_t secondary_arg) :
+ opcode(inst)
{
- return bytes_t {
- .first = static_cast((data & 0xff00) >> 8),
- .second = static_cast(data & 0x00ff)
- };
+ byte_1 = static_cast((secondary_arg & 0xff0) >> 4);
+ byte_2 = static_cast((secondary_arg & 0x00f) << 4 | (primary_arg & 0xf00) >> 8);
+ byte_3 = static_cast(primary_arg & 0x0ff);
}
};
}
diff --git a/include/Ark/Constants.hpp.in b/include/Ark/Constants.hpp.in
index 5964cdccb..d693163e0 100644
--- a/include/Ark/Constants.hpp.in
+++ b/include/Ark/Constants.hpp.in
@@ -49,11 +49,18 @@ namespace Ark
constexpr uint16_t FeatureImportSolver = 1 << 0;
constexpr uint16_t FeatureMacroProcessor = 1 << 1;
constexpr uint16_t FeatureASTOptimizer = 1 << 2;
+ constexpr uint16_t FeatureIROptimizer = 1 << 3;
+
+ constexpr uint16_t FeatureDumpIR = 1 << 14;
/// This feature should only be used in tests, to disable diagnostics generation and enable exceptions to be thrown
constexpr uint16_t FeatureTestFailOnException = 1 << 15;
// Default features for the VM x Compiler x Parser
- constexpr uint16_t DefaultFeatures = FeatureImportSolver | FeatureMacroProcessor | FeatureASTOptimizer;
+ constexpr uint16_t DefaultFeatures =
+ FeatureImportSolver
+ | FeatureMacroProcessor
+ | FeatureASTOptimizer
+ | FeatureIROptimizer;
constexpr std::size_t MaxMacroProcessingDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::processNode
constexpr std::size_t MaxMacroUnificationDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::unify
diff --git a/include/Ark/VM/VM.hpp b/include/Ark/VM/VM.hpp
index 63789d584..d0e46e409 100644
--- a/include/Ark/VM/VM.hpp
+++ b/include/Ark/VM/VM.hpp
@@ -181,6 +181,15 @@ namespace Ark
*/
void init() noexcept;
+ // ================================================
+ // instruction helpers
+ // ================================================
+
+ inline Value* loadSymbol(uint16_t id, internal::ExecutionContext& context);
+ inline Value* loadConstAsPtr(uint16_t id) const;
+ inline void store(uint16_t id, const Value* val, internal::ExecutionContext& context);
+ inline void setVal(uint16_t id, const Value* val, internal::ExecutionContext& context);
+
// ================================================
// stack related
// ================================================
diff --git a/include/Ark/VM/VM.inl b/include/Ark/VM/VM.inl
index ffe424fd6..7620d9a1b 100644
--- a/include/Ark/VM/VM.inl
+++ b/include/Ark/VM/VM.inl
@@ -123,6 +123,58 @@ inline Value VM::resolve(internal::ExecutionContext* context, std::vector
return *popAndResolveAsPtr(*context);
}
+#pragma region "instruction helpers"
+
+inline Value* VM::loadSymbol(const uint16_t id, internal::ExecutionContext& context)
+{
+ context.last_symbol = id;
+ if (Value* var = findNearestVariable(context.last_symbol, context); var != nullptr) [[likely]]
+ {
+ // push internal reference, shouldn't break anything so far, unless it's already a ref
+ if (var->valueType() == ValueType::Reference)
+ return var->reference();
+ return var;
+ }
+ else [[unlikely]]
+ throwVMError(internal::ErrorKind::Scope, fmt::format("Unbound variable `{}'", m_state.m_symbols[context.last_symbol]));
+ return nullptr;
+}
+
+inline Value* VM::loadConstAsPtr(const uint16_t id) const
+{
+ return &m_state.m_constants[id];
+}
+
+inline void VM::store(const uint16_t id, const Value* val, internal::ExecutionContext& context)
+{
+ // avoid adding the pair (id, _) multiple times, with different values
+ Value* local = context.locals.back()[id];
+ if (local == nullptr) [[likely]]
+ context.locals.back().push_back(id, *val);
+ else
+ *local = *val;
+}
+
+inline void VM::setVal(const uint16_t id, const Value* val, internal::ExecutionContext& context)
+{
+ if (Value* var = findNearestVariable(id, context); var != nullptr) [[likely]]
+ {
+ if (var->valueType() == ValueType::Reference)
+ *var->reference() = *val;
+ else [[likely]]
+ *var = *val;
+ }
+ else
+ throwVMError(
+ internal::ErrorKind::Scope,
+ fmt::format(
+ "Unbound variable `{}', can not change its value to {}",
+ m_state.m_symbols[id],
+ val->toString(*this)));
+}
+
+#pragma endregion
+
#pragma region "stack management"
inline Value* VM::pop(internal::ExecutionContext& context)
@@ -338,8 +390,7 @@ inline void VM::call(internal::ExecutionContext& context, const uint16_t argc)
needed_argc = 0;
// every argument is a MUT declaration in the bytecode
- // index+1 to skip the padding
- while (m_state.m_pages[context.pp][index + 1] == STORE)
+ while (m_state.m_pages[context.pp][index] == STORE)
{
needed_argc += 1;
index += 4; // instructions are on 4 bytes
diff --git a/src/arkreactor/Compiler/BytecodeReader.cpp b/src/arkreactor/Compiler/BytecodeReader.cpp
index c51450bac..3b13188ec 100644
--- a/src/arkreactor/Compiler/BytecodeReader.cpp
+++ b/src/arkreactor/Compiler/BytecodeReader.cpp
@@ -4,6 +4,7 @@
#include
#include
+#include
#include
#include
#include
@@ -331,6 +332,28 @@ namespace Ark
uint16_t arg;
};
+ const std::unordered_map arg_kinds = {
+ { LOAD_SYMBOL, ArgKind::Symbol },
+ { LOAD_CONST, ArgKind::Value },
+ { POP_JUMP_IF_TRUE, ArgKind::Raw },
+ { STORE, ArgKind::Symbol },
+ { SET_VAL, ArgKind::Symbol },
+ { POP_JUMP_IF_FALSE, ArgKind::Raw },
+ { JUMP, ArgKind::Raw },
+ { CALL, ArgKind::Raw },
+ { CAPTURE, ArgKind::Symbol },
+ { BUILTIN, ArgKind::Builtin },
+ { DEL, ArgKind::Symbol },
+ { MAKE_CLOSURE, ArgKind::Value },
+ { GET_FIELD, ArgKind::Symbol },
+ { PLUGIN, ArgKind::Value },
+ { LIST, ArgKind::Raw },
+ { APPEND, ArgKind::Raw },
+ { CONCAT, ArgKind::Raw },
+ { APPEND_IN_PLACE, ArgKind::Raw },
+ { CONCAT_IN_PLACE, ArgKind::Raw }
+ };
+
const auto color_print_inst = [&syms, &vals, &stringify_value](const std::string& name, std::optional arg = std::nullopt) {
fmt::print("{}", fmt::styled(name, fmt::fg(fmt::color::gold)));
if (arg.has_value())
@@ -392,113 +415,24 @@ namespace Ark
for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(page.size()); j < end; j += 4)
{
- const uint8_t padding = page[j];
- const uint8_t inst = page[j + 1];
+ const uint8_t inst = page[j];
+ // TEMP
+ const uint8_t padding = page[j + 1];
const auto arg = static_cast((page[j + 2] << 8) + page[j + 3]);
// instruction number
fmt::print(fmt::fg(fmt::color::cyan), "{:>4}", j / 4);
// padding inst arg arg
- fmt::print(" {:02x} {:02x} {:02x} {:02x} ", padding, inst, page[j + 2], page[j + 3]);
-
- if (inst == NOP)
- color_print_inst("NOP");
- else if (inst == LOAD_SYMBOL)
- color_print_inst("LOAD_SYMBOL", Arg { ArgKind::Symbol, arg });
- else if (inst == LOAD_CONST)
- color_print_inst("LOAD_CONST", Arg { ArgKind::Value, arg });
- else if (inst == POP_JUMP_IF_TRUE)
- color_print_inst("POP_JUMP_IF_TRUE", Arg { ArgKind::Raw, arg });
- else if (inst == STORE)
- color_print_inst("STORE", Arg { ArgKind::Symbol, arg });
- else if (inst == SET_VAL)
- color_print_inst("SET_VAL", Arg { ArgKind::Symbol, arg });
- else if (inst == POP_JUMP_IF_FALSE)
- color_print_inst("POP_JUMP_IF_FALSE", Arg { ArgKind::Raw, arg });
- else if (inst == JUMP)
- color_print_inst("JUMP", Arg { ArgKind::Raw, arg });
- else if (inst == RET)
- color_print_inst("RET");
- else if (inst == HALT)
- color_print_inst("HALT");
- else if (inst == CALL)
- color_print_inst("CALL", Arg { ArgKind::Raw, arg });
- else if (inst == CAPTURE)
- color_print_inst("CAPTURE", Arg { ArgKind::Symbol, arg });
- else if (inst == BUILTIN)
- color_print_inst("BUILTIN", Arg { ArgKind::Builtin, arg });
- else if (inst == DEL)
- color_print_inst("DEL", Arg { ArgKind::Symbol, arg });
- else if (inst == MAKE_CLOSURE)
- color_print_inst("MAKE_CLOSURE", Arg { ArgKind::Value, arg });
- else if (inst == GET_FIELD)
- color_print_inst("GET_FIELD", Arg { ArgKind::Symbol, arg });
- else if (inst == PLUGIN)
- color_print_inst("PLUGIN", Arg { ArgKind::Value, arg });
- else if (inst == LIST)
- color_print_inst("LIST", Arg { ArgKind::Raw, arg });
- else if (inst == APPEND)
- color_print_inst("APPEND", Arg { ArgKind::Raw, arg });
- else if (inst == CONCAT)
- color_print_inst("CONCAT", Arg { ArgKind::Raw, arg });
- else if (inst == APPEND_IN_PLACE)
- color_print_inst("APPEND_IN_PLACE", Arg { ArgKind::Raw, arg });
- else if (inst == CONCAT_IN_PLACE)
- color_print_inst("CONCAT_IN_PLACE", Arg { ArgKind::Raw, arg });
- else if (inst == POP_LIST)
- color_print_inst("POP_LIST");
- else if (inst == POP_LIST_IN_PLACE)
- color_print_inst("POP_LIST_IN_PLACE");
- else if (inst == POP)
- color_print_inst("POP");
- else if (inst == DUP)
- color_print_inst("DUP");
- else if (inst == ADD)
- color_print_inst("ADD");
- else if (inst == SUB)
- color_print_inst("SUB");
- else if (inst == MUL)
- color_print_inst("MUL");
- else if (inst == DIV)
- color_print_inst("DIV");
- else if (inst == GT)
- color_print_inst("GT");
- else if (inst == LT)
- color_print_inst("LT");
- else if (inst == LE)
- color_print_inst("LE");
- else if (inst == GE)
- color_print_inst("GE");
- else if (inst == NEQ)
- color_print_inst("NEQ");
- else if (inst == EQ)
- color_print_inst("EQ");
- else if (inst == LEN)
- color_print_inst("LEN");
- else if (inst == EMPTY)
- color_print_inst("EMPTY");
- else if (inst == TAIL)
- color_print_inst("TAIL");
- else if (inst == HEAD)
- color_print_inst("HEAD");
- else if (inst == ISNIL)
- color_print_inst("ISNIL");
- else if (inst == ASSERT)
- color_print_inst("ASSERT");
- else if (inst == TO_NUM)
- color_print_inst("TO_NUM");
- else if (inst == TO_STR)
- color_print_inst("TO_STR");
- else if (inst == AT)
- color_print_inst("AT");
- else if (inst == MOD)
- color_print_inst("MOD");
- else if (inst == TYPE)
- color_print_inst("TYPE");
- else if (inst == HASFIELD)
- color_print_inst("HASFIELD");
- else if (inst == NOT)
- color_print_inst("NOT");
+ fmt::print(" {:02x} {:02x} {:02x} {:02x} ", inst, padding, page[j + 2], page[j + 3]);
+
+ if (const auto idx = static_cast(inst); idx < InstructionNames.size())
+ {
+ const auto inst_name = InstructionNames[idx];
+ if (const auto iinst = static_cast(inst); arg_kinds.contains(iinst))
+ color_print_inst(inst_name, Arg { arg_kinds.at(iinst), arg });
+ else
+ color_print_inst(inst_name);
+ }
else
fmt::println("Unknown instruction");
}
diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Compiler.cpp
index 5ad2d7ac2..eebf5058f 100644
--- a/src/arkreactor/Compiler/Compiler.cpp
+++ b/src/arkreactor/Compiler/Compiler.cpp
@@ -4,7 +4,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -15,9 +14,8 @@
#include
#include
-namespace Ark
+namespace Ark::internal
{
- using namespace internal;
using namespace literals;
Compiler::Compiler(const unsigned debug) :
@@ -26,8 +24,6 @@ namespace Ark
void Compiler::process(const Node& ast)
{
- pushFileHeader();
-
m_code_pages.emplace_back(); // create empty page
// gather symbols, values, and start to create code segments
@@ -36,163 +32,28 @@ namespace Ark
/* current_page */ Page { .index = 0, .is_temp = false },
/* is_result_unused */ false,
/* is_terminal */ false);
-
- pushSymAndValTables();
-
- // push the different code segments
- for (std::size_t i = 0, end = m_code_pages.size(); i < end; ++i)
- {
- std::vector& page = m_code_pages[i];
- // just in case we got too far, always add a HALT to be sure the
- // VM won't do anything crazy
- page.emplace_back(Instruction::HALT);
-
- // push number of elements
- const std::size_t page_size = page.size();
- if (page_size > std::numeric_limits::max())
- throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i));
-
- m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
- m_bytecode.push_back(static_cast((page_size & 0xff00) >> 8));
- m_bytecode.push_back(static_cast(page_size & 0x00ff));
-
- for (auto inst : page)
- {
- m_bytecode.push_back(inst.padding);
- m_bytecode.push_back(inst.opcode);
-
- auto [first, second] = inst.bytes();
- m_bytecode.push_back(first);
- m_bytecode.push_back(second);
- }
- }
-
- if (m_code_pages.empty())
- {
- // code segment with a single instruction
- m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
- m_bytecode.push_back(0_u8);
- m_bytecode.push_back(1_u8);
-
- m_bytecode.push_back(0_u8);
- m_bytecode.push_back(Instruction::HALT);
- m_bytecode.push_back(0_u8);
- m_bytecode.push_back(0_u8);
- }
-
- constexpr std::size_t header_size = 18;
-
- // generate a hash of the tables + bytecode
- std::vector hash_out(picosha2::k_digest_size);
- picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out);
- m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end());
}
- const bytecode_t& Compiler::bytecode() const noexcept
+ const std::vector& Compiler::intermediateRepresentation() const noexcept
{
- return m_bytecode;
+ return m_code_pages;
}
- void Compiler::pushFileHeader() noexcept
+ const std::vector& Compiler::symbols() const noexcept
{
- /*
- Generating headers:
- - lang name (to be sure we are executing an ArkScript file)
- on 4 bytes (ark + padding)
- - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
- - timestamp (8 bytes, unix format)
- */
-
- m_bytecode.push_back('a');
- m_bytecode.push_back('r');
- m_bytecode.push_back('k');
- m_bytecode.push_back(0_u8);
-
- // push version
- for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH })
- {
- m_bytecode.push_back(static_cast((n & 0xff00) >> 8));
- m_bytecode.push_back(static_cast(n & 0x00ff));
- }
-
- // push timestamp
- const long long timestamp = std::chrono::duration_cast(
- std::chrono::system_clock::now().time_since_epoch())
- .count();
- for (long i = 0; i < 8; ++i)
- {
- const long shift = 8 * (7 - i);
- const auto ts_byte = static_cast((timestamp & (0xffLL << shift)) >> shift);
- m_bytecode.push_back(ts_byte);
- }
+ return m_symbols;
}
- void Compiler::pushSymAndValTables()
+ const std::vector& Compiler::values() const noexcept
{
- const std::size_t symbol_size = m_symbols.size();
- if (symbol_size > std::numeric_limits::max())
- throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size));
-
- m_bytecode.push_back(SYM_TABLE_START);
- m_bytecode.push_back(static_cast((symbol_size & 0xff00) >> 8));
- m_bytecode.push_back(static_cast(symbol_size & 0x00ff));
-
- for (const auto& sym : m_symbols)
- {
- // push the string, null terminated
- std::string s = sym.string();
- std::ranges::transform(s, std::back_inserter(m_bytecode), [](const char i) {
- return static_cast(i);
- });
- m_bytecode.push_back(0_u8);
- }
-
- const std::size_t value_size = m_values.size();
- if (value_size > std::numeric_limits::max())
- throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size));
-
- m_bytecode.push_back(VAL_TABLE_START);
- m_bytecode.push_back(static_cast((value_size & 0xff00) >> 8));
- m_bytecode.push_back(static_cast(value_size & 0x00ff));
-
- for (const ValTableElem& val : m_values)
- {
- if (val.type == ValTableElemType::Number)
- {
- m_bytecode.push_back(NUMBER_TYPE);
- const auto n = std::get(val.value);
- std::string t = std::to_string(n);
- std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
- return static_cast(i);
- });
- }
- else if (val.type == ValTableElemType::String)
- {
- m_bytecode.push_back(STRING_TYPE);
- auto t = std::get(val.value);
- std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
- return static_cast(i);
- });
- }
- else if (val.type == ValTableElemType::PageAddr)
- {
- m_bytecode.push_back(FUNC_TYPE);
- const std::size_t addr = std::get(val.value);
- m_bytecode.push_back(static_cast((addr & 0xff00) >> 8));
- m_bytecode.push_back(static_cast(addr & 0x00ff));
- }
- else
- throw Error("The compiler is trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code");
-
- m_bytecode.push_back(0_u8);
- }
+ return m_values;
}
- std::optional Compiler::getOperator(const std::string& name) noexcept
+ std::optional Compiler::getOperator(const std::string& name) noexcept
{
- const auto it = std::ranges::find(internal::Language::operators, name);
- if (it != internal::Language::operators.end())
- return static_cast(std::distance(internal::Language::operators.begin(), it) + FIRST_OPERATOR);
+ const auto it = std::ranges::find(Language::operators, name);
+ if (it != Language::operators.end())
+ return static_cast(std::distance(Language::operators.begin(), it) + FIRST_OPERATOR);
return std::nullopt;
}
@@ -209,9 +70,9 @@ namespace Ark
std::optional Compiler::getListInstruction(const std::string& name) noexcept
{
- const auto it = std::ranges::find(internal::Language::listInstructions, name);
- if (it != internal::Language::listInstructions.end())
- return static_cast(std::distance(internal::Language::listInstructions.begin(), it) + LIST);
+ const auto it = std::ranges::find(Language::listInstructions, name);
+ if (it != Language::listInstructions.end())
+ return static_cast(std::distance(Language::listInstructions.begin(), it) + LIST);
return std::nullopt;
}
@@ -244,16 +105,6 @@ namespace Ark
}
}
- bool Compiler::mayBeFromPlugin(const std::string& name) noexcept
- {
- std::string splitted = Utils::splitString(name, ':')[0];
- const auto it = std::ranges::find_if(m_plugins,
- [&splitted](const std::string& plugin) -> bool {
- return std::filesystem::path(plugin).stem().string() == splitted;
- });
- return it != m_plugins.end();
- }
-
void Compiler::compilerWarning(const std::string& message, const Node& node)
{
fmt::println("{} {}", fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)), Diagnostics::makeContextWithNode(message, node));
@@ -432,23 +283,23 @@ namespace Ark
compileExpression(x.constList()[1], p, false, false);
// jump only if needed to the if
- const std::size_t jump_to_if_pos = page(p).size();
- page(p).emplace_back(Instruction::POP_JUMP_IF_TRUE);
+ const auto label_then = IR::Entity::Label();
+ page(p).emplace_back(IR::Entity::GotoIf(label_then, true));
// else code
if (x.constList().size() == 4) // we have an else clause
compileExpression(x.constList()[3], p, is_result_unused, is_terminal, var_name);
// when else is finished, jump to end
- const std::size_t jump_to_end_pos = page(p).size();
- page(p).emplace_back(Instruction::JUMP);
+ const auto label_end = IR::Entity::Label();
+ page(p).emplace_back(IR::Entity::Goto(label_end));
// absolute address to jump to if condition is true
- page(p)[jump_to_if_pos].data = static_cast(page(p).size());
+ page(p).emplace_back(label_then);
// if code
compileExpression(x.constList()[2], p, is_result_unused, is_terminal, var_name);
// set jump to end pos
- page(p)[jump_to_end_pos].data = static_cast(page(p).size());
+ page(p).emplace_back(label_end);
}
void Compiler::compileFunction(const Node& x, const Page p, const bool is_result_unused, const std::string& var_name)
@@ -523,20 +374,21 @@ namespace Ark
throwCompilerError("Invalid node ; if it was computed by a macro, check that a node is returned", x);
// save current position to jump there at the end of the loop
- std::size_t current = page(p).size();
+ const auto label_loop = IR::Entity::Label();
+ page(p).emplace_back(label_loop);
// push condition
compileExpression(x.constList()[1], p, false, false);
// absolute jump to end of block if condition is false
- const std::size_t jump_to_end_pos = page(p).size();
- page(p).emplace_back(POP_JUMP_IF_FALSE);
+ const auto label_end = IR::Entity::Label();
+ page(p).emplace_back(IR::Entity::GotoIf(label_end, false));
// push code to page
compileExpression(x.constList()[2], p, true, false);
// loop, jump to the condition
- page(p).emplace_back(JUMP, current);
+ page(p).emplace_back(IR::Entity::Goto(label_loop));
// absolute address to jump to if condition is false
- page(p)[jump_to_end_pos].data = static_cast(page(p).size());
+ page(p).emplace_back(label_end);
}
void Compiler::compilePluginImport(const Node& x, const Page p)
@@ -553,8 +405,6 @@ namespace Ark
// register plugin path in the constants table
uint16_t id = addValue(Node(NodeType::String, path));
- // save plugin name to use it later
- m_plugins.push_back(path);
// add plugin instruction + id of the constant referring to the plugin path
page(p).emplace_back(PLUGIN, id);
}
@@ -564,7 +414,7 @@ namespace Ark
constexpr std::size_t start_index = 1;
const auto node = x.constList()[0];
- const auto maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt;
+ const std::optional maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt;
enum class ShortcircuitOp
{
@@ -587,18 +437,16 @@ namespace Ark
compileExpression(x.constList()[1], p, false, false);
page(p).emplace_back(DUP);
- std::vector to_update;
+ const auto label_shortcircuit = IR::Entity::Label();
for (std::size_t i = 2, end = x.constList().size(); i < end; ++i)
{
- to_update.push_back(page(p).size());
-
switch (maybe_shortcircuit.value())
{
case ShortcircuitOp::And:
- page(p).emplace_back(POP_JUMP_IF_FALSE);
+ page(p).emplace_back(IR::Entity::GotoIf(label_shortcircuit, false));
break;
case ShortcircuitOp::Or:
- page(p).emplace_back(POP_JUMP_IF_TRUE);
+ page(p).emplace_back(IR::Entity::GotoIf(label_shortcircuit, true));
break;
}
page(p).emplace_back(POP);
@@ -608,8 +456,7 @@ namespace Ark
page(p).emplace_back(DUP);
}
- for (const auto pos : to_update)
- page(p)[pos].data = static_cast(page(p).size());
+ page(p).emplace_back(label_shortcircuit);
}
else if (!maybe_operator.has_value())
{
@@ -646,8 +493,8 @@ namespace Ark
throwCompilerError(fmt::format("Invalid node inside call to `{}'", node.repr()), x);
}
// push proc from temp page
- for (const Word& word : m_temp_pages.back())
- page(p).push_back(word);
+ for (const auto& inst : m_temp_pages.back())
+ page(p).push_back(inst);
m_temp_pages.pop_back();
// number of arguments
@@ -664,9 +511,9 @@ namespace Ark
else // operator
{
// retrieve operator
- auto op = Word(maybe_operator.value());
+ auto op = maybe_operator.value();
- if (op.opcode == ASSERT)
+ if (op == ASSERT)
is_result_unused = false;
// push arguments on current page
@@ -684,14 +531,14 @@ namespace Ark
// in order to be able to handle things like (op A B C D...)
// which should be transformed into A B op C op D op...
if (exp_count >= 2)
- page(p).emplace_back(op.opcode, 2); // TODO generalize to n arguments (n >= 2)
+ page(p).emplace_back(op);
}
- if (isUnaryInst(static_cast(op.opcode)))
+ if (isUnaryInst(op))
{
if (exp_count != 1)
throwCompilerError(fmt::format("Operator needs one argument, but was called with {}", exp_count), x.constList()[0]);
- page(p).emplace_back(op.opcode);
+ page(p).emplace_back(op);
}
else if (exp_count <= 1)
{
@@ -701,7 +548,7 @@ namespace Ark
// need to check we didn't push the (op A B C D...) things for operators not supporting it
if (exp_count > 2)
{
- switch (op.opcode)
+ switch (op)
{
// authorized instructions
case ADD: [[fallthrough]];
@@ -716,7 +563,7 @@ namespace Ark
fmt::format(
"can not create a chained expression (of length {}) for operator `{}'. You most likely forgot a `)'.",
exp_count,
- Language::operators[static_cast(op.opcode - FIRST_OPERATOR)]),
+ Language::operators[static_cast(op - FIRST_OPERATOR)]),
x);
}
}
@@ -729,12 +576,10 @@ namespace Ark
uint16_t Compiler::addSymbol(const Node& sym)
{
// otherwise, add the symbol, and return its id in the table
- auto it = std::ranges::find_if(m_symbols, [&sym](const Node& sym_node) -> bool {
- return sym_node.string() == sym.string();
- });
+ auto it = std::ranges::find(m_symbols, sym.string());
if (it == m_symbols.end())
{
- m_symbols.push_back(sym);
+ m_symbols.push_back(sym.string());
it = m_symbols.begin() + static_cast::difference_type>(m_symbols.size() - 1);
}
diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp
new file mode 100644
index 000000000..269ea72d9
--- /dev/null
+++ b/src/arkreactor/Compiler/IntermediateRepresentation/Entity.cpp
@@ -0,0 +1,52 @@
+#include
+
+namespace Ark::internal::IR
+{
+ Entity::Entity(const Kind kind) :
+ m_kind(kind),
+ m_inst(NOP)
+ {}
+
+ Entity::Entity(const Instruction inst, const uint16_t arg) :
+ m_kind(Kind::Opcode),
+ m_inst(inst), m_primary_arg(arg)
+ {}
+
+ Entity::Entity(const Instruction inst, const uint16_t primary_arg, const uint16_t secondary_arg) :
+ m_kind(Kind::Opcode2Args),
+ m_inst(inst), m_primary_arg(primary_arg), m_secondary_arg(secondary_arg)
+ {}
+
+ Entity Entity::Label()
+ {
+ auto label = Entity(Kind::Label);
+ label.m_label = Entity::LabelCounter++;
+
+ return label;
+ }
+
+ Entity Entity::Goto(const Entity& label)
+ {
+ auto jump = Entity(Kind::Goto);
+ jump.m_label = label.m_label;
+
+ return jump;
+ }
+
+ Entity Entity::GotoIf(const Entity& label, const bool cond)
+ {
+ auto jump = Entity(cond ? Kind::GotoIfTrue : Kind::GotoIfFalse);
+ jump.m_label = label.m_label;
+
+ return jump;
+ }
+
+ Word Entity::bytecode() const
+ {
+ if (m_kind == Kind::Opcode)
+ return Word(m_inst, m_primary_arg);
+ if (m_kind == Kind::Opcode2Args)
+ return Word(m_inst, m_primary_arg, m_secondary_arg);
+ return Word(0, 0);
+ }
+}
diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp
new file mode 100644
index 000000000..2c1026139
--- /dev/null
+++ b/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp
@@ -0,0 +1,226 @@
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+namespace Ark::internal
+{
+ using namespace literals;
+
+ IRCompiler::IRCompiler(const unsigned debug) :
+ m_logger("IRCompiler", debug)
+ {}
+
+ void IRCompiler::process(const std::vector& pages, const std::vector& symbols, const std::vector& values)
+ {
+ pushFileHeader();
+ pushSymAndValTables(symbols, values);
+
+ m_ir = pages;
+ compile();
+
+ if (m_ir.empty())
+ {
+ // code segment with a single instruction
+ m_bytecode.push_back(CODE_SEGMENT_START);
+ m_bytecode.push_back(0_u8);
+ m_bytecode.push_back(1_u8);
+
+ m_bytecode.push_back(0_u8);
+ m_bytecode.push_back(HALT);
+ m_bytecode.push_back(0_u8);
+ m_bytecode.push_back(0_u8);
+ }
+
+ constexpr std::size_t header_size = 18;
+
+ // generate a hash of the tables + bytecode
+ std::vector hash_out(picosha2::k_digest_size);
+ picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out);
+ m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end());
+ }
+
+ const bytecode_t& IRCompiler::bytecode() const noexcept
+ {
+ return m_bytecode;
+ }
+
+ void IRCompiler::compile()
+ {
+ // push the different code segments
+ for (std::size_t i = 0, end = m_ir.size(); i < end; ++i)
+ {
+ IR::Block& page = m_ir[i];
+ // just in case we got too far, always add a HALT to be sure the
+ // VM won't do anything crazy
+ page.emplace_back(HALT);
+
+ // push number of elements
+ const auto page_size = std::ranges::count_if(page, [](const auto& a) {
+ return a.kind() != IR::Kind::Label;
+ });
+ if (std::cmp_greater(page_size, std::numeric_limits::max()))
+ throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i));
+
+ m_bytecode.push_back(CODE_SEGMENT_START);
+ m_bytecode.push_back(static_cast((page_size & 0xff00) >> 8));
+ m_bytecode.push_back(static_cast(page_size & 0x00ff));
+
+ // register labels position
+ uint16_t pos = 0;
+ std::unordered_map label_to_position;
+ for (auto inst : page)
+ {
+ switch (inst.kind())
+ {
+ case IR::Kind::Label:
+ label_to_position[inst.label()] = pos;
+ break;
+
+ default:
+ ++pos;
+ }
+ }
+
+ for (auto inst : page)
+ {
+ switch (inst.kind())
+ {
+ case IR::Kind::Goto:
+ pushWord(Word(JUMP, label_to_position[inst.label()]));
+ break;
+
+ case IR::Kind::GotoIfTrue:
+ pushWord(Word(POP_JUMP_IF_TRUE, label_to_position[inst.label()]));
+ break;
+
+ case IR::Kind::GotoIfFalse:
+ pushWord(Word(POP_JUMP_IF_FALSE, label_to_position[inst.label()]));
+ break;
+
+ case IR::Kind::Opcode:
+ [[fallthrough]];
+ case IR::Kind::Opcode2Args:
+ pushWord(inst.bytecode());
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ void IRCompiler::pushWord(const Word& word)
+ {
+ m_bytecode.push_back(word.opcode);
+ m_bytecode.push_back(word.byte_1);
+ m_bytecode.push_back(word.byte_2);
+ m_bytecode.push_back(word.byte_3);
+ }
+
+ void IRCompiler::pushFileHeader() noexcept
+ {
+ /*
+ Generating headers:
+ - lang name (to be sure we are executing an ArkScript file)
+ on 4 bytes (ark + padding)
+ - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
+ - timestamp (8 bytes, unix format)
+ */
+
+ m_bytecode.push_back('a');
+ m_bytecode.push_back('r');
+ m_bytecode.push_back('k');
+ m_bytecode.push_back(0_u8);
+
+ // push version
+ for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH })
+ {
+ m_bytecode.push_back(static_cast((n & 0xff00) >> 8));
+ m_bytecode.push_back(static_cast(n & 0x00ff));
+ }
+
+ // push timestamp
+ const long long timestamp = std::chrono::duration_cast(
+ std::chrono::system_clock::now().time_since_epoch())
+ .count();
+ for (long i = 0; i < 8; ++i)
+ {
+ const long shift = 8 * (7 - i);
+ const auto ts_byte = static_cast((timestamp & (0xffLL << shift)) >> shift);
+ m_bytecode.push_back(ts_byte);
+ }
+ }
+
+ void IRCompiler::pushSymAndValTables(const std::vector& symbols, const std::vector& values)
+ {
+ const std::size_t symbol_size = symbols.size();
+ if (symbol_size > std::numeric_limits::max())
+ throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size));
+
+ m_bytecode.push_back(SYM_TABLE_START);
+ m_bytecode.push_back(static_cast((symbol_size & 0xff00) >> 8));
+ m_bytecode.push_back(static_cast(symbol_size & 0x00ff));
+
+ for (const auto& sym : symbols)
+ {
+ // push the string, null terminated
+ std::ranges::transform(sym, std::back_inserter(m_bytecode), [](const char i) {
+ return static_cast(i);
+ });
+ m_bytecode.push_back(0_u8);
+ }
+
+ const std::size_t value_size = values.size();
+ if (value_size > std::numeric_limits::max())
+ throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size));
+
+ m_bytecode.push_back(VAL_TABLE_START);
+ m_bytecode.push_back(static_cast((value_size & 0xff00) >> 8));
+ m_bytecode.push_back(static_cast(value_size & 0x00ff));
+
+ for (const ValTableElem& val : values)
+ {
+ switch (val.type)
+ {
+ case ValTableElemType::Number:
+ {
+ m_bytecode.push_back(NUMBER_TYPE);
+ const auto n = std::get(val.value);
+ std::string t = std::to_string(n);
+ std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
+ return static_cast(i);
+ });
+ break;
+ }
+
+ case ValTableElemType::String:
+ {
+ m_bytecode.push_back(STRING_TYPE);
+ auto t = std::get(val.value);
+ std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
+ return static_cast(i);
+ });
+ break;
+ }
+
+ case ValTableElemType::PageAddr:
+ {
+ m_bytecode.push_back(FUNC_TYPE);
+ const std::size_t addr = std::get(val.value);
+ m_bytecode.push_back(static_cast((addr & 0xff00) >> 8));
+ m_bytecode.push_back(static_cast(addr & 0x00ff));
+ break;
+ }
+ }
+
+ m_bytecode.push_back(0_u8);
+ }
+ }
+}
diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp
new file mode 100644
index 000000000..caf78b79a
--- /dev/null
+++ b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp
@@ -0,0 +1,139 @@
+#include
+
+namespace Ark::internal
+{
+ IROptimizer::IROptimizer(const unsigned debug) :
+ m_logger("IROptimizer", debug)
+ {}
+
+ void IROptimizer::process(const std::vector& pages, const std::vector& symbols, const std::vector& values)
+ {
+ m_symbols = symbols;
+ m_values = values;
+
+ for (const auto& block : pages)
+ {
+ m_ir.emplace_back();
+ IR::Block& current_block = m_ir.back();
+
+ std::size_t i = 0;
+ const std::size_t end = block.size();
+
+ while (i < end)
+ {
+ const Instruction first = block[i].inst();
+ const uint16_t arg_1 = block[i].primaryArg();
+
+ if (i + 1 < end)
+ {
+ const Instruction second = block[i + 1].inst();
+ const uint16_t arg_2 = block[i + 1].primaryArg();
+
+ // LOAD_CONST x
+ // LOAD_CONST y
+ // ---> LOAD_CONST_LOAD_CONST x y
+ if (first == LOAD_CONST && second == LOAD_CONST)
+ {
+ current_block.emplace_back(LOAD_CONST_LOAD_CONST, arg_1, arg_2);
+ i += 2;
+ }
+ // LOAD_CONST x
+ // STORE / SET_VAL a
+ // ---> LOAD_CONST_STORE x a ; LOAD_CONST_SET_VAL x a
+ else if (first == LOAD_CONST && second == STORE)
+ {
+ current_block.emplace_back(LOAD_CONST_STORE, arg_1, arg_2);
+ i += 2;
+ }
+ else if (first == LOAD_CONST && second == SET_VAL)
+ {
+ current_block.emplace_back(LOAD_CONST_SET_VAL, arg_1, arg_2);
+ i += 2;
+ }
+ // LOAD_SYMBOL a
+ // STORE / SET_VAL b
+ // ---> STORE_FROM a b ; SET_VAL_FROM a b
+ else if (first == LOAD_SYMBOL && second == STORE)
+ {
+ current_block.emplace_back(STORE_FROM, arg_1, arg_2);
+ i += 2;
+ }
+ else if (first == LOAD_SYMBOL && second == SET_VAL)
+ {
+ current_block.emplace_back(SET_VAL_FROM, arg_1, arg_2);
+ i += 2;
+ }
+ else if (i + 2 < end)
+ {
+ const Instruction third = block[i + 2].inst();
+ const uint16_t arg_3 = block[i + 2].primaryArg();
+
+ // LOAD_SYMBOL a
+ // LOAD_CONST n (1)
+ // ADD / SUB
+ // ---> INCREMENT / DECREMENT a
+ if (third == ADD && first == LOAD_CONST && second == LOAD_SYMBOL && m_values[arg_1].type == ValTableElemType::Number && std::get(m_values[arg_1].value) == 1)
+ {
+ current_block.emplace_back(INCREMENT, arg_2);
+ i += 3;
+ }
+ else if (third == ADD && first == LOAD_SYMBOL && second == LOAD_CONST && m_values[arg_2].type == ValTableElemType::Number && std::get(m_values[arg_2].value) == 1)
+ {
+ current_block.emplace_back(INCREMENT, arg_1);
+ i += 3;
+ }
+ else if (third == SUB && first == LOAD_SYMBOL && second == LOAD_CONST && m_values[arg_2].type == ValTableElemType::Number && std::get(m_values[arg_2].value) == 1)
+ {
+ current_block.emplace_back(DECREMENT, arg_1);
+ i += 3;
+ }
+ // LOAD_SYMBOL list
+ // TAIL / HEAD
+ // STORE / SET_VAL a
+ // ---> STORE_TAIL list a ; STORE_HEAD ; SET_VAL_TAIL ; SET_VAL_HEAD
+ else if (first == LOAD_SYMBOL && second == TAIL && third == STORE)
+ {
+ current_block.emplace_back(STORE_TAIL, arg_1, arg_3);
+ i += 3;
+ }
+ else if (first == LOAD_SYMBOL && second == TAIL && third == SET_VAL)
+ {
+ current_block.emplace_back(SET_VAL_TAIL, arg_1, arg_3);
+ i += 3;
+ }
+ else if (first == LOAD_SYMBOL && second == HEAD && third == STORE)
+ {
+ current_block.emplace_back(STORE_HEAD, arg_1, arg_3);
+ i += 3;
+ }
+ else if (first == LOAD_SYMBOL && second == HEAD && third == SET_VAL)
+ {
+ current_block.emplace_back(SET_VAL_HEAD, arg_1, arg_3);
+ i += 3;
+ }
+ else
+ {
+ current_block.emplace_back(block[i]);
+ ++i;
+ }
+ }
+ else
+ {
+ current_block.emplace_back(block[i]);
+ ++i;
+ }
+ }
+ else
+ {
+ current_block.emplace_back(block[i]);
+ ++i;
+ }
+ }
+ }
+ }
+
+ const std::vector& IROptimizer::intermediateRepresentation() const noexcept
+ {
+ return m_ir;
+ }
+}
diff --git a/src/arkreactor/Compiler/ImportSolver.cpp b/src/arkreactor/Compiler/Package/ImportSolver.cpp
similarity index 99%
rename from src/arkreactor/Compiler/ImportSolver.cpp
rename to src/arkreactor/Compiler/Package/ImportSolver.cpp
index e4ecf070b..19ca4b8a7 100644
--- a/src/arkreactor/Compiler/ImportSolver.cpp
+++ b/src/arkreactor/Compiler/Package/ImportSolver.cpp
@@ -1,4 +1,4 @@
-#include
+#include
#include
#include
diff --git a/src/arkreactor/Compiler/Welder.cpp b/src/arkreactor/Compiler/Welder.cpp
index 7c0966165..0e6cb2e62 100644
--- a/src/arkreactor/Compiler/Welder.cpp
+++ b/src/arkreactor/Compiler/Welder.cpp
@@ -1,13 +1,14 @@
#include
#include
-#include
+#include
#include
#include
-
+#include
#include
#include
-#include
+
+#include
namespace Ark
{
@@ -20,6 +21,8 @@ namespace Ark
m_ast_optimizer(debug),
m_name_resolver(debug),
m_logger("Welder", debug),
+ m_ir_optimizer(debug),
+ m_ir_compiler(debug),
m_compiler(debug)
{}
@@ -48,7 +51,19 @@ namespace Ark
try
{
m_compiler.process(m_computed_ast);
- m_bytecode = m_compiler.bytecode();
+ m_ir = m_compiler.intermediateRepresentation();
+
+ if ((m_features & FeatureIROptimizer) != 0)
+ {
+ m_ir_optimizer.process(m_ir, m_compiler.symbols(), m_compiler.values());
+ m_ir = m_ir_optimizer.intermediateRepresentation();
+ }
+
+ if ((m_features & FeatureDumpIR) != 0)
+ dumpIRToFile();
+
+ m_ir_compiler.process(m_ir, m_compiler.symbols(), m_compiler.values());
+ m_bytecode = m_ir_compiler.bytecode();
return true;
}
@@ -87,6 +102,57 @@ namespace Ark
return m_bytecode;
}
+ void Welder::dumpIRToFile() const
+ {
+ std::filesystem::path path = m_root_file;
+ if (is_directory(m_root_file))
+ path /= "output.ark.ir";
+ else
+ path.replace_extension(".ark.ir");
+
+ std::ofstream output(path);
+
+ std::size_t index = 0;
+ for (const auto& block : m_ir)
+ {
+ fmt::println(output, "page_{}", index);
+ for (const auto entity : block)
+ {
+ switch (entity.kind())
+ {
+ case internal::IR::Kind::Label:
+ fmt::println(output, ".L{}:", entity.label());
+ break;
+
+ case internal::IR::Kind::Goto:
+ fmt::println(output, "\tGOTO L{}", entity.label());
+ break;
+
+ case internal::IR::Kind::GotoIfTrue:
+ fmt::println(output, "\tGOTO_IF_TRUE L{}", entity.label());
+ break;
+
+ case internal::IR::Kind::GotoIfFalse:
+ fmt::println(output, "\tGOTO_IF_FALSE L{}", entity.label());
+ break;
+
+ case internal::IR::Kind::Opcode:
+ fmt::println(output, "\t{} {}", internal::InstructionNames[entity.inst()], entity.primaryArg());
+ break;
+
+ case internal::IR::Kind::Opcode2Args:
+ fmt::println(output, "\t{} {}, {}", internal::InstructionNames[entity.inst()], entity.primaryArg(), entity.secondaryArg());
+ break;
+ }
+ }
+
+ fmt::println(output, "");
+ ++index;
+ }
+
+ output.close();
+ }
+
bool Welder::computeAST(const std::string& filename, const std::string& code)
{
try
diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp
index 85c95a1c2..ba2b9fe64 100644
--- a/src/arkreactor/VM/VM.cpp
+++ b/src/arkreactor/VM/VM.cpp
@@ -22,6 +22,60 @@ namespace Ark
{
using namespace internal;
+ namespace helper
+ {
+ inline Value tail(Value* a)
+ {
+ if (a->valueType() == ValueType::List)
+ {
+ if (a->constList().size() < 2)
+ return Value(ValueType::List);
+
+ std::vector tmp(a->constList().size() - 1);
+ for (std::size_t i = 1, end = a->constList().size(); i < end; ++i)
+ tmp[i - 1] = a->constList()[i];
+ return Value(std::move(tmp));
+ }
+ if (a->valueType() == ValueType::String)
+ {
+ if (a->string().size() < 2)
+ return Value(ValueType::String);
+
+ Value b { *a };
+ b.stringRef().erase(b.stringRef().begin());
+ return b;
+ }
+
+ types::generateError(
+ "tail",
+ { { types::Contract { { types::Typedef("value", ValueType::List) } },
+ types::Contract { { types::Typedef("value", ValueType::String) } } } },
+ { *a });
+ }
+
+ inline Value head(Value* a)
+ {
+ if (a->valueType() == ValueType::List)
+ {
+ if (a->constList().empty())
+ return Builtins::nil;
+ return a->constList()[0];
+ }
+ if (a->valueType() == ValueType::String)
+ {
+ if (a->string().empty())
+ return Value(ValueType::String);
+ return Value(std::string(1, a->stringRef()[0]));
+ }
+
+ types::generateError(
+ "head",
+ { { types::Contract { { types::Typedef("value", ValueType::List) } },
+ types::Contract { { types::Typedef("value", ValueType::String) } } } },
+ { *a });
+ }
+ }
+
VM::VM(State& state) noexcept :
m_state(state), m_exit_code(0), m_running(false)
{
@@ -281,8 +335,8 @@ namespace Ark
#define NEXTOPARG() \
do \
{ \
- padding = m_state.m_pages[context.pp][context.ip]; \
- inst = m_state.m_pages[context.pp][context.ip + 1]; \
+ inst = m_state.m_pages[context.pp][context.ip]; \
+ padding = m_state.m_pages[context.pp][context.ip + 1]; \
arg = static_cast((m_state.m_pages[context.pp][context.ip + 2] << 8) + \
m_state.m_pages[context.pp][context.ip + 3]); \
context.ip += 4; \
@@ -290,6 +344,12 @@ namespace Ark
#define DISPATCH() \
NEXTOPARG(); \
DISPATCH_GOTO();
+#define UNPACK_ARGS() \
+ do \
+ { \
+ secondary_arg = static_cast((padding << 4) | (arg & 0xf000) >> 12); \
+ primary_arg = arg & 0x0fff; \
+ } while (false)
#if ARK_USE_COMPUTED_GOTOS
# pragma GCC diagnostic push
@@ -344,15 +404,29 @@ namespace Ark
&&TARGET_TYPE,
&&TARGET_HASFIELD,
&&TARGET_NOT,
+ &&TARGET_LOAD_CONST_LOAD_CONST,
+ &&TARGET_LOAD_CONST_STORE,
+ &&TARGET_LOAD_CONST_SET_VAL,
+ &&TARGET_STORE_FROM,
+ &&TARGET_SET_VAL_FROM,
+ &&TARGET_INCREMENT,
+ &&TARGET_DECREMENT,
+ &&TARGET_STORE_TAIL,
+ &&TARGET_STORE_HEAD,
+ &&TARGET_SET_VAL_TAIL,
+ &&TARGET_SET_VAL_HEAD
};
# pragma GCC diagnostic pop
#endif
try
{
- [[maybe_unused]] uint8_t padding = 0;
uint8_t inst = 0;
+ uint8_t padding = 0;
uint16_t arg = 0;
+ uint16_t primary_arg = 0;
+ uint16_t secondary_arg = 0;
+
m_running = true;
DISPATCH();
@@ -370,23 +444,13 @@ namespace Ark
TARGET(LOAD_SYMBOL)
{
- context.last_symbol = arg;
- if (Value* var = findNearestVariable(context.last_symbol, context); var != nullptr) [[likely]]
- {
- // push internal reference, shouldn't break anything so far, unless it's already a ref
- if (var->valueType() == ValueType::Reference)
- push(var->reference(), context);
- else
- push(var, context);
- }
- else [[unlikely]]
- throwVMError(ErrorKind::Scope, fmt::format("Unbound variable `{}'", m_state.m_symbols[context.last_symbol]));
+ push(loadSymbol(arg, context), context);
DISPATCH();
}
TARGET(LOAD_CONST)
{
- push(&(m_state.m_constants[arg]), context);
+ push(loadConstAsPtr(arg), context);
DISPATCH();
}
@@ -399,33 +463,13 @@ namespace Ark
TARGET(STORE)
{
- {
- Value val = *popAndResolveAsPtr(context);
- // avoid adding the pair (id, _) multiple times, with different values
- Value* local = context.locals.back()[arg];
- if (local == nullptr) [[likely]]
- context.locals.back().push_back(arg, val);
- else
- *local = val;
- }
-
+ store(arg, popAndResolveAsPtr(context), context);
DISPATCH();
}
TARGET(SET_VAL)
{
- {
- Value val = *popAndResolveAsPtr(context);
- if (Value* var = findNearestVariable(arg, context); var != nullptr) [[likely]]
- {
- if (var->valueType() == ValueType::Reference)
- *var->reference() = val;
- else [[likely]]
- *var = val;
- }
- else
- throwVMError(ErrorKind::Scope, fmt::format("Unbound variable `{}', can not change its value to {}", m_state.m_symbols[arg], val.toString(*this)));
- }
+ setVal(arg, popAndResolveAsPtr(context), context);
DISPATCH();
}
@@ -535,7 +579,6 @@ namespace Ark
}
throwVMError(ErrorKind::Scope, fmt::format("Can not delete unbound variable `{}'", m_state.m_symbols[arg]));
- DISPATCH();
}
TARGET(MAKE_CLOSURE)
@@ -568,9 +611,8 @@ namespace Ark
if (Value* field = var->refClosure().refScope()[arg]; field != nullptr)
{
- // check for CALL instruction
- // doing a +1 on the IP to read the instruction because context.ip is already on the next instruction word (the padding)
- if (context.ip + 1 < m_state.m_pages[context.pp].size() && m_state.m_pages[context.pp][context.ip + 1] == CALL)
+ // check for CALL instruction (the instruction because context.ip is already on the next instruction word)
+ if (m_state.m_pages[context.pp][context.ip] == CALL)
push(Value(Closure(var->refClosure().scopePtr(), field->pageAddr())), context);
else
push(field, context);
@@ -896,63 +938,14 @@ namespace Ark
TARGET(TAIL)
{
Value* a = popAndResolveAsPtr(context);
-
- if (a->valueType() == ValueType::List)
- {
- if (a->constList().size() < 2)
- push(Value(ValueType::List), context);
- else
- {
- std::vector tmp(a->constList().size() - 1);
- for (std::size_t i = 1, end = a->constList().size(); i < end; ++i)
- tmp[i - 1] = a->constList()[i];
- push(Value(std::move(tmp)), context);
- }
- }
- else if (a->valueType() == ValueType::String)
- {
- if (a->string().size() < 2)
- push(Value(ValueType::String), context);
- else
- {
- Value b { *a };
- b.stringRef().erase(b.stringRef().begin());
- push(std::move(b), context);
- }
- }
- else
- types::generateError(
- "tail",
- { { types::Contract { { types::Typedef("value", ValueType::List) } },
- types::Contract { { types::Typedef("value", ValueType::String) } } } },
- { *a });
+ push(helper::tail(a), context);
DISPATCH();
}
TARGET(HEAD)
{
Value* a = popAndResolveAsPtr(context);
-
- if (a->valueType() == ValueType::List)
- {
- if (a->constList().empty())
- push(Builtins::nil, context);
- else
- push(a->constList()[0], context);
- }
- else if (a->valueType() == ValueType::String)
- {
- if (a->string().empty())
- push(Value(ValueType::String), context);
- else
- push(Value(std::string(1, a->stringRef()[0])), context);
- }
- else
- types::generateError(
- "head",
- { { types::Contract { { types::Typedef("value", ValueType::List) } },
- types::Contract { { types::Typedef("value", ValueType::String) } } } },
- { *a });
+ push(helper::head(a), context);
DISPATCH();
}
@@ -1101,6 +1094,130 @@ namespace Ark
DISPATCH();
}
+#pragma endregion
+
+#pragma region "Super Instructions"
+ TARGET(LOAD_CONST_LOAD_CONST)
+ {
+ UNPACK_ARGS();
+ push(loadConstAsPtr(primary_arg), context);
+ push(loadConstAsPtr(secondary_arg), context);
+ DISPATCH();
+ }
+
+ TARGET(LOAD_CONST_STORE)
+ {
+ UNPACK_ARGS();
+ store(secondary_arg, loadConstAsPtr(primary_arg), context);
+ DISPATCH();
+ }
+
+ TARGET(LOAD_CONST_SET_VAL)
+ {
+ UNPACK_ARGS();
+ setVal(secondary_arg, loadConstAsPtr(primary_arg), context);
+ DISPATCH();
+ }
+
+ TARGET(STORE_FROM)
+ {
+ UNPACK_ARGS();
+ store(secondary_arg, loadSymbol(primary_arg, context), context);
+ DISPATCH();
+ }
+
+ TARGET(SET_VAL_FROM)
+ {
+ UNPACK_ARGS();
+ setVal(secondary_arg, loadSymbol(primary_arg, context), context);
+ DISPATCH();
+ }
+
+ TARGET(INCREMENT)
+ {
+ UNPACK_ARGS();
+ {
+ Value* var = loadSymbol(primary_arg, context);
+
+ // use internal reference, shouldn't break anything so far, unless it's already a ref
+ if (var->valueType() == ValueType::Reference)
+ var = var->reference();
+
+ if (var->valueType() == ValueType::Number)
+ push(Value(var->number() + 1), context);
+ else
+ types::generateError(
+ "+",
+ { { types::Contract { { types::Typedef("a", ValueType::Number), types::Typedef("b", ValueType::Number) } } } },
+ { *var, Value(1) });
+ }
+ DISPATCH();
+ }
+
+ TARGET(DECREMENT)
+ {
+ UNPACK_ARGS();
+ {
+ Value* var = loadSymbol(primary_arg, context);
+
+ // use internal reference, shouldn't break anything so far, unless it's already a ref
+ if (var->valueType() == ValueType::Reference)
+ var = var->reference();
+
+ if (var->valueType() == ValueType::Number)
+ push(Value(var->number() - 1), context);
+ else
+ types::generateError(
+ "-",
+ { { types::Contract { { types::Typedef("a", ValueType::Number), types::Typedef("b", ValueType::Number) } } } },
+ { *var, Value(1) });
+ }
+ DISPATCH();
+ }
+
+ TARGET(STORE_TAIL)
+ {
+ UNPACK_ARGS();
+ {
+ Value* list = loadSymbol(primary_arg, context);
+ Value tail = helper::tail(list);
+ store(secondary_arg, &tail, context);
+ }
+ DISPATCH();
+ }
+
+ TARGET(STORE_HEAD)
+ {
+ UNPACK_ARGS();
+ {
+ Value* list = loadSymbol(primary_arg, context);
+ Value head = helper::head(list);
+ store(secondary_arg, &head, context);
+ }
+ DISPATCH();
+ }
+
+ TARGET(SET_VAL_TAIL)
+ {
+ UNPACK_ARGS();
+ {
+ Value* list = loadSymbol(primary_arg, context);
+ Value tail = helper::tail(list);
+ setVal(secondary_arg, &tail, context);
+ }
+ DISPATCH();
+ }
+
+ TARGET(SET_VAL_HEAD)
+ {
+ UNPACK_ARGS();
+ {
+ Value* list = loadSymbol(primary_arg, context);
+ Value head = helper::head(list);
+ setVal(secondary_arg, &head, context);
+ }
+ DISPATCH();
+ }
#pragma endregion
}
#if ARK_USE_COMPUTED_GOTOS
diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp
index c8812fb8d..e99c48cf1 100644
--- a/src/arkscript/main.cpp
+++ b/src/arkscript/main.cpp
@@ -42,7 +42,7 @@ int main(int argc, char** argv)
uint16_t bcr_page = max_uint16;
uint16_t bcr_start = max_uint16;
uint16_t bcr_end = max_uint16;
- Ark::BytecodeSegment segment = Ark::BytecodeSegment::All;
+ auto segment = Ark::BytecodeSegment::All;
// Eval / Run / AST dump
std::string file, eval_expression;
std::string libdir;
@@ -72,8 +72,17 @@ int main(int argc, char** argv)
option("-foptimizer").call([&] { passes |= Ark::FeatureASTOptimizer; })
| option("-fno-optimizer").call([&] { passes &= ~Ark::FeatureASTOptimizer; })
).doc("Toggle on and off the optimizer pass");
- // cppcheck-suppress constStatement
- const auto compiler_passes_flag = (import_solver_pass_flag, macro_proc_pass_flag, optimizer_pass_flag);
+ auto ir_optimizer_pass_flag = (
+ option("-firoptimizer").call([&] { passes |= Ark::FeatureIROptimizer; })
+ | option("-fno-iroptimizer").call([&] { passes &= ~Ark::FeatureIROptimizer; })
+ ).doc("Toggle on and off the IR optimizer pass");
+ auto ir_dump = option("-fdump-ir").call([&] { passes |= Ark::FeatureDumpIR; })
+ .doc("Dump IR to file.ark.ir");
+
+ const auto compiler_passes_flag = (
+ // cppcheck-suppress constStatement
+ import_solver_pass_flag, macro_proc_pass_flag, optimizer_pass_flag, ir_optimizer_pass_flag, ir_dump
+ );
auto cli = (
option("-h", "--help").set(selected, mode::help).doc("Display this message")
diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md
new file mode 100644
index 000000000..52568b31c
--- /dev/null
+++ b/tests/benchmarks/README.md
@@ -0,0 +1,20 @@
+# Benchmarks
+
+## Running the benchmarks and storing the results
+
+1. Run for the repository root
+2. Make sure the target `arkscript` has been compiled in release mode, as well as `bench`
+```bash
+result="tests/benchmarks/results/$(set -- tests/benchmarks/results/*.csv; echo $#)-$(git rev-parse --short HEAD).csv"
+cmake-build-release/bench \
+ --benchmark_min_warmup_time=1 \
+ --benchmark_format=csv \
+ --benchmark_time_unit=ms \
+ --v=0 | grep -Ev "(New parser|Welder)" > $result
+```
+
+## Generate the comparison
+
+```bash
+python3 tests/benchmarks/compare.py tests/benchmarks/results/*.csv
+```
diff --git a/tests/benchmarks/results/5-ee9ff764.csv b/tests/benchmarks/results/5-ee9ff764.csv
new file mode 100644
index 000000000..22c636dd0
--- /dev/null
+++ b/tests/benchmarks/results/5-ee9ff764.csv
@@ -0,0 +1,5 @@
+name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second,label,error_occurred,error_message
+"quicksort",4601,0.150416,0.150228,ms,,,,,
+"ackermann/iterations:50",50,59.9067,59.8294,ms,,,,,
+"fibonacci/iterations:100",100,6.27221,6.26509,ms,,,,,
+"man_or_boy",45605,0.0153367,0.015325,ms,,,,,
diff --git a/tests/unittests/CompilerSuite.cpp b/tests/unittests/CompilerSuite.cpp
new file mode 100644
index 000000000..d177bbbb7
--- /dev/null
+++ b/tests/unittests/CompilerSuite.cpp
@@ -0,0 +1,37 @@
+#include
+
+#include
+
+using namespace boost;
+
+ut::suite<"Compiler"> compiler_suite = [] {
+ using namespace ut;
+
+ "Word construction"_test = [] {
+ should("create a word with a single argument on 2 bytes") = [] {
+ const auto word = Ark::internal::Word(12, 0x5678);
+ expect(that % word.opcode == 12);
+ expect(that % word.byte_1 == 0);
+ expect(that % word.byte_2 == 0x56);
+ expect(that % word.byte_3 == 0x78);
+ };
+
+ constexpr uint16_t primary_arg = 0x0567;
+ constexpr uint16_t secondary_arg = 0x089a;
+ const auto word = Ark::internal::Word(12, primary_arg, secondary_arg);
+ should("split arguments evenly between 3 bytes") = [&] {
+ expect(that % word.opcode == 12);
+ expect(that % word.byte_1 == 0x89);
+ expect(that % word.byte_2 == 0xa5);
+ expect(that % word.byte_3 == 0x67);
+ };
+
+ should("be able to unpack both arguments from word") = [&] {
+ const uint8_t padding = word.byte_1;
+ const auto arg = static_cast((word.byte_2 << 8) | word.byte_3);
+
+ expect(that % primary_arg == (arg & 0x0fff));
+ expect(that % secondary_arg == ((padding << 4) | (arg & 0xf000) >> 12));
+ };
+ };
+};
diff --git a/tests/unittests/EmbeddingSuite.cpp b/tests/unittests/EmbeddingSuite.cpp
index d4041f131..0a277b5a7 100644
--- a/tests/unittests/EmbeddingSuite.cpp
+++ b/tests/unittests/EmbeddingSuite.cpp
@@ -76,15 +76,20 @@ ut::suite<"Embedding"> embedding_suite = [] {
};
should("have symbol foo registered") = [&] {
- auto func = mut(vm)["foo"];
+ const auto func = mut(vm)["foo"];
expect(func.isFunction());
};
should("(foo 5 6.0) have a value of 13") = [&] {
- auto value = mut(vm).call("foo", 5, 6.0);
+ const auto value = mut(vm).call("foo", 5, 6.0);
expect(value.valueType() == Ark::ValueType::Number);
expect(value.number() == 13.0_d);
};
+
+ should("get nil when retrieving unbound symbol") = [&] {
+ const auto value = mut(vm)["unknown"];
+ expect(value.valueType() == Ark::ValueType::Nil);
+ };
};
"[reset the VM and use it to run code again]"_test = [] {