8#include <termcolor/termcolor.hpp>
18 using namespace internal;
19 using namespace literals;
22 m_parser(debug, options, libenv), m_optimizer(options),
23 m_options(options), m_debug(debug)
51 m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
56 for (
auto inst :
page)
65 m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
70 constexpr std::size_t header_size = 18;
73 std::vector<unsigned char> hash_out(picosha2::k_digest_size);
81 std::cout <<
"Final bytecode size: " <<
m_bytecode.size() *
sizeof(uint8_t) <<
"B\n";
83 std::ofstream output(file, std::ofstream::binary);
114 unsigned long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
115 std::chrono::system_clock::now().time_since_epoch())
117 for (
char c = 0; c < 8; c++)
119 unsigned shift = 8 * (7 - c);
120 uint8_t ts_byte = (timestamp & (0xffULL << shift)) >> shift;
134 m_bytecode.push_back(Instruction::SYM_TABLE_START);
141 std::string s = sym.string();
142 for (std::size_t i = 0, size = s.size(); i < size; ++i)
148 m_bytecode.push_back(Instruction::VAL_TABLE_START);
154 if (val.type == ValTableElemType::Number)
156 m_bytecode.push_back(Instruction::NUMBER_TYPE);
157 auto n = std::get<double>(val.value);
158 std::string t = std::to_string(n);
159 for (std::size_t i = 0, size = t.size(); i < size; ++i)
162 else if (val.type == ValTableElemType::String)
164 m_bytecode.push_back(Instruction::STRING_TYPE);
165 std::string t = std::get<std::string>(val.value);
166 for (std::size_t i = 0, size = t.size(); i < size; ++i)
169 else if (val.type == ValTableElemType::PageAddr)
172 pushNumber(
static_cast<uint16_t
>(std::get<std::size_t>(val.value)));
175 throw CompilationError(
"trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code");
184 for (
const Node& node : lst)
186 if (node.nodeType() != NodeType::GetField)
203 [&name](
const std::pair<std::string, Value>& element) ->
bool {
204 return name == element.first;
215 case Instruction::NOT: [[fallthrough]];
216 case Instruction::LEN: [[fallthrough]];
217 case Instruction::EMPTY: [[fallthrough]];
218 case Instruction::TAIL: [[fallthrough]];
219 case Instruction::HEAD: [[fallthrough]];
220 case Instruction::ISNIL: [[fallthrough]];
221 case Instruction::TO_NUM: [[fallthrough]];
222 case Instruction::TO_STR: [[fallthrough]];
223 case Instruction::TYPE: [[fallthrough]];
224 case Instruction::HASFIELD:
234 if (inst == Instruction::LIST)
235 pushNumber(previous, page_ptr(p));
236 else if (inst == Instruction::APPEND || inst == Instruction::APPEND_IN_PLACE ||
237 inst == Instruction::CONCAT || inst == Instruction::CONCAT_IN_PLACE)
238 pushNumber(previous - 1, page_ptr(p));
244 auto it = std::find_if(m_plugins.begin(), m_plugins.end(),
245 [&splitted](
const std::string& plugin) ->
bool {
246 return std::filesystem::path(plugin).stem().string() == splitted;
248 return it != m_plugins.end();
259 std::cerr << termcolor::yellow <<
"Warning " << termcolor::reset <<
makeNodeBasedErrorCtx(message, node) <<
"\n";
265 if (x.
nodeType() == NodeType::Symbol)
267 else if (x.
nodeType() == NodeType::GetField)
269 std::string name = x.
string();
273 page(p).emplace_back(Instruction::GET_FIELD);
277 else if (x.
nodeType() == NodeType::String || x.
nodeType() == NodeType::Number)
281 if (!produces_result)
283 page(p).emplace_back(Instruction::LOAD_CONST);
290 if (!produces_result)
293 page(p).emplace_back(Instruction::BUILTIN);
298 else if (
auto c0 = x.
constList()[0]; c0.nodeType() == NodeType::Symbol &&
isSpecific(c0.string()).has_value())
301 else if (x.
constList()[0].nodeType() == NodeType::Keyword)
308 compileIf(x, p, produces_result, is_terminal, var_name);
325 for (std::size_t i = 1, size = x.
constList().size(); i < size; ++i)
330 (i != size - 1) ?
true : produces_result,
332 is_terminal ? (i == size - 1) :
false,
341 case Keyword::Import:
346 compileQuote(x, p, produces_result, is_terminal, var_name);
358 handleCalls(x, p, produces_result, is_terminal, var_name);
364 std::string name = x.
string();
368 page(p).emplace_back(Instruction::BUILTIN);
372 page(p).emplace_back(
static_cast<uint8_t
>(Instruction::FIRST_OPERATOR + it_operator.value()));
377 page(p).emplace_back(Instruction::LOAD_SYMBOL);
384 page(p).push_back(Instruction::POP);
390 std::string name = c0.
string();
396 if (argc < 2 && inst != Instruction::LIST)
397 throw CompilationError(
"can not use " + name +
" with less than 2 arguments");
400 for (uint16_t i = x.
constList().size() - 1; i > 0; --i)
403 while (x.
constList()[j].nodeType() == NodeType::GetField)
405 uint16_t diff = i - j;
416 page(p).emplace_back(inst);
419 if (produces_result && name.back() !=
'!')
422 page(p).push_back(Instruction::POP);
432 page(p).push_back(Instruction::POP_JUMP_IF_TRUE);
433 std::size_t jump_to_if_pos =
page(p).size();
442 page(p).push_back(Instruction::JUMP);
443 std::size_t jump_to_end_pos =
page(p).size();
457 for (
auto it = x.
constList()[1].constList().begin(), it_end = x.
constList()[1].constList().end(); it != it_end; ++it)
459 if (it->nodeType() == NodeType::Capture)
465 throwCompilerError(
"Can not capture " + it->string() +
" because it is referencing an unbound variable.", *it);
467 page(p).emplace_back(Instruction::CAPTURE);
478 page(p).emplace_back(Instruction::LOAD_CONST);
483 for (
auto it = x.
constList()[1].constList().begin(), it_end = x.
constList()[1].constList().end(); it != it_end; ++it)
485 if (it->nodeType() == NodeType::Symbol)
487 page(page_id).emplace_back(Instruction::MUT);
498 page(page_id).emplace_back(Instruction::RET);
503 page(p).push_back(Instruction::POP);
510 if (n != Keyword::Set)
516 if (n == Keyword::Let)
517 page(p).push_back(Instruction::LET);
518 else if (n == Keyword::Mut)
519 page(p).push_back(Instruction::MUT);
521 page(p).push_back(Instruction::STORE);
528 std::size_t current =
page(p).size();
532 page(p).push_back(Instruction::POP_JUMP_IF_FALSE);
533 std::size_t jump_to_end_pos =
page(p).size();
539 page(p).push_back(Instruction::JUMP);
552 page(page_id).emplace_back(Instruction::RET);
556 page(p).emplace_back(Instruction::LOAD_CONST);
562 page(p).push_back(Instruction::POP);
573 page(p).emplace_back(Instruction::PLUGIN);
582 page(p).emplace_back(Instruction::DEL);
594 const std::size_t end = x.
constList().size();
597 if (x.
constList()[n].nodeType() == NodeType::GetField)
609 if (proc_page_len > 1)
611 if (is_terminal && x.
constList()[0].nodeType() == NodeType::Symbol && var_name == x.
constList()[0].string())
617 for (std::size_t i = x.
constList().size() - 1; i >= n; --i)
621 page(p).push_back(Instruction::JUMP);
629 for (
auto exp = x.
constList().begin() + n, exp_end = x.
constList().end(); exp != exp_end; ++exp)
633 page(p).push_back(inst);
637 page(p).push_back(Instruction::CALL);
639 std::size_t args_count = 0;
640 for (
auto it = x.
constList().begin() + 1, it_end = x.
constList().end(); it != it_end; ++it)
642 if (it->nodeType() != NodeType::GetField &&
643 it->nodeType() != NodeType::Capture)
655 if (op_inst == Instruction::ASSERT)
656 produces_result =
false;
659 std::size_t exp_count = 0;
660 for (std::size_t index = n, size = x.
constList().size(); index < size; ++index)
664 if ((index + 1 < size &&
665 x.
constList()[index + 1].nodeType() != NodeType::GetField &&
666 x.
constList()[index + 1].nodeType() != NodeType::Capture) ||
673 page(p).push_back(op_inst);
679 page(p).push_back(op_inst);
690 case Instruction::ADD: [[fallthrough]];
691 case Instruction::SUB: [[fallthrough]];
692 case Instruction::MUL: [[fallthrough]];
693 case Instruction::DIV: [[fallthrough]];
694 case Instruction::AND_: [[fallthrough]];
695 case Instruction::OR_: [[fallthrough]];
696 case Instruction::MOD:
701 "can not create a chained expression (of length " + std::to_string(exp_count) +
702 ") for operator `" + std::string(
internal::operators[
static_cast<std::size_t
>(op_inst - Instruction::FIRST_OPERATOR)]) +
703 "'. You most likely forgot a `)'.",
710 page(p).push_back(Instruction::POP);
715 std::string name = x.
constList()[1].string();
718 for (std::size_t idx = 2, end = x.
constList().size(); idx < end; ++idx)
726 return sym_node.string() == sym.string();
734 auto distance = std::distance(
m_symbols.begin(), it);
735 if (distance < std::numeric_limits<uint16_t>::max())
736 return static_cast<uint16_t
>(distance);
751 auto distance = std::distance(
m_values.begin(), it);
752 if (distance < std::numeric_limits<uint16_t>::max())
753 return static_cast<uint16_t
>(distance);
768 auto distance = std::distance(
m_values.begin(), it);
769 if (distance < std::numeric_limits<uint16_t>::max())
770 return static_cast<uint16_t
>(distance);
772 throwCompilerError(
"Too many values (exceeds 65'536), aborting compilation.", current);
787 const std::string& str = sym.string();
794 if (suggestion.empty())
795 throwCompilerError(
"Unbound variable error \"" + str +
"\" (variable is used but not defined)", sym);
797 throwCompilerError(
"Unbound variable error \"" + str +
"\" (did you mean \"" + suggestion +
"\"?)", sym);
804 std::string suggestion;
806 std::size_t suggestion_distance = str.size() / 2;
811 if (current_distance <= suggestion_distance)
813 suggestion_distance = current_distance;
825 m_bytecode.push_back((n & 0xff00) >> 8);
826 m_bytecode.push_back(n & 0x00ff);
830 page->emplace_back((n & 0xff00) >> 8);
831 page->emplace_back(n & 0x00ff);
Host the declaration of all the ArkScript builtins.
ArkScript compiler is in charge of transforming the AST into bytecode.
constexpr int ARK_VERSION_MAJOR
constexpr int ARK_VERSION_PATCH
constexpr int ARK_VERSION_MINOR
User defined literals for Ark internals.
Handles the macros and their expansion in ArkScript source code.
Lots of utilities about string, filesystem and more.
CompilationError thrown by the compiler.
void compilerWarning(const std::string &message, const internal::Node &node)
Display a warning message.
void _compile(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name="")
Compile a single node recursively.
void putValue(const internal::Node &x, int p, bool produces_result)
Put a value in the bytecode, handling the closures chains.
void pushNumber(uint16_t n, std::vector< uint8_t > *page=nullptr) noexcept
Push a number on stack (need 2 bytes)
void pushSpecificInstArgc(internal::Instruction inst, uint16_t previous, int p) noexcept
Compute specific instruction argument count.
void compileIf(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
std::optional< internal::Instruction > isSpecific(const std::string &name) noexcept
Check if a symbol needs to be compiled to a specific instruction.
std::vector< std::vector< uint8_t > > m_temp_pages
we need temporary code pages for some compilations passes
void compileLetMutSet(internal::Keyword n, const internal::Node &x, int p)
std::vector< std::string > m_defined_symbols
Compiler(unsigned debug, const std::vector< std::string > &libenv, uint16_t options=DefaultFeatures)
Construct a new Compiler object.
void checkForUndefinedSymbol()
Checks for undefined symbols, not present in the defined symbols table.
internal::Optimizer m_optimizer
const bytecode_t & bytecode() noexcept
Return the constructed bytecode object.
bool mayBeFromPlugin(const std::string &name) noexcept
Checking if a symbol may be coming from a plugin.
void compileDel(const internal::Node &x, int p)
void compileFunction(const internal::Node &x, int p, bool produces_result, const std::string &var_name)
std::vector< std::string > m_plugins
std::vector< uint8_t > & page(int i) noexcept
helper functions to get a temp or finalized code page
void compile()
Start the compilation.
void compilePluginImport(const internal::Node &x, int p)
void addDefinedSymbol(const std::string &sym)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
std::string offerSuggestion(const std::string &str)
Suggest a symbol of what the user may have meant to input.
unsigned m_debug
the debug level of the compiler
std::vector< internal::ValTableElem > m_values
void compileWhile(const internal::Node &x, int p)
std::optional< std::size_t > isBuiltin(const std::string &name) noexcept
Checking if a symbol is a builtin.
internal::Parser m_parser
uint16_t addSymbol(const internal::Node &sym)
Register a given node in the symbol table.
void compileSpecific(const internal::Node &c0, const internal::Node &x, int p, bool produces_result)
uint16_t addValue(const internal::Node &x)
Register a given node in the value table.
std::vector< uint8_t > * page_ptr(int i) noexcept
helper functions to get a temp or finalized code page
void handleCalls(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
void feed(const std::string &code, const std::string &filename=ARK_NO_NAME_FILE)
Feed the differents variables with information taken from the given source code file.
void compileSymbol(const internal::Node &x, int p, bool produces_result)
void setNumberAt(int p, std::size_t at_inst, std::size_t number)
std::vector< internal::Node > m_symbols
void throwCompilerError(const std::string &message, const internal::Node &node)
Throw a nice error message.
std::optional< std::size_t > isOperator(const std::string &name) noexcept
Checking if a symbol is an operator.
void compileQuote(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
void saveTo(const std::string &file)
Save generated bytecode to a file.
std::vector< std::vector< uint8_t > > m_code_pages
bool isUnaryInst(internal::Instruction inst) noexcept
Check if a given instruction is unary (takes only one argument)
std::size_t countArkObjects(const std::vector< internal::Node > &lst) noexcept
Count the number of "valid" ark objects in a node.
void pushFileHeader() noexcept
Push the file headers (magic, version used, timestamp)
void pushSymAndValTables()
Push the symbols and values tables.
The class handling the macros definitions and calls, given an AST.
const Node & ast() const noexcept
Return the modified AST.
void feed(const Node &ast)
Send the complete AST (after the inclusions and stuff), and work on it.
A node of an Abstract Syntax Tree for ArkScript.
NodeType nodeType() const noexcept
Return the node type.
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
void feed(const Node &ast)
Send the AST to the optimizer, then run the different optimization strategies on it.
const Node & ast() const noexcept
Returns the modified AST.
const Node & ast() const noexcept
Return the generated AST.
void feed(const std::string &code, const std::string &filename=ARK_NO_NAME_FILE)
Give the code to parse.
std::vector< std::string > splitString(const std::string &source, char sep)
Cut a string into pieces, given a character separator.
int levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 25 > operators
std::string makeNodeBasedErrorCtx(const std::string &message, const Node &node)
Construct an error message based on a given node.
Keyword
The different keywords available.
Instruction
The different bytecodes are stored here.
std::vector< uint8_t > bytecode_t
constexpr uint16_t FeatureShowWarnings
A Compiler Value class helper to handle multiple types.