18 using namespace internal;
19 using namespace literals;
39 for (std::size_t i = 0, end =
m_code_pages.size(); i < end; ++i)
44 page.emplace_back(Instruction::HALT);
47 const std::size_t page_size =
page.size();
48 if (page_size > std::numeric_limits<uint16_t>::max())
49 throw std::overflow_error(
"Size of page " + std::to_string(i) +
" exceeds the maximum size of 2^16 - 1");
51 m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
52 m_bytecode.push_back(
static_cast<uint16_t
>((page_size & 0xff00) >> 8));
53 m_bytecode.push_back(
static_cast<uint16_t
>(page_size & 0x00ff));
55 for (
auto inst :
page)
67 m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
77 constexpr std::size_t header_size = 18;
80 std::vector<unsigned char> hash_out(picosha2::k_digest_size);
108 m_bytecode.push_back(
static_cast<uint16_t
>((n & 0xff00) >> 8));
109 m_bytecode.push_back(
static_cast<uint16_t
>(n & 0x00ff));
113 const unsigned long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
114 std::chrono::system_clock::now().time_since_epoch())
116 for (std::size_t i = 0; i < 8; ++i)
118 const unsigned shift = 8 * (7 - i);
119 uint8_t ts_byte = (timestamp & (0xffULL << shift)) >> shift;
126 const std::size_t symbol_size =
m_symbols.size();
127 if (symbol_size > std::numeric_limits<uint16_t>::max())
128 throw std::overflow_error(
"Too many symbols: " + std::to_string(symbol_size) +
", exceeds the maximum size of 2^16 - 1");
131 m_bytecode.push_back(
static_cast<uint16_t
>((symbol_size & 0xff00) >> 8));
132 m_bytecode.push_back(
static_cast<uint16_t
>(symbol_size & 0x00ff));
137 std::string s = sym.string();
138 for (
const char i : s)
143 const std::size_t value_size =
m_values.size();
144 if (value_size > std::numeric_limits<uint16_t>::max())
145 throw std::overflow_error(
"Too many values: " + std::to_string(value_size) +
", exceeds the maximum size of 2^16 - 1");
148 m_bytecode.push_back(
static_cast<uint16_t
>((value_size & 0xff00) >> 8));
149 m_bytecode.push_back(
static_cast<uint16_t
>(value_size & 0x00ff));
153 if (val.type == ValTableElemType::Number)
156 const auto n = std::get<double>(val.value);
157 std::string t = std::to_string(n);
158 for (
const char i : t)
161 else if (val.type == ValTableElemType::String)
164 auto t = std::get<std::string>(val.value);
165 for (
const char i : t)
168 else if (val.type == ValTableElemType::PageAddr)
171 const std::size_t addr = std::get<std::size_t>(val.value);
172 m_bytecode.push_back(
static_cast<uint16_t
>((addr & 0xff00) >> 8));
173 m_bytecode.push_back(
static_cast<uint16_t
>(addr & 0x00ff));
176 throw Error(
"The compiler is trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code");
193 [&name](
const std::pair<std::string, Value>& element) ->
bool {
194 return name == element.first;
205 case NOT: [[fallthrough]];
206 case LEN: [[fallthrough]];
207 case EMPTY: [[fallthrough]];
208 case TAIL: [[fallthrough]];
209 case HEAD: [[fallthrough]];
210 case ISNIL: [[fallthrough]];
211 case TO_NUM: [[fallthrough]];
212 case TO_STR: [[fallthrough]];
213 case TYPE: [[fallthrough]];
243 const auto it = std::ranges::find_if(m_plugins,
244 [&splitted](
const std::string& plugin) ->
bool {
245 return std::filesystem::path(plugin).stem().string() == splitted;
247 return it != m_plugins.end();
263 if (x.
nodeType() == NodeType::Symbol)
265 else if (x.
nodeType() == NodeType::Field)
276 else if (x.
nodeType() == NodeType::String || x.
nodeType() == NodeType::Number)
280 if (!is_result_unused)
286 if (!is_result_unused)
288 static const std::optional<std::size_t> nil =
getBuiltin(
"nil");
289 page(p).emplace_back(
BUILTIN,
static_cast<uint16_t
>(nil.value()));
293 else if (
const auto c0 = x.
constList()[0]; c0.nodeType() == NodeType::Symbol &&
getSpecific(c0.string()).has_value())
296 else if (x.
constList()[0].nodeType() == NodeType::Keyword)
301 compileIf(x, p, is_result_unused, is_terminal, var_name);
318 for (std::size_t i = 1, size = x.
constList().size(); i < size; ++i)
323 (i != size - 1) || is_result_unused,
325 is_terminal && (i == size - 1),
334 case Keyword::Import:
347 handleCalls(x, p, is_result_unused, is_terminal, var_name);
353 const std::string& name = x.
string();
356 page(p).emplace_back(Instruction::BUILTIN,
static_cast<uint16_t
>(it_builtin.value()));
357 else if (
const auto it_operator =
getOperator(name))
362 if (is_result_unused)
371 std::string name = c0.
string();
375 const uint16_t argc = x.
constList().size() - 1;
377 if (argc < 2 && inst !=
LIST)
378 throwCompilerError(fmt::format(
"Can not use {} with less than 2 arguments", name), c0);
381 for (uint16_t i = x.
constList().size() - 1; i > 0; --i)
387 if (is_result_unused && name.back() !=
'!')
394 void Compiler::compileIf(
const Node& x,
const int p,
const bool is_result_unused,
const bool is_terminal,
const std::string& var_name)
400 const std::size_t jump_to_if_pos =
page(p).size();
401 page(p).emplace_back(Instruction::POP_JUMP_IF_TRUE);
408 const std::size_t jump_to_end_pos =
page(p).size();
409 page(p).emplace_back(Instruction::JUMP);
412 page(p)[jump_to_if_pos].data =
static_cast<uint16_t
>(
page(p).size());
416 page(p)[jump_to_end_pos].data =
static_cast<uint16_t
>(
page(p).size());
422 for (
const auto& node : x.
constList()[1].constList())
424 if (node.nodeType() == NodeType::Capture)
430 throwCompilerError(
"Can not capture " + node.string() +
" because node is referencing an unbound variable.", node);
445 for (
const auto& node : x.
constList()[1].constList())
447 if (node.nodeType() == NodeType::Symbol)
458 page(page_id).emplace_back(
RET);
461 if (is_result_unused)
470 const std::string name = x.
constList()[1].string();
472 if (n != Keyword::Set)
477 for (std::size_t idx = 2, end = x.
constList().size(); idx < end; ++idx)
480 if (n == Keyword::Let)
482 else if (n == Keyword::Mut)
491 std::size_t current =
page(p).size();
495 const std::size_t jump_to_end_pos =
page(p).size();
501 page(p).emplace_back(
JUMP, current);
504 page(p)[jump_to_end_pos].data =
static_cast<uint16_t
>(
page(p).size());
511 for (std::size_t i = 0, end = package_node.
constList().size(); i < end; ++i)
513 path += package_node.
constList()[i].string();
530 const int proc_page = -
static_cast<int>(
m_temp_pages.size());
531 constexpr std::size_t start_index = 1;
539 if (is_terminal && x.
constList()[0].nodeType() == NodeType::Symbol && var_name == x.
constList()[0].string())
545 for (std::size_t i = x.
constList().size() - 1; i >= start_index; --i)
555 for (
auto exp = x.
constList().begin() + start_index, exp_end = x.
constList().end(); exp != exp_end; ++exp)
559 page(p).push_back(word);
563 std::size_t args_count = 0;
564 for (
auto it = x.
constList().begin() + 1, it_end = x.
constList().end(); it != it_end; ++it)
566 if (it->nodeType() != NodeType::Capture)
570 page(p).emplace_back(
CALL, args_count);
580 is_result_unused =
false;
583 std::size_t exp_count = 0;
584 for (std::size_t index = start_index, size = x.
constList().size(); index < size; ++index)
588 if ((index + 1 < size && x.
constList()[index + 1].nodeType() != NodeType::Capture) || index + 1 == size)
594 page(p).emplace_back(op.opcode, 2);
600 page(p).emplace_back(op.opcode);
611 case ADD: [[fallthrough]];
612 case SUB: [[fallthrough]];
613 case MUL: [[fallthrough]];
614 case DIV: [[fallthrough]];
615 case AND_: [[fallthrough]];
616 case OR_: [[fallthrough]];
622 "can not create a chained expression (of length " + std::to_string(exp_count) +
624 "'. You most likely forgot a `)'.",
630 if (is_result_unused)
637 auto it = std::ranges::find_if(
m_symbols, [&sym](
const Node& sym_node) ->
bool {
646 const auto distance = std::distance(
m_symbols.begin(), it);
647 if (distance < std::numeric_limits<uint16_t>::max())
648 return static_cast<uint16_t
>(distance);
655 auto it = std::ranges::find(
m_values, v);
662 const auto distance = std::distance(
m_values.begin(), it);
663 if (distance < std::numeric_limits<uint16_t>::max())
664 return static_cast<uint16_t
>(distance);
671 auto it = std::ranges::find(
m_values, v);
678 const auto distance = std::distance(
m_values.begin(), it);
679 if (distance < std::numeric_limits<uint16_t>::max())
680 return static_cast<uint16_t
>(distance);
681 throwCompilerError(
"Too many values (exceeds 65'536), aborting compilation.", current);
695 const std::string& str = sym.string();
701 if (suggestion.empty())
702 throwCompilerError(
"Unbound variable error \"" + str +
"\" (variable is used but not defined)", sym);
704 throwCompilerError(
"Unbound variable error \"" + str +
"\" (did you mean \"" + suggestion +
"\"?)", sym);
711 std::string suggestion;
713 std::size_t suggestion_distance = str.size() / 2;
718 if (current_distance <= suggestion_distance)
720 suggestion_distance = current_distance;
Lots of utilities about string, filesystem and more.
Host the declaration of all the ArkScript builtins.
ArkScript compiler is in charge of transforming the AST into bytecode.
Constants used by ArkScript.
constexpr int ARK_VERSION_MAJOR
constexpr int ARK_VERSION_PATCH
constexpr int ARK_VERSION_MINOR
User defined literals for Ark internals.
Handles the macros and their expansion in ArkScript source code.
static void compilerWarning(const std::string &message, const internal::Node &node)
Display a warning message.
void compileSymbol(const internal::Node &x, int p, bool is_result_unused)
std::vector< std::vector< internal::Word > > m_code_pages
void handleCalls(const internal::Node &x, int p, bool is_result_unused, bool is_terminal, const std::string &var_name)
void compileExpression(const internal::Node &x, int p, bool is_result_unused, bool is_terminal, const std::string &var_name="")
Compile an expression (a node) recursively.
void compileLetMutSet(internal::Keyword n, const internal::Node &x, int p)
static std::optional< std::size_t > getBuiltin(const std::string &name) noexcept
Checking if a symbol is a builtin.
std::vector< std::string > m_defined_symbols
std::string offerSuggestion(const std::string &str) const
Suggest a symbol of what the user may have meant to input.
void checkForUndefinedSymbol()
Checks for undefined symbols, not present in the defined symbols table.
bool mayBeFromPlugin(const std::string &name) noexcept
Checking if a symbol may be coming from a plugin.
void compileSpecific(const internal::Node &c0, const internal::Node &x, int p, bool is_result_unused)
std::vector< std::string > m_plugins
void process(const internal::Node &ast)
Start the compilation.
void compilePluginImport(const internal::Node &x, int p)
void addDefinedSymbol(const std::string &sym)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
std::vector< internal::ValTableElem > m_values
void compileWhile(const internal::Node &x, int p)
std::vector< std::vector< internal::Word > > m_temp_pages
we need temporary code pages for some compilations passes
void compileFunction(const internal::Node &x, int p, bool is_result_unused, const std::string &var_name)
static std::optional< internal::Instruction > getSpecific(const std::string &name) noexcept
Check if a symbol needs to be compiled to a specific instruction.
uint16_t addSymbol(const internal::Node &sym)
Register a given node in the symbol table.
void compileIf(const internal::Node &x, int p, bool is_result_unused, bool is_terminal, const std::string &var_name)
static std::optional< std::size_t > getOperator(const std::string &name) noexcept
Checking if a symbol is an operator.
static uint16_t computeSpecificInstArgc(internal::Instruction inst, uint16_t previous) noexcept
Compute specific instruction argument count.
const bytecode_t & bytecode() const noexcept
Return the constructed bytecode object.
uint16_t addValue(const internal::Node &x)
Register a given node in the value table.
std::vector< internal::Node > m_symbols
static void throwCompilerError(const std::string &message, const internal::Node &node)
Throw a nice error message.
std::vector< internal::Word > & page(const int i) noexcept
helper functions to get a temp or finalized code page
static bool isUnaryInst(internal::Instruction inst) noexcept
Check if a given instruction is unary (takes only one argument)
void pushFileHeader() noexcept
Push the file headers (magic, version used, timestamp)
Compiler(unsigned debug)
Construct a new Compiler object.
void pushSymAndValTables()
Push the symbols and values tables.
A node of an Abstract Syntax Tree for ArkScript.
NodeType nodeType() const noexcept
Return the node type.
const std::string & filename() const noexcept
Return the filename in which this node was created.
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
std::string repr() const noexcept
Compute a representation of the node without any comments or additional sugar, colors,...
std::size_t col() const noexcept
Get the column at which this node was created.
std::size_t line() const noexcept
Get the line at which this node was created.
ARK_API std::string makeContextWithNode(const std::string &message, const internal::Node &node)
Helper used by the compiler to generate a colorized context from a node.
std::vector< std::string > splitString(const std::string &source, const char sep)
Cut a string into pieces, given a character separator.
int levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 25 > operators
Keyword
The different keywords available.
Instruction
The different bytecodes are stored here.
std::vector< uint8_t > bytecode_t
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
A Compiler Value class helper to handle multiple types.