ArkScript
A small, fast, functional and scripting language for video games
Compiler.cpp
Go to the documentation of this file.
2
3#include <chrono>
4#include <limits>
5#include <filesystem>
6#include <picosha2.h>
7#include <Ark/Constants.hpp>
8#include <termcolor/proxy.hpp>
9#include <fmt/core.h>
10
11#include <Ark/Literals.hpp>
12#include <Ark/Utils.hpp>
15
16namespace Ark
17{
18 using namespace internal;
19 using namespace literals;
20
21 Compiler::Compiler(const unsigned debug) :
22 m_debug(debug)
23 {}
24
25 void Compiler::process(const Node& ast)
26 {
28
29 m_code_pages.emplace_back(); // create empty page
30
31 // gather symbols, values, and start to create code segments
32 compileExpression(ast, /* current_page */ 0, /* is_result_unused */ false, /* is_terminal */ false);
33 // throw an error on undefined symbol uses
35
37
38 // push the different code segments
39 for (std::size_t i = 0, end = m_code_pages.size(); i < end; ++i)
40 {
41 std::vector<Word>& page = m_code_pages[i];
42 // just in case we got too far, always add a HALT to be sure the
43 // VM won't do anything crazy
44 page.emplace_back(Instruction::HALT);
45
46 // push number of elements
47 const std::size_t page_size = page.size();
48 if (page_size > std::numeric_limits<uint16_t>::max())
49 throw std::overflow_error("Size of page " + std::to_string(i) + " exceeds the maximum size of 2^16 - 1");
50
51 m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
52 m_bytecode.push_back(static_cast<uint16_t>((page_size & 0xff00) >> 8));
53 m_bytecode.push_back(static_cast<uint16_t>(page_size & 0x00ff));
54
55 for (auto inst : page)
56 {
57 m_bytecode.push_back(inst.padding);
58 m_bytecode.push_back(inst.opcode);
59 m_bytecode.push_back(inst.bytes.first);
60 m_bytecode.push_back(inst.bytes.second);
61 }
62 }
63
64 if (m_code_pages.empty())
65 {
66 // code segment with a single instruction
67 m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
68 m_bytecode.push_back(0_u8);
69 m_bytecode.push_back(1_u8);
70
71 m_bytecode.push_back(0_u8);
72 m_bytecode.push_back(Instruction::HALT);
73 m_bytecode.push_back(0_u8);
74 m_bytecode.push_back(0_u8);
75 }
76
77 constexpr std::size_t header_size = 18;
78
79 // generate a hash of the tables + bytecode
80 std::vector<unsigned char> hash_out(picosha2::k_digest_size);
81 picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out);
82 m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end());
83 }
84
85 const bytecode_t& Compiler::bytecode() const noexcept
86 {
87 return m_bytecode;
88 }
89
91 {
92 /*
93 Generating headers:
94 - lang name (to be sure we are executing an ArkScript file)
95 on 4 bytes (ark + padding)
96 - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
97 - timestamp (8 bytes, unix format)
98 */
99
100 m_bytecode.push_back('a');
101 m_bytecode.push_back('r');
102 m_bytecode.push_back('k');
103 m_bytecode.push_back(0_u8);
104
105 // push version
106 for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH })
107 {
108 m_bytecode.push_back(static_cast<uint16_t>((n & 0xff00) >> 8));
109 m_bytecode.push_back(static_cast<uint16_t>(n & 0x00ff));
110 }
111
112 // push timestamp
113 const unsigned long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
114 std::chrono::system_clock::now().time_since_epoch())
115 .count();
116 for (std::size_t i = 0; i < 8; ++i)
117 {
118 const unsigned shift = 8 * (7 - i);
119 uint8_t ts_byte = (timestamp & (0xffULL << shift)) >> shift;
120 m_bytecode.push_back(ts_byte);
121 }
122 }
123
125 {
126 const std::size_t symbol_size = m_symbols.size();
127 if (symbol_size > std::numeric_limits<uint16_t>::max())
128 throw std::overflow_error("Too many symbols: " + std::to_string(symbol_size) + ", exceeds the maximum size of 2^16 - 1");
129
130 m_bytecode.push_back(SYM_TABLE_START);
131 m_bytecode.push_back(static_cast<uint16_t>((symbol_size & 0xff00) >> 8));
132 m_bytecode.push_back(static_cast<uint16_t>(symbol_size & 0x00ff));
133
134 for (const auto& sym : m_symbols)
135 {
136 // push the string, null terminated
137 std::string s = sym.string();
138 for (const char i : s)
139 m_bytecode.push_back(i);
140 m_bytecode.push_back(0_u8);
141 }
142
143 const std::size_t value_size = m_values.size();
144 if (value_size > std::numeric_limits<uint16_t>::max())
145 throw std::overflow_error("Too many values: " + std::to_string(value_size) + ", exceeds the maximum size of 2^16 - 1");
146
147 m_bytecode.push_back(VAL_TABLE_START);
148 m_bytecode.push_back(static_cast<uint16_t>((value_size & 0xff00) >> 8));
149 m_bytecode.push_back(static_cast<uint16_t>(value_size & 0x00ff));
150
151 for (const ValTableElem& val : m_values)
152 {
153 if (val.type == ValTableElemType::Number)
154 {
155 m_bytecode.push_back(NUMBER_TYPE);
156 const auto n = std::get<double>(val.value);
157 std::string t = std::to_string(n);
158 for (const char i : t)
159 m_bytecode.push_back(i);
160 }
161 else if (val.type == ValTableElemType::String)
162 {
163 m_bytecode.push_back(STRING_TYPE);
164 auto t = std::get<std::string>(val.value);
165 for (const char i : t)
166 m_bytecode.push_back(i);
167 }
168 else if (val.type == ValTableElemType::PageAddr)
169 {
170 m_bytecode.push_back(FUNC_TYPE);
171 const std::size_t addr = std::get<std::size_t>(val.value);
172 m_bytecode.push_back(static_cast<uint16_t>((addr & 0xff00) >> 8));
173 m_bytecode.push_back(static_cast<uint16_t>(addr & 0x00ff));
174 }
175 else
176 throw Error("The compiler is trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code");
177
178 m_bytecode.push_back(0_u8);
179 }
180 }
181
182 std::optional<std::size_t> Compiler::getOperator(const std::string& name) noexcept
183 {
184 const auto it = std::ranges::find(internal::operators, name);
185 if (it != internal::operators.end())
186 return std::distance(internal::operators.begin(), it);
187 return std::nullopt;
188 }
189
190 std::optional<std::size_t> Compiler::getBuiltin(const std::string& name) noexcept
191 {
192 const auto it = std::ranges::find_if(Builtins::builtins,
193 [&name](const std::pair<std::string, Value>& element) -> bool {
194 return name == element.first;
195 });
196 if (it != Builtins::builtins.end())
197 return std::distance(Builtins::builtins.begin(), it);
198 return std::nullopt;
199 }
200
201 bool Compiler::isUnaryInst(const Instruction inst) noexcept
202 {
203 switch (inst)
204 {
205 case NOT: [[fallthrough]];
206 case LEN: [[fallthrough]];
207 case EMPTY: [[fallthrough]];
208 case TAIL: [[fallthrough]];
209 case HEAD: [[fallthrough]];
210 case ISNIL: [[fallthrough]];
211 case TO_NUM: [[fallthrough]];
212 case TO_STR: [[fallthrough]];
213 case TYPE: [[fallthrough]];
214 case HASFIELD:
215 return true;
216
217 default:
218 return false;
219 }
220 }
221
222 uint16_t Compiler::computeSpecificInstArgc(const Instruction inst, const uint16_t previous) noexcept
223 {
224 switch (inst)
225 {
226 case LIST:
227 return previous;
228
229 case APPEND:
230 case APPEND_IN_PLACE:
231 case CONCAT:
232 case CONCAT_IN_PLACE:
233 return previous - 1;
234
235 default:
236 return 0;
237 }
238 }
239
240 bool Compiler::mayBeFromPlugin(const std::string& name) noexcept
241 {
242 std::string splitted = Utils::splitString(name, ':')[0];
243 const auto it = std::ranges::find_if(m_plugins,
244 [&splitted](const std::string& plugin) -> bool {
245 return std::filesystem::path(plugin).stem().string() == splitted;
246 });
247 return it != m_plugins.end();
248 }
249
250 void Compiler::compilerWarning(const std::string& message, const Node& node)
251 {
252 std::cout << termcolor::yellow << "Warning " << termcolor::reset << Diagnostics::makeContextWithNode(message, node) << "\n";
253 }
254
255 void Compiler::throwCompilerError(const std::string& message, const Node& node)
256 {
257 throw CodeError(message, node.filename(), node.line(), node.col(), node.repr());
258 }
259
260 void Compiler::compileExpression(const Node& x, const int p, const bool is_result_unused, const bool is_terminal, const std::string& var_name)
261 {
262 // register symbols
263 if (x.nodeType() == NodeType::Symbol)
264 compileSymbol(x, p, is_result_unused);
265 else if (x.nodeType() == NodeType::Field)
266 {
267 // the parser guarantees us that there is at least 2 elements (eg: a.b)
268 compileSymbol(x.constList()[0], p, is_result_unused);
269 for (auto it = x.constList().begin() + 1, end = x.constList().end(); it != end; ++it)
270 {
271 uint16_t i = addSymbol(*it);
272 page(p).emplace_back(GET_FIELD, i);
273 }
274 }
275 // register values
276 else if (x.nodeType() == NodeType::String || x.nodeType() == NodeType::Number)
277 {
278 uint16_t i = addValue(x);
279
280 if (!is_result_unused)
281 page(p).emplace_back(LOAD_CONST, i);
282 }
283 // empty code block should be nil
284 else if (x.constList().empty())
285 {
286 if (!is_result_unused)
287 {
288 static const std::optional<std::size_t> nil = getBuiltin("nil");
289 page(p).emplace_back(BUILTIN, static_cast<uint16_t>(nil.value()));
290 }
291 }
292 // specific instructions
293 else if (const auto c0 = x.constList()[0]; c0.nodeType() == NodeType::Symbol && getSpecific(c0.string()).has_value())
294 compileSpecific(c0, x, p, is_result_unused);
295 // registering structures
296 else if (x.constList()[0].nodeType() == NodeType::Keyword)
297 {
298 switch (const Keyword keyword = x.constList()[0].keyword())
299 {
300 case Keyword::If:
301 compileIf(x, p, is_result_unused, is_terminal, var_name);
302 break;
303
304 case Keyword::Set:
305 [[fallthrough]];
306 case Keyword::Let:
307 [[fallthrough]];
308 case Keyword::Mut:
309 compileLetMutSet(keyword, x, p);
310 break;
311
312 case Keyword::Fun:
313 compileFunction(x, p, is_result_unused, var_name);
314 break;
315
316 case Keyword::Begin:
317 {
318 for (std::size_t i = 1, size = x.constList().size(); i < size; ++i)
320 x.constList()[i],
321 p,
322 // All the nodes in a begin (except for the last one) are producing a result that we want to drop.
323 (i != size - 1) || is_result_unused,
324 // If the begin is a terminal node, only its last node is terminal.
325 is_terminal && (i == size - 1),
326 var_name);
327 break;
328 }
329
330 case Keyword::While:
331 compileWhile(x, p);
332 break;
333
334 case Keyword::Import:
336 break;
337
338 case Keyword::Del:
339 page(p).emplace_back(DEL, addSymbol(x.constList()[1]));
340 break;
341 }
342 }
343 else
344 {
345 // if we are here, we should have a function name
346 // push arguments first, then function name, then call it
347 handleCalls(x, p, is_result_unused, is_terminal, var_name);
348 }
349 }
350
351 void Compiler::compileSymbol(const Node& x, const int p, const bool is_result_unused)
352 {
353 const std::string& name = x.string();
354
355 if (const auto it_builtin = getBuiltin(name))
356 page(p).emplace_back(Instruction::BUILTIN, static_cast<uint16_t>(it_builtin.value()));
357 else if (const auto it_operator = getOperator(name))
358 page(p).emplace_back(static_cast<uint8_t>(FIRST_OPERATOR + it_operator.value()));
359 else
360 page(p).emplace_back(LOAD_SYMBOL, addSymbol(x)); // using the variable
361
362 if (is_result_unused)
363 {
364 compilerWarning("Statement has no effect", x);
365 page(p).emplace_back(POP);
366 }
367 }
368
369 void Compiler::compileSpecific(const Node& c0, const Node& x, const int p, const bool is_result_unused)
370 {
371 std::string name = c0.string();
372 Instruction inst = getSpecific(name).value();
373
374 // length of at least 1 since we got a symbol name
375 const uint16_t argc = x.constList().size() - 1;
376 // error, can not use append/concat/pop (and their in place versions) with a <2 length argument list
377 if (argc < 2 && inst != LIST)
378 throwCompilerError(fmt::format("Can not use {} with less than 2 arguments", name), c0);
379
380 // compile arguments in reverse order
381 for (uint16_t i = x.constList().size() - 1; i > 0; --i)
382 compileExpression(x.constList()[i], p, false, false);
383
384 // put inst and number of arguments
385 page(p).emplace_back(inst, computeSpecificInstArgc(inst, argc));
386
387 if (is_result_unused && name.back() != '!') // in-place functions never push a value
388 {
389 compilerWarning("Ignoring return value of function", x);
390 page(p).emplace_back(POP);
391 }
392 }
393
394 void Compiler::compileIf(const Node& x, const int p, const bool is_result_unused, const bool is_terminal, const std::string& var_name)
395 {
396 // compile condition
397 compileExpression(x.constList()[1], p, false, false);
398
399 // jump only if needed to the if
400 const std::size_t jump_to_if_pos = page(p).size();
401 page(p).emplace_back(Instruction::POP_JUMP_IF_TRUE);
402
403 // else code
404 if (x.constList().size() == 4) // we have an else clause
405 compileExpression(x.constList()[3], p, is_result_unused, is_terminal, var_name);
406
407 // when else is finished, jump to end
408 const std::size_t jump_to_end_pos = page(p).size();
409 page(p).emplace_back(Instruction::JUMP);
410
411 // absolute address to jump to if condition is true
412 page(p)[jump_to_if_pos].data = static_cast<uint16_t>(page(p).size());
413 // if code
414 compileExpression(x.constList()[2], p, is_result_unused, is_terminal, var_name);
415 // set jump to end pos
416 page(p)[jump_to_end_pos].data = static_cast<uint16_t>(page(p).size());
417 }
418
419 void Compiler::compileFunction(const Node& x, const int p, const bool is_result_unused, const std::string& var_name)
420 {
421 // capture, if needed
422 for (const auto& node : x.constList()[1].constList())
423 {
424 if (node.nodeType() == NodeType::Capture)
425 {
426 // first check that the capture is a defined symbol
427 if (std::ranges::find(m_defined_symbols, node.string()) == m_defined_symbols.end())
428 {
429 // we didn't find node in the defined symbol list, thus we can't capture node
430 throwCompilerError("Can not capture " + node.string() + " because node is referencing an unbound variable.", node);
431 }
432
433 addDefinedSymbol(node.string());
434 page(p).emplace_back(CAPTURE, addSymbol(node));
435 }
436 }
437
438 // create new page for function body
439 m_code_pages.emplace_back();
440 const std::size_t page_id = m_code_pages.size() - 1;
441 // save page_id into the constants table as PageAddr and load the const
442 page(p).emplace_back(LOAD_CONST, addValue(page_id, x));
443
444 // pushing arguments from the stack into variables in the new scope
445 for (const auto& node : x.constList()[1].constList())
446 {
447 if (node.nodeType() == NodeType::Symbol)
448 {
449 addDefinedSymbol(node.string());
450 page(page_id).emplace_back(MUT, addSymbol(node));
451 }
452 }
453
454 // push body of the function
455 compileExpression(x.constList()[2], page_id, false, true, var_name);
456
457 // return last value on the stack
458 page(page_id).emplace_back(RET);
459
460 // if the computed function is unused, pop it
461 if (is_result_unused)
462 {
463 compilerWarning("Unused declared function", x);
464 page(p).emplace_back(POP);
465 }
466 }
467
468 void Compiler::compileLetMutSet(const Keyword n, const Node& x, const int p)
469 {
470 const std::string name = x.constList()[1].string();
471 uint16_t i = addSymbol(x.constList()[1]);
472 if (n != Keyword::Set)
473 addDefinedSymbol(name);
474
475 // put value before symbol id
476 // starting at index = 2 because x is a (let|mut|set variable ...) node
477 for (std::size_t idx = 2, end = x.constList().size(); idx < end; ++idx)
478 compileExpression(x.constList()[idx], p, false, false, name);
479
480 if (n == Keyword::Let)
481 page(p).emplace_back(LET, i);
482 else if (n == Keyword::Mut)
483 page(p).emplace_back(MUT, i);
484 else
485 page(p).emplace_back(STORE, i);
486 }
487
488 void Compiler::compileWhile(const Node& x, const int p)
489 {
490 // save current position to jump there at the end of the loop
491 std::size_t current = page(p).size();
492 // push condition
493 compileExpression(x.constList()[1], p, false, false);
494 // absolute jump to end of block if condition is false
495 const std::size_t jump_to_end_pos = page(p).size();
496 page(p).emplace_back(POP_JUMP_IF_FALSE);
497 // push code to page
498 compileExpression(x.constList()[2], p, true, false);
499
500 // loop, jump to the condition
501 page(p).emplace_back(JUMP, current);
502
503 // absolute address to jump to if condition is false
504 page(p)[jump_to_end_pos].data = static_cast<uint16_t>(page(p).size());
505 }
506
507 void Compiler::compilePluginImport(const Node& x, const int p)
508 {
509 std::string path;
510 const Node package_node = x.constList()[1];
511 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
512 {
513 path += package_node.constList()[i].string();
514 if (i + 1 != end)
515 path += "/";
516 }
517 path += ".arkm";
518
519 // register plugin path in the constants table
520 uint16_t id = addValue(Node(NodeType::String, path));
521 // save plugin name to use it later
522 m_plugins.push_back(path);
523 // add plugin instruction + id of the constant referring to the plugin path
524 page(p).emplace_back(PLUGIN, id);
525 }
526
527 void Compiler::handleCalls(const Node& x, const int p, bool is_result_unused, const bool is_terminal, const std::string& var_name)
528 {
529 m_temp_pages.emplace_back();
530 const int proc_page = -static_cast<int>(m_temp_pages.size());
531 constexpr std::size_t start_index = 1;
532
533 compileExpression(x.constList()[0], proc_page, false, false); // storing proc
534 // closure chains have been handled: closure.field.field.function
535
536 // it's a builtin/function
537 if (m_temp_pages.back()[0].opcode < FIRST_OPERATOR)
538 {
539 if (is_terminal && x.constList()[0].nodeType() == NodeType::Symbol && var_name == x.constList()[0].string())
540 {
541 // we can drop the temp page as we won't be using it
542 m_temp_pages.pop_back();
543
544 // push the arguments in reverse order
545 for (std::size_t i = x.constList().size() - 1; i >= start_index; --i)
546 compileExpression(x.constList()[i], p, false, false);
547
548 // jump to the top of the function
549 page(p).emplace_back(JUMP, 0_u16);
550 return; // skip the possible Instruction::POP at the end
551 }
552 else
553 {
554 // push arguments on current page
555 for (auto exp = x.constList().begin() + start_index, exp_end = x.constList().end(); exp != exp_end; ++exp)
556 compileExpression(*exp, p, false, false);
557 // push proc from temp page
558 for (const Word& word : m_temp_pages.back())
559 page(p).push_back(word);
560 m_temp_pages.pop_back();
561
562 // number of arguments
563 std::size_t args_count = 0;
564 for (auto it = x.constList().begin() + 1, it_end = x.constList().end(); it != it_end; ++it)
565 {
566 if (it->nodeType() != NodeType::Capture)
567 args_count++;
568 }
569 // call the procedure
570 page(p).emplace_back(CALL, args_count);
571 }
572 }
573 else // operator
574 {
575 // retrieve operator
576 auto op = m_temp_pages.back()[0];
577 m_temp_pages.pop_back();
578
579 if (op.opcode == ASSERT)
580 is_result_unused = false;
581
582 // push arguments on current page
583 std::size_t exp_count = 0;
584 for (std::size_t index = start_index, size = x.constList().size(); index < size; ++index)
585 {
586 compileExpression(x.constList()[index], p, false, false);
587
588 if ((index + 1 < size && x.constList()[index + 1].nodeType() != NodeType::Capture) || index + 1 == size)
589 exp_count++;
590
591 // in order to be able to handle things like (op A B C D...)
592 // which should be transformed into A B op C op D op...
593 if (exp_count >= 2)
594 page(p).emplace_back(op.opcode, 2); // TODO generalize to n arguments (n >= 2)
595 }
596
597 if (exp_count == 1)
598 {
599 if (isUnaryInst(static_cast<Instruction>(op.opcode)))
600 page(p).emplace_back(op.opcode);
601 else
602 throwCompilerError("Operator needs two arguments, but was called with only one", x.constList()[0]);
603 }
604
605 // need to check we didn't push the (op A B C D...) things for operators not supporting it
606 if (exp_count > 2)
607 {
608 switch (op.opcode)
609 {
610 // authorized instructions
611 case ADD: [[fallthrough]];
612 case SUB: [[fallthrough]];
613 case MUL: [[fallthrough]];
614 case DIV: [[fallthrough]];
615 case AND_: [[fallthrough]];
616 case OR_: [[fallthrough]];
617 case MOD:
618 break;
619
620 default:
622 "can not create a chained expression (of length " + std::to_string(exp_count) +
623 ") for operator `" + std::string(operators[static_cast<std::size_t>(op.opcode - FIRST_OPERATOR)]) +
624 "'. You most likely forgot a `)'.",
625 x);
626 }
627 }
628 }
629
630 if (is_result_unused)
631 page(p).emplace_back(POP);
632 }
633
634 uint16_t Compiler::addSymbol(const Node& sym)
635 {
636 // otherwise, add the symbol, and return its id in the table
637 auto it = std::ranges::find_if(m_symbols, [&sym](const Node& sym_node) -> bool {
638 return sym_node.string() == sym.string();
639 });
640 if (it == m_symbols.end())
641 {
642 m_symbols.push_back(sym);
643 it = m_symbols.begin() + m_symbols.size() - 1;
644 }
645
646 const auto distance = std::distance(m_symbols.begin(), it);
647 if (distance < std::numeric_limits<uint16_t>::max())
648 return static_cast<uint16_t>(distance);
649 throwCompilerError("Too many symbols (exceeds 65'536), aborting compilation.", sym);
650 }
651
652 uint16_t Compiler::addValue(const Node& x)
653 {
654 const ValTableElem v(x);
655 auto it = std::ranges::find(m_values, v);
656 if (it == m_values.end())
657 {
658 m_values.push_back(v);
659 it = m_values.begin() + m_values.size() - 1;
660 }
661
662 const auto distance = std::distance(m_values.begin(), it);
663 if (distance < std::numeric_limits<uint16_t>::max())
664 return static_cast<uint16_t>(distance);
665 throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", x);
666 }
667
668 uint16_t Compiler::addValue(const std::size_t page_id, const Node& current)
669 {
670 const ValTableElem v(page_id);
671 auto it = std::ranges::find(m_values, v);
672 if (it == m_values.end())
673 {
674 m_values.push_back(v);
675 it = m_values.begin() + m_values.size() - 1;
676 }
677
678 const auto distance = std::distance(m_values.begin(), it);
679 if (distance < std::numeric_limits<uint16_t>::max())
680 return static_cast<uint16_t>(distance);
681 throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", current);
682 }
683
684 void Compiler::addDefinedSymbol(const std::string& sym)
685 {
686 // otherwise, add the symbol, and return its id in the table
687 if (std::ranges::find(m_defined_symbols, sym) == m_defined_symbols.end())
688 m_defined_symbols.push_back(sym);
689 }
690
692 {
693 for (const Node& sym : m_symbols)
694 {
695 const std::string& str = sym.string();
696 const bool is_plugin = mayBeFromPlugin(str);
697
698 if (auto it = std::ranges::find(m_defined_symbols, str); it == m_defined_symbols.end() && !is_plugin)
699 {
700 const std::string suggestion = offerSuggestion(str);
701 if (suggestion.empty())
702 throwCompilerError("Unbound variable error \"" + str + "\" (variable is used but not defined)", sym);
703
704 throwCompilerError("Unbound variable error \"" + str + "\" (did you mean \"" + suggestion + "\"?)", sym);
705 }
706 }
707 }
708
709 std::string Compiler::offerSuggestion(const std::string& str) const
710 {
711 std::string suggestion;
712 // our suggestion shouldn't require more than half the string to change
713 std::size_t suggestion_distance = str.size() / 2;
714
715 for (const std::string& symbol : m_defined_symbols)
716 {
717 const std::size_t current_distance = Utils::levenshteinDistance(str, symbol);
718 if (current_distance <= suggestion_distance)
719 {
720 suggestion_distance = current_distance;
721 suggestion = symbol;
722 }
723 }
724
725 return suggestion;
726 }
727}
Lots of utilities about string, filesystem and more.
Host the declaration of all the ArkScript builtins.
ArkScript compiler is in charge of transforming the AST into bytecode.
Constants used by ArkScript.
constexpr int ARK_VERSION_MAJOR
Definition: Constants.hpp:18
constexpr int ARK_VERSION_PATCH
Definition: Constants.hpp:20
constexpr int ARK_VERSION_MINOR
Definition: Constants.hpp:19
User defined literals for Ark internals.
Handles the macros and their expansion in ArkScript source code.
static void compilerWarning(const std::string &message, const internal::Node &node)
Display a warning message.
Definition: Compiler.cpp:250
void compileSymbol(const internal::Node &x, int p, bool is_result_unused)
Definition: Compiler.cpp:351
std::vector< std::vector< internal::Word > > m_code_pages
Definition: Compiler.hpp:68
void handleCalls(const internal::Node &x, int p, bool is_result_unused, bool is_terminal, const std::string &var_name)
Definition: Compiler.cpp:527
void compileExpression(const internal::Node &x, int p, bool is_result_unused, bool is_terminal, const std::string &var_name="")
Compile an expression (a node) recursively.
Definition: Compiler.cpp:260
void compileLetMutSet(internal::Keyword n, const internal::Node &x, int p)
Definition: Compiler.cpp:468
static std::optional< std::size_t > getBuiltin(const std::string &name) noexcept
Checking if a symbol is a builtin.
Definition: Compiler.cpp:190
std::vector< std::string > m_defined_symbols
Definition: Compiler.hpp:65
std::string offerSuggestion(const std::string &str) const
Suggest a symbol of what the user may have meant to input.
Definition: Compiler.cpp:709
void checkForUndefinedSymbol()
Checks for undefined symbols, not present in the defined symbols table.
Definition: Compiler.cpp:691
bool mayBeFromPlugin(const std::string &name) noexcept
Checking if a symbol may be coming from a plugin.
Definition: Compiler.cpp:240
void compileSpecific(const internal::Node &c0, const internal::Node &x, int p, bool is_result_unused)
Definition: Compiler.cpp:369
std::vector< std::string > m_plugins
Definition: Compiler.hpp:66
void process(const internal::Node &ast)
Start the compilation.
Definition: Compiler.cpp:25
void compilePluginImport(const internal::Node &x, int p)
Definition: Compiler.cpp:507
void addDefinedSymbol(const std::string &sym)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
Definition: Compiler.cpp:684
std::vector< internal::ValTableElem > m_values
Definition: Compiler.hpp:67
void compileWhile(const internal::Node &x, int p)
Definition: Compiler.cpp:488
std::vector< std::vector< internal::Word > > m_temp_pages
we need temporary code pages for some compilations passes
Definition: Compiler.hpp:69
void compileFunction(const internal::Node &x, int p, bool is_result_unused, const std::string &var_name)
Definition: Compiler.cpp:419
static std::optional< internal::Instruction > getSpecific(const std::string &name) noexcept
Check if a symbol needs to be compiled to a specific instruction.
Definition: Compiler.hpp:134
uint16_t addSymbol(const internal::Node &sym)
Register a given node in the symbol table.
Definition: Compiler.cpp:634
void compileIf(const internal::Node &x, int p, bool is_result_unused, bool is_terminal, const std::string &var_name)
Definition: Compiler.cpp:394
static std::optional< std::size_t > getOperator(const std::string &name) noexcept
Checking if a symbol is an operator.
Definition: Compiler.cpp:182
static uint16_t computeSpecificInstArgc(internal::Instruction inst, uint16_t previous) noexcept
Compute specific instruction argument count.
Definition: Compiler.cpp:222
const bytecode_t & bytecode() const noexcept
Return the constructed bytecode object.
Definition: Compiler.cpp:85
uint16_t addValue(const internal::Node &x)
Register a given node in the value table.
Definition: Compiler.cpp:652
std::vector< internal::Node > m_symbols
Definition: Compiler.hpp:64
static void throwCompilerError(const std::string &message, const internal::Node &node)
Throw a nice error message.
Definition: Compiler.cpp:255
std::vector< internal::Word > & page(const int i) noexcept
helper functions to get a temp or finalized code page
Definition: Compiler.hpp:92
static bool isUnaryInst(internal::Instruction inst) noexcept
Check if a given instruction is unary (takes only one argument)
Definition: Compiler.cpp:201
bytecode_t m_bytecode
Definition: Compiler.hpp:71
void pushFileHeader() noexcept
Push the file headers (magic, version used, timestamp)
Definition: Compiler.cpp:90
Compiler(unsigned debug)
Construct a new Compiler object.
Definition: Compiler.cpp:21
void pushSymAndValTables()
Push the symbols and values tables.
Definition: Compiler.cpp:124
A node of an Abstract Syntax Tree for ArkScript.
Definition: Node.hpp:30
NodeType nodeType() const noexcept
Return the node type.
Definition: Node.cpp:71
const std::string & filename() const noexcept
Return the filename in which this node was created.
Definition: Node.cpp:128
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition: Node.cpp:41
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition: Node.cpp:66
std::string repr() const noexcept
Compute a representation of the node without any comments or additional sugar, colors,...
Definition: Node.cpp:143
std::size_t col() const noexcept
Get the column at which this node was created.
Definition: Node.cpp:123
std::size_t line() const noexcept
Get the line at which this node was created.
Definition: Node.cpp:118
ARK_API std::string makeContextWithNode(const std::string &message, const internal::Node &node)
Helper used by the compiler to generate a colorized context from a node.
Definition: Exceptions.cpp:135
std::vector< std::string > splitString(const std::string &source, const char sep)
Cut a string into pieces, given a character separator.
Definition: Utils.hpp:30
int levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
Definition: Utils.cpp:5
const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 25 > operators
Definition: Common.hpp:84
Keyword
The different keywords available.
Definition: Common.hpp:56
Instruction
The different bytecodes are stored here.
Definition: Builtins.hpp:21
std::vector< uint8_t > bytecode_t
Definition: Common.hpp:21
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
Definition: Exceptions.hpp:85
A Compiler Value class helper to handle multiple types.