ArkScript
A small, fast, functional and scripting language for video games
Compiler.cpp
Go to the documentation of this file.
2
3#include <fstream>
4#include <chrono>
5#include <limits>
6#include <filesystem>
7#include <picosha2.h>
8#include <termcolor/termcolor.hpp>
9#undef max
10
11#include <Ark/Literals.hpp>
12#include <Ark/Utils.hpp>
15
16namespace Ark
17{
18 using namespace internal;
19 using namespace literals;
20
21 Compiler::Compiler(unsigned debug, const std::vector<std::string>& libenv, uint16_t options) :
22 m_parser(debug, options, libenv), m_optimizer(options),
23 m_options(options), m_debug(debug)
24 {}
25
26 void Compiler::feed(const std::string& code, const std::string& filename)
27 {
28 m_parser.feed(code, filename);
29
31 mp.feed(m_parser.ast());
32 m_optimizer.feed(mp.ast());
33 }
34
36 {
38
39 m_code_pages.emplace_back(); // create empty page
40
41 // gather symbols, values, and start to create code segments
42 _compile(m_optimizer.ast(), /* current_page */ 0, /* produces_result */ false, /* is_terminal */ false);
43 // throw an error on undefined symbol uses
45
47
48 // push the different code segments
49 for (const bytecode_t& page : m_code_pages)
50 {
51 m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
52
53 // push number of elements
54 pushNumber(static_cast<uint16_t>(page.size() + 1));
55
56 for (auto inst : page)
57 m_bytecode.push_back(inst);
58 // just in case we got too far, always add a HALT to be sure the
59 // VM won't do anything crazy
60 m_bytecode.push_back(Instruction::HALT);
61 }
62
63 if (!m_code_pages.size())
64 {
65 m_bytecode.push_back(Instruction::CODE_SEGMENT_START);
66 pushNumber(1_u16);
67 m_bytecode.push_back(Instruction::HALT);
68 }
69
70 constexpr std::size_t header_size = 18;
71
72 // generate a hash of the tables + bytecode
73 std::vector<unsigned char> hash_out(picosha2::k_digest_size);
74 picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out);
75 m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end());
76 }
77
78 void Compiler::saveTo(const std::string& file)
79 {
80 if (m_debug >= 1)
81 std::cout << "Final bytecode size: " << m_bytecode.size() * sizeof(uint8_t) << "B\n";
82
83 std::ofstream output(file, std::ofstream::binary);
84 output.write(reinterpret_cast<char*>(&m_bytecode[0]), m_bytecode.size() * sizeof(uint8_t));
85 output.close();
86 }
87
89 {
90 return m_bytecode;
91 }
92
94 {
95 /*
96 Generating headers:
97 - lang name (to be sure we are executing an ArkScript file)
98 on 4 bytes (ark + padding)
99 - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
100 - timestamp (8 bytes, unix format)
101 */
102
103 m_bytecode.push_back('a');
104 m_bytecode.push_back('r');
105 m_bytecode.push_back('k');
106 m_bytecode.push_back(0_u8);
107
108 // push version
112
113 // push timestamp
114 unsigned long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
115 std::chrono::system_clock::now().time_since_epoch())
116 .count();
117 for (char c = 0; c < 8; c++)
118 {
119 unsigned shift = 8 * (7 - c);
120 uint8_t ts_byte = (timestamp & (0xffULL << shift)) >> shift;
121 m_bytecode.push_back(ts_byte);
122 }
123 }
124
126 {
127 /*
128 - symbols table
129 + elements
130 - values table header
131 + elements
132 */
133
134 m_bytecode.push_back(Instruction::SYM_TABLE_START);
135 // push size
136 pushNumber(static_cast<uint16_t>(m_symbols.size()));
137 // push elements
138 for (auto sym : m_symbols)
139 {
140 // push the string, null terminated
141 std::string s = sym.string();
142 for (std::size_t i = 0, size = s.size(); i < size; ++i)
143 m_bytecode.push_back(s[i]);
144 m_bytecode.push_back(0_u8);
145 }
146
147 // values table
148 m_bytecode.push_back(Instruction::VAL_TABLE_START);
149 // push size
150 pushNumber(static_cast<uint16_t>(m_values.size()));
151 // push elements (separated with 0x00)
152 for (auto val : m_values)
153 {
154 if (val.type == ValTableElemType::Number)
155 {
156 m_bytecode.push_back(Instruction::NUMBER_TYPE);
157 auto n = std::get<double>(val.value);
158 std::string t = std::to_string(n);
159 for (std::size_t i = 0, size = t.size(); i < size; ++i)
160 m_bytecode.push_back(t[i]);
161 }
162 else if (val.type == ValTableElemType::String)
163 {
164 m_bytecode.push_back(Instruction::STRING_TYPE);
165 std::string t = std::get<std::string>(val.value);
166 for (std::size_t i = 0, size = t.size(); i < size; ++i)
167 m_bytecode.push_back(t[i]);
168 }
169 else if (val.type == ValTableElemType::PageAddr)
170 {
171 m_bytecode.push_back(Instruction::FUNC_TYPE);
172 pushNumber(static_cast<uint16_t>(std::get<std::size_t>(val.value)));
173 }
174 else
175 throw CompilationError("trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code");
176
177 m_bytecode.push_back(0_u8);
178 }
179 }
180
181 std::size_t Compiler::countArkObjects(const std::vector<Node>& lst) noexcept
182 {
183 std::size_t n = 0;
184 for (const Node& node : lst)
185 {
186 if (node.nodeType() != NodeType::GetField)
187 n++;
188 }
189 return n;
190 }
191
192 std::optional<std::size_t> Compiler::isOperator(const std::string& name) noexcept
193 {
194 auto it = std::find(internal::operators.begin(), internal::operators.end(), name);
195 if (it != internal::operators.end())
196 return std::distance(internal::operators.begin(), it);
197 return std::nullopt;
198 }
199
200 std::optional<std::size_t> Compiler::isBuiltin(const std::string& name) noexcept
201 {
202 auto it = std::find_if(Builtins::builtins.begin(), Builtins::builtins.end(),
203 [&name](const std::pair<std::string, Value>& element) -> bool {
204 return name == element.first;
205 });
206 if (it != Builtins::builtins.end())
207 return std::distance(Builtins::builtins.begin(), it);
208 return std::nullopt;
209 }
210
212 {
213 switch (inst)
214 {
215 case Instruction::NOT: [[fallthrough]];
216 case Instruction::LEN: [[fallthrough]];
217 case Instruction::EMPTY: [[fallthrough]];
218 case Instruction::TAIL: [[fallthrough]];
219 case Instruction::HEAD: [[fallthrough]];
220 case Instruction::ISNIL: [[fallthrough]];
221 case Instruction::TO_NUM: [[fallthrough]];
222 case Instruction::TO_STR: [[fallthrough]];
223 case Instruction::TYPE: [[fallthrough]];
224 case Instruction::HASFIELD:
225 return true;
226
227 default:
228 return false;
229 }
230 }
231
232 void Compiler::pushSpecificInstArgc(Instruction inst, uint16_t previous, int p) noexcept
233 {
234 if (inst == Instruction::LIST)
235 pushNumber(previous, page_ptr(p));
236 else if (inst == Instruction::APPEND || inst == Instruction::APPEND_IN_PLACE ||
237 inst == Instruction::CONCAT || inst == Instruction::CONCAT_IN_PLACE)
238 pushNumber(previous - 1, page_ptr(p));
239 }
240
241 bool Compiler::mayBeFromPlugin(const std::string& name) noexcept
242 {
243 std::string splitted = Utils::splitString(name, ':')[0];
244 auto it = std::find_if(m_plugins.begin(), m_plugins.end(),
245 [&splitted](const std::string& plugin) -> bool {
246 return std::filesystem::path(plugin).stem().string() == splitted;
247 });
248 return it != m_plugins.end();
249 }
250
251 void Compiler::throwCompilerError(const std::string& message, const Node& node)
252 {
253 throw CompilationError(makeNodeBasedErrorCtx(message, node));
254 }
255
256 void Compiler::compilerWarning(const std::string& message, const Node& node)
257 {
259 std::cerr << termcolor::yellow << "Warning " << termcolor::reset << makeNodeBasedErrorCtx(message, node) << "\n";
260 }
261
262 void Compiler::_compile(const Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name)
263 {
264 // register symbols
265 if (x.nodeType() == NodeType::Symbol)
266 compileSymbol(x, p, produces_result);
267 else if (x.nodeType() == NodeType::GetField)
268 {
269 std::string name = x.string();
270 // 'name' shouldn't be a builtin/operator, we can use it as-is
271 uint16_t i = addSymbol(x);
272
273 page(p).emplace_back(Instruction::GET_FIELD);
274 pushNumber(i, page_ptr(p));
275 }
276 // register values
277 else if (x.nodeType() == NodeType::String || x.nodeType() == NodeType::Number)
278 {
279 uint16_t i = addValue(x);
280
281 if (!produces_result)
282 {
283 page(p).emplace_back(Instruction::LOAD_CONST);
284 pushNumber(i, page_ptr(p));
285 }
286 }
287 // empty code block should be nil
288 else if (x.constList().empty())
289 {
290 if (!produces_result)
291 {
292 auto it_builtin = isBuiltin("nil");
293 page(p).emplace_back(Instruction::BUILTIN);
294 pushNumber(static_cast<uint16_t>(it_builtin.value()), page_ptr(p));
295 }
296 }
297 // specific instructions
298 else if (auto c0 = x.constList()[0]; c0.nodeType() == NodeType::Symbol && isSpecific(c0.string()).has_value())
299 compileSpecific(c0, x, p, produces_result);
300 // registering structures
301 else if (x.constList()[0].nodeType() == NodeType::Keyword)
302 {
303 Keyword n = x.constList()[0].keyword();
304
305 switch (n)
306 {
307 case Keyword::If:
308 compileIf(x, p, produces_result, is_terminal, var_name);
309 break;
310
311 case Keyword::Set:
312 [[fallthrough]];
313 case Keyword::Let:
314 [[fallthrough]];
315 case Keyword::Mut:
316 compileLetMutSet(n, x, p);
317 break;
318
319 case Keyword::Fun:
320 compileFunction(x, p, produces_result, var_name);
321 break;
322
323 case Keyword::Begin:
324 {
325 for (std::size_t i = 1, size = x.constList().size(); i < size; ++i)
326 _compile(
327 x.constList()[i],
328 p,
329 // All the nodes in a begin (except for the last one) are producing a result that we want to drop.
330 (i != size - 1) ? true : produces_result,
331 // If the begin is a terminal node, only its last node is terminal.
332 is_terminal ? (i == size - 1) : false,
333 var_name);
334 break;
335 }
336
337 case Keyword::While:
338 compileWhile(x, p);
339 break;
340
341 case Keyword::Import:
343 break;
344
345 case Keyword::Quote:
346 compileQuote(x, p, produces_result, is_terminal, var_name);
347 break;
348
349 case Keyword::Del:
350 compileDel(x, p);
351 break;
352 }
353 }
354 else
355 {
356 // if we are here, we should have a function name
357 // push arguments first, then function name, then call it
358 handleCalls(x, p, produces_result, is_terminal, var_name);
359 }
360 }
361
362 void Compiler::compileSymbol(const Node& x, int p, bool produces_result)
363 {
364 std::string name = x.string();
365
366 if (auto it_builtin = isBuiltin(name))
367 {
368 page(p).emplace_back(Instruction::BUILTIN);
369 pushNumber(static_cast<uint16_t>(it_builtin.value()), page_ptr(p));
370 }
371 else if (auto it_operator = isOperator(name))
372 page(p).emplace_back(static_cast<uint8_t>(Instruction::FIRST_OPERATOR + it_operator.value()));
373 else // var-use
374 {
375 uint16_t i = addSymbol(x);
376
377 page(p).emplace_back(Instruction::LOAD_SYMBOL);
378 pushNumber(i, page_ptr(p));
379 }
380
381 if (produces_result)
382 {
383 compilerWarning("Statement has no effect", x);
384 page(p).push_back(Instruction::POP);
385 }
386 }
387
388 void Compiler::compileSpecific(const Node& c0, const Node& x, int p, bool produces_result)
389 {
390 std::string name = c0.string();
391 Instruction inst = isSpecific(name).value();
392
393 // length of at least 1 since we got a symbol name
394 uint16_t argc = countArkObjects(x.constList()) - 1;
395 // error, can not use append/concat/pop (and their in place versions) with a <2 length argument list
396 if (argc < 2 && inst != Instruction::LIST)
397 throw CompilationError("can not use " + name + " with less than 2 arguments");
398
399 // compile arguments in reverse order
400 for (uint16_t i = x.constList().size() - 1; i > 0; --i)
401 {
402 uint16_t j = i;
403 while (x.constList()[j].nodeType() == NodeType::GetField)
404 --j;
405 uint16_t diff = i - j;
406 while (j < i)
407 {
408 _compile(x.constList()[j], p, false, false);
409 ++j;
410 }
411 _compile(x.constList()[i], p, false, false);
412 i -= diff;
413 }
414
415 // put inst and number of arguments
416 page(p).emplace_back(inst);
417 pushSpecificInstArgc(inst, argc, p);
418
419 if (produces_result && name.back() != '!') // in-place functions never push a value
420 {
421 compilerWarning("Ignoring return value of function", x);
422 page(p).push_back(Instruction::POP);
423 }
424 }
425
426 void Compiler::compileIf(const Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name)
427 {
428 // compile condition
429 _compile(x.constList()[1], p, false, false);
430
431 // jump only if needed to the if
432 page(p).push_back(Instruction::POP_JUMP_IF_TRUE);
433 std::size_t jump_to_if_pos = page(p).size();
434 // absolute address to jump to if condition is true
435 pushNumber(0_u16, page_ptr(p));
436
437 // else code
438 if (x.constList().size() == 4) // we have an else clause
439 _compile(x.constList()[3], p, produces_result, is_terminal, var_name);
440
441 // when else is finished, jump to end
442 page(p).push_back(Instruction::JUMP);
443 std::size_t jump_to_end_pos = page(p).size();
444 pushNumber(0_u16, page_ptr(p));
445
446 // set jump to if pos
447 setNumberAt(p, jump_to_if_pos, page(p).size());
448 // if code
449 _compile(x.constList()[2], p, produces_result, is_terminal, var_name);
450 // set jump to end pos
451 setNumberAt(p, jump_to_end_pos, page(p).size());
452 }
453
454 void Compiler::compileFunction(const Node& x, int p, bool produces_result, const std::string& var_name)
455 {
456 // capture, if needed
457 for (auto it = x.constList()[1].constList().begin(), it_end = x.constList()[1].constList().end(); it != it_end; ++it)
458 {
459 if (it->nodeType() == NodeType::Capture)
460 {
461 // first check that the capture is a defined symbol
462 if (std::find(m_defined_symbols.begin(), m_defined_symbols.end(), it->string()) == m_defined_symbols.end())
463 {
464 // we didn't find it in the defined symbol list, thus we can't capture it
465 throwCompilerError("Can not capture " + it->string() + " because it is referencing an unbound variable.", *it);
466 }
467 page(p).emplace_back(Instruction::CAPTURE);
468 addDefinedSymbol(it->string());
469 uint16_t var_id = addSymbol(*it);
470 pushNumber(var_id, page_ptr(p));
471 }
472 }
473
474 // create new page for function body
475 m_code_pages.emplace_back();
476 std::size_t page_id = m_code_pages.size() - 1;
477 // load value on the stack
478 page(p).emplace_back(Instruction::LOAD_CONST);
479 // save page_id into the constants table as PageAddr
480 pushNumber(addValue(page_id, x), page_ptr(p));
481
482 // pushing arguments from the stack into variables in the new scope
483 for (auto it = x.constList()[1].constList().begin(), it_end = x.constList()[1].constList().end(); it != it_end; ++it)
484 {
485 if (it->nodeType() == NodeType::Symbol)
486 {
487 page(page_id).emplace_back(Instruction::MUT);
488 uint16_t var_id = addSymbol(*it);
489 addDefinedSymbol(it->string());
490 pushNumber(var_id, page_ptr(page_id));
491 }
492 }
493
494 // push body of the function
495 _compile(x.constList()[2], page_id, false, true, var_name);
496
497 // return last value on the stack
498 page(page_id).emplace_back(Instruction::RET);
499
500 if (produces_result)
501 {
502 compilerWarning("Unused declared function", x);
503 page(p).push_back(Instruction::POP);
504 }
505 }
506
507 void Compiler::compileLetMutSet(Keyword n, const Node& x, int p)
508 {
509 uint16_t i = addSymbol(x.constList()[1]);
510 if (n != Keyword::Set)
511 addDefinedSymbol(x.constList()[1].string());
512
513 // put value before symbol id
514 putValue(x, p, false);
515
516 if (n == Keyword::Let)
517 page(p).push_back(Instruction::LET);
518 else if (n == Keyword::Mut)
519 page(p).push_back(Instruction::MUT);
520 else
521 page(p).push_back(Instruction::STORE);
522 pushNumber(i, page_ptr(p));
523 }
524
525 void Compiler::compileWhile(const Node& x, int p)
526 {
527 // save current position to jump there at the end of the loop
528 std::size_t current = page(p).size();
529 // push condition
530 _compile(x.constList()[1], p, false, false);
531 // absolute jump to end of block if condition is false
532 page(p).push_back(Instruction::POP_JUMP_IF_FALSE);
533 std::size_t jump_to_end_pos = page(p).size();
534 // absolute address to jump to if condition is false
535 pushNumber(0_u16, page_ptr(p));
536 // push code to page
537 _compile(x.constList()[2], p, true, false);
538 // loop, jump to the condition
539 page(p).push_back(Instruction::JUMP);
540 // abosolute address
541 pushNumber(static_cast<uint16_t>(current), page_ptr(p));
542 // set jump to end pos
543 setNumberAt(p, jump_to_end_pos, page(p).size());
544 }
545
546 void Compiler::compileQuote(const Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name)
547 {
548 // create new page for quoted code
549 m_code_pages.emplace_back();
550 std::size_t page_id = m_code_pages.size() - 1;
551 _compile(x.constList()[1], page_id, false, is_terminal, var_name);
552 page(page_id).emplace_back(Instruction::RET); // return to the last frame
553
554 // call it
555 uint16_t id = addValue(page_id, x); // save page_id into the constants table as PageAddr
556 page(p).emplace_back(Instruction::LOAD_CONST);
557 pushNumber(id, page_ptr(p));
558
559 if (produces_result)
560 {
561 compilerWarning("Unused quote expression", x);
562 page(p).push_back(Instruction::POP);
563 }
564 }
565
567 {
568 // register plugin path in the constants table
569 uint16_t id = addValue(x.constList()[1]);
570 // save plugin name to use it later
571 m_plugins.push_back(x.constList()[1].string());
572 // add plugin instruction + id of the constant refering to the plugin path
573 page(p).emplace_back(Instruction::PLUGIN);
574 pushNumber(id, page_ptr(p));
575 }
576
577 void Compiler::compileDel(const Node& x, int p)
578 {
579 // get id of symbol to delete
580 uint16_t i = addSymbol(x.constList()[1]);
581
582 page(p).emplace_back(Instruction::DEL);
583 pushNumber(i, page_ptr(p));
584 }
585
586 void Compiler::handleCalls(const Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name)
587 {
588 m_temp_pages.emplace_back();
589 int proc_page = -static_cast<int>(m_temp_pages.size());
590 _compile(x.constList()[0], proc_page, false, false); // storing proc
591
592 // trying to handle chained closure.field.field.field...
593 std::size_t n = 1; // we need it later
594 const std::size_t end = x.constList().size();
595 while (n < end)
596 {
597 if (x.constList()[n].nodeType() == NodeType::GetField)
598 {
599 _compile(x.constList()[n], proc_page, false, false);
600 n++;
601 }
602 else
603 break;
604 }
605 std::size_t proc_page_len = m_temp_pages.back().size();
606
607 // we know that operators take only 1 instruction, so if there are more
608 // it's a builtin/function
609 if (proc_page_len > 1)
610 {
611 if (is_terminal && x.constList()[0].nodeType() == NodeType::Symbol && var_name == x.constList()[0].string())
612 {
613 // we can drop the temp page as we won't be using it
614 m_temp_pages.pop_back();
615
616 // push the arguments in reverse order
617 for (std::size_t i = x.constList().size() - 1; i >= n; --i)
618 _compile(x.constList()[i], p, false, false);
619
620 // jump to the top of the function
621 page(p).push_back(Instruction::JUMP);
622 pushNumber(0_u16, page_ptr(p));
623
624 return; // skip the possible Instruction::POP at the end
625 }
626 else
627 {
628 // push arguments on current page
629 for (auto exp = x.constList().begin() + n, exp_end = x.constList().end(); exp != exp_end; ++exp)
630 _compile(*exp, p, false, false);
631 // push proc from temp page
632 for (auto const& inst : m_temp_pages.back())
633 page(p).push_back(inst);
634 m_temp_pages.pop_back();
635
636 // call the procedure
637 page(p).push_back(Instruction::CALL);
638 // number of arguments
639 std::size_t args_count = 0;
640 for (auto it = x.constList().begin() + 1, it_end = x.constList().end(); it != it_end; ++it)
641 {
642 if (it->nodeType() != NodeType::GetField &&
643 it->nodeType() != NodeType::Capture)
644 args_count++;
645 }
646 pushNumber(static_cast<uint16_t>(args_count), page_ptr(p));
647 }
648 }
649 else // operator
650 {
651 // retrieve operator
652 auto op_inst = m_temp_pages.back()[0];
653 m_temp_pages.pop_back();
654
655 if (op_inst == Instruction::ASSERT)
656 produces_result = false;
657
658 // push arguments on current page
659 std::size_t exp_count = 0;
660 for (std::size_t index = n, size = x.constList().size(); index < size; ++index)
661 {
662 _compile(x.constList()[index], p, false, false);
663
664 if ((index + 1 < size &&
665 x.constList()[index + 1].nodeType() != NodeType::GetField &&
666 x.constList()[index + 1].nodeType() != NodeType::Capture) ||
667 index + 1 == size)
668 exp_count++;
669
670 // in order to be able to handle things like (op A B C D...)
671 // which should be transformed into A B op C op D op...
672 if (exp_count >= 2)
673 page(p).push_back(op_inst);
674 }
675
676 if (exp_count == 1)
677 {
678 if (isUnaryInst(static_cast<Instruction>(op_inst)))
679 page(p).push_back(op_inst);
680 else
681 throwCompilerError("Operator needs two arguments, but was called with only one", x.constList()[0]);
682 }
683
684 // need to check we didn't push the (op A B C D...) things for operators not supporting it
685 if (exp_count > 2)
686 {
687 switch (op_inst)
688 {
689 // authorized instructions
690 case Instruction::ADD: [[fallthrough]];
691 case Instruction::SUB: [[fallthrough]];
692 case Instruction::MUL: [[fallthrough]];
693 case Instruction::DIV: [[fallthrough]];
694 case Instruction::AND_: [[fallthrough]];
695 case Instruction::OR_: [[fallthrough]];
696 case Instruction::MOD:
697 break;
698
699 default:
701 "can not create a chained expression (of length " + std::to_string(exp_count) +
702 ") for operator `" + std::string(internal::operators[static_cast<std::size_t>(op_inst - Instruction::FIRST_OPERATOR)]) +
703 "'. You most likely forgot a `)'.",
704 x);
705 }
706 }
707 }
708
709 if (produces_result)
710 page(p).push_back(Instruction::POP);
711 }
712
713 void Compiler::putValue(const Node& x, int p, bool produces_result)
714 {
715 std::string name = x.constList()[1].string();
716
717 // starting at index = 2 because x is a (let|mut|set variable ...) node
718 for (std::size_t idx = 2, end = x.constList().size(); idx < end; ++idx)
719 _compile(x.constList()[idx], p, produces_result, false, name);
720 }
721
722 uint16_t Compiler::addSymbol(const Node& sym)
723 {
724 // otherwise, add the symbol, and return its id in the table
725 auto it = std::find_if(m_symbols.begin(), m_symbols.end(), [&sym](const Node& sym_node) -> bool {
726 return sym_node.string() == sym.string();
727 });
728 if (it == m_symbols.end())
729 {
730 m_symbols.push_back(sym);
731 it = m_symbols.begin() + m_symbols.size() - 1;
732 }
733
734 auto distance = std::distance(m_symbols.begin(), it);
735 if (distance < std::numeric_limits<uint16_t>::max())
736 return static_cast<uint16_t>(distance);
737 else
738 throwCompilerError("Too many symbols (exceeds 65'536), aborting compilation.", sym);
739 }
740
741 uint16_t Compiler::addValue(const Node& x)
742 {
743 ValTableElem v(x);
744 auto it = std::find(m_values.begin(), m_values.end(), v);
745 if (it == m_values.end())
746 {
747 m_values.push_back(v);
748 it = m_values.begin() + m_values.size() - 1;
749 }
750
751 auto distance = std::distance(m_values.begin(), it);
752 if (distance < std::numeric_limits<uint16_t>::max())
753 return static_cast<uint16_t>(distance);
754 else
755 throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", x);
756 }
757
758 uint16_t Compiler::addValue(std::size_t page_id, const Node& current)
759 {
760 ValTableElem v(page_id);
761 auto it = std::find(m_values.begin(), m_values.end(), v);
762 if (it == m_values.end())
763 {
764 m_values.push_back(v);
765 it = m_values.begin() + m_values.size() - 1;
766 }
767
768 auto distance = std::distance(m_values.begin(), it);
769 if (distance < std::numeric_limits<uint16_t>::max())
770 return static_cast<uint16_t>(distance);
771 else
772 throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", current);
773 }
774
775 void Compiler::addDefinedSymbol(const std::string& sym)
776 {
777 // otherwise, add the symbol, and return its id in the table
778 auto it = std::find(m_defined_symbols.begin(), m_defined_symbols.end(), sym);
779 if (it == m_defined_symbols.end())
780 m_defined_symbols.push_back(sym);
781 }
782
784 {
785 for (const Node& sym : m_symbols)
786 {
787 const std::string& str = sym.string();
788 bool is_plugin = mayBeFromPlugin(str);
789
790 auto it = std::find(m_defined_symbols.begin(), m_defined_symbols.end(), str);
791 if (it == m_defined_symbols.end() && !is_plugin)
792 {
793 std::string suggestion = offerSuggestion(str);
794 if (suggestion.empty())
795 throwCompilerError("Unbound variable error \"" + str + "\" (variable is used but not defined)", sym);
796
797 throwCompilerError("Unbound variable error \"" + str + "\" (did you mean \"" + suggestion + "\"?)", sym);
798 }
799 }
800 }
801
802 std::string Compiler::offerSuggestion(const std::string& str)
803 {
804 std::string suggestion;
805 // our suggestion shouldn't require more than half the string to change
806 std::size_t suggestion_distance = str.size() / 2;
807
808 for (const std::string& symbol : m_defined_symbols)
809 {
810 std::size_t current_distance = Utils::levenshteinDistance(str, symbol);
811 if (current_distance <= suggestion_distance)
812 {
813 suggestion_distance = current_distance;
814 suggestion = symbol;
815 }
816 }
817
818 return suggestion;
819 }
820
821 void Compiler::pushNumber(uint16_t n, std::vector<uint8_t>* page) noexcept
822 {
823 if (page == nullptr)
824 {
825 m_bytecode.push_back((n & 0xff00) >> 8);
826 m_bytecode.push_back(n & 0x00ff);
827 }
828 else
829 {
830 page->emplace_back((n & 0xff00) >> 8);
831 page->emplace_back(n & 0x00ff);
832 }
833 }
834}
Host the declaration of all the ArkScript builtins.
ArkScript compiler is in charge of transforming the AST into bytecode.
constexpr int ARK_VERSION_MAJOR
Definition: Constants.hpp:16
constexpr int ARK_VERSION_PATCH
Definition: Constants.hpp:18
constexpr int ARK_VERSION_MINOR
Definition: Constants.hpp:17
User defined literals for Ark internals.
Handles the macros and their expansion in ArkScript source code.
Lots of utilities about string, filesystem and more.
CompilationError thrown by the compiler.
Definition: Exceptions.hpp:131
void compilerWarning(const std::string &message, const internal::Node &node)
Display a warning message.
Definition: Compiler.cpp:256
void _compile(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name="")
Compile a single node recursively.
Definition: Compiler.cpp:262
void putValue(const internal::Node &x, int p, bool produces_result)
Put a value in the bytecode, handling the closures chains.
Definition: Compiler.cpp:713
void pushNumber(uint16_t n, std::vector< uint8_t > *page=nullptr) noexcept
Push a number on stack (need 2 bytes)
Definition: Compiler.cpp:821
void pushSpecificInstArgc(internal::Instruction inst, uint16_t previous, int p) noexcept
Compute specific instruction argument count.
Definition: Compiler.cpp:232
void compileIf(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
Definition: Compiler.cpp:426
std::optional< internal::Instruction > isSpecific(const std::string &name) noexcept
Check if a symbol needs to be compiled to a specific instruction.
Definition: Compiler.hpp:167
std::vector< std::vector< uint8_t > > m_temp_pages
we need temporary code pages for some compilations passes
Definition: Compiler.hpp:86
void compileLetMutSet(internal::Keyword n, const internal::Node &x, int p)
Definition: Compiler.cpp:507
std::vector< std::string > m_defined_symbols
Definition: Compiler.hpp:82
Compiler(unsigned debug, const std::vector< std::string > &libenv, uint16_t options=DefaultFeatures)
Construct a new Compiler object.
Definition: Compiler.cpp:21
void checkForUndefinedSymbol()
Checks for undefined symbols, not present in the defined symbols table.
Definition: Compiler.cpp:783
internal::Optimizer m_optimizer
Definition: Compiler.hpp:78
const bytecode_t & bytecode() noexcept
Return the constructed bytecode object.
Definition: Compiler.cpp:88
bool mayBeFromPlugin(const std::string &name) noexcept
Checking if a symbol may be coming from a plugin.
Definition: Compiler.cpp:241
void compileDel(const internal::Node &x, int p)
Definition: Compiler.cpp:577
void compileFunction(const internal::Node &x, int p, bool produces_result, const std::string &var_name)
Definition: Compiler.cpp:454
std::vector< std::string > m_plugins
Definition: Compiler.hpp:83
std::vector< uint8_t > & page(int i) noexcept
helper functions to get a temp or finalized code page
Definition: Compiler.hpp:109
void compile()
Start the compilation.
Definition: Compiler.cpp:35
void compilePluginImport(const internal::Node &x, int p)
Definition: Compiler.cpp:566
void addDefinedSymbol(const std::string &sym)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
Definition: Compiler.cpp:775
std::string offerSuggestion(const std::string &str)
Suggest a symbol of what the user may have meant to input.
Definition: Compiler.cpp:802
unsigned m_debug
the debug level of the compiler
Definition: Compiler.hpp:89
std::vector< internal::ValTableElem > m_values
Definition: Compiler.hpp:84
void compileWhile(const internal::Node &x, int p)
Definition: Compiler.cpp:525
std::optional< std::size_t > isBuiltin(const std::string &name) noexcept
Checking if a symbol is a builtin.
Definition: Compiler.cpp:200
uint16_t m_options
Definition: Compiler.hpp:79
internal::Parser m_parser
Definition: Compiler.hpp:77
uint16_t addSymbol(const internal::Node &sym)
Register a given node in the symbol table.
Definition: Compiler.cpp:722
void compileSpecific(const internal::Node &c0, const internal::Node &x, int p, bool produces_result)
Definition: Compiler.cpp:388
uint16_t addValue(const internal::Node &x)
Register a given node in the value table.
Definition: Compiler.cpp:741
std::vector< uint8_t > * page_ptr(int i) noexcept
helper functions to get a temp or finalized code page
Definition: Compiler.hpp:122
void handleCalls(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
Definition: Compiler.cpp:586
void feed(const std::string &code, const std::string &filename=ARK_NO_NAME_FILE)
Feed the differents variables with information taken from the given source code file.
Definition: Compiler.cpp:26
void compileSymbol(const internal::Node &x, int p, bool produces_result)
Definition: Compiler.cpp:362
void setNumberAt(int p, std::size_t at_inst, std::size_t number)
Definition: Compiler.hpp:129
std::vector< internal::Node > m_symbols
Definition: Compiler.hpp:81
void throwCompilerError(const std::string &message, const internal::Node &node)
Throw a nice error message.
Definition: Compiler.cpp:251
std::optional< std::size_t > isOperator(const std::string &name) noexcept
Checking if a symbol is an operator.
Definition: Compiler.cpp:192
void compileQuote(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
Definition: Compiler.cpp:546
void saveTo(const std::string &file)
Save generated bytecode to a file.
Definition: Compiler.cpp:78
std::vector< std::vector< uint8_t > > m_code_pages
Definition: Compiler.hpp:85
bool isUnaryInst(internal::Instruction inst) noexcept
Check if a given instruction is unary (takes only one argument)
Definition: Compiler.cpp:211
bytecode_t m_bytecode
Definition: Compiler.hpp:88
std::size_t countArkObjects(const std::vector< internal::Node > &lst) noexcept
Count the number of "valid" ark objects in a node.
Definition: Compiler.cpp:181
void pushFileHeader() noexcept
Push the file headers (magic, version used, timestamp)
Definition: Compiler.cpp:93
void pushSymAndValTables()
Push the symbols and values tables.
Definition: Compiler.cpp:125
The class handling the macros definitions and calls, given an AST.
Definition: Processor.hpp:31
const Node & ast() const noexcept
Return the modified AST.
Definition: Processor.cpp:46
void feed(const Node &ast)
Send the complete AST (after the inclusions and stuff), and work on it.
Definition: Processor.cpp:30
A node of an Abstract Syntax Tree for ArkScript.
Definition: Node.hpp:29
NodeType nodeType() const noexcept
Return the node type.
Definition: Node.cpp:126
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition: Node.cpp:92
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition: Node.cpp:119
void feed(const Node &ast)
Send the AST to the optimizer, then run the different optimization strategies on it.
Definition: Optimizer.cpp:9
const Node & ast() const noexcept
Returns the modified AST.
Definition: Optimizer.cpp:17
const Node & ast() const noexcept
Return the generated AST.
Definition: Parser.cpp:59
void feed(const std::string &code, const std::string &filename=ARK_NO_NAME_FILE)
Give the code to parse.
Definition: Parser.cpp:21
std::vector< std::string > splitString(const std::string &source, char sep)
Cut a string into pieces, given a character separator.
Definition: Utils.hpp:34
int levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
Definition: Utils.cpp:33
const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 25 > operators
Definition: Common.hpp:89
std::string makeNodeBasedErrorCtx(const std::string &message, const Node &node)
Construct an error message based on a given node.
Keyword
The different keywords available.
Definition: Common.hpp:59
Instruction
The different bytecodes are stored here.
Definition: Builtins.hpp:21
std::vector< uint8_t > bytecode_t
Definition: Common.hpp:22
constexpr uint16_t FeatureShowWarnings
Definition: Constants.hpp:49
A Compiler Value class helper to handle multiple types.