ArkScript
A small, lisp-inspired, functional scripting language
ASTLowerer.cpp
Go to the documentation of this file.
2
3#include <cassert>
4#include <ranges>
5#include <utility>
6#include <algorithm>
7#include <fmt/core.h>
8
13
14namespace Ark::internal
15{
16 using namespace literals;
17
18 enum class CallType
19 {
20 Classic,
22 Symbol,
25 };
26
27 ASTLowerer::ASTLowerer(const unsigned debug) :
28 Pass("ASTLowerer", debug)
29 {}
30
31 void ASTLowerer::addToTables(const std::vector<std::string>& symbols, const std::vector<ValTableElem>& constants)
32 {
33 std::ranges::copy(symbols, std::back_inserter(m_symbols));
34 std::ranges::copy(constants, std::back_inserter(m_values));
35 }
36
37 void ASTLowerer::offsetPagesBy(const std::size_t offset)
38 {
40 }
41
43 {
44 m_logger.traceStart("process");
45 const Page global = createNewCodePage();
46
47 // gather symbols, values, and start to create code segments
49 ast,
50 /* current_page */ global,
51 /* is_result_unused= */ true,
52 /* is_terminal= */ false);
54 }
55
56 const std::vector<IR::Block>& ASTLowerer::intermediateRepresentation() const noexcept
57 {
58 return m_code_pages;
59 }
60
61 const std::vector<std::string>& ASTLowerer::symbols() const noexcept
62 {
63 return m_symbols;
64 }
65
66 const std::vector<ValTableElem>& ASTLowerer::values() const noexcept
67 {
68 return m_values;
69 }
70
71 std::optional<Instruction> ASTLowerer::getOperator(const std::string& name) noexcept
72 {
73 const auto it = std::ranges::find(Language::operators, name);
74 if (it != Language::operators.end())
75 return static_cast<Instruction>(std::distance(Language::operators.begin(), it) + FIRST_OPERATOR);
76 return std::nullopt;
77 }
78
79 std::optional<uint16_t> ASTLowerer::getBuiltin(const std::string& name) noexcept
80 {
81 const auto it = std::ranges::find_if(Builtins::builtins,
82 [&name](const std::pair<std::string, Value>& element) -> bool {
83 return name == element.first;
84 });
85 if (it != Builtins::builtins.end())
86 return static_cast<uint16_t>(std::distance(Builtins::builtins.begin(), it));
87 return std::nullopt;
88 }
89
90 std::optional<Instruction> ASTLowerer::getListInstruction(const std::string& name) noexcept
91 {
92 const auto it = std::ranges::find(Language::listInstructions, name);
93 if (it != Language::listInstructions.end())
94 return static_cast<Instruction>(std::distance(Language::listInstructions.begin(), it) + LIST);
95 return std::nullopt;
96 }
97
99 {
100 if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol)
101 return node.constList().front().string() == "breakpoint";
102 return false;
103 }
104
106 {
107 if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword)
108 // a 'begin' node produces a value if the last node in it produces a value
109 return (node.constList()[0].keyword() == Keyword::Begin && node.constList().size() > 1 && nodeProducesOutput(node.constList().back())) ||
110 // a function always produces a value ; even if it ends with a node not producing one, the VM returns nil
111 node.constList()[0].keyword() == Keyword::Fun ||
112 // a let/mut/set pushes the value that was assigned
113 node.constList()[0].keyword() == Keyword::Let ||
114 node.constList()[0].keyword() == Keyword::Mut ||
115 node.constList()[0].keyword() == Keyword::Set ||
116 // a condition produces a value if all its branches produce a value
117 (node.constList()[0].keyword() == Keyword::If &&
118 nodeProducesOutput(node.constList()[2]) &&
119 (node.constList().size() == 3 || nodeProducesOutput(node.constList()[3])));
120 // breakpoint do not produce values
121 if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol)
122 {
123 const std::string& name = node.constList().front().string();
124 return name != "breakpoint";
125 }
126 return true; // any other node, function call, symbol, number...
127 }
128
129 bool ASTLowerer::isUnaryInst(const Instruction inst) noexcept
130 {
131 switch (inst)
132 {
133 case NOT: [[fallthrough]];
134 case LEN: [[fallthrough]];
135 case IS_EMPTY: [[fallthrough]];
136 case TAIL: [[fallthrough]];
137 case HEAD: [[fallthrough]];
138 case IS_NIL: [[fallthrough]];
139 case TO_NUM: [[fallthrough]];
140 case TO_STR: [[fallthrough]];
141 case TYPE:
142 return true;
143
144 default:
145 return false;
146 }
147 }
148
149 bool ASTLowerer::isTernaryInst(const Instruction inst) noexcept
150 {
151 switch (inst)
152 {
153 case AT_AT:
154 return true;
155
156 default:
157 return false;
158 }
159 }
160
162 {
163 switch (inst)
164 {
165 case ADD: [[fallthrough]];
166 case SUB: [[fallthrough]];
167 case MUL: [[fallthrough]];
168 case DIV:
169 return true;
170
171 default:
172 return false;
173 }
174 }
175
176 void ASTLowerer::warning(const std::string& message, const Node& node)
177 {
179 }
180
181 void ASTLowerer::buildAndThrowError(const std::string& message, const Node& node)
182 {
183 throw CodeError(message, CodeErrorContext(node.filename(), node.position()));
184 }
185
186 void ASTLowerer::makeError(const ErrorKind kind, const Node& node, const std::string& additional_ctx)
187 {
188 const std::string invalid_node_msg = "The given node doesn't return a value, and thus can't be used as an expression.";
189
190 switch (kind)
191 {
193 buildAndThrowError(fmt::format("Invalid node ; if it was computed by a macro, check that a node is returned"), node);
194 break;
195
197 buildAndThrowError(fmt::format("Invalid node inside call to `{}'. {}", additional_ctx, invalid_node_msg), node);
198 break;
199
201 buildAndThrowError(fmt::format("Invalid node inside tail call to `{}'. {}", additional_ctx, invalid_node_msg), node);
202 break;
203
205 buildAndThrowError(fmt::format("Invalid node inside call to operator `{}'. {}", additional_ctx, invalid_node_msg), node);
206 break;
207 }
208 }
209
210 void ASTLowerer::compileExpression(Node& x, const Page p, const bool is_result_unused, const bool is_terminal)
211 {
212 // register symbols
213 if (x.nodeType() == NodeType::Symbol)
214 compileSymbol(x, p, is_result_unused, /* can_use_ref= */ true);
215 else if (x.nodeType() == NodeType::Field)
216 {
217 // the parser guarantees us that there is at least 2 elements (eg: a.b)
218 compileSymbol(x.list()[0], p, is_result_unused, /* can_use_ref= */ true);
219 for (auto it = x.constList().begin() + 1, end = x.constList().end(); it != end; ++it)
220 {
221 uint16_t i = addSymbol(*it);
222 page(p).emplace_back(GET_FIELD, i);
223 }
224 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
225 }
226 // register values
227 else if (x.nodeType() == NodeType::String || x.nodeType() == NodeType::Number)
228 {
229 uint16_t i = addValue(x);
230
231 if (!is_result_unused)
232 page(p).emplace_back(LOAD_CONST, i);
233 }
234 // namespace nodes
235 else if (x.nodeType() == NodeType::Namespace)
236 compileExpression(*x.constArkNamespace().ast, p, is_result_unused, is_terminal);
237 else if (x.nodeType() == NodeType::List)
238 {
239 // empty code block should be nil
240 if (x.constList().empty())
241 {
242 if (!is_result_unused)
243 {
244 static const std::optional<uint16_t> nil = getBuiltin("nil");
245 page(p).emplace_back(BUILTIN, nil.value());
246 }
247 }
248 // list instructions
249 else if (const auto head = x.constList()[0]; head.nodeType() == NodeType::Symbol && getListInstruction(head.string()).has_value())
250 compileListInstruction(x, p, is_result_unused);
251 else if (head.nodeType() == NodeType::Symbol && head.string() == Language::Apply)
252 compileApplyInstruction(x, p, is_result_unused);
253 // registering structures
254 else if (head.nodeType() == NodeType::Keyword)
255 {
256 switch (const Keyword keyword = head.keyword())
257 {
258 case Keyword::If:
259 compileIf(x, p, is_result_unused, is_terminal);
260 break;
261
262 case Keyword::Set:
263 [[fallthrough]];
264 case Keyword::Let:
265 [[fallthrough]];
266 case Keyword::Mut:
267 compileLetMutSet(keyword, x, p, is_result_unused);
268 break;
269
270 case Keyword::Fun:
271 compileFunction(x, p, is_result_unused);
272 break;
273
274 case Keyword::Begin:
275 {
276 for (std::size_t i = 1, size = x.list().size(); i < size; ++i)
278 x.list()[i],
279 p,
280 // All the nodes in a 'begin' (except for the last one) are producing a result that we want to drop.
281 /* is_result_unused= */ (i != size - 1) || is_result_unused,
282 // If the 'begin' is a terminal node, only its last node is terminal.
283 /* is_terminal= */ is_terminal && (i == size - 1));
284 break;
285 }
286
287 case Keyword::While:
288 compileWhile(x, p);
289 break;
290
291 case Keyword::Import:
293 break;
294
295 case Keyword::Del:
296 page(p).emplace_back(DEL, addSymbol(x.constList()[1]));
297 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
298 break;
299 }
300 }
301 else
302 {
303 // If we are here, we should have a function name via the m_opened_vars.
304 // Push arguments first, then function name, then call it.
305 handleCalls(x, p, is_result_unused, is_terminal);
306 }
307 }
308 else if (x.nodeType() != NodeType::Unused)
310 fmt::format(
311 "NodeType `{}' not handled in ASTLowerer::compileExpression. Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark",
312 typeToString(x)),
313 x);
314 }
315
316 void ASTLowerer::compileSymbol(const Node& x, const Page p, const bool is_result_unused, const bool can_use_ref)
317 {
318 const std::string& name = x.string();
319
320 if (const auto it_builtin = getBuiltin(name))
321 page(p).emplace_back(Instruction::BUILTIN, it_builtin.value());
322 else if (getOperator(name).has_value())
323 buildAndThrowError(fmt::format("Found a freestanding operator: `{}`. It can not be used as value like `+', where (let add +) (add 1 2) would be valid", name), x);
324 else
325 {
326 if (can_use_ref)
327 {
328 const std::optional<std::size_t> maybe_local_idx = m_locals_locator.lookupLastScopeByName(name);
329 if (maybe_local_idx.has_value())
330 page(p).emplace_back(LOAD_FAST_BY_INDEX, static_cast<uint16_t>(maybe_local_idx.value()));
331 else
332 page(p).emplace_back(LOAD_FAST, addSymbol(x));
333 }
334 else
335 page(p).emplace_back(LOAD_SYMBOL, addSymbol(x));
336 }
337
338 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
339
340 if (is_result_unused)
341 {
342 warning("Statement has no effect", x);
343 page(p).emplace_back(POP);
344 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
345 }
346 }
347
348 void ASTLowerer::compileListInstruction(Node& x, const Page p, const bool is_result_unused)
349 {
350 const Node head = x.constList()[0];
351 const std::string& name = head.string();
352 const Instruction inst = getListInstruction(name).value();
353
354 // length of at least 1 since we got a symbol name
355 const auto argc = x.constList().size() - 1u;
356 // error, can not use append/concat/pop (and their in place versions) with a <2 length argument list
357 if (argc < 2 && APPEND <= inst && inst <= SET_AT_2_INDEX)
358 buildAndThrowError(fmt::format("Can not use {} with less than 2 arguments", name), head);
359 if (std::cmp_greater(argc, MaxValue16Bits))
360 buildAndThrowError(fmt::format("Too many arguments ({}), exceeds {}", argc, MaxValue16Bits), x);
361 if (argc != 3 && inst == SET_AT_INDEX)
362 buildAndThrowError(fmt::format("Expected 3 arguments (list, index, value) for {}, got {}", name, argc), head);
363 if (argc != 4 && inst == SET_AT_2_INDEX)
364 buildAndThrowError(fmt::format("Expected 4 arguments (list, y, x, value) for {}, got {}", name, argc), head);
365
366 // compile arguments in reverse order
367 for (std::size_t i = x.constList().size() - 1u; i > 0; --i)
368 {
369 Node& node = x.list()[i];
370 if (nodeProducesOutput(node))
371 compileExpression(node, p, false, false);
372 else
374 }
375
376 // put inst and number of arguments
377 std::size_t inst_argc = 0;
378 switch (inst)
379 {
380 case LIST:
381 inst_argc = argc;
382 break;
383
384 case APPEND:
385 [[fallthrough]];
386 case APPEND_IN_PLACE:
387 [[fallthrough]];
388 case CONCAT:
389 [[fallthrough]];
390 case CONCAT_IN_PLACE:
391 inst_argc = argc - 1;
392 break;
393
394 case POP_LIST:
395 inst_argc = 0;
396 break;
397
398 case SET_AT_INDEX:
399 [[fallthrough]];
400 case SET_AT_2_INDEX:
401 [[fallthrough]];
403 inst_argc = is_result_unused ? 0 : 1;
404 break;
405
406 default:
407 break;
408 }
409 page(p).emplace_back(inst, static_cast<uint16_t>(inst_argc));
410 page(p).back().setSourceLocation(head.filename(), head.position().start.line);
411
412 if (!is_result_unused && (inst == APPEND_IN_PLACE || inst == CONCAT_IN_PLACE))
413 {
414 // Load the first argument which should be a symbol (or field),
415 // that append!/concat! write to, so that we have its new value available.
416 compileExpression(x.list()[1], p, false, false);
417 }
418
419 // append!, concat!, pop!, @= and @@= can push to the stack, but not using its returned value isn't an error
420 if (is_result_unused && (inst == LIST || inst == APPEND || inst == CONCAT || inst == POP_LIST))
421 {
422 warning("Ignoring return value of function", x);
423 page(p).emplace_back(POP);
424 }
425 }
426
427 void ASTLowerer::compileApplyInstruction(Node& x, const Page p, const bool is_result_unused)
428 {
429 const Node head = x.constList()[0];
430 const auto argc = x.constList().size() - 1u;
431
432 if (argc != 2)
433 buildAndThrowError(fmt::format("Expected 2 arguments (function, arguments) for apply, got {}", argc), head);
434
435 const auto label_return = IR::Entity::Label(m_current_label++);
436 page(p).emplace_back(IR::Entity::Goto(label_return, PUSH_RETURN_ADDRESS));
437
438 for (Node& node : x.list() | std::ranges::views::drop(1))
439 {
440 if (nodeProducesOutput(node))
441 compileExpression(node, p, false, false);
442 else
444 }
445 page(p).emplace_back(APPLY);
446 // patch the PUSH_RETURN_ADDRESS instruction with the return location (IP=CALL instruction IP)
447 page(p).emplace_back(label_return);
448
449 if (is_result_unused)
450 page(p).emplace_back(POP);
451 }
452
453 void ASTLowerer::compileIf(Node& x, const Page p, const bool is_result_unused, const bool is_terminal)
454 {
455 if (x.constList().size() == 1)
456 buildAndThrowError("Invalid condition: missing 'cond' and 'then' nodes, expected (if cond then)", x);
457 if (x.constList().size() == 2)
458 buildAndThrowError(fmt::format("Invalid condition: missing 'then' node, expected (if {} then)", x.constList()[1].repr()), x);
459
460 // compile condition
461 compileExpression(x.list()[1], p, false, false);
462 page(p).back().setSourceLocation(x.constList()[1].filename(), x.constList()[1].position().start.line);
463
464 // jump only if needed to the "true" branch
465 const auto label_then = IR::Entity::Label(m_current_label++);
466 page(p).emplace_back(IR::Entity::GotoIf(label_then, true));
467
468 // "false" branch code
469 if (x.constList().size() == 4) // we have an else clause
470 {
472 compileExpression(x.list()[3], p, is_result_unused, is_terminal);
473 page(p).back().setSourceLocation(x.constList()[3].filename(), x.constList()[3].position().start.line);
475 }
476
477 // when else is finished, jump to end
478 const auto label_end = IR::Entity::Label(m_current_label++);
479 page(p).emplace_back(IR::Entity::Goto(label_end));
480
481 // absolute address to jump to if condition is true
482 page(p).emplace_back(label_then);
483 // if code
485 compileExpression(x.list()[2], p, is_result_unused, is_terminal);
486 page(p).back().setSourceLocation(x.constList()[2].filename(), x.constList()[2].position().start.line);
488 // set jump to end pos
489 page(p).emplace_back(label_end);
490 }
491
492 void ASTLowerer::compileFunction(Node& x, const Page p, const bool is_result_unused)
493 {
494 if (const auto args = x.constList()[1]; args.nodeType() != NodeType::List)
495 buildAndThrowError(fmt::format("Expected a well formed argument(s) list, got a {}", typeToString(args)), args);
496 if (x.constList().size() != 3)
498
499 // capture, if needed
500 std::size_t capture_inst_count = 0;
501 for (const auto& node : x.constList()[1].constList())
502 {
503 if (node.nodeType() == NodeType::Capture)
504 {
505 const uint16_t symbol_id = addSymbol(node);
506
507 // We have an unqualified name that isn't the captured name
508 // This means we need to rename the captured value
509 if (const auto& maybe_nqn = node.getUnqualifiedName(); maybe_nqn.has_value() && maybe_nqn.value() != node.string())
510 {
511 const uint16_t nqn_id = addSymbol(Node(NodeType::Symbol, maybe_nqn.value()));
512
513 page(p).emplace_back(RENAME_NEXT_CAPTURE, nqn_id);
514 page(p).emplace_back(CAPTURE, symbol_id);
515 }
516 else
517 page(p).emplace_back(CAPTURE, symbol_id);
518
519 ++capture_inst_count;
520 }
521 }
522 const bool is_closure = capture_inst_count > 0;
523
525 is_closure
528
529 // create new page for function body
530 const auto function_body_page = createNewCodePage();
531 // save page_id into the constants table as PageAddr and load the const
532 page(p).emplace_back(is_closure ? MAKE_CLOSURE : LOAD_CONST, addValue(function_body_page.index, x));
533
534 // pushing arguments from the stack into variables in the new scope
535 for (const auto& node : x.constList()[1].constList() | std::ranges::views::reverse)
536 {
537 if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::MutArg)
538 {
539 page(function_body_page).emplace_back(STORE, addSymbol(node));
540 m_locals_locator.addLocal(node.string());
541 }
542 else if (node.nodeType() == NodeType::RefArg)
543 {
544 page(function_body_page).emplace_back(STORE_REF, addSymbol(node));
545 m_locals_locator.addLocal(node.string());
546 }
547 }
548
549 // Register an opened variable as "#anonymous", which won't match any valid names inside ASTLowerer::handleCalls.
550 // This way we can continue to safely apply optimisations on
551 // (let name (fun (e) (map lst (fun (e) (name e)))))
552 // Otherwise, `name` would have been optimized to a CALL_CURRENT_PAGE, which would have returned the wrong page.
553 if (x.isAnonymousFunction())
554 m_opened_vars.emplace("#anonymous");
555 // push body of the function
556 compileExpression(x.list()[2], function_body_page, false, true);
557 if (x.isAnonymousFunction())
558 m_opened_vars.pop();
559
560 // return last value on the stack
561 page(function_body_page).emplace_back(RET);
563
564 // if the computed function is unused, pop it
565 if (is_result_unused)
566 {
567 warning("Unused declared function", x);
568 page(p).emplace_back(POP);
569 }
570 }
571
572 void ASTLowerer::compileLetMutSet(const Keyword n, Node& x, const Page p, const bool is_result_unused)
573 {
574 if (const auto sym = x.constList()[1]; sym.nodeType() != NodeType::Symbol)
575 buildAndThrowError(fmt::format("Expected a symbol, got a {}", typeToString(sym)), sym);
576 if (x.constList().size() != 3)
578
579 const std::string name = x.constList()[1].string();
580 uint16_t i = addSymbol(x.constList()[1]);
581
582 if (!m_opened_vars.empty() && m_opened_vars.top() == name)
583 buildAndThrowError("Can not define a variable using the same name as the function it is defined inside. You need to rename the function or the variable", x);
584
585 const bool is_function = x.constList()[2].isFunction();
586 if (is_function)
587 {
588 m_opened_vars.push(name);
589 x.list()[2].setFunctionKind(/* anonymous= */ false);
590 }
591
592 // put value before symbol id
593 // starting at index = 2 because x is a (let|mut|set variable ...) node
594 compileExpression(x.list()[2], p, false, false);
595
596 if (n == Keyword::Let || n == Keyword::Mut)
597 {
598 page(p).emplace_back(STORE, i);
600 }
601 else
602 page(p).emplace_back(SET_VAL, i);
603
604 if (!is_result_unused)
605 page(p).emplace_back(LOAD_SYMBOL, i);
606
607 if (is_function)
608 m_opened_vars.pop();
609 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
610 }
611
613 {
614 if (x.constList().size() != 3)
616
618 page(p).emplace_back(CREATE_SCOPE);
619 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
620
621 // save current position to jump there at the end of the loop
622 const auto label_loop = IR::Entity::Label(m_current_label++);
623 page(p).emplace_back(label_loop);
624 // push condition
625 compileExpression(x.list()[1], p, false, false);
626 // absolute jump to end of block if condition is false
627 const auto label_end = IR::Entity::Label(m_current_label++);
628 page(p).emplace_back(IR::Entity::GotoIf(label_end, false));
629 // push code to page
630 compileExpression(x.list()[2], p, true, false);
631
632 // reset the scope at the end of the loop so that indices are still valid
633 // otherwise, (while true { (let a 5) (print a) (let b 6) (print b) })
634 // would print 5, 6, then only 6 as we emit LOAD_SYMBOL_FROM_INDEX 0 and b is the last in the scope
635 // loop, jump to the condition
636 page(p).emplace_back(IR::Entity::Goto(label_loop, RESET_SCOPE_JUMP));
637
638 // absolute address to jump to if condition is false
639 page(p).emplace_back(label_end);
640
641 page(p).emplace_back(POP_SCOPE);
643 }
644
646 {
647 std::string path;
648 const Node package_node = x.constList()[1];
649 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
650 {
651 path += package_node.constList()[i].string();
652 if (i + 1 != end)
653 path += "/";
654 }
655 path += ".arkm";
656
657 // register plugin path in the constants table
658 uint16_t id = addValue(Node(NodeType::String, path));
659 // add plugin instruction + id of the constant referring to the plugin path
660 page(p).emplace_back(PLUGIN, id);
661 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
662 }
663
664 void ASTLowerer::pushFunctionCallArguments(Node& call, const Page p, const bool is_tail_call)
665 {
666 const auto node = call.constList()[0];
667
668 for (Node& value : std::ranges::drop_view(call.list(), 1))
669 {
670 if (nodeProducesOutput(value) || isBreakpoint(value))
671 {
672 // we have to disallow usage of references in tail calls, because if we shuffle arguments around while using refs, they will end up with the same value
673 if (value.nodeType() == NodeType::Symbol && is_tail_call)
674 compileSymbol(value, p, /* is_result_unused= */ false, /* can_use_ref= */ false);
675 else
676 compileExpression(value, p, /* is_result_unused= */ false, /* is_terminal= */ false);
677 }
678 else
680 }
681 }
682
683 void ASTLowerer::handleCalls(Node& x, const Page p, bool is_result_unused, const bool is_terminal)
684 {
685 const Node& node = x.constList()[0];
686 bool matched = false;
687
688 if (node.nodeType() == NodeType::Symbol)
689 {
690 if (node.string() == Language::And || node.string() == Language::Or)
691 {
692 matched = true;
693 handleShortcircuit(x, p);
694 }
695 if (const auto maybe_operator = getOperator(node.string()); maybe_operator.has_value())
696 {
697 matched = true;
698 if (maybe_operator.value() == BREAKPOINT)
699 is_result_unused = false;
700 handleOperator(x, p, maybe_operator.value());
701 }
702 }
703
704 if (!matched)
705 {
706 // if nothing else matched, then compile a function call
707 if (handleFunctionCall(x, p, is_terminal))
708 // if it returned true, we compiled a tail call, skip the POP at the end
709 return;
710 }
711
712 if (is_result_unused)
713 page(p).emplace_back(POP);
714 }
715
717 {
718 const Node& node = x.constList()[0];
719 const auto name = node.string(); // and / or
721
722 // short circuit implementation
723 if (x.constList().size() < 3)
725 fmt::format(
726 "Expected at least 2 arguments while compiling '{}', got {}",
727 name,
728 x.constList().size() - 1),
729 x);
730
731 if (!nodeProducesOutput(x.list()[1]))
733 fmt::format(
734 "Can not use `{}' inside a `{}' expression, as it doesn't return a value",
735 x.list()[1].repr(), name),
736 x.list()[1]);
737 compileExpression(x.list()[1], p, false, false);
738
739 const auto label_shortcircuit = IR::Entity::Label(m_current_label++);
740 auto shortcircuit_entity = IR::Entity::Goto(label_shortcircuit, inst);
741 page(p).emplace_back(shortcircuit_entity);
742
743 for (std::size_t i = 2, end = x.constList().size(); i < end; ++i)
744 {
745 if (!nodeProducesOutput(x.list()[i]))
747 fmt::format(
748 "Can not use `{}' inside a `{}' expression, as it doesn't return a value",
749 x.list()[i].repr(), name),
750 x.list()[i]);
751 compileExpression(x.list()[i], p, false, false);
752 if (i + 1 != end)
753 page(p).emplace_back(shortcircuit_entity);
754 }
755
756 page(p).emplace_back(label_shortcircuit);
757 }
758
760 {
761 constexpr std::size_t start_index = 1;
762 const Node& node = x.constList()[0];
763 const auto op_name = Language::operators[static_cast<std::size_t>(op - FIRST_OPERATOR)];
764
765
766 // push arguments on current page
767 std::size_t exp_count = 0;
768 for (std::size_t index = start_index, size = x.constList().size(); index < size; ++index)
769 {
770 const bool is_breakpoint = isBreakpoint(x.constList()[index]);
771 if (nodeProducesOutput(x.constList()[index]) || is_breakpoint)
772 compileExpression(x.list()[index], p, false, false);
773 else
775
776 if (!is_breakpoint)
777 exp_count++;
778
779 // in order to be able to handle things like (op A B C D...)
780 // which should be transformed into A B op C op D op...
781 if (exp_count >= 2 && !isTernaryInst(op) && !is_breakpoint)
782 page(p).emplace_back(op);
783 }
784
785 if (isBreakpoint(x))
786 {
787 if (exp_count > 1)
788 buildAndThrowError(fmt::format("`{}' expected at most one argument, but was called with {}", op_name, exp_count), x.constList()[0]);
789 page(p).emplace_back(op, exp_count);
790 }
791 else if (isUnaryInst(op))
792 {
793 if (exp_count != 1)
794 buildAndThrowError(fmt::format("`{}' expected one argument, but was called with {}", op_name, exp_count), x.constList()[0]);
795 page(p).emplace_back(op);
796 }
797 else if (isTernaryInst(op))
798 {
799 if (exp_count != 3)
800 buildAndThrowError(fmt::format("`{}' expected three arguments, but was called with {}", op_name, exp_count), x.constList()[0]);
801 page(p).emplace_back(op);
802 }
803 else if (exp_count <= 1)
804 buildAndThrowError(fmt::format("`{}' expected two arguments, but was called with {}", op_name, exp_count), x.constList()[0]);
805
806 // need to check we didn't push the (op A B C D...) things for operators not supporting it
807 if (exp_count > 2 && !isRepeatableOperation(op) && !isTernaryInst(op))
808 buildAndThrowError(fmt::format("`{}' requires 2 arguments, but got {}.", op_name, exp_count), x);
809
810 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
811 }
812
813 bool ASTLowerer::handleFunctionCall(Node& x, const Page p, const bool is_terminal)
814 {
815 constexpr std::size_t start_index = 1;
816 Node& node = x.list()[0];
817
818 if (is_terminal && node.nodeType() == NodeType::Symbol && isFunctionCallingItself(node.string()))
819 {
820 pushFunctionCallArguments(x, p, /* is_tail_call= */ true);
821
822 // jump to the top of the function
823 page(p).emplace_back(TAIL_CALL_SELF);
824 page(p).back().setSourceLocation(node.filename(), node.position().start.line);
825 return true; // skip the potential Instruction::POP at the end
826 }
827
828 if (!nodeProducesOutput(node))
829 buildAndThrowError(fmt::format("Can not call `{}', as it doesn't return a value", node.repr()), node);
830
831 const IR::Entity label_return = IR::Entity::Label(m_current_label++);
832 page(p).emplace_back(IR::Entity::Goto(label_return, PUSH_RETURN_ADDRESS));
833 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
834
835 const Page proc_page = createNewCodePage(/* temp= */ true);
836 CallType call_type = CallType::Classic;
837 std::optional<uint16_t> call_arg = std::nullopt;
838
839 // compile the function resolution to a separate page
841 {
842 // The function is trying to call itself, but this isn't a tail call.
843 // We can skip the LOAD_FAST function_name and directly push the current
844 // function page, which will be quicker than a local variable resolution.
845 // We set its argument to the symbol id of the function we are calling,
846 // so that the VM knows the name of the last called function.
847 call_type = CallType::SelfNotRecursive;
848 }
849 else
850 {
851 // closure chains have been handled (eg: closure.field.field.function)
852 compileExpression(node, proc_page, false, false);
853
854 if (page(proc_page).empty())
855 buildAndThrowError(fmt::format("Can not call {}", x.constList()[0].repr()), x);
856 else if (page(proc_page).back().inst() == GET_FIELD)
857 // the last GET_FIELD instruction should push the closure environment with it
858 page(proc_page).back().replaceInstruction(GET_FIELD_AS_CLOSURE);
859 else if (page(proc_page).size() == 1)
860 {
861 const Instruction inst = page(proc_page).back().inst();
862 const uint16_t arg = page(proc_page).back().primaryArg();
863
864 if (inst == LOAD_FAST)
865 {
866 call_type = CallType::Symbol;
867 call_arg = arg;
868 // we don't want to push any instruction, as we'll use an optimised instruction instead of CALL
869 page(proc_page).clear();
870 }
871 else if (inst == LOAD_FAST_BY_INDEX)
872 {
873 call_type = CallType::SymbolByIndex;
874 page(proc_page).clear();
875 }
876 else if (inst == BUILTIN && Builtins::builtins[arg].second.isFunction())
877 {
878 call_type = CallType::Builtin;
879 call_arg = arg;
880 page(proc_page).clear();
881 }
882 }
883 }
884
885 // push proc from temp page
886 for (const auto& inst : page(proc_page))
887 page(p).push_back(inst);
888 m_temp_pages.pop_back();
889
890 pushFunctionCallArguments(x, p, /* is_tail_call= */ false);
891
892 // number of arguments
893 std::size_t args_count = 0;
894 for (auto it = x.constList().begin() + start_index, it_end = x.constList().end(); it != it_end; ++it)
895 {
896 if (it->nodeType() != NodeType::Capture && !isBreakpoint(*it))
897 args_count++;
898 }
899
900 // call the procedure
901 switch (call_type)
902 {
904 page(p).emplace_back(CALL, args_count);
905 break;
906
908 page(p).emplace_back(CALL_CURRENT_PAGE, addSymbol(node), args_count);
909 break;
910
911 case CallType::Symbol:
912 assert(call_arg.has_value() && "Expected a value for call_arg with CallType::Symbol");
913 page(p).emplace_back(CALL_SYMBOL, call_arg.value(), args_count);
914 break;
915
917 {
918 const Page temp_page = createNewCodePage(/* temp= */ true);
919 compileExpression(node, temp_page, false, false);
920 assert(page(temp_page).size() == 1 && page(temp_page).back().inst() == LOAD_FAST_BY_INDEX);
921 page(p).emplace_back(CALL_SYMBOL_BY_INDEX, page(temp_page).back().primaryArg(), args_count);
922 m_temp_pages.pop_back();
923 break;
924 }
925
927 assert(call_arg.has_value() && "Expected a value for call_arg with CallType::Builtin");
928 page(p).emplace_back(CALL_BUILTIN, call_arg.value(), args_count);
929 break;
930 }
931 page(p).back().setSourceLocation(node.filename(), node.position().start.line);
932
933 // patch the PUSH_RETURN_ADDRESS instruction with the return location (IP=CALL instruction IP)
934 page(p).emplace_back(label_return);
935 return false; // we didn't compile a tail call
936 }
937
938 uint16_t ASTLowerer::addSymbol(const Node& sym)
939 {
940 // otherwise, add the symbol, and return its id in the table
941 auto it = std::ranges::find(m_symbols, sym.string());
942 if (it == m_symbols.end())
943 {
944 m_symbols.push_back(sym.string());
945 it = m_symbols.begin() + static_cast<std::vector<std::string>::difference_type>(m_symbols.size() - 1);
946 }
947
948 const auto distance = std::distance(m_symbols.begin(), it);
949 if (std::cmp_less(distance, MaxValue16Bits))
950 return static_cast<uint16_t>(distance);
951 buildAndThrowError(fmt::format("Too many symbols (exceeds {}), aborting compilation.", MaxValue16Bits), sym);
952 }
953
954 uint16_t ASTLowerer::addValue(const Node& x)
955 {
956 const ValTableElem v(x);
957 auto it = std::ranges::find(m_values, v);
958 if (it == m_values.end())
959 {
960 m_values.push_back(v);
961 it = m_values.begin() + static_cast<std::vector<ValTableElem>::difference_type>(m_values.size() - 1);
962 }
963
964 const auto distance = std::distance(m_values.begin(), it);
965 if (std::cmp_less(distance, MaxValue16Bits))
966 return static_cast<uint16_t>(distance);
967 buildAndThrowError(fmt::format("Too many values (exceeds {}), aborting compilation.", MaxValue16Bits), x);
968 }
969
970 uint16_t ASTLowerer::addValue(const std::size_t page_id, const Node& current)
971 {
972 const ValTableElem v(page_id);
973 auto it = std::ranges::find(m_values, v);
974 if (it == m_values.end())
975 {
976 m_values.push_back(v);
977 it = m_values.begin() + static_cast<std::vector<ValTableElem>::difference_type>(m_values.size() - 1);
978 }
979
980 const auto distance = std::distance(m_values.begin(), it);
981 if (std::cmp_less(distance, MaxValue16Bits))
982 return static_cast<uint16_t>(distance);
983 buildAndThrowError(fmt::format("Too many values (exceeds {}), aborting compilation.", MaxValue16Bits), current);
984 }
985}
Host the declaration of all the ArkScript builtins.
Tools to report code errors nicely to the user.
ArkScript homemade exceptions.
User defined literals for Ark internals.
const String_t & string() const
Definition Value.hpp:166
uint16_t addValue(const Node &x)
Register a given node in the value table.
void pushFunctionCallArguments(Node &call, Page p, bool is_tail_call)
bool handleFunctionCall(Node &x, Page p, bool is_terminal)
uint16_t addSymbol(const Node &sym)
Register a given node in the symbol table.
static std::optional< Instruction > getListInstruction(const std::string &name) noexcept
Checking if a symbol is a list instruction.
std::vector< ValTableElem > m_values
void compileListInstruction(Node &x, Page p, bool is_result_unused)
static bool nodeProducesOutput(const Node &node)
std::vector< IR::Block > m_temp_pages
we need temporary code pages for some compilations passes
static bool isRepeatableOperation(Instruction inst) noexcept
Check if an operator can be repeated.
void handleCalls(Node &x, Page p, bool is_result_unused, bool is_terminal)
Page createNewCodePage(const bool temp=false) noexcept
const std::vector< ValTableElem > & values() const noexcept
Return the value table pre-computed.
void compileIf(Node &x, Page p, bool is_result_unused, bool is_terminal)
void process(Node &ast)
Start the compilation.
void offsetPagesBy(std::size_t offset)
Start bytecode pages at a given offset (by default, 0)
const std::vector< IR::Block > & intermediateRepresentation() const noexcept
Return the IR blocks (one per scope)
static bool isBreakpoint(const Node &node)
static bool isUnaryInst(Instruction inst) noexcept
Check if a given instruction is unary (takes only one argument)
void compileLetMutSet(Keyword n, Node &x, Page p, bool is_result_unused)
std::vector< IR::Block > m_code_pages
ASTLowerer(unsigned debug)
Construct a new ASTLowerer object.
static void makeError(ErrorKind kind, const Node &node, const std::string &additional_ctx)
Throw a nice error message, using a message builder.
void compileWhile(Node &x, Page p)
void handleOperator(Node &x, Page p, Instruction op)
void compileApplyInstruction(Node &x, Page p, bool is_result_unused)
std::vector< std::string > m_symbols
static void buildAndThrowError(const std::string &message, const Node &node)
Throw a nice error message.
static bool isTernaryInst(Instruction inst) noexcept
Check if a given instruction is ternary (takes three arguments)
void compileExpression(Node &x, Page p, bool is_result_unused, bool is_terminal)
Compile an expression (a node) recursively.
LocalsLocator m_locals_locator
std::stack< std::string > m_opened_vars
stack of vars we are currently declaring
void compileSymbol(const Node &x, Page p, bool is_result_unused, bool can_use_ref)
void warning(const std::string &message, const Node &node)
Display a warning message.
bool isFunctionCallingItself(const std::string &name) noexcept
Check if we are in a recursive self call.
void compileFunction(Node &x, Page p, bool is_result_unused)
static std::optional< uint16_t > getBuiltin(const std::string &name) noexcept
Checking if a symbol is a builtin.
void compilePluginImport(const Node &x, Page p)
std::size_t m_start_page_at_offset
Used to offset the page numbers when compiling code in the debugger.
static std::optional< Instruction > getOperator(const std::string &name) noexcept
Checking if a symbol is an operator.
void handleShortcircuit(Node &x, Page p)
IR::Block & page(const Page page) noexcept
helper functions to get a temp or finalized code page
const std::vector< std::string > & symbols() const noexcept
Return the symbol table pre-computed.
void addToTables(const std::vector< std::string > &symbols, const std::vector< ValTableElem > &constants)
Pre-fill tables (used by the debugger)
static Entity Goto(const Entity &label, Instruction inst=Instruction::JUMP)
Definition Entity.cpp:38
static Entity Label(label_t value)
Definition Entity.cpp:30
static Entity GotoIf(const Entity &label, bool cond)
Definition Entity.cpp:57
void saveScopeLengthForBranch()
Save the current scope length before entering a branch, so that we can ignore variable definitions in...
std::optional< std::size_t > lookupLastScopeByName(const std::string &name)
Search for a local in the current scope. Returns std::nullopt in case of closure scopes or if the var...
void dropVarsForBranch()
Drop potentially defined variables in the last saved branch.
void deleteScope()
Delete the last scope.
void addLocal(const std::string &name)
Register a local in the current scope, triggered by a STORE instruction. If the local already exists,...
void createScope(ScopeType type=ScopeType::Default)
Create a new scope.
bool colorize() const noexcept
Check if logs can be colorized.
Definition Logger.hpp:157
void warn(const char *fmt, Args &&... args)
Write a warn level log using fmtlib.
Definition Logger.hpp:80
void traceStart(std::string &&trace_name)
Definition Logger.hpp:109
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
bool isAnonymousFunction() const noexcept
Check if a node is an anonymous function.
Definition Node.cpp:154
const std::string & filename() const noexcept
Return the filename in which this node was created.
Definition Node.cpp:164
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
std::string repr() const noexcept
Compute a representation of the node without any comments or additional sugar, colors,...
Definition Node.cpp:179
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
const Namespace & constArkNamespace() const noexcept
Return the namespace held by the value (if the node type allows it)
Definition Node.cpp:58
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
An interface to describe compiler passes.
Definition Pass.hpp:24
std::string makeContextWithNode(const std::string &message, const internal::Node &node, bool colorize=true)
Helper used by the compiler to generate a colorized context from a node.
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 9 > listInstructions
Definition Common.hpp:119
constexpr std::string_view Apply
Definition Common.hpp:137
constexpr std::array< std::string_view, 24 > operators
Definition Common.hpp:158
constexpr std::string_view And
Definition Common.hpp:134
constexpr std::string_view Or
Definition Common.hpp:135
std::string typeToString(const Node &node) noexcept
Definition Node.hpp:264
Keyword
The different keywords available.
Definition Common.hpp:79
Instruction
The different bytecodes are stored here.
constexpr uint16_t MaxValue16Bits
Definition Constants.hpp:73
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
std::size_t line
0-indexed line number
Definition Position.hpp:22
std::shared_ptr< Node > ast
Definition Namespace.hpp:18
A Compiler Value class helper to handle multiple types.