ArkScript
A small, lisp-inspired, functional scripting language
ASTLowerer.cpp
Go to the documentation of this file.
2
3#include <ranges>
4#include <utility>
5#include <algorithm>
6#include <fmt/core.h>
7#include <fmt/color.h>
8
13
14namespace Ark::internal
15{
16 using namespace literals;
17
18 ASTLowerer::ASTLowerer(const unsigned debug) :
19 m_logger("ASTLowerer", debug)
20 {}
21
23 {
24 m_logger.traceStart("process");
25 m_code_pages.emplace_back(); // create empty page
26
27 // gather symbols, values, and start to create code segments
29 ast,
30 /* current_page */ Page { .index = 0, .is_temp = false },
31 /* is_result_unused */ false,
32 /* is_terminal */ false);
34 }
35
36 const std::vector<IR::Block>& ASTLowerer::intermediateRepresentation() const noexcept
37 {
38 return m_code_pages;
39 }
40
41 const std::vector<std::string>& ASTLowerer::symbols() const noexcept
42 {
43 return m_symbols;
44 }
45
46 const std::vector<ValTableElem>& ASTLowerer::values() const noexcept
47 {
48 return m_values;
49 }
50
51 std::optional<Instruction> ASTLowerer::getOperator(const std::string& name) noexcept
52 {
53 const auto it = std::ranges::find(Language::operators, name);
54 if (it != Language::operators.end())
55 return static_cast<Instruction>(std::distance(Language::operators.begin(), it) + FIRST_OPERATOR);
56 return std::nullopt;
57 }
58
59 std::optional<uint16_t> ASTLowerer::getBuiltin(const std::string& name) noexcept
60 {
61 const auto it = std::ranges::find_if(Builtins::builtins,
62 [&name](const std::pair<std::string, Value>& element) -> bool {
63 return name == element.first;
64 });
65 if (it != Builtins::builtins.end())
66 return static_cast<uint16_t>(std::distance(Builtins::builtins.begin(), it));
67 return std::nullopt;
68 }
69
70 std::optional<Instruction> ASTLowerer::getListInstruction(const std::string& name) noexcept
71 {
72 const auto it = std::ranges::find(Language::listInstructions, name);
73 if (it != Language::listInstructions.end())
74 return static_cast<Instruction>(std::distance(Language::listInstructions.begin(), it) + LIST);
75 return std::nullopt;
76 }
77
79 {
80 if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword)
81 // a begin node produces a value if the last node in it produces a value
82 return (node.constList()[0].keyword() == Keyword::Begin && node.constList().size() > 1 && nodeProducesOutput(node.constList().back())) ||
83 // a function always produces a value ; even if it ends with a node not producing one, the VM returns nil
84 node.constList()[0].keyword() == Keyword::Fun ||
85 // a condition produces a value if all its branches produce a value
86 (node.constList()[0].keyword() == Keyword::If &&
87 nodeProducesOutput(node.constList()[2]) &&
88 (node.constList().size() == 3 || nodeProducesOutput(node.constList()[3])));
89 // in place list instruction, as well as assert, do not produce values
90 if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol)
91 return std::ranges::find(Language::UpdateRef, node.constList().front().string()) == Language::UpdateRef.end() &&
92 node.constList().front().string() != "assert";
93 return true; // any other node, function call, symbol, number...
94 }
95
96 bool ASTLowerer::isUnaryInst(const Instruction inst) noexcept
97 {
98 switch (inst)
99 {
100 case NOT: [[fallthrough]];
101 case LEN: [[fallthrough]];
102 case EMPTY: [[fallthrough]];
103 case TAIL: [[fallthrough]];
104 case HEAD: [[fallthrough]];
105 case ISNIL: [[fallthrough]];
106 case TO_NUM: [[fallthrough]];
107 case TO_STR: [[fallthrough]];
108 case TYPE:
109 return true;
110
111 default:
112 return false;
113 }
114 }
115
116 bool ASTLowerer::isTernaryInst(const Instruction inst) noexcept
117 {
118 switch (inst)
119 {
120 case AT_AT:
121 return true;
122
123 default:
124 return false;
125 }
126 }
127
128 void ASTLowerer::warning(const std::string& message, const Node& node)
129 {
130 fmt::println("{} {}", fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)), Diagnostics::makeContextWithNode(message, node));
131 }
132
133 void ASTLowerer::buildAndThrowError(const std::string& message, const Node& node)
134 {
135 throw CodeError(message, CodeErrorContext(node.filename(), node.position()));
136 }
137
138 void ASTLowerer::compileExpression(Node& x, const Page p, const bool is_result_unused, const bool is_terminal)
139 {
140 // register symbols
141 if (x.nodeType() == NodeType::Symbol)
142 compileSymbol(x, p, is_result_unused);
143 else if (x.nodeType() == NodeType::Field)
144 {
145 // the parser guarantees us that there is at least 2 elements (eg: a.b)
146 compileSymbol(x.list()[0], p, is_result_unused);
147 for (auto it = x.constList().begin() + 1, end = x.constList().end(); it != end; ++it)
148 {
149 uint16_t i = addSymbol(*it);
150 page(p).emplace_back(GET_FIELD, i);
151 }
152 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
153 }
154 // register values
155 else if (x.nodeType() == NodeType::String || x.nodeType() == NodeType::Number)
156 {
157 uint16_t i = addValue(x);
158
159 if (!is_result_unused)
160 page(p).emplace_back(LOAD_CONST, i);
161 }
162 // namespace nodes
163 else if (x.nodeType() == NodeType::Namespace)
164 compileExpression(*x.constArkNamespace().ast, p, is_result_unused, is_terminal);
165 else if (x.nodeType() == NodeType::Unused)
166 {
167 // do nothing, explicitly
168 }
169 // empty code block should be nil
170 else if (x.constList().empty())
171 {
172 if (!is_result_unused)
173 {
174 static const std::optional<uint16_t> nil = getBuiltin("nil");
175 page(p).emplace_back(BUILTIN, nil.value());
176 }
177 }
178 // list instructions
179 else if (const auto head = x.constList()[0]; head.nodeType() == NodeType::Symbol && getListInstruction(head.string()).has_value())
180 compileListInstruction(x, p, is_result_unused);
181 // registering structures
182 else if (x.constList()[0].nodeType() == NodeType::Keyword)
183 {
184 switch (const Keyword keyword = x.constList()[0].keyword())
185 {
186 case Keyword::If:
187 compileIf(x, p, is_result_unused, is_terminal);
188 break;
189
190 case Keyword::Set:
191 [[fallthrough]];
192 case Keyword::Let:
193 [[fallthrough]];
194 case Keyword::Mut:
195 compileLetMutSet(keyword, x, p);
196 break;
197
198 case Keyword::Fun:
199 compileFunction(x, p, is_result_unused);
200 break;
201
202 case Keyword::Begin:
203 {
204 for (std::size_t i = 1, size = x.list().size(); i < size; ++i)
206 x.list()[i],
207 p,
208 // All the nodes in a begin (except for the last one) are producing a result that we want to drop.
209 (i != size - 1) || is_result_unused,
210 // If the begin is a terminal node, only its last node is terminal.
211 is_terminal && (i == size - 1));
212 break;
213 }
214
215 case Keyword::While:
216 compileWhile(x, p);
217 break;
218
219 case Keyword::Import:
221 break;
222
223 case Keyword::Del:
224 page(p).emplace_back(DEL, addSymbol(x.constList()[1]));
225 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
226 break;
227 }
228 }
229 else if (x.nodeType() == NodeType::List)
230 {
231 // If we are here, we should have a function name via the m_opened_vars.
232 // Push arguments first, then function name, then call it.
233 handleCalls(x, p, is_result_unused, is_terminal);
234 }
235 else
237 fmt::format(
238 "NodeType `{}' not handled in ASTLowerer::compileExpression. Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark",
239 typeToString(x)),
240 x);
241 }
242
243 void ASTLowerer::compileSymbol(const Node& x, const Page p, const bool is_result_unused)
244 {
245 const std::string& name = x.string();
246
247 if (const auto it_builtin = getBuiltin(name))
248 page(p).emplace_back(Instruction::BUILTIN, it_builtin.value());
249 else if (getOperator(name).has_value())
250 buildAndThrowError(fmt::format("Found a free standing operator: `{}`", name), x);
251 else
252 {
253 const std::optional<std::size_t> maybe_local_idx = m_locals_locator.lookupLastScopeByName(name);
254 if (maybe_local_idx.has_value())
255 page(p).emplace_back(LOAD_SYMBOL_BY_INDEX, static_cast<uint16_t>(maybe_local_idx.value()));
256 else
257 page(p).emplace_back(LOAD_SYMBOL, addSymbol(x));
258 }
259
260 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
261
262 if (is_result_unused)
263 {
264 warning("Statement has no effect", x);
265 page(p).emplace_back(POP);
266 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
267 }
268 }
269
270 void ASTLowerer::compileListInstruction(Node& x, const Page p, const bool is_result_unused)
271 {
272 const Node head = x.constList()[0];
273 std::string name = x.constList()[0].string();
274 Instruction inst = getListInstruction(name).value();
275
276 // length of at least 1 since we got a symbol name
277 const auto argc = x.constList().size() - 1u;
278 // error, can not use append/concat/pop (and their in place versions) with a <2 length argument list
279 if (argc < 2 && APPEND <= inst && inst <= POP)
280 buildAndThrowError(fmt::format("Can not use {} with less than 2 arguments", name), head);
281 if (inst <= POP && std::cmp_greater(argc, MaxValue16Bits))
282 buildAndThrowError(fmt::format("Too many arguments ({}), exceeds {}", argc, MaxValue16Bits), x);
283 if (argc != 3 && inst == SET_AT_INDEX)
284 buildAndThrowError(fmt::format("Expected 3 arguments (list, index, value) for {}, got {}", name, argc), head);
285 if (argc != 4 && inst == SET_AT_2_INDEX)
286 buildAndThrowError(fmt::format("Expected 4 arguments (list, y, x, value) for {}, got {}", name, argc), head);
287
288 // compile arguments in reverse order
289 for (std::size_t i = x.constList().size() - 1u; i > 0; --i)
290 {
291 Node& node = x.list()[i];
292 if (nodeProducesOutput(node))
293 compileExpression(node, p, false, false);
294 else
295 buildAndThrowError(fmt::format("Invalid node inside call to {}", name), node);
296 }
297
298 // put inst and number of arguments
299 std::size_t inst_argc = 0;
300 switch (inst)
301 {
302 case LIST:
303 inst_argc = argc;
304 break;
305
306 case APPEND:
307 case APPEND_IN_PLACE:
308 case CONCAT:
309 case CONCAT_IN_PLACE:
310 inst_argc = argc - 1;
311 break;
312
313 case POP_LIST:
315 inst_argc = 0;
316 break;
317
318 default:
319 break;
320 }
321 page(p).emplace_back(inst, static_cast<uint16_t>(inst_argc));
322 page(p).back().setSourceLocation(head.filename(), head.position().start.line);
323
324 if (is_result_unused && name.back() != '!' && inst <= POP_LIST_IN_PLACE) // in-place functions never push a value
325 {
326 warning("Ignoring return value of function", x);
327 page(p).emplace_back(POP);
328 }
329 }
330
331 void ASTLowerer::compileIf(Node& x, const Page p, const bool is_result_unused, const bool is_terminal)
332 {
333 if (x.constList().size() == 1)
334 buildAndThrowError("Invalid condition: missing 'cond' and 'then' nodes, expected (if cond then)", x);
335 if (x.constList().size() == 2)
336 buildAndThrowError(fmt::format("Invalid condition: missing 'then' node, expected (if {} then)", x.constList()[1].repr()), x);
337
338 // compile condition
339 compileExpression(x.list()[1], p, false, false);
340 page(p).back().setSourceLocation(x.constList()[1].filename(), x.constList()[1].position().start.line);
341
342 // jump only if needed to the "true" branch
343 const auto label_then = IR::Entity::Label(m_current_label++);
344 page(p).emplace_back(IR::Entity::GotoIf(label_then, true));
345
346 // "false" branch code
347 if (x.constList().size() == 4) // we have an else clause
348 {
350 compileExpression(x.list()[3], p, is_result_unused, is_terminal);
351 page(p).back().setSourceLocation(x.constList()[3].filename(), x.constList()[3].position().start.line);
353 }
354
355 // when else is finished, jump to end
356 const auto label_end = IR::Entity::Label(m_current_label++);
357 page(p).emplace_back(IR::Entity::Goto(label_end));
358
359 // absolute address to jump to if condition is true
360 page(p).emplace_back(label_then);
361 // if code
363 compileExpression(x.list()[2], p, is_result_unused, is_terminal);
364 page(p).back().setSourceLocation(x.constList()[2].filename(), x.constList()[2].position().start.line);
366 // set jump to end pos
367 page(p).emplace_back(label_end);
368 }
369
370 void ASTLowerer::compileFunction(Node& x, const Page p, const bool is_result_unused)
371 {
372 if (const auto args = x.constList()[1]; args.nodeType() != NodeType::List)
373 buildAndThrowError(fmt::format("Expected a well formed argument(s) list, got a {}", typeToString(args)), args);
374 if (x.constList().size() != 3)
375 buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x);
376
377 // capture, if needed
378 std::size_t capture_inst_count = 0;
379 for (const auto& node : x.constList()[1].constList())
380 {
381 if (node.nodeType() == NodeType::Capture)
382 {
383 const uint16_t symbol_id = addSymbol(node);
384
385 // We have an unqualified name that isn't the captured name
386 // This means we need to rename the captured value
387 if (const auto& maybe_nqn = node.getUnqualifiedName(); maybe_nqn.has_value() && maybe_nqn.value() != node.string())
388 {
389 const uint16_t nqn_id = addSymbol(Node(NodeType::Symbol, maybe_nqn.value()));
390
391 page(p).emplace_back(RENAME_NEXT_CAPTURE, nqn_id);
392 page(p).emplace_back(CAPTURE, symbol_id);
393 }
394 else
395 page(p).emplace_back(CAPTURE, symbol_id);
396
397 ++capture_inst_count;
398 }
399 }
400 const bool is_closure = capture_inst_count > 0;
401
403 is_closure
406
407 // create new page for function body
408 m_code_pages.emplace_back();
409 const auto function_body_page = Page { .index = m_code_pages.size() - 1, .is_temp = false };
410 // save page_id into the constants table as PageAddr and load the const
411 page(p).emplace_back(is_closure ? MAKE_CLOSURE : LOAD_CONST, addValue(function_body_page.index, x));
412
413 // pushing arguments from the stack into variables in the new scope
414 for (const auto& node : x.constList()[1].constList())
415 {
416 if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::MutArg)
417 {
418 page(function_body_page).emplace_back(STORE, addSymbol(node));
419 m_locals_locator.addLocal(node.string());
420 }
421 else if (node.nodeType() == NodeType::RefArg)
422 {
423 page(function_body_page).emplace_back(STORE_REF, addSymbol(node));
424 m_locals_locator.addLocal(node.string());
425 }
426 }
427
428 // Register an opened variable as "#anonymous", which won't match any valid names inside ASTLowerer::handleCalls.
429 // This way we can continue to safely apply optimisations on
430 // (let name (fun (e) (map lst (fun (e) (name e)))))
431 // Otherwise, `name` would have been optimized to a GET_CURRENT_PAGE_ADDRESS, which would have returned the wrong page.
432 if (x.isAnonymousFunction())
433 m_opened_vars.emplace("#anonymous");
434 // push body of the function
435 compileExpression(x.list()[2], function_body_page, false, true);
436 if (x.isAnonymousFunction())
437 m_opened_vars.pop();
438
439 // return last value on the stack
440 page(function_body_page).emplace_back(RET);
442
443 // if the computed function is unused, pop it
444 if (is_result_unused)
445 {
446 warning("Unused declared function", x);
447 page(p).emplace_back(POP);
448 }
449 }
450
452 {
453 if (const auto sym = x.constList()[1]; sym.nodeType() != NodeType::Symbol)
454 buildAndThrowError(fmt::format("Expected a symbol, got a {}", typeToString(sym)), sym);
455 if (x.constList().size() != 3)
456 buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x);
457
458 const std::string name = x.constList()[1].string();
459 uint16_t i = addSymbol(x.constList()[1]);
460
461 if (!m_opened_vars.empty() && m_opened_vars.top() == name)
462 buildAndThrowError("Can not define a variable using the same name as the function it is defined inside", x);
463
464 const bool is_function = x.constList()[2].isFunction();
465 if (is_function)
466 {
467 m_opened_vars.push(name);
468 x.list()[2].setFunctionKind(/* anonymous= */ false);
469 }
470
471 // put value before symbol id
472 // starting at index = 2 because x is a (let|mut|set variable ...) node
473 for (std::size_t idx = 2, end = x.constList().size(); idx < end; ++idx)
474 compileExpression(x.list()[idx], p, false, false);
475
476 if (n == Keyword::Let || n == Keyword::Mut)
477 {
478 page(p).emplace_back(STORE, i);
480 }
481 else
482 page(p).emplace_back(SET_VAL, i);
483
484 if (is_function)
485 m_opened_vars.pop();
486 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
487 }
488
490 {
491 if (x.constList().size() != 3)
492 buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x);
493
495 page(p).emplace_back(CREATE_SCOPE);
496 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
497
498 // save current position to jump there at the end of the loop
499 const auto label_loop = IR::Entity::Label(m_current_label++);
500 page(p).emplace_back(label_loop);
501 // push condition
502 compileExpression(x.list()[1], p, false, false);
503 // absolute jump to end of block if condition is false
504 const auto label_end = IR::Entity::Label(m_current_label++);
505 page(p).emplace_back(IR::Entity::GotoIf(label_end, false));
506 // push code to page
507 compileExpression(x.list()[2], p, true, false);
508
509 // reset the scope at the end of the loop so that indices are still valid
510 // otherwise, (while true { (let a 5) (print a) (let b 6) (print b) })
511 // would print 5, 6, then only 6 as we emit LOAD_SYMBOL_FROM_INDEX 0 and b is the last in the scope
512 // loop, jump to the condition
513 page(p).emplace_back(IR::Entity::Goto(label_loop, RESET_SCOPE_JUMP));
514
515 // absolute address to jump to if condition is false
516 page(p).emplace_back(label_end);
517
518 page(p).emplace_back(POP_SCOPE);
520 }
521
523 {
524 std::string path;
525 const Node package_node = x.constList()[1];
526 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
527 {
528 path += package_node.constList()[i].string();
529 if (i + 1 != end)
530 path += "/";
531 }
532 path += ".arkm";
533
534 // register plugin path in the constants table
535 uint16_t id = addValue(Node(NodeType::String, path));
536 // add plugin instruction + id of the constant referring to the plugin path
537 page(p).emplace_back(PLUGIN, id);
538 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
539 }
540
541 void ASTLowerer::pushFunctionCallArguments(Node& call, const Page p, const bool is_tail_call)
542 {
543 const auto node = call.constList()[0];
544
545 // push the arguments in reverse order because the function loads its arguments in the order they are defined:
546 // (fun (a b c) ...) -> load 'a', then 'b', then 'c'
547 // We have to push arguments in this order and load them in reverse, because we are using references internally,
548 // which can cause problems for recursive functions that swap their arguments around.
549 // Eg (let foo (fun (a b c) (if (> a 0) (foo (- a 1) c (+ b c)) 1))) (foo 12 0 1)
550 // On the second self-call, b and c would have the same value, since we set c to (+ b c), and we pushed c as the
551 // value for argument b, but loaded it as a reference.
552 for (Node& value : std::ranges::drop_view(call.list(), 1) | std::views::reverse)
553 {
554 if (nodeProducesOutput(value))
555 compileExpression(value, p, false, false);
556 else
557 {
558 std::string message;
559 if (is_tail_call)
560 message = fmt::format("Invalid node inside tail call to `{}'", node.repr());
561 else
562 message = fmt::format("Invalid node inside call to `{}'", node.repr());
563 buildAndThrowError(message, value);
564 }
565 }
566 }
567
568 void ASTLowerer::handleCalls(Node& x, const Page p, bool is_result_unused, const bool is_terminal)
569 {
570 constexpr std::size_t start_index = 1;
571
572 Node& node = x.list()[0];
573 const std::optional<Instruction> maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt;
574
575 const std::optional<Instruction> maybe_shortcircuit =
576 node.nodeType() == NodeType::Symbol
577 ? (node.string() == Language::And
578 ? std::make_optional(Instruction::SHORTCIRCUIT_AND)
579 : (node.string() == Language::Or
580 ? std::make_optional(Instruction::SHORTCIRCUIT_OR)
581 : std::nullopt))
582 : std::nullopt;
583
584 if (maybe_shortcircuit.has_value())
585 {
586 // short circuit implementation
587 if (x.constList().size() < 3)
589 fmt::format(
590 "Expected at least 2 arguments while compiling '{}', got {}",
591 node.string(),
592 x.constList().size() - 1),
593 x);
594
595 compileExpression(x.list()[1], p, false, false);
596
597 const auto label_shortcircuit = IR::Entity::Label(m_current_label++);
598 auto shortcircuit_entity = IR::Entity::Goto(label_shortcircuit, maybe_shortcircuit.value());
599 page(p).emplace_back(shortcircuit_entity);
600
601 for (std::size_t i = 2, end = x.constList().size(); i < end; ++i)
602 {
603 compileExpression(x.list()[i], p, false, false);
604 if (i + 1 != end)
605 page(p).emplace_back(shortcircuit_entity);
606 }
607
608 page(p).emplace_back(label_shortcircuit);
609 }
610 else if (!maybe_operator.has_value())
611 {
612 if (is_terminal && node.nodeType() == NodeType::Symbol && !m_opened_vars.empty() && m_opened_vars.top() == node.string())
613 {
614 pushFunctionCallArguments(x, p, /* is_tail_call= */ true);
615
616 // jump to the top of the function
617 page(p).emplace_back(JUMP, 0_u16);
618 page(p).back().setSourceLocation(node.filename(), node.position().start.line);
619 return; // skip the potential Instruction::POP at the end
620 }
621 else
622 {
623 if (!nodeProducesOutput(node))
624 buildAndThrowError(fmt::format("Can not call `{}', as it doesn't return a value", node.repr()), node);
625
626 m_temp_pages.emplace_back();
627 const auto proc_page = Page { .index = m_temp_pages.size() - 1u, .is_temp = true };
628
629 // compile the function resolution to a separate page
630 if (node.nodeType() == NodeType::Symbol && !m_opened_vars.empty() && m_opened_vars.top() == node.string())
631 {
632 // The function is trying to call itself, but this isn't a tail call.
633 // We can skip the LOAD_SYMBOL function_name and directly push the current
634 // function page, which will be quicker than a local variable resolution.
635 // We set its argument to the symbol id of the function we are calling,
636 // so that the VM knows the name of the last called function.
637 page(proc_page).emplace_back(GET_CURRENT_PAGE_ADDR, addSymbol(node));
638 }
639 else
640 {
641 // closure chains have been handled (eg: closure.field.field.function)
642 compileExpression(node, proc_page, false, false); // storing proc
643 }
644
645 if (m_temp_pages.back().empty())
646 buildAndThrowError(fmt::format("Can not call {}", x.constList()[0].repr()), x);
647
648 const auto label_return = IR::Entity::Label(m_current_label++);
649 page(p).emplace_back(IR::Entity::Goto(label_return, PUSH_RETURN_ADDRESS));
650 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
651
652 pushFunctionCallArguments(x, p, /* is_tail_call= */ false);
653 // push proc from temp page
654 for (const auto& inst : m_temp_pages.back())
655 page(p).push_back(inst);
656 m_temp_pages.pop_back();
657
658 // number of arguments
659 std::size_t args_count = 0;
660 for (auto it = x.constList().begin() + start_index, it_end = x.constList().end(); it != it_end; ++it)
661 {
662 if (it->nodeType() != NodeType::Capture)
663 args_count++;
664 }
665 // call the procedure
666 page(p).emplace_back(CALL, args_count);
667 page(p).back().setSourceLocation(node.filename(), node.position().start.line);
668
669 // patch the PUSH_RETURN_ADDRESS instruction with the return location (IP=CALL instruction IP)
670 page(p).emplace_back(label_return);
671 }
672 }
673 else // operator
674 {
675 // retrieve operator
676 auto op = maybe_operator.value();
677
678 if (op == ASSERT)
679 is_result_unused = false;
680
681 // push arguments on current page
682 std::size_t exp_count = 0;
683 for (std::size_t index = start_index, size = x.constList().size(); index < size; ++index)
684 {
685 if (nodeProducesOutput(x.constList()[index]))
686 compileExpression(x.list()[index], p, false, false);
687 else
688 buildAndThrowError(fmt::format("Invalid node inside call to operator `{}'", node.repr()), x.constList()[index]);
689
690 if ((index + 1 < size && x.constList()[index + 1].nodeType() != NodeType::Capture) || index + 1 == size)
691 exp_count++;
692
693 // in order to be able to handle things like (op A B C D...)
694 // which should be transformed into A B op C op D op...
695 if (exp_count >= 2 && !isTernaryInst(op))
696 page(p).emplace_back(op);
697 }
698
699 if (isUnaryInst(op))
700 {
701 if (exp_count != 1)
702 buildAndThrowError(fmt::format("Operator needs one argument, but was called with {}", exp_count), x.constList()[0]);
703 page(p).emplace_back(op);
704 }
705 else if (isTernaryInst(op))
706 {
707 if (exp_count != 3)
708 buildAndThrowError(fmt::format("Operator needs three arguments, but was called with {}", exp_count), x.constList()[0]);
709 page(p).emplace_back(op);
710 }
711 else if (exp_count <= 1)
712 buildAndThrowError(fmt::format("Operator needs two arguments, but was called with {}", exp_count), x.constList()[0]);
713
714 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
715
716 // need to check we didn't push the (op A B C D...) things for operators not supporting it
717 if (exp_count > 2)
718 {
719 switch (op)
720 {
721 // authorized instructions
722 case ADD: [[fallthrough]];
723 case SUB: [[fallthrough]];
724 case MUL: [[fallthrough]];
725 case DIV: [[fallthrough]];
726 case MOD: [[fallthrough]];
727 case AT_AT:
728 break;
729
730 default:
732 fmt::format(
733 "`{}' requires 2 arguments, but got {}.",
734 Language::operators[static_cast<std::size_t>(op - FIRST_OPERATOR)],
735 exp_count),
736 x);
737 }
738 }
739 }
740
741 if (is_result_unused)
742 page(p).emplace_back(POP);
743 }
744
745 uint16_t ASTLowerer::addSymbol(const Node& sym)
746 {
747 // otherwise, add the symbol, and return its id in the table
748 auto it = std::ranges::find(m_symbols, sym.string());
749 if (it == m_symbols.end())
750 {
751 m_symbols.push_back(sym.string());
752 it = m_symbols.begin() + static_cast<std::vector<std::string>::difference_type>(m_symbols.size() - 1);
753 }
754
755 const auto distance = std::distance(m_symbols.begin(), it);
756 if (std::cmp_less(distance, MaxValue16Bits))
757 return static_cast<uint16_t>(distance);
758 buildAndThrowError(fmt::format("Too many symbols (exceeds {}), aborting compilation.", MaxValue16Bits), sym);
759 }
760
761 uint16_t ASTLowerer::addValue(const Node& x)
762 {
763 const ValTableElem v(x);
764 auto it = std::ranges::find(m_values, v);
765 if (it == m_values.end())
766 {
767 m_values.push_back(v);
768 it = m_values.begin() + static_cast<std::vector<ValTableElem>::difference_type>(m_values.size() - 1);
769 }
770
771 const auto distance = std::distance(m_values.begin(), it);
772 if (std::cmp_less(distance, MaxValue16Bits))
773 return static_cast<uint16_t>(distance);
774 buildAndThrowError(fmt::format("Too many values (exceeds {}), aborting compilation.", MaxValue16Bits), x);
775 }
776
777 uint16_t ASTLowerer::addValue(const std::size_t page_id, const Node& current)
778 {
779 const ValTableElem v(page_id);
780 auto it = std::ranges::find(m_values, v);
781 if (it == m_values.end())
782 {
783 m_values.push_back(v);
784 it = m_values.begin() + static_cast<std::vector<ValTableElem>::difference_type>(m_values.size() - 1);
785 }
786
787 const auto distance = std::distance(m_values.begin(), it);
788 if (std::cmp_less(distance, MaxValue16Bits))
789 return static_cast<uint16_t>(distance);
790 buildAndThrowError(fmt::format("Too many values (exceeds {}), aborting compilation.", MaxValue16Bits), current);
791 }
792}
Host the declaration of all the ArkScript builtins.
Tools to report code errors nicely to the user.
ArkScript homemade exceptions.
User defined literals for Ark internals.
const String_t & string() const
Definition Value.hpp:164
uint16_t addValue(const Node &x)
Register a given node in the value table.
void pushFunctionCallArguments(Node &call, Page p, bool is_tail_call)
uint16_t addSymbol(const Node &sym)
Register a given node in the symbol table.
static std::optional< Instruction > getListInstruction(const std::string &name) noexcept
Checking if a symbol is a list instruction.
std::vector< ValTableElem > m_values
void compileListInstruction(Node &x, Page p, bool is_result_unused)
static bool nodeProducesOutput(const Node &node)
std::vector< IR::Block > m_temp_pages
we need temporary code pages for some compilations passes
void compileLetMutSet(Keyword n, Node &x, Page p)
void handleCalls(Node &x, Page p, bool is_result_unused, bool is_terminal)
const std::vector< ValTableElem > & values() const noexcept
Return the value table pre-computed.
void compileIf(Node &x, Page p, bool is_result_unused, bool is_terminal)
void process(Node &ast)
Start the compilation.
const std::vector< IR::Block > & intermediateRepresentation() const noexcept
Return the IR blocks (one per scope)
static bool isUnaryInst(Instruction inst) noexcept
Check if a given instruction is unary (takes only one argument)
std::vector< IR::Block > m_code_pages
ASTLowerer(unsigned debug)
Construct a new ASTLowerer object.
void compileWhile(Node &x, Page p)
std::vector< std::string > m_symbols
static void buildAndThrowError(const std::string &message, const Node &node)
Throw a nice error message.
static bool isTernaryInst(Instruction inst) noexcept
Check if a given instruction is ternary (takes three arguments)
void compileExpression(Node &x, Page p, bool is_result_unused, bool is_terminal)
Compile an expression (a node) recursively.
LocalsLocator m_locals_locator
std::stack< std::string > m_opened_vars
stack of vars we are currently declaring
void compileSymbol(const Node &x, Page p, bool is_result_unused)
static void warning(const std::string &message, const Node &node)
Display a warning message.
void compileFunction(Node &x, Page p, bool is_result_unused)
static std::optional< uint16_t > getBuiltin(const std::string &name) noexcept
Checking if a symbol is a builtin.
void compilePluginImport(const Node &x, Page p)
static std::optional< Instruction > getOperator(const std::string &name) noexcept
Checking if a symbol is an operator.
IR::Block & page(const Page page) noexcept
helper functions to get a temp or finalized code page
const std::vector< std::string > & symbols() const noexcept
Return the symbol table pre-computed.
static Entity Goto(const Entity &label, Instruction inst=Instruction::JUMP)
Definition Entity.cpp:33
static Entity Label(label_t value)
Definition Entity.cpp:25
static Entity GotoIf(const Entity &label, bool cond)
Definition Entity.cpp:52
void saveScopeLengthForBranch()
Save the current scope length before entering a branch, so that we can ignore variable definitions in...
std::optional< std::size_t > lookupLastScopeByName(const std::string &name)
Search for a local in the current scope. Returns std::nullopt in case of closure scopes or if the var...
void dropVarsForBranch()
Drop potentially defined variables in the last saved branch.
void deleteScope()
Delete the last scope.
void addLocal(const std::string &name)
Register a local in the current scope, triggered by a STORE instruction. If the local already exists,...
void createScope(ScopeType type=ScopeType::Default)
Create a new scope.
void traceStart(std::string &&trace_name)
Definition Logger.hpp:90
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
bool isAnonymousFunction() const noexcept
Check if a node is an anonymous function.
Definition Node.cpp:154
const std::string & filename() const noexcept
Return the filename in which this node was created.
Definition Node.cpp:164
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
std::string repr() const noexcept
Compute a representation of the node without any comments or additional sugar, colors,...
Definition Node.cpp:179
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
const Namespace & constArkNamespace() const noexcept
Return the namespace held by the value (if the node type allows it)
Definition Node.cpp:58
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
std::string makeContextWithNode(const std::string &message, const internal::Node &node)
Helper used by the compiler to generate a colorized context from a node.
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 9 > listInstructions
Definition Common.hpp:119
constexpr std::array< std::string_view, 24 > operators
Definition Common.hpp:156
constexpr std::string_view And
Definition Common.hpp:134
constexpr std::array UpdateRef
All the builtins that modify in place a variable.
Definition Common.hpp:112
constexpr std::string_view Or
Definition Common.hpp:135
std::string typeToString(const Node &node) noexcept
Definition Node.hpp:264
Keyword
The different keywords available.
Definition Common.hpp:79
Instruction
The different bytecodes are stored here.
constexpr uint16_t MaxValue16Bits
Definition Constants.hpp:71
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
std::size_t line
0-indexed line number
Definition Position.hpp:22
std::shared_ptr< Node > ast
Definition Namespace.hpp:18
A Compiler Value class helper to handle multiple types.