ArkScript
A small, lisp-inspired, functional scripting language
ASTLowerer.cpp
Go to the documentation of this file.
2
3#include <ranges>
4#include <utility>
5#include <algorithm>
6#include <fmt/core.h>
7#include <fmt/color.h>
8
13
14namespace Ark::internal
15{
16 using namespace literals;
17
18 ASTLowerer::ASTLowerer(const unsigned debug) :
19 m_logger("ASTLowerer", debug)
20 {}
21
23 {
24 m_logger.traceStart("process");
25 m_code_pages.emplace_back(); // create empty page
26
27 // gather symbols, values, and start to create code segments
29 ast,
30 /* current_page */ Page { .index = 0, .is_temp = false },
31 /* is_result_unused */ false,
32 /* is_terminal */ false);
34 }
35
36 const std::vector<IR::Block>& ASTLowerer::intermediateRepresentation() const noexcept
37 {
38 return m_code_pages;
39 }
40
41 const std::vector<std::string>& ASTLowerer::symbols() const noexcept
42 {
43 return m_symbols;
44 }
45
46 const std::vector<ValTableElem>& ASTLowerer::values() const noexcept
47 {
48 return m_values;
49 }
50
51 std::optional<Instruction> ASTLowerer::getOperator(const std::string& name) noexcept
52 {
53 const auto it = std::ranges::find(Language::operators, name);
54 if (it != Language::operators.end())
55 return static_cast<Instruction>(std::distance(Language::operators.begin(), it) + FIRST_OPERATOR);
56 return std::nullopt;
57 }
58
59 std::optional<uint16_t> ASTLowerer::getBuiltin(const std::string& name) noexcept
60 {
61 const auto it = std::ranges::find_if(Builtins::builtins,
62 [&name](const std::pair<std::string, Value>& element) -> bool {
63 return name == element.first;
64 });
65 if (it != Builtins::builtins.end())
66 return static_cast<uint16_t>(std::distance(Builtins::builtins.begin(), it));
67 return std::nullopt;
68 }
69
70 std::optional<Instruction> ASTLowerer::getListInstruction(const std::string& name) noexcept
71 {
72 const auto it = std::ranges::find(Language::listInstructions, name);
73 if (it != Language::listInstructions.end())
74 return static_cast<Instruction>(std::distance(Language::listInstructions.begin(), it) + LIST);
75 return std::nullopt;
76 }
77
79 {
80 if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword)
81 // a begin node produces a value if the last node in it produces a value
82 return (node.constList()[0].keyword() == Keyword::Begin && node.constList().size() > 1 && nodeProducesOutput(node.constList().back())) ||
83 // a function always produces a value ; even if it ends with a node not producing one, the VM returns nil
84 node.constList()[0].keyword() == Keyword::Fun ||
85 // a condition produces a value if all its branches produce a value
86 (node.constList()[0].keyword() == Keyword::If &&
87 nodeProducesOutput(node.constList()[2]) &&
88 (node.constList().size() == 3 || nodeProducesOutput(node.constList()[3])));
89 // in place list instruction, as well as assert, do not produce values
90 if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol)
91 return std::ranges::find(Language::UpdateRef, node.constList().front().string()) == Language::UpdateRef.end() &&
92 node.constList().front().string() != "assert";
93 return true; // any other node, function call, symbol, number...
94 }
95
96 bool ASTLowerer::isUnaryInst(const Instruction inst) noexcept
97 {
98 switch (inst)
99 {
100 case NOT: [[fallthrough]];
101 case LEN: [[fallthrough]];
102 case EMPTY: [[fallthrough]];
103 case TAIL: [[fallthrough]];
104 case HEAD: [[fallthrough]];
105 case ISNIL: [[fallthrough]];
106 case TO_NUM: [[fallthrough]];
107 case TO_STR: [[fallthrough]];
108 case TYPE:
109 return true;
110
111 default:
112 return false;
113 }
114 }
115
116 bool ASTLowerer::isTernaryInst(const Instruction inst) noexcept
117 {
118 switch (inst)
119 {
120 case AT_AT:
121 return true;
122
123 default:
124 return false;
125 }
126 }
127
128 void ASTLowerer::warning(const std::string& message, const Node& node)
129 {
130 fmt::println("{} {}", fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)), Diagnostics::makeContextWithNode(message, node));
131 }
132
133 void ASTLowerer::buildAndThrowError(const std::string& message, const Node& node)
134 {
135 throw CodeError(message, CodeErrorContext(node.filename(), node.position()));
136 }
137
138 void ASTLowerer::compileExpression(Node& x, const Page p, const bool is_result_unused, const bool is_terminal)
139 {
140 // register symbols
141 if (x.nodeType() == NodeType::Symbol)
142 compileSymbol(x, p, is_result_unused);
143 else if (x.nodeType() == NodeType::Field)
144 {
145 // the parser guarantees us that there is at least 2 elements (eg: a.b)
146 compileSymbol(x.list()[0], p, is_result_unused);
147 for (auto it = x.constList().begin() + 1, end = x.constList().end(); it != end; ++it)
148 {
149 uint16_t i = addSymbol(*it);
150 page(p).emplace_back(GET_FIELD, i);
151 }
152 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
153 }
154 // register values
155 else if (x.nodeType() == NodeType::String || x.nodeType() == NodeType::Number)
156 {
157 uint16_t i = addValue(x);
158
159 if (!is_result_unused)
160 page(p).emplace_back(LOAD_CONST, i);
161 }
162 // namespace nodes
163 else if (x.nodeType() == NodeType::Namespace)
164 compileExpression(*x.constArkNamespace().ast, p, is_result_unused, is_terminal);
165 else if (x.nodeType() == NodeType::Unused)
166 {
167 // do nothing, explicitly
168 }
169 // empty code block should be nil
170 else if (x.constList().empty())
171 {
172 if (!is_result_unused)
173 {
174 static const std::optional<uint16_t> nil = getBuiltin("nil");
175 page(p).emplace_back(BUILTIN, nil.value());
176 }
177 }
178 // list instructions
179 else if (const auto head = x.constList()[0]; head.nodeType() == NodeType::Symbol && getListInstruction(head.string()).has_value())
180 compileListInstruction(x, p, is_result_unused);
181 // registering structures
182 else if (x.constList()[0].nodeType() == NodeType::Keyword)
183 {
184 switch (const Keyword keyword = x.constList()[0].keyword())
185 {
186 case Keyword::If:
187 compileIf(x, p, is_result_unused, is_terminal);
188 break;
189
190 case Keyword::Set:
191 [[fallthrough]];
192 case Keyword::Let:
193 [[fallthrough]];
194 case Keyword::Mut:
195 compileLetMutSet(keyword, x, p);
196 break;
197
198 case Keyword::Fun:
199 compileFunction(x, p, is_result_unused);
200 break;
201
202 case Keyword::Begin:
203 {
204 for (std::size_t i = 1, size = x.list().size(); i < size; ++i)
206 x.list()[i],
207 p,
208 // All the nodes in a begin (except for the last one) are producing a result that we want to drop.
209 (i != size - 1) || is_result_unused,
210 // If the begin is a terminal node, only its last node is terminal.
211 is_terminal && (i == size - 1));
212 break;
213 }
214
215 case Keyword::While:
216 compileWhile(x, p);
217 break;
218
219 case Keyword::Import:
221 break;
222
223 case Keyword::Del:
224 page(p).emplace_back(DEL, addSymbol(x.constList()[1]));
225 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
226 break;
227 }
228 }
229 else if (x.nodeType() == NodeType::List)
230 {
231 // If we are here, we should have a function name via the m_opened_vars.
232 // Push arguments first, then function name, then call it.
233 handleCalls(x, p, is_result_unused, is_terminal);
234 }
235 else
237 fmt::format(
238 "NodeType `{}' not handled in ASTLowerer::compileExpression. Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark",
239 typeToString(x)),
240 x);
241 }
242
243 void ASTLowerer::compileSymbol(const Node& x, const Page p, const bool is_result_unused)
244 {
245 const std::string& name = x.string();
246
247 if (const auto it_builtin = getBuiltin(name))
248 page(p).emplace_back(Instruction::BUILTIN, it_builtin.value());
249 else if (getOperator(name).has_value())
250 buildAndThrowError(fmt::format("Found a free standing operator: `{}`", name), x);
251 else
252 {
253 const std::optional<std::size_t> maybe_local_idx = m_locals_locator.lookupLastScopeByName(name);
254 if (maybe_local_idx.has_value())
255 page(p).emplace_back(LOAD_SYMBOL_BY_INDEX, static_cast<uint16_t>(maybe_local_idx.value()));
256 else
257 page(p).emplace_back(LOAD_SYMBOL, addSymbol(x));
258 }
259
260 if (is_result_unused)
261 {
262 warning("Statement has no effect", x);
263 page(p).emplace_back(POP);
264 }
265 }
266
267 void ASTLowerer::compileListInstruction(Node& x, const Page p, const bool is_result_unused)
268 {
269 const Node head = x.constList()[0];
270 std::string name = x.constList()[0].string();
271 Instruction inst = getListInstruction(name).value();
272
273 // length of at least 1 since we got a symbol name
274 const auto argc = x.constList().size() - 1u;
275 // error, can not use append/concat/pop (and their in place versions) with a <2 length argument list
276 if (argc < 2 && APPEND <= inst && inst <= POP)
277 buildAndThrowError(fmt::format("Can not use {} with less than 2 arguments", name), head);
278 if (inst <= POP && std::cmp_greater(argc, MaxValue16Bits))
279 buildAndThrowError(fmt::format("Too many arguments ({}), exceeds {}", argc, MaxValue16Bits), x);
280 if (argc != 3 && inst == SET_AT_INDEX)
281 buildAndThrowError(fmt::format("Expected 3 arguments (list, index, value) for {}, got {}", name, argc), head);
282 if (argc != 4 && inst == SET_AT_2_INDEX)
283 buildAndThrowError(fmt::format("Expected 4 arguments (list, y, x, value) for {}, got {}", name, argc), head);
284
285 // compile arguments in reverse order
286 for (std::size_t i = x.constList().size() - 1u; i > 0; --i)
287 {
288 Node& node = x.list()[i];
289 if (nodeProducesOutput(node))
290 compileExpression(node, p, false, false);
291 else
292 buildAndThrowError(fmt::format("Invalid node inside call to {}", name), node);
293 }
294
295 // put inst and number of arguments
296 std::size_t inst_argc = 0;
297 switch (inst)
298 {
299 case LIST:
300 inst_argc = argc;
301 break;
302
303 case APPEND:
304 case APPEND_IN_PLACE:
305 case CONCAT:
306 case CONCAT_IN_PLACE:
307 inst_argc = argc - 1;
308 break;
309
310 case POP_LIST:
312 inst_argc = 0;
313 break;
314
315 default:
316 break;
317 }
318 page(p).emplace_back(inst, static_cast<uint16_t>(inst_argc));
319 page(p).back().setSourceLocation(head.filename(), head.position().start.line);
320
321 if (is_result_unused && name.back() != '!' && inst <= POP_LIST_IN_PLACE) // in-place functions never push a value
322 {
323 warning("Ignoring return value of function", x);
324 page(p).emplace_back(POP);
325 }
326 }
327
328 void ASTLowerer::compileIf(Node& x, const Page p, const bool is_result_unused, const bool is_terminal)
329 {
330 if (x.constList().size() == 1)
331 buildAndThrowError("Invalid condition: missing 'cond' and 'then' nodes, expected (if cond then)", x);
332 if (x.constList().size() == 2)
333 buildAndThrowError(fmt::format("Invalid condition: missing 'then' node, expected (if {} then)", x.constList()[1].repr()), x);
334
335 // compile condition
336 compileExpression(x.list()[1], p, false, false);
337 page(p).back().setSourceLocation(x.constList()[1].filename(), x.constList()[1].position().start.line);
338
339 // jump only if needed to the "true" branch
340 const auto label_then = IR::Entity::Label(m_current_label++);
341 page(p).emplace_back(IR::Entity::GotoIf(label_then, true));
342
343 // "false" branch code
344 if (x.constList().size() == 4) // we have an else clause
345 {
347 compileExpression(x.list()[3], p, is_result_unused, is_terminal);
348 page(p).back().setSourceLocation(x.constList()[3].filename(), x.constList()[3].position().start.line);
350 }
351
352 // when else is finished, jump to end
353 const auto label_end = IR::Entity::Label(m_current_label++);
354 page(p).emplace_back(IR::Entity::Goto(label_end));
355
356 // absolute address to jump to if condition is true
357 page(p).emplace_back(label_then);
358 // if code
360 compileExpression(x.list()[2], p, is_result_unused, is_terminal);
361 page(p).back().setSourceLocation(x.constList()[2].filename(), x.constList()[2].position().start.line);
363 // set jump to end pos
364 page(p).emplace_back(label_end);
365 }
366
367 void ASTLowerer::compileFunction(Node& x, const Page p, const bool is_result_unused)
368 {
369 if (const auto args = x.constList()[1]; args.nodeType() != NodeType::List)
370 buildAndThrowError(fmt::format("Expected a well formed argument(s) list, got a {}", typeToString(args)), args);
371 if (x.constList().size() != 3)
372 buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x);
373
374 // capture, if needed
375 std::size_t capture_inst_count = 0;
376 for (const auto& node : x.constList()[1].constList())
377 {
378 if (node.nodeType() == NodeType::Capture)
379 {
380 const uint16_t symbol_id = addSymbol(node);
381
382 // We have an unqualified name that isn't the captured name
383 // This means we need to rename the captured value
384 if (const auto& maybe_nqn = node.getUnqualifiedName(); maybe_nqn.has_value() && maybe_nqn.value() != node.string())
385 {
386 const uint16_t nqn_id = addSymbol(Node(NodeType::Symbol, maybe_nqn.value()));
387
388 page(p).emplace_back(RENAME_NEXT_CAPTURE, nqn_id);
389 page(p).emplace_back(CAPTURE, symbol_id);
390 }
391 else
392 page(p).emplace_back(CAPTURE, symbol_id);
393
394 ++capture_inst_count;
395 }
396 }
397 const bool is_closure = capture_inst_count > 0;
398
400 is_closure
403
404 // create new page for function body
405 m_code_pages.emplace_back();
406 const auto function_body_page = Page { .index = m_code_pages.size() - 1, .is_temp = false };
407 // save page_id into the constants table as PageAddr and load the const
408 page(p).emplace_back(is_closure ? MAKE_CLOSURE : LOAD_CONST, addValue(function_body_page.index, x));
409
410 // pushing arguments from the stack into variables in the new scope
411 for (const auto& node : x.constList()[1].constList())
412 {
413 if (node.nodeType() == NodeType::Symbol)
414 {
415 page(function_body_page).emplace_back(STORE, addSymbol(node));
416 m_locals_locator.addLocal(node.string());
417 }
418 }
419
420 // Register an opened variable as "#anonymous", which won't match any valid names inside ASTLowerer::handleCalls.
421 // This way we can continue to safely apply optimisations on
422 // (let name (fun (e) (map lst (fun (e) (name e)))))
423 // Otherwise, `name` would have been optimized to a GET_CURRENT_PAGE_ADDRESS, which would have returned the wrong page.
424 if (x.isAnonymousFunction())
425 m_opened_vars.push("#anonymous");
426 // push body of the function
427 compileExpression(x.list()[2], function_body_page, false, true);
428 if (x.isAnonymousFunction())
429 m_opened_vars.pop();
430
431 // return last value on the stack
432 page(function_body_page).emplace_back(RET);
434
435 // if the computed function is unused, pop it
436 if (is_result_unused)
437 {
438 warning("Unused declared function", x);
439 page(p).emplace_back(POP);
440 }
441 }
442
444 {
445 if (const auto sym = x.constList()[1]; sym.nodeType() != NodeType::Symbol)
446 buildAndThrowError(fmt::format("Expected a symbol, got a {}", typeToString(sym)), sym);
447 if (x.constList().size() != 3)
448 buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x);
449
450 const std::string name = x.constList()[1].string();
451 uint16_t i = addSymbol(x.constList()[1]);
452
453 if (!m_opened_vars.empty() && m_opened_vars.top() == name)
454 buildAndThrowError("Can not define a variable using the same name as the function it is defined inside", x);
455
456 const bool is_function = x.constList()[2].isFunction();
457 if (is_function)
458 {
459 m_opened_vars.push(name);
460 x.list()[2].setFunctionKind(/* anonymous= */ false);
461 }
462
463 // put value before symbol id
464 // starting at index = 2 because x is a (let|mut|set variable ...) node
465 for (std::size_t idx = 2, end = x.constList().size(); idx < end; ++idx)
466 compileExpression(x.list()[idx], p, false, false);
467
468 if (n == Keyword::Let || n == Keyword::Mut)
469 {
470 page(p).emplace_back(STORE, i);
472 }
473 else
474 page(p).emplace_back(SET_VAL, i);
475
476 if (is_function)
477 m_opened_vars.pop();
478 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
479 }
480
482 {
483 if (x.constList().size() != 3)
484 buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x);
485
487 page(p).emplace_back(CREATE_SCOPE);
488 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
489
490 // save current position to jump there at the end of the loop
491 const auto label_loop = IR::Entity::Label(m_current_label++);
492 page(p).emplace_back(label_loop);
493 // push condition
494 compileExpression(x.list()[1], p, false, false);
495 // absolute jump to end of block if condition is false
496 const auto label_end = IR::Entity::Label(m_current_label++);
497 page(p).emplace_back(IR::Entity::GotoIf(label_end, false));
498 // push code to page
499 compileExpression(x.list()[2], p, true, false);
500
501 // reset the scope at the end of the loop so that indices are still valid
502 // otherwise, (while true { (let a 5) (print a) (let b 6) (print b) })
503 // would print 5, 6, then only 6 as we emit LOAD_SYMBOL_FROM_INDEX 0 and b is the last in the scope
504 // loop, jump to the condition
505 page(p).emplace_back(IR::Entity::Goto(label_loop, RESET_SCOPE_JUMP));
506
507 // absolute address to jump to if condition is false
508 page(p).emplace_back(label_end);
509
510 page(p).emplace_back(POP_SCOPE);
512 }
513
515 {
516 std::string path;
517 const Node package_node = x.constList()[1];
518 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
519 {
520 path += package_node.constList()[i].string();
521 if (i + 1 != end)
522 path += "/";
523 }
524 path += ".arkm";
525
526 // register plugin path in the constants table
527 uint16_t id = addValue(Node(NodeType::String, path));
528 // add plugin instruction + id of the constant referring to the plugin path
529 page(p).emplace_back(PLUGIN, id);
530 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
531 }
532
533 void ASTLowerer::pushFunctionCallArguments(Node& call, const Page p, const bool is_tail_call)
534 {
535 const auto node = call.constList()[0];
536
537 // push the arguments in reverse order because the function loads its arguments in the order they are defined:
538 // (fun (a b c) ...) -> load 'a', then 'b', then 'c'
539 // We have to push arguments in this order and load them in reverse, because we are using references internally,
540 // which can cause problems for recursive functions that swap their arguments around.
541 // Eg (let foo (fun (a b c) (if (> a 0) (foo (- a 1) c (+ b c)) 1))) (foo 12 0 1)
542 // On the second self-call, b and c would have the same value, since we set c to (+ b c), and we pushed c as the
543 // value for argument b, but loaded it as a reference.
544 for (Node& value : std::ranges::drop_view(call.list(), 1) | std::views::reverse)
545 {
546 if (nodeProducesOutput(value))
547 compileExpression(value, p, false, false);
548 else
549 {
550 std::string message;
551 if (is_tail_call)
552 message = fmt::format("Invalid node inside tail call to `{}'", node.repr());
553 else
554 message = fmt::format("Invalid node inside call to `{}'", node.repr());
555 buildAndThrowError(message, value);
556 }
557 }
558 }
559
560 void ASTLowerer::handleCalls(Node& x, const Page p, bool is_result_unused, const bool is_terminal)
561 {
562 constexpr std::size_t start_index = 1;
563
564 Node& node = x.list()[0];
565 const std::optional<Instruction> maybe_operator = node.nodeType() == NodeType::Symbol ? getOperator(node.string()) : std::nullopt;
566
567 const std::optional<Instruction> maybe_shortcircuit =
568 node.nodeType() == NodeType::Symbol
569 ? (node.string() == Language::And
570 ? std::make_optional(Instruction::SHORTCIRCUIT_AND)
571 : (node.string() == Language::Or
572 ? std::make_optional(Instruction::SHORTCIRCUIT_OR)
573 : std::nullopt))
574 : std::nullopt;
575
576 if (maybe_shortcircuit.has_value())
577 {
578 // short circuit implementation
579 if (x.constList().size() < 3)
581 fmt::format(
582 "Expected at least 2 arguments while compiling '{}', got {}",
583 node.string(),
584 x.constList().size() - 1),
585 x);
586
587 compileExpression(x.list()[1], p, false, false);
588
589 const auto label_shortcircuit = IR::Entity::Label(m_current_label++);
590 auto shortcircuit_entity = IR::Entity::Goto(label_shortcircuit, maybe_shortcircuit.value());
591 page(p).emplace_back(shortcircuit_entity);
592
593 for (std::size_t i = 2, end = x.constList().size(); i < end; ++i)
594 {
595 compileExpression(x.list()[i], p, false, false);
596 if (i + 1 != end)
597 page(p).emplace_back(shortcircuit_entity);
598 }
599
600 page(p).emplace_back(label_shortcircuit);
601 }
602 else if (!maybe_operator.has_value())
603 {
604 if (is_terminal && node.nodeType() == NodeType::Symbol && !m_opened_vars.empty() && m_opened_vars.top() == node.string())
605 {
606 pushFunctionCallArguments(x, p, /* is_tail_call= */ true);
607
608 // jump to the top of the function
609 page(p).emplace_back(JUMP, 0_u16);
610 page(p).back().setSourceLocation(node.filename(), node.position().start.line);
611 return; // skip the potential Instruction::POP at the end
612 }
613 else
614 {
615 if (!nodeProducesOutput(node))
616 buildAndThrowError(fmt::format("Can not call `{}', as it doesn't return a value", node.repr()), node);
617
618 m_temp_pages.emplace_back();
619 const auto proc_page = Page { .index = m_temp_pages.size() - 1u, .is_temp = true };
620
621 // compile the function resolution to a separate page
622 if (node.nodeType() == NodeType::Symbol && !m_opened_vars.empty() && m_opened_vars.top() == node.string())
623 {
624 // The function is trying to call itself, but this isn't a tail call.
625 // We can skip the LOAD_SYMBOL function_name and directly push the current
626 // function page, which will be quicker than a local variable resolution.
627 // We set its argument to the symbol id of the function we are calling,
628 // so that the VM knows the name of the last called function.
629 page(proc_page).emplace_back(GET_CURRENT_PAGE_ADDR, addSymbol(node));
630 }
631 else
632 {
633 // closure chains have been handled (eg: closure.field.field.function)
634 compileExpression(node, proc_page, false, false); // storing proc
635 }
636
637 if (m_temp_pages.back().empty())
638 buildAndThrowError(fmt::format("Can not call {}", x.constList()[0].repr()), x);
639
640 const auto label_return = IR::Entity::Label(m_current_label++);
641 page(p).emplace_back(IR::Entity::Goto(label_return, PUSH_RETURN_ADDRESS));
642
643 pushFunctionCallArguments(x, p, /* is_tail_call= */ false);
644 // push proc from temp page
645 for (const auto& inst : m_temp_pages.back())
646 page(p).push_back(inst);
647 m_temp_pages.pop_back();
648
649 // number of arguments
650 std::size_t args_count = 0;
651 for (auto it = x.constList().begin() + start_index, it_end = x.constList().end(); it != it_end; ++it)
652 {
653 if (it->nodeType() != NodeType::Capture)
654 args_count++;
655 }
656 // call the procedure
657 page(p).emplace_back(CALL, args_count);
658 page(p).back().setSourceLocation(node.filename(), node.position().start.line);
659
660 // patch the PUSH_RETURN_ADDRESS instruction with the return location (IP=CALL instruction IP)
661 page(p).emplace_back(label_return);
662 }
663 }
664 else // operator
665 {
666 // retrieve operator
667 auto op = maybe_operator.value();
668
669 if (op == ASSERT)
670 is_result_unused = false;
671
672 // push arguments on current page
673 std::size_t exp_count = 0;
674 for (std::size_t index = start_index, size = x.constList().size(); index < size; ++index)
675 {
676 if (nodeProducesOutput(x.constList()[index]))
677 compileExpression(x.list()[index], p, false, false);
678 else
679 buildAndThrowError(fmt::format("Invalid node inside call to operator `{}'", node.repr()), x.constList()[index]);
680
681 if ((index + 1 < size && x.constList()[index + 1].nodeType() != NodeType::Capture) || index + 1 == size)
682 exp_count++;
683
684 // in order to be able to handle things like (op A B C D...)
685 // which should be transformed into A B op C op D op...
686 if (exp_count >= 2 && !isTernaryInst(op))
687 page(p).emplace_back(op);
688 }
689
690 if (isUnaryInst(op))
691 {
692 if (exp_count != 1)
693 buildAndThrowError(fmt::format("Operator needs one argument, but was called with {}", exp_count), x.constList()[0]);
694 page(p).emplace_back(op);
695 }
696 else if (isTernaryInst(op))
697 {
698 if (exp_count != 3)
699 buildAndThrowError(fmt::format("Operator needs three arguments, but was called with {}", exp_count), x.constList()[0]);
700 page(p).emplace_back(op);
701 }
702 else if (exp_count <= 1)
703 buildAndThrowError(fmt::format("Operator needs two arguments, but was called with {}", exp_count), x.constList()[0]);
704
705 page(p).back().setSourceLocation(x.filename(), x.position().start.line);
706
707 // need to check we didn't push the (op A B C D...) things for operators not supporting it
708 if (exp_count > 2)
709 {
710 switch (op)
711 {
712 // authorized instructions
713 case ADD: [[fallthrough]];
714 case SUB: [[fallthrough]];
715 case MUL: [[fallthrough]];
716 case DIV: [[fallthrough]];
717 case MOD: [[fallthrough]];
718 case AT_AT:
719 break;
720
721 default:
723 fmt::format(
724 "`{}' requires 2 arguments, but got {}.",
725 Language::operators[static_cast<std::size_t>(op - FIRST_OPERATOR)],
726 exp_count),
727 x);
728 }
729 }
730 }
731
732 if (is_result_unused)
733 page(p).emplace_back(POP);
734 }
735
736 uint16_t ASTLowerer::addSymbol(const Node& sym)
737 {
738 // otherwise, add the symbol, and return its id in the table
739 auto it = std::ranges::find(m_symbols, sym.string());
740 if (it == m_symbols.end())
741 {
742 m_symbols.push_back(sym.string());
743 it = m_symbols.begin() + static_cast<std::vector<std::string>::difference_type>(m_symbols.size() - 1);
744 }
745
746 const auto distance = std::distance(m_symbols.begin(), it);
747 if (std::cmp_less(distance, MaxValue16Bits))
748 return static_cast<uint16_t>(distance);
749 buildAndThrowError(fmt::format("Too many symbols (exceeds {}), aborting compilation.", MaxValue16Bits), sym);
750 }
751
752 uint16_t ASTLowerer::addValue(const Node& x)
753 {
754 const ValTableElem v(x);
755 auto it = std::ranges::find(m_values, v);
756 if (it == m_values.end())
757 {
758 m_values.push_back(v);
759 it = m_values.begin() + static_cast<std::vector<ValTableElem>::difference_type>(m_values.size() - 1);
760 }
761
762 const auto distance = std::distance(m_values.begin(), it);
763 if (std::cmp_less(distance, MaxValue16Bits))
764 return static_cast<uint16_t>(distance);
765 buildAndThrowError(fmt::format("Too many values (exceeds {}), aborting compilation.", MaxValue16Bits), x);
766 }
767
768 uint16_t ASTLowerer::addValue(const std::size_t page_id, const Node& current)
769 {
770 const ValTableElem v(page_id);
771 auto it = std::ranges::find(m_values, v);
772 if (it == m_values.end())
773 {
774 m_values.push_back(v);
775 it = m_values.begin() + static_cast<std::vector<ValTableElem>::difference_type>(m_values.size() - 1);
776 }
777
778 const auto distance = std::distance(m_values.begin(), it);
779 if (std::cmp_less(distance, MaxValue16Bits))
780 return static_cast<uint16_t>(distance);
781 buildAndThrowError(fmt::format("Too many values (exceeds {}), aborting compilation.", MaxValue16Bits), current);
782 }
783}
Host the declaration of all the ArkScript builtins.
Tools to report code errors nicely to the user.
ArkScript homemade exceptions.
User defined literals for Ark internals.
const String_t & string() const
Definition Value.hpp:164
uint16_t addValue(const Node &x)
Register a given node in the value table.
void pushFunctionCallArguments(Node &call, Page p, bool is_tail_call)
uint16_t addSymbol(const Node &sym)
Register a given node in the symbol table.
static std::optional< Instruction > getListInstruction(const std::string &name) noexcept
Checking if a symbol is a list instruction.
std::vector< ValTableElem > m_values
void compileListInstruction(Node &x, Page p, bool is_result_unused)
static bool nodeProducesOutput(const Node &node)
std::vector< IR::Block > m_temp_pages
we need temporary code pages for some compilations passes
void compileLetMutSet(Keyword n, Node &x, Page p)
void handleCalls(Node &x, Page p, bool is_result_unused, bool is_terminal)
const std::vector< ValTableElem > & values() const noexcept
Return the value table pre-computed.
void compileIf(Node &x, Page p, bool is_result_unused, bool is_terminal)
void process(Node &ast)
Start the compilation.
const std::vector< IR::Block > & intermediateRepresentation() const noexcept
Return the IR blocks (one per scope)
static bool isUnaryInst(Instruction inst) noexcept
Check if a given instruction is unary (takes only one argument)
std::vector< IR::Block > m_code_pages
ASTLowerer(unsigned debug)
Construct a new ASTLowerer object.
void compileWhile(Node &x, Page p)
std::vector< std::string > m_symbols
static void buildAndThrowError(const std::string &message, const Node &node)
Throw a nice error message.
static bool isTernaryInst(Instruction inst) noexcept
Check if a given instruction is ternary (takes three arguments)
void compileExpression(Node &x, Page p, bool is_result_unused, bool is_terminal)
Compile an expression (a node) recursively.
LocalsLocator m_locals_locator
std::stack< std::string > m_opened_vars
stack of vars we are currently declaring
void compileSymbol(const Node &x, Page p, bool is_result_unused)
static void warning(const std::string &message, const Node &node)
Display a warning message.
void compileFunction(Node &x, Page p, bool is_result_unused)
static std::optional< uint16_t > getBuiltin(const std::string &name) noexcept
Checking if a symbol is a builtin.
void compilePluginImport(const Node &x, Page p)
static std::optional< Instruction > getOperator(const std::string &name) noexcept
Checking if a symbol is an operator.
IR::Block & page(const Page page) noexcept
helper functions to get a temp or finalized code page
const std::vector< std::string > & symbols() const noexcept
Return the symbol table pre-computed.
static Entity Goto(const Entity &label, Instruction inst=Instruction::JUMP)
Definition Entity.cpp:28
static Entity Label(label_t value)
Definition Entity.cpp:20
static Entity GotoIf(const Entity &label, bool cond)
Definition Entity.cpp:47
void saveScopeLengthForBranch()
Save the current scope length before entering a branch, so that we can ignore variable definitions in...
std::optional< std::size_t > lookupLastScopeByName(const std::string &name)
Search for a local in the current scope. Returns std::nullopt in case of closure scopes or if the var...
void dropVarsForBranch()
Drop potentially defined variables in the last saved branch.
void deleteScope()
Delete the last scope.
void addLocal(const std::string &name)
Register a local in the current scope, triggered by a STORE instruction. If the local already exists,...
void createScope(ScopeType type=ScopeType::Default)
Create a new scope.
void traceStart(std::string &&trace_name)
Definition Logger.hpp:90
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
bool isAnonymousFunction() const noexcept
Check if a node is an anonymous function.
Definition Node.cpp:154
const std::string & filename() const noexcept
Return the filename in which this node was created.
Definition Node.cpp:164
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
std::string repr() const noexcept
Compute a representation of the node without any comments or additional sugar, colors,...
Definition Node.cpp:179
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
const Namespace & constArkNamespace() const noexcept
Return the namespace held by the value (if the node type allows it)
Definition Node.cpp:58
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
std::string makeContextWithNode(const std::string &message, const internal::Node &node)
Helper used by the compiler to generate a colorized context from a node.
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 9 > listInstructions
Definition Common.hpp:115
constexpr std::array< std::string_view, 24 > operators
Definition Common.hpp:152
constexpr std::string_view And
Definition Common.hpp:130
constexpr std::array UpdateRef
All the builtins that modify in place a variable.
Definition Common.hpp:108
constexpr std::string_view Or
Definition Common.hpp:131
std::string typeToString(const Node &node) noexcept
Definition Node.hpp:264
Keyword
The different keywords available.
Definition Common.hpp:75
Instruction
The different bytecodes are stored here.
constexpr uint16_t MaxValue16Bits
Definition Constants.hpp:70
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
std::size_t line
0-indexed line number
Definition Position.hpp:22
std::shared_ptr< Node > ast
Definition Namespace.hpp:18
A Compiler Value class helper to handle multiple types.