ArkScript
A small, lisp-inspired, functional scripting language
Parser.cpp
Go to the documentation of this file.
2
3#include <fmt/core.h>
4
5namespace Ark::internal
6{
7 Parser::Parser(const unsigned debug, const ParserMode mode) :
8 BaseParser(), m_mode(mode), m_logger("Parser", debug),
9 m_ast(NodeType::List), m_imports({}), m_allow_macro_behavior(0),
10 m_nested_nodes(0)
11 {
12 m_ast.push_back(Node(Keyword::Begin));
13
14 m_parsers = {
15 [this](FilePosition) {
16 return wrapped(&Parser::letMutSet, "variable assignment or declaration");
17 },
18 [this](FilePosition) {
19 return wrapped(&Parser::function, "function");
20 },
21 [this](FilePosition) {
22 return wrapped(&Parser::condition, "condition");
23 },
24 [this](FilePosition) {
25 return wrapped(&Parser::loop, "loop");
26 },
27 [this](const FilePosition filepos) {
28 return import_(filepos);
29 },
30 [this](const FilePosition filepos) {
31 return block(filepos);
32 },
33 [this](FilePosition) {
34 return wrapped(&Parser::macroCondition, "$if");
35 },
36 [this](const FilePosition filepos) {
37 return macro(filepos);
38 },
39 [this](FilePosition) {
40 return wrapped(&Parser::del, "del");
41 },
42 [this](const FilePosition filepos) {
43 return functionCall(filepos);
44 },
45 [this](const FilePosition filepos) {
46 return list(filepos);
47 }
48 };
49 }
50
51 void Parser::process(const std::string& filename, const std::string& code)
52 {
53 m_logger.traceStart("process");
54 initParser(filename, code);
55
56 while (!isEOF())
57 {
58 std::string comment = newlineOrComment();
59 if (isEOF())
60 {
61 if (!comment.empty())
62 m_ast.list().back().attachCommentAfter(comment);
63 break;
64 }
65
66 const auto pos = getCount();
67 if (auto n = node())
68 {
69 m_ast.push_back(n->attachNearestCommentBefore(n->comment() + comment));
70 m_ast.list().back().attachCommentAfter(spaceComment());
71 }
72 else
73 {
74 backtrack(pos);
75 std::string out = peek();
76 std::string message;
77 if (out == ")")
78 message = "Unexpected closing paren";
79 else if (out == "}")
80 message = "Unexpected closing bracket";
81 else if (out == "]")
82 message = "Unexpected closing square bracket";
83 else
84 errorWithNextToken("invalid syntax, expected node");
85 errorWithNextToken(message);
86 }
87 }
88
90 }
91
92 const Node& Parser::ast() const noexcept
93 {
94 return m_ast;
95 }
96
97 const std::vector<Import>& Parser::imports() const
98 {
99 return m_imports;
100 }
101
102 Node Parser::positioned(Node node, const FilePosition cursor) const
103 {
104 const auto [row, col] = cursor;
105 const auto [end_row, end_col] = getCursor();
106
107 node.m_filename = m_filename;
108 node.m_pos = FileSpan {
109 .start = FilePos { .line = row, .column = col },
110 .end = FilePos { .line = end_row, .column = end_col }
111 };
112 return node;
113 }
114
115 std::optional<Node>& Parser::positioned(std::optional<Node>& node, const FilePosition cursor) const
116 {
117 if (!node)
118 return node;
119
120 const auto [row, col] = cursor;
121 const auto [end_row, end_col] = getCursor();
122
123 node->m_filename = m_filename;
124 node->m_pos = FileSpan {
125 .start = FilePos { .line = row, .column = col },
126 .end = FilePos { .line = end_row, .column = end_col }
127 };
128 return node;
129 }
130
131 std::optional<Node> Parser::node()
132 {
134
136 errorWithNextToken(fmt::format("Too many nested node while parsing, exceeds limit of {}. Consider rewriting your code by breaking it in functions and macros.", MaxNestedNodes));
137
138 // save current position in buffer to be able to go back if needed
139 const auto position = getCount();
140 const auto filepos = getCursor();
141 std::optional<Node> result = std::nullopt;
142
143 for (auto&& parser : m_parsers)
144 {
145 result = parser(filepos);
146
147 if (result)
148 break;
149 backtrack(position);
150 }
151
152 // return std::nullopt only on parsing error, nothing matched, the user provided terrible code
154 return result;
155 }
156
157 std::optional<Node> Parser::letMutSet(const FilePosition filepos)
158 {
159 std::optional<Node> leaf { NodeType::List };
160
161 std::string token;
162 if (!oneOf({ "let", "mut", "set" }, &token))
163 return std::nullopt;
164
165 std::string comment = newlineOrComment();
166 leaf->attachNearestCommentBefore(comment);
167
168 if (token == "let")
169 leaf->push_back(Node(Keyword::Let));
170 else if (token == "mut")
171 leaf->push_back(Node(Keyword::Mut));
172 else // "set"
173 leaf->push_back(Node(Keyword::Set));
174
176 {
177 const auto position = getCount();
178 const auto value_pos = getCursor();
179 if (const auto value = nodeOrValue(); value.has_value())
180 {
181 const Node& sym = value.value();
182 if (sym.nodeType() == NodeType::List || sym.nodeType() == NodeType::Symbol || sym.nodeType() == NodeType::Macro || sym.nodeType() == NodeType::Spread)
183 leaf->push_back(sym);
184 else
185 error(fmt::format("Can not use a {} as a symbol name, even in a macro", nodeTypes[static_cast<std::size_t>(sym.nodeType())]), value_pos);
186 }
187 else
188 backtrack(position);
189 }
190
191 if (leaf->constList().size() == 1)
192 {
193 // we haven't parsed anything while in "macro state"
194 std::string symbol_name;
195 if (!name(&symbol_name))
196 errorWithNextToken(token + " needs a symbol");
197
198 leaf->push_back(Node(NodeType::Symbol, symbol_name));
199 }
200
202 if (auto value = nodeOrValue(); value.has_value())
203 leaf->push_back(value.value().attachNearestCommentBefore(comment));
204 else
205 errorWithNextToken("Expected a value");
206
207 return positioned(leaf, filepos);
208 }
209
210 std::optional<Node> Parser::del(const FilePosition filepos)
211 {
212 std::optional<Node> leaf { NodeType::List };
213
214 if (!oneOf({ "del" }))
215 return std::nullopt;
216 leaf->push_back(Node(Keyword::Del));
217
218 const std::string comment = newlineOrComment();
219
220 std::string symbol_name;
221 if (!name(&symbol_name))
222 errorWithNextToken("del needs a symbol");
223
224 leaf->push_back(Node(NodeType::Symbol, symbol_name));
225 leaf->list().back().attachNearestCommentBefore(comment);
226
227 return positioned(leaf, filepos);
228 }
229
230 std::optional<Node> Parser::condition(const FilePosition filepos)
231 {
232 std::optional<Node> leaf { NodeType::List };
233
234 if (!oneOf({ "if" }))
235 return std::nullopt;
236
237 std::string comment = newlineOrComment();
238
239 leaf->push_back(Node(Keyword::If));
240
241 if (auto cond_expr = nodeOrValue(); cond_expr.has_value())
242 leaf->push_back(cond_expr.value().attachNearestCommentBefore(comment));
243 else
244 errorWithNextToken("`if' needs a valid condition");
245
247 if (auto value_if_true = nodeOrValue(); value_if_true.has_value())
248 leaf->push_back(value_if_true.value().attachNearestCommentBefore(comment));
249 else
250 errorWithNextToken("Expected a node or value after condition");
251
253 if (auto value_if_false = nodeOrValue(); value_if_false.has_value())
254 {
255 leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment));
256 leaf->list().back().attachCommentAfter(newlineOrComment());
257 }
258 else if (!comment.empty())
259 leaf->attachCommentAfter(comment);
260
261 return positioned(leaf, filepos);
262 }
263
264 std::optional<Node> Parser::loop(const FilePosition filepos)
265 {
266 std::optional<Node> leaf { NodeType::List };
267
268 if (!oneOf({ "while" }))
269 return std::nullopt;
270
271 std::string comment = newlineOrComment();
272 leaf->push_back(Node(Keyword::While));
273
274 if (auto cond_expr = nodeOrValue(); cond_expr.has_value())
275 leaf->push_back(cond_expr.value().attachNearestCommentBefore(comment));
276 else
277 errorWithNextToken("`while' needs a valid condition");
278
280 if (auto body = nodeOrValue(); body.has_value())
281 leaf->push_back(body.value().attachNearestCommentBefore(comment));
282 else
283 errorWithNextToken("Expected a node or value after loop condition");
284
285 return positioned(leaf, filepos);
286 }
287
288 std::optional<Node> Parser::import_(const FilePosition filepos)
289 {
290 std::optional<Node> leaf { NodeType::List };
291
293 if (!accept(IsChar('(')))
294 return std::nullopt;
295
296 std::string comment = newlineOrComment();
297 leaf->attachNearestCommentBefore(comment);
298
299 if (!oneOf({ "import" }))
300 return std::nullopt;
301
303 leaf->push_back(Node(Keyword::Import));
304
305 Import import_data;
306 import_data.col = filepos.col;
307 import_data.line = filepos.row;
308
309 const auto pos = getCount();
310 if (!packageName(&import_data.prefix))
311 errorWithNextToken("Import expected a package name");
312
313 if (import_data.prefix.size() > 255)
314 {
315 backtrack(pos);
316 errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", import_data.prefix.size()));
317 }
318 import_data.package.push_back(import_data.prefix);
319
321 packageNode.push_back(Node(NodeType::Symbol, import_data.prefix));
322
323 // first, parse the package name
324 while (!isEOF())
325 {
326 const auto item_pos = getCursor();
327
328 // parsing package folder.foo.bar.yes
329 if (accept(IsChar('.')))
330 {
331 const auto package_pos = getCursor();
332 std::string path;
333 if (!packageName(&path))
334 errorWithNextToken("Package name expected after '.'");
335 else
336 {
337 packageNode.push_back(positioned(Node(NodeType::Symbol, path), package_pos));
338
339 import_data.package.push_back(path);
340 import_data.prefix = path; // in the end we will store the last element of the package, which is what we want
341
342 if (path.size() > 255)
343 {
344 backtrack(pos);
345 errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", path.size()));
346 }
347 }
348 }
349 else if (accept(IsChar(':')) && accept(IsChar('*'))) // parsing :*, terminal in imports
350 {
351 leaf->push_back(packageNode);
352 leaf->push_back(positioned(Node(NodeType::Symbol, "*"), item_pos));
353
354 space();
355 expectSuffixOrError(')', fmt::format("in import `{}'", import_data.toPackageString()), context);
356
357 // save the import data structure to know we encounter an import node, and retrieve its data more easily later on
358 import_data.with_prefix = false;
359 import_data.is_glob = true;
360 m_imports.push_back(import_data);
361
362 return positioned(leaf, filepos);
363 }
364 else
365 break;
366 }
367
369 // then parse the symbols to import, if any
370 if (space())
371 {
373
374 while (!isEOF())
375 {
376 if (accept(IsChar(':'))) // parsing potential :a :b :c
377 {
378 const auto symbol_pos = getCursor();
379 std::string symbol_name;
380 if (!name(&symbol_name))
381 errorWithNextToken("Expected a valid symbol to import");
382 if (symbol_name == "*")
383 error(fmt::format("Glob patterns can not be separated from the package, use (import {}:*) instead", import_data.toPackageString()), symbol_pos);
384
385 if (symbol_name.size() >= 2 && symbol_name[symbol_name.size() - 2] == ':' && symbol_name.back() == '*')
386 error("Glob pattern can not follow a symbol to import", FilePosition { .row = symbol_pos.row, .col = symbol_pos.col + symbol_name.size() - 2 });
387
388 symbols.push_back(positioned(Node(NodeType::Symbol, symbol_name).attachNearestCommentBefore(comment), symbol_pos));
389 comment.clear();
390
391 import_data.symbols.push_back(symbol_name);
392 // we do not need the prefix when importing specific symbols
393 import_data.with_prefix = false;
394 }
395
396 if (!space())
397 break;
399 }
400
401 if (!comment.empty() && !symbols.list().empty())
402 symbols.list().back().attachCommentAfter(comment);
403 }
404
405 leaf->push_back(packageNode);
406 leaf->push_back(symbols);
407 // save the import data
408 m_imports.push_back(import_data);
409
411 if (!comment.empty())
412 leaf->list().back().attachCommentAfter(comment);
413
414 expectSuffixOrError(')', fmt::format("in import `{}'", import_data.toPackageString()), context);
415 return positioned(leaf, filepos);
416 }
417
418 std::optional<Node> Parser::block(const FilePosition filepos)
419 {
420 std::optional<Node> leaf { NodeType::List };
421
423 bool alt_syntax = false;
424 std::string comment;
425 if (accept(IsChar('(')))
426 {
428 if (!oneOf({ "begin" }))
429 return std::nullopt;
430 }
431 else if (accept(IsChar('{')))
432 alt_syntax = true;
433 else
434 return std::nullopt;
435
436 leaf->setAltSyntax(alt_syntax);
437 leaf->push_back(Node(Keyword::Begin).attachNearestCommentBefore(comment));
438
440
441 while (!isEOF())
442 {
443 if (auto value = nodeOrValue(); value.has_value())
444 {
445 leaf->push_back(value.value().attachNearestCommentBefore(comment));
447 }
448 else
449 break;
450 }
451
453 expectSuffixOrError(alt_syntax ? '}' : ')', "to close block", context);
454 leaf->list().back().attachCommentAfter(comment);
455 return positioned(leaf, filepos);
456 }
457
458 std::optional<Node> Parser::functionArgs(const FilePosition filepos)
459 {
460 expect(IsChar('('));
461 std::optional<Node> args { NodeType::List };
462
463 std::string comment = newlineOrComment();
464 args->attachNearestCommentBefore(comment);
465
466 bool has_captures = false;
467
468 while (!isEOF())
469 {
470 const auto pos = getCursor();
471 if (accept(IsChar('&'))) // captures
472 {
473 has_captures = true;
474 std::string capture;
475 if (!name(&capture))
476 error("No symbol provided to capture", pos);
477
478 args->push_back(positioned(Node(NodeType::Capture, capture), pos));
479 }
480 else if (accept(IsChar('(')))
481 {
482 // attribute modifiers: mut, ref
483 std::string modifier;
484 std::ignore = newlineOrComment();
485 if (!oneOf({ "mut", "ref" }, &modifier))
486 // We cannot return an error like this:
487 // error("Expected an attribute modifier, either `mut' or `ref'", pos);
488 // Because it would break on macro instantiations like (fun ((suffix-dup a 3)) ())
489 return std::nullopt;
490
492 if (modifier == "mut")
493 type = NodeType::MutArg;
494 else if (modifier == "ref")
495 type = NodeType::RefArg;
496
497 Node arg_with_attr = Node(type);
498 std::string comment2 = newlineOrComment();
499 arg_with_attr.attachCommentAfter(comment2);
500
501 std::string symbol_name;
502 if (!name(&symbol_name))
503 error(fmt::format("Expected a symbol name for the attribute with modifier `{}'", modifier), pos);
504 arg_with_attr.setString(symbol_name);
505
506 args->push_back(positioned(arg_with_attr, pos));
507 std::ignore = newlineOrComment();
508 expect(IsChar(')'));
509 }
510 else
511 {
512 std::string symbol_name;
513 if (!name(&symbol_name))
514 break;
515 if (has_captures)
516 error("Captured variables should be at the end of the argument list", pos);
517
518 args->push_back(positioned(Node(NodeType::Symbol, symbol_name), pos));
519 }
520
521 if (!comment.empty())
522 args->list().back().attachNearestCommentBefore(comment);
524 }
525
526 if (accept(IsChar(')')))
527 return positioned(args, filepos);
528 return std::nullopt;
529 }
530
531 std::optional<Node> Parser::function(const FilePosition filepos)
532 {
533 std::optional<Node> leaf { NodeType::List };
534
535 if (!oneOf({ "fun" }))
536 return std::nullopt;
537 leaf->push_back(Node(Keyword::Fun));
538
539 const std::string comment_before_args = newlineOrComment();
540
541 while (m_allow_macro_behavior > 0)
542 {
543 const auto position = getCount();
544
545 // args
546 if (const auto value = nodeOrValue(); value.has_value())
547 {
548 // if value is nil, just add an empty argument bloc to prevent bugs when
549 // declaring functions inside macros
550 const Node& args = value.value();
551 if (args.nodeType() == NodeType::Symbol && args.string() == "nil")
553 else
554 leaf->push_back(args);
555 }
556 else
557 {
558 backtrack(position);
559 break;
560 }
561
562 const std::string comment = newlineOrComment();
563 // body
564 if (auto value = nodeOrValue(); value.has_value())
565 leaf->push_back(value.value().attachNearestCommentBefore(comment));
566 else
567 errorWithNextToken("Expected a body for the function");
568 return positioned(leaf, filepos);
569 }
570
571 const auto position = getCount();
572 const auto args_file_pos = getCursor();
573 if (auto args = functionArgs(args_file_pos); args.has_value())
574 leaf->push_back(args.value().attachNearestCommentBefore(comment_before_args));
575 else
576 {
577 backtrack(position);
578
579 if (auto value = nodeOrValue(); value.has_value())
580 leaf->push_back(value.value().attachNearestCommentBefore(comment_before_args));
581 else
582 errorWithNextToken("Expected an argument list");
583 }
584
585 const std::string comment = newlineOrComment();
586
587 if (auto value = nodeOrValue(); value.has_value())
588 leaf->push_back(value.value().attachNearestCommentBefore(comment));
589 else
590 errorWithNextToken("Expected a body for the function");
591
592 return positioned(leaf, filepos);
593 }
594
595 std::optional<Node> Parser::macroCondition(const FilePosition filepos)
596 {
597 std::optional<Node> leaf { NodeType::Macro };
598
599 if (!oneOf({ "$if" }))
600 return std::nullopt;
601 leaf->push_back(Node(Keyword::If));
602
603 std::string comment = newlineOrComment();
604 leaf->attachNearestCommentBefore(comment);
605
606 if (const auto cond_expr = nodeOrValue(); cond_expr.has_value())
607 leaf->push_back(cond_expr.value());
608 else
609 errorWithNextToken("$if need a valid condition");
610
612 if (auto value_if_true = nodeOrValue(); value_if_true.has_value())
613 leaf->push_back(value_if_true.value().attachNearestCommentBefore(comment));
614 else
615 errorWithNextToken("Expected a node or value after condition");
616
618 if (auto value_if_false = nodeOrValue(); value_if_false.has_value())
619 {
620 leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment));
622 leaf->list().back().attachCommentAfter(comment);
623 }
624
625 return positioned(leaf, filepos);
626 }
627
628 std::optional<Node> Parser::macroArgs(const FilePosition filepos)
629 {
630 if (!accept(IsChar('(')))
631 return std::nullopt;
632
633 std::optional<Node> args { NodeType::List };
634
635 std::string comment = newlineOrComment();
636 args->attachNearestCommentBefore(comment);
637
638 std::vector<std::string> names;
639 while (!isEOF())
640 {
641 const auto pos = getCount();
642
643 std::string arg_name;
644 if (!name(&arg_name))
645 break;
646
648 args->push_back(Node(NodeType::Symbol, arg_name).attachNearestCommentBefore(comment));
649
650 if (std::ranges::find(names, arg_name) != names.end())
651 {
652 backtrack(pos);
653 errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", arg_name));
654 }
655 names.push_back(arg_name);
656 }
657
658 const auto pos = getCount();
659 if (sequence("..."))
660 {
661 std::string spread_name;
662 if (!name(&spread_name))
663 errorWithNextToken("Expected a name for the variadic arguments list");
664
665 args->push_back(Node(NodeType::Spread, spread_name));
666 args->list().back().attachCommentAfter(newlineOrComment());
667
668 if (std::ranges::find(names, spread_name) != names.end())
669 {
670 backtrack(pos);
671 errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", spread_name));
672 }
673 }
674
675 if (!accept(IsChar(')')))
676 return std::nullopt;
677
679 if (!comment.empty())
680 args->attachCommentAfter(comment);
681
682 return positioned(args, filepos);
683 }
684
685 std::optional<Node> Parser::macro(const FilePosition filepos)
686 {
687 std::optional<Node> leaf { NodeType::Macro };
688
690 if (!accept(IsChar('(')))
691 return std::nullopt;
692
693 if (!oneOf({ "macro" }))
694 return std::nullopt;
695 std::string comment = newlineOrComment();
696 leaf->attachNearestCommentBefore(comment);
697
698 std::string symbol_name;
699 if (!name(&symbol_name))
700 errorWithNextToken("Expected a symbol to declare a macro");
701
703 leaf->push_back(Node(NodeType::Symbol, symbol_name).attachNearestCommentBefore(comment));
704
705 const auto position = getCount();
706 const auto args_file_pos = getCursor();
707 if (const auto args = macroArgs(args_file_pos); args.has_value())
708 leaf->push_back(args.value());
709 else
710 {
711 // if we couldn't parse arguments, then we have a value
712 backtrack(position);
713
715 const auto value = nodeOrValue();
717
718 if (value.has_value())
719 leaf->push_back(value.value());
720 else
721 errorWithNextToken(fmt::format("Expected an argument list, atom or node while defining macro `{}'", symbol_name));
722
723 leaf->list().back().attachCommentAfter(newlineOrComment());
724 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
725 return positioned(leaf, filepos);
726 }
727
729 const auto value = nodeOrValue();
731
732 if (value.has_value())
733 leaf->push_back(value.value());
734 else if (leaf->list().size() == 2) // the argument list is actually a function call and it's okay
735 {
736 leaf->list().back().attachCommentAfter(newlineOrComment());
737
738 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
739 return positioned(leaf, filepos);
740 }
741 else
742 {
743 backtrack(position);
744 errorWithNextToken(fmt::format("Expected a value while defining macro `{}'", symbol_name), context);
745 }
746
747 leaf->list().back().attachCommentAfter(newlineOrComment());
748
749 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
750 return positioned(leaf, filepos);
751 }
752
753 std::optional<Node> Parser::functionCall(const FilePosition filepos)
754 {
756 if (!accept(IsChar('(')))
757 return std::nullopt;
758 std::string comment = newlineOrComment();
759
760 const auto func_name_pos = getCursor();
761 std::optional<Node> func;
762 if (auto sym_or_field = anyAtomOf({ NodeType::Symbol, NodeType::Field }); sym_or_field.has_value())
763 func = sym_or_field->attachNearestCommentBefore(comment);
764 else if (auto nested = node(); nested.has_value())
765 func = nested->attachNearestCommentBefore(comment);
766 else
767 return std::nullopt;
768
769 if (func.value().nodeType() == NodeType::Symbol && func.value().string() == "ref")
770 error("`ref' can not be used outside a function's arguments list.", func_name_pos);
771
772 std::optional<Node> leaf { NodeType::List };
773 leaf->push_back(positioned(func.value(), func_name_pos));
774
776
777 while (!isEOF())
778 {
779 if (auto arg = nodeOrValue(); arg.has_value())
780 {
781 leaf->push_back(arg.value().attachNearestCommentBefore(comment));
783 }
784 else
785 break;
786 }
787
788 leaf->list().back().attachCommentAfter(comment);
790 if (!comment.empty())
791 leaf->list().back().attachCommentAfter(comment);
792
793 expectSuffixOrError(')', fmt::format("in function call to `{}'", func.value().repr()), context);
794 return positioned(leaf, filepos);
795 }
796
797 std::optional<Node> Parser::list(const FilePosition filepos)
798 {
799 std::optional<Node> leaf { NodeType::List };
800
802 if (!accept(IsChar('[')))
803 return std::nullopt;
804 leaf->setAltSyntax(true);
805 leaf->push_back(Node(NodeType::Symbol, "list"));
806
807 std::string comment = newlineOrComment();
808 leaf->attachNearestCommentBefore(comment);
809
810 while (!isEOF())
811 {
812 if (auto value = nodeOrValue(); value.has_value())
813 {
814 leaf->push_back(value.value().attachNearestCommentBefore(comment));
816 }
817 else
818 break;
819 }
820 leaf->list().back().attachCommentAfter(comment);
821
822 expectSuffixOrError(']', "to end list definition", context);
823 return positioned(leaf, filepos);
824 }
825
826 std::optional<Node> Parser::number(const FilePosition filepos)
827 {
828 std::string res;
829 if (signedNumber(&res))
830 {
831 double output;
832 if (Utils::isDouble(res, &output))
833 return positioned(Node(output), filepos);
834
835 error("Is not a valid number", filepos);
836 }
837 return std::nullopt;
838 }
839
840 std::optional<Node> Parser::string(const FilePosition filepos)
841 {
842 std::string res;
843 if (accept(IsChar('"')))
844 {
845 while (true)
846 {
847 const auto pos = getCursor();
848
849 if (accept(IsChar('\\')))
850 {
852 res += '\\';
853
854 if (accept(IsChar('"')))
855 res += '"';
856 else if (accept(IsChar('\\')))
857 res += '\\';
858 else if (accept(IsChar('n')))
859 res += m_mode == ParserMode::Interpret ? '\n' : 'n';
860 else if (accept(IsChar('t')))
861 res += m_mode == ParserMode::Interpret ? '\t' : 't';
862 else if (accept(IsChar('v')))
863 res += m_mode == ParserMode::Interpret ? '\v' : 'v';
864 else if (accept(IsChar('r')))
865 res += m_mode == ParserMode::Interpret ? '\r' : 'r';
866 else if (accept(IsChar('a')))
867 res += m_mode == ParserMode::Interpret ? '\a' : 'a';
868 else if (accept(IsChar('b')))
869 res += m_mode == ParserMode::Interpret ? '\b' : 'b';
870 else if (accept(IsChar('f')))
871 res += m_mode == ParserMode::Interpret ? '\f' : 'f';
872 else if (accept(IsChar('u')))
873 {
874 std::string seq;
875 if (hexNumber(4, &seq))
876 {
878 {
879 char utf8_str[5];
880 utf8::decode(seq.c_str(), utf8_str);
881 if (*utf8_str == '\0')
882 error("Invalid escape sequence", pos);
883 res += utf8_str;
884 }
885 else
886 res += "u" + seq;
887 }
888 else
889 error("Invalid escape sequence, expected 4 hex digits: \\uabcd", pos);
890 }
891 else if (accept(IsChar('U')))
892 {
893 std::string seq;
894 if (hexNumber(8, &seq))
895 {
897 {
898 std::size_t begin = 0;
899 for (; seq[begin] == '0'; ++begin)
900 ;
901 char utf8_str[5];
902 utf8::decode(seq.c_str() + begin, utf8_str);
903 if (*utf8_str == '\0')
904 error("Invalid escape sequence", pos);
905 res += utf8_str;
906 }
907 else
908 res += "U" + seq;
909 }
910 else
911 error("Invalid escape sequence, expected 8 hex digits: \\UABCDEF78", pos);
912 }
913 else
914 {
915 backtrack(getCount() - 1);
916 error("Unknown escape sequence", pos);
917 }
918 }
919 else
920 accept(IsNot(IsEither(IsChar('\\'), IsChar('"'))), &res);
921
922 if (accept(IsChar('"')))
923 break;
924 if (isEOF())
925 expectSuffixOrError('"', "after string");
926 }
927
928 return positioned(Node(NodeType::String, res), filepos);
929 }
930 return std::nullopt;
931 }
932
933 std::optional<Node> Parser::field(const FilePosition filepos)
934 {
935 std::string sym;
936 if (!name(&sym))
937 return std::nullopt;
938
939 std::optional<Node> leaf { Node(NodeType::Field) };
940 leaf->push_back(Node(NodeType::Symbol, sym));
941
942 while (true)
943 {
944 if (leaf->list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc
945 return std::nullopt;
946
947 if (leaf->list().size() > 1 && !accept(IsChar('.')))
948 break;
949
950 const auto filepos_inner = getCursor();
951 std::string res;
952 if (!name(&res))
953 errorWithNextToken("Expected a field name: <symbol>.<field>");
954 leaf->push_back(positioned(Node(NodeType::Symbol, res), filepos_inner));
955 }
956
957 return positioned(leaf, filepos);
958 }
959
960 std::optional<Node> Parser::symbol(const FilePosition filepos)
961 {
962 std::string res;
963 if (!name(&res))
964 return std::nullopt;
965 return positioned(Node(NodeType::Symbol, res), filepos);
966 }
967
968 std::optional<Node> Parser::spread(const FilePosition filepos)
969 {
970 std::string res;
971 if (sequence("..."))
972 {
973 if (!name(&res))
974 errorWithNextToken("Expected a name for the variadic");
975 return positioned(Node(NodeType::Spread, res), filepos);
976 }
977 return std::nullopt;
978 }
979
980 std::optional<Node> Parser::nil(const FilePosition filepos)
981 {
982 if (!accept(IsChar('(')))
983 return std::nullopt;
984
985 const std::string comment = newlineOrComment();
986 if (!accept(IsChar(')')))
987 return std::nullopt;
988
990 return positioned(Node(NodeType::Symbol, "nil").attachNearestCommentBefore(comment), filepos);
991 return positioned(Node(NodeType::List).attachNearestCommentBefore(comment), filepos);
992 }
993
994 std::optional<Node> Parser::atom()
995 {
996 const auto pos = getCount();
997 const auto filepos = getCursor();
998
999 if (auto res = Parser::number(filepos); res.has_value())
1000 return res;
1001 backtrack(pos);
1002
1003 if (auto res = Parser::string(filepos); res.has_value())
1004 return res;
1005 backtrack(pos);
1006
1007 if (auto res = Parser::spread(filepos); m_allow_macro_behavior > 0 && res.has_value())
1008 return res;
1009 backtrack(pos);
1010
1011 if (auto res = Parser::field(filepos); res.has_value())
1012 return res;
1013 backtrack(pos);
1014
1015 if (auto res = Parser::symbol(filepos); res.has_value())
1016 return res;
1017 backtrack(pos);
1018
1019 if (auto res = Parser::nil(filepos); res.has_value())
1020 return res;
1021 backtrack(pos);
1022
1023 return std::nullopt;
1024 }
1025
1026 std::optional<Node> Parser::anyAtomOf(const std::initializer_list<NodeType> types)
1027 {
1028 if (auto value = atom(); value.has_value())
1029 {
1030 for (const auto type : types)
1031 {
1032 if (value->nodeType() == type)
1033 return value;
1034 }
1035 }
1036 return std::nullopt;
1037 }
1038
1039 std::optional<Node> Parser::nodeOrValue()
1040 {
1041 if (auto value = atom(); value.has_value())
1042 return value;
1043 if (auto sub_node = node(); sub_node.has_value())
1044 return sub_node;
1045
1046 return std::nullopt;
1047 }
1048
1049 std::optional<Node> Parser::wrapped(std::optional<Node> (Parser::*parser)(FilePosition), const std::string& name)
1050 {
1051 const auto cursor = getCursor();
1053 if (!prefix('('))
1054 return std::nullopt;
1055
1056 const std::string comment = newlineOrComment();
1057
1058 if (auto result = (this->*parser)(cursor); result.has_value())
1059 {
1060 result->attachNearestCommentBefore(result->comment() + comment);
1061 result.value().attachCommentAfter(newlineOrComment());
1062
1063 if (name == "function")
1064 expectSuffixOrError(')', "after function body. Did you forget to wrap the body with `{}'?", context);
1065 else
1066 expectSuffixOrError(')', "after " + name, context);
1067
1068 result.value().attachCommentAfter(spaceComment());
1069 return result;
1070 }
1071
1072 return std::nullopt;
1073 }
1074}
Parse ArkScript code, but do not handle any import declarations.
bool sequence(const std::string &s)
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
void error(const std::string &error, FilePosition start_at, const std::optional< CodeErrorContext > &additional_context=std::nullopt) const
Create an error context and throw an error containing said context.
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
std::string newlineOrComment()
bool hexNumber(unsigned length, std::string *s=nullptr)
void backtrack(long n)
Backtrack to a given position (this is NOT an offset!)
std::string peek() const
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
bool space(std::string *s=nullptr)
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Fetch the next token (space and paren delimited) to generate an error.
CodeErrorContext generateErrorContextAtCurrentPosition() const
void expectSuffixOrError(char suffix, const std::string &context, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Check for a closing char or generate an error.
bool signedNumber(std::string *s=nullptr)
FilePosition getCursor() const
void traceStart(std::string &&trace_name)
Definition Logger.hpp:90
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
Node & attachNearestCommentBefore(const std::string &comment)
Set the comment field with the nearest comment before this node.
Definition Node.cpp:128
void push_back(const Node &node) noexcept
Every node has a list as well as a value so we can push_back on all node no matter their type.
Definition Node.cpp:63
void setString(const std::string &value) noexcept
Set the String object.
Definition Node.cpp:117
Node & attachCommentAfter(const std::string &comment)
Set the comment_after field with the nearest comment after this node.
Definition Node.cpp:134
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
std::optional< Node > string(FilePosition filepos)
Definition Parser.cpp:840
std::optional< Node > letMutSet(FilePosition filepos)
Definition Parser.cpp:157
std::optional< Node > loop(FilePosition filepos)
Definition Parser.cpp:264
Parser(unsigned debug, ParserMode mode=ParserMode::Interpret)
Constructs a new Parser object.
Definition Parser.cpp:7
std::optional< Node > atom()
Try to parse an atom (number, string, spread, field, symbol, nil)
Definition Parser.cpp:994
std::optional< Node > spread(FilePosition filepos)
Definition Parser.cpp:968
std::optional< Node > number(FilePosition filepos)
Definition Parser.cpp:826
void process(const std::string &filename, const std::string &code)
Parse the given code.
Definition Parser.cpp:51
std::optional< Node > block(FilePosition filepos)
Definition Parser.cpp:418
std::optional< Node > macro(FilePosition filepos)
Definition Parser.cpp:685
std::optional< Node > field(FilePosition filepos)
Definition Parser.cpp:933
std::optional< Node > functionCall(FilePosition filepos)
Definition Parser.cpp:753
const Node & ast() const noexcept
Definition Parser.cpp:92
std::optional< Node > nodeOrValue()
Try to parse an atom first, if it fails try to parse a node.
Definition Parser.cpp:1039
std::vector< std::function< std::optional< Node >(FilePosition)> > m_parsers
Definition Parser.hpp:72
const std::vector< Import > & imports() const
Definition Parser.cpp:97
ParserMode m_mode
Definition Parser.hpp:66
std::optional< Node > condition(FilePosition filepos)
Definition Parser.cpp:230
std::optional< Node > del(FilePosition filepos)
Definition Parser.cpp:210
std::optional< Node > wrapped(std::optional< Node >(Parser::*parser)(FilePosition), const std::string &name)
Try to parse using a given parser, prefixing and suffixing it with (...), handling comments around th...
Definition Parser.cpp:1049
std::optional< Node > node()
Definition Parser.cpp:131
unsigned m_allow_macro_behavior
Toggled on when inside a macro definition, off afterward.
Definition Parser.hpp:70
std::optional< Node > function(FilePosition filepos)
Definition Parser.cpp:531
Node positioned(Node node, FilePosition cursor) const
Definition Parser.cpp:102
std::optional< Node > functionArgs(FilePosition filepos)
Definition Parser.cpp:458
std::optional< Node > anyAtomOf(std::initializer_list< NodeType > types)
Try to parse an atom, if any, match its type against the given list.
Definition Parser.cpp:1026
std::vector< Import > m_imports
Definition Parser.hpp:69
std::size_t m_nested_nodes
Nested node counter.
Definition Parser.hpp:71
std::optional< Node > list(FilePosition filepos)
Definition Parser.cpp:797
std::optional< Node > macroArgs(FilePosition filepos)
Definition Parser.cpp:628
std::optional< Node > symbol(FilePosition filepos)
Definition Parser.cpp:960
std::optional< Node > import_(FilePosition filepos)
Definition Parser.cpp:288
std::optional< Node > nil(FilePosition filepos)
Definition Parser.cpp:980
std::optional< Node > macroCondition(FilePosition filepos)
Definition Parser.cpp:595
bool isDouble(const std::string &s, double *output=nullptr)
Checks if a string is a valid double.
Definition Utils.hpp:85
NodeType
The different node types available.
Definition Common.hpp:44
@ Interpret
Escape sequences and () will be replaced by their UTF8 representation and nil, respectively.
constexpr std::array< std::string_view, 13 > nodeTypes
Node types as string, in the same order as the enum NodeType.
Definition Common.hpp:61
constexpr std::size_t MaxNestedNodes
Maximum number of nodes that can be nested while parsing code.
Definition Constants.hpp:73
void decode(const char *input, char *dest)
Convert hex string to utf8 string.
Definition utf8.hpp:67
Describe a position in a given file ; handled by the BaseParser.
Describes a span for a node/atom in a file, its start position and end position.
Definition Position.hpp:35
std::size_t line
Definition Import.hpp:14
std::vector< std::string > symbols
List of symbols to import, can be empty if none provided. (import package :a :b)
Definition Import.hpp:48
std::size_t col
Position in the source file.
Definition Import.hpp:14
std::string prefix
The filename without the extension.
Definition Import.hpp:23
bool is_glob
Import as glob (import package:*)
Definition Import.hpp:42
std::string toPackageString() const
Definition Import.hpp:54
std::vector< std::string > package
Package with all the segments.
Definition Import.hpp:31
bool with_prefix
Import with prefix (import package)
Definition Import.hpp:37