ArkScript
A small, lisp-inspired, functional scripting language
Parser.cpp
Go to the documentation of this file.
2
3#include <fmt/core.h>
4
5namespace Ark::internal
6{
7 Parser::Parser(const unsigned debug, const ParserMode mode) :
8 BaseParser(), m_mode(mode), m_logger("Parser", debug),
9 m_ast(NodeType::List), m_imports({}), m_allow_macro_behavior(0),
10 m_nested_nodes(0)
11 {
12 m_ast.push_back(Node(Keyword::Begin));
13
14 m_parsers = {
15 [this](FilePosition) {
16 return wrapped(&Parser::letMutSet, "variable assignment or declaration");
17 },
18 [this](FilePosition) {
19 return wrapped(&Parser::function, "function");
20 },
21 [this](FilePosition) {
22 return wrapped(&Parser::condition, "condition");
23 },
24 [this](FilePosition) {
25 return wrapped(&Parser::loop, "loop");
26 },
27 [this](const FilePosition filepos) {
28 return import_(filepos);
29 },
30 [this](const FilePosition filepos) {
31 return block(filepos);
32 },
33 [this](FilePosition) {
34 return wrapped(&Parser::macroCondition, "$if");
35 },
36 [this](const FilePosition filepos) {
37 return macro(filepos);
38 },
39 [this](FilePosition) {
40 return wrapped(&Parser::del, "del");
41 },
42 [this](const FilePosition filepos) {
43 return functionCall(filepos);
44 },
45 [this](const FilePosition filepos) {
46 return list(filepos);
47 }
48 };
49 }
50
51 void Parser::process(const std::string& filename, const std::string& code)
52 {
53 m_logger.traceStart("process");
54 initParser(filename, code);
55
56 while (!isEOF())
57 {
58 std::string comment = newlineOrComment();
59 if (isEOF())
60 {
61 if (!comment.empty())
62 m_ast.list().back().attachCommentAfter(comment);
63 break;
64 }
65
66 const auto pos = getCount();
67 if (auto n = node())
68 {
69 m_ast.push_back(n->attachNearestCommentBefore(n->comment() + comment));
70 m_ast.list().back().attachCommentAfter(spaceComment());
71 }
72 else
73 {
74 backtrack(pos);
75 std::string out = peek();
76 std::string message;
77 if (out == ")")
78 message = "Unexpected closing paren";
79 else if (out == "}")
80 message = "Unexpected closing bracket";
81 else if (out == "]")
82 message = "Unexpected closing square bracket";
83 else
84 errorWithNextToken("invalid syntax, expected node");
85 errorWithNextToken(message);
86 }
87 }
88
90 }
91
92 const Node& Parser::ast() const noexcept
93 {
94 return m_ast;
95 }
96
97 const std::vector<Import>& Parser::imports() const
98 {
99 return m_imports;
100 }
101
102 Node Parser::positioned(Node node, const FilePosition cursor) const
103 {
104 const auto [row, col] = cursor;
105 const auto [end_row, end_col] = getCursor();
106
107 node.m_filename = m_filename;
108 node.m_pos = FileSpan {
109 .start = FilePos { .line = row, .column = col },
110 .end = FilePos { .line = end_row, .column = end_col }
111 };
112 return node;
113 }
114
115 std::optional<Node>& Parser::positioned(std::optional<Node>& node, const FilePosition cursor) const
116 {
117 if (!node)
118 return node;
119
120 const auto [row, col] = cursor;
121 const auto [end_row, end_col] = getCursor();
122
123 node->m_filename = m_filename;
124 node->m_pos = FileSpan {
125 .start = FilePos { .line = row, .column = col },
126 .end = FilePos { .line = end_row, .column = end_col }
127 };
128 return node;
129 }
130
131 std::optional<Node> Parser::node()
132 {
134
136 errorWithNextToken(fmt::format("Too many nested node while parsing, exceeds limit of {}. Consider rewriting your code by breaking it in functions and macros.", MaxNestedNodes));
137
138 // save current position in buffer to be able to go back if needed
139 const auto position = getCount();
140 const auto filepos = getCursor();
141 std::optional<Node> result = std::nullopt;
142
143 for (auto&& parser : m_parsers)
144 {
145 result = parser(filepos);
146
147 if (result)
148 break;
149 backtrack(position);
150 }
151
152 // return std::nullopt only on parsing error, nothing matched, the user provided terrible code
154 return result;
155 }
156
157 std::optional<Node> Parser::letMutSet(const FilePosition filepos)
158 {
159 std::optional<Node> leaf { NodeType::List };
160
161 std::string token;
162 if (!oneOf({ "let", "mut", "set" }, &token))
163 return std::nullopt;
164
165 std::string comment = newlineOrComment();
166 leaf->attachNearestCommentBefore(comment);
167
168 if (token == "let")
169 leaf->push_back(Node(Keyword::Let));
170 else if (token == "mut")
171 leaf->push_back(Node(Keyword::Mut));
172 else // "set"
173 leaf->push_back(Node(Keyword::Set));
174
176 {
177 const auto position = getCount();
178 const auto value_pos = getCursor();
179 if (const auto value = nodeOrValue(); value.has_value())
180 {
181 const Node& sym = value.value();
182 if (sym.nodeType() == NodeType::List || sym.nodeType() == NodeType::Symbol || sym.nodeType() == NodeType::Macro || sym.nodeType() == NodeType::Spread)
183 leaf->push_back(sym);
184 else
185 error(fmt::format("Can not use a {} as a symbol name, even in a macro", nodeTypes[static_cast<std::size_t>(sym.nodeType())]), value_pos);
186 }
187 else
188 backtrack(position);
189 }
190
191 if (leaf->constList().size() == 1)
192 {
193 // we haven't parsed anything while in "macro state"
194 std::string symbol_name;
195 const auto value_pos = getCursor();
196 if (!name(&symbol_name))
197 errorWithNextToken(token + " needs a symbol");
198
199 leaf->push_back(positioned(Node(NodeType::Symbol, symbol_name), value_pos));
200 }
201
203 if (auto value = nodeOrValue(); value.has_value())
204 leaf->push_back(value.value().attachNearestCommentBefore(comment));
205 else
206 errorWithNextToken("Expected a value");
207
208 return positioned(leaf, filepos);
209 }
210
211 std::optional<Node> Parser::del(const FilePosition filepos)
212 {
213 std::optional<Node> leaf { NodeType::List };
214
215 if (!oneOf({ "del" }))
216 return std::nullopt;
217 leaf->push_back(Node(Keyword::Del));
218
219 const std::string comment = newlineOrComment();
220
221 std::string symbol_name;
222 if (!name(&symbol_name))
223 errorWithNextToken("del needs a symbol");
224
225 leaf->push_back(Node(NodeType::Symbol, symbol_name));
226 leaf->list().back().attachNearestCommentBefore(comment);
227
228 return positioned(leaf, filepos);
229 }
230
231 std::optional<Node> Parser::condition(const FilePosition filepos)
232 {
233 std::optional<Node> leaf { NodeType::List };
234
235 if (!oneOf({ "if" }))
236 return std::nullopt;
237
238 std::string comment = newlineOrComment();
239
240 leaf->push_back(Node(Keyword::If));
241
242 if (auto cond_expr = nodeOrValue(); cond_expr.has_value())
243 leaf->push_back(cond_expr.value().attachNearestCommentBefore(comment));
244 else
245 errorWithNextToken("`if' needs a valid condition");
246
248 if (auto value_if_true = nodeOrValue(); value_if_true.has_value())
249 leaf->push_back(value_if_true.value().attachNearestCommentBefore(comment));
250 else
251 errorWithNextToken("Expected a node or value after condition");
252
254 if (auto value_if_false = nodeOrValue(); value_if_false.has_value())
255 {
256 leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment));
257 leaf->list().back().attachCommentAfter(newlineOrComment());
258 }
259 else if (!comment.empty())
260 leaf->attachCommentAfter(comment);
261
262 return positioned(leaf, filepos);
263 }
264
265 std::optional<Node> Parser::loop(const FilePosition filepos)
266 {
267 std::optional<Node> leaf { NodeType::List };
268
269 if (!oneOf({ "while" }))
270 return std::nullopt;
271
272 std::string comment = newlineOrComment();
273 leaf->push_back(Node(Keyword::While));
274
275 if (auto cond_expr = nodeOrValue(); cond_expr.has_value())
276 leaf->push_back(cond_expr.value().attachNearestCommentBefore(comment));
277 else
278 errorWithNextToken("`while' needs a valid condition");
279
281 if (auto body = nodeOrValue(); body.has_value())
282 leaf->push_back(body.value().attachNearestCommentBefore(comment));
283 else
284 errorWithNextToken("Expected a node or value after loop condition");
285
286 return positioned(leaf, filepos);
287 }
288
289 std::optional<Node> Parser::import_(const FilePosition filepos)
290 {
291 std::optional<Node> leaf { NodeType::List };
292
294 if (!accept(IsChar('(')))
295 return std::nullopt;
296
297 std::string comment = newlineOrComment();
298 leaf->attachNearestCommentBefore(comment);
299
300 if (!oneOf({ "import" }))
301 return std::nullopt;
302
304 leaf->push_back(Node(Keyword::Import));
305
306 Import import_data;
307 import_data.col = filepos.col;
308 import_data.line = filepos.row;
309
310 const auto pos = getCount();
311 if (!packageName(&import_data.prefix))
312 errorWithNextToken("Import expected a package name");
313
314 if (import_data.prefix.size() > 255)
315 {
316 backtrack(pos);
317 errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", import_data.prefix.size()));
318 }
319 import_data.package.push_back(import_data.prefix);
320
322 packageNode.push_back(Node(NodeType::Symbol, import_data.prefix));
323
324 // first, parse the package name
325 while (!isEOF())
326 {
327 const auto item_pos = getCursor();
328
329 // parsing package folder.foo.bar.yes
330 if (accept(IsChar('.')))
331 {
332 const auto package_pos = getCursor();
333 std::string path;
334 if (!packageName(&path))
335 errorWithNextToken("Package name expected after '.'");
336 else
337 {
338 packageNode.push_back(positioned(Node(NodeType::Symbol, path), package_pos));
339
340 import_data.package.push_back(path);
341 import_data.prefix = path; // in the end we will store the last element of the package, which is what we want
342
343 if (path.size() > 255)
344 {
345 backtrack(pos);
346 errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", path.size()));
347 }
348 }
349 }
350 else if (accept(IsChar(':')) && accept(IsChar('*'))) // parsing :*, terminal in imports
351 {
352 leaf->push_back(packageNode);
353 leaf->push_back(positioned(Node(NodeType::Symbol, "*"), item_pos));
354
355 space();
356 expectSuffixOrError(')', fmt::format("in import `{}'", import_data.toPackageString()), context);
357
358 // save the import data structure to know we encounter an import node, and retrieve its data more easily later on
359 import_data.with_prefix = false;
360 import_data.is_glob = true;
361 m_imports.push_back(import_data);
362
363 return positioned(leaf, filepos);
364 }
365 else
366 break;
367 }
368
370 // then parse the symbols to import, if any
371 if (space())
372 {
374
375 while (!isEOF())
376 {
377 if (accept(IsChar(':'))) // parsing potential :a :b :c
378 {
379 const auto symbol_pos = getCursor();
380 std::string symbol_name;
381 if (!name(&symbol_name))
382 errorWithNextToken("Expected a valid symbol to import");
383 if (symbol_name == "*")
384 error(fmt::format("Glob patterns can not be separated from the package, use (import {}:*) instead", import_data.toPackageString()), symbol_pos);
385
386 if (symbol_name.size() >= 2 && symbol_name[symbol_name.size() - 2] == ':' && symbol_name.back() == '*')
387 error("Glob pattern can not follow a symbol to import", FilePosition { .row = symbol_pos.row, .col = symbol_pos.col + symbol_name.size() - 2 });
388
389 symbols.push_back(positioned(Node(NodeType::Symbol, symbol_name).attachNearestCommentBefore(comment), symbol_pos));
390 comment.clear();
391
392 import_data.symbols.push_back(symbol_name);
393 // we do not need the prefix when importing specific symbols
394 import_data.with_prefix = false;
395 }
396
397 if (!space())
398 break;
400 }
401
402 if (!comment.empty() && !symbols.list().empty())
403 symbols.list().back().attachCommentAfter(comment);
404 }
405
406 leaf->push_back(packageNode);
407 leaf->push_back(symbols);
408 // save the import data
409 m_imports.push_back(import_data);
410
412 if (!comment.empty())
413 leaf->list().back().attachCommentAfter(comment);
414
415 expectSuffixOrError(')', fmt::format("in import `{}'", import_data.toPackageString()), context);
416 return positioned(leaf, filepos);
417 }
418
419 std::optional<Node> Parser::block(const FilePosition filepos)
420 {
421 std::optional<Node> leaf { NodeType::List };
422
424 bool alt_syntax = false;
425 std::string comment;
426 if (accept(IsChar('(')))
427 {
429 if (!oneOf({ "begin" }))
430 return std::nullopt;
431 }
432 else if (accept(IsChar('{')))
433 alt_syntax = true;
434 else
435 return std::nullopt;
436
437 leaf->setAltSyntax(alt_syntax);
438 leaf->push_back(Node(Keyword::Begin).attachNearestCommentBefore(comment));
439
441
442 while (!isEOF())
443 {
444 if (auto value = nodeOrValue(); value.has_value())
445 {
446 leaf->push_back(value.value().attachNearestCommentBefore(comment));
448 }
449 else
450 break;
451 }
452
454 expectSuffixOrError(alt_syntax ? '}' : ')', "to close block", context);
455 leaf->list().back().attachCommentAfter(comment);
456 return positioned(leaf, filepos);
457 }
458
459 std::optional<Node> Parser::functionArgs(const FilePosition filepos)
460 {
461 expect(IsChar('('));
462 std::optional<Node> args { NodeType::List };
463
464 std::string comment = newlineOrComment();
465 args->attachNearestCommentBefore(comment);
466
467 bool has_captures = false;
468
469 while (!isEOF())
470 {
471 const auto pos = getCursor();
472 if (accept(IsChar('&'))) // captures
473 {
474 has_captures = true;
475 std::string capture;
476 if (!name(&capture))
477 error("No symbol provided to capture", pos);
478
479 args->push_back(positioned(Node(NodeType::Capture, capture), pos));
480 }
481 else if (accept(IsChar('(')))
482 {
483 // attribute modifiers: mut, ref
484 std::string modifier;
485 std::ignore = newlineOrComment();
486 if (!oneOf({ "mut", "ref" }, &modifier))
487 // We cannot return an error like this:
488 // error("Expected an attribute modifier, either `mut' or `ref'", pos);
489 // Because it would break on macro instantiations like (fun ((suffix-dup a 3)) ())
490 return std::nullopt;
491
493 if (modifier == "mut")
494 type = NodeType::MutArg;
495 else if (modifier == "ref")
496 type = NodeType::RefArg;
497
498 Node arg_with_attr = Node(type);
499 std::string comment2 = newlineOrComment();
500 arg_with_attr.attachCommentAfter(comment2);
501
502 std::string symbol_name;
503 if (!name(&symbol_name))
504 error(fmt::format("Expected a symbol name for the attribute with modifier `{}'", modifier), pos);
505 arg_with_attr.setString(symbol_name);
506
507 args->push_back(positioned(arg_with_attr, pos));
508 std::ignore = newlineOrComment();
509 expect(IsChar(')'));
510 }
511 else
512 {
513 std::string symbol_name;
514 if (!name(&symbol_name))
515 break;
516 if (has_captures)
517 error("Captured variables should be at the end of the argument list", pos);
518
519 args->push_back(positioned(Node(NodeType::Symbol, symbol_name), pos));
520 }
521
522 if (!comment.empty())
523 args->list().back().attachNearestCommentBefore(comment);
525 }
526
527 if (accept(IsChar(')')))
528 return positioned(args, filepos);
529 return std::nullopt;
530 }
531
532 std::optional<Node> Parser::function(const FilePosition filepos)
533 {
534 std::optional<Node> leaf { NodeType::List };
535
536 if (!oneOf({ "fun" }))
537 return std::nullopt;
538 leaf->push_back(Node(Keyword::Fun));
539
540 const std::string comment_before_args = newlineOrComment();
541
542 while (m_allow_macro_behavior > 0)
543 {
544 const auto position = getCount();
545
546 // args
547 if (const auto value = nodeOrValue(); value.has_value())
548 {
549 // if value is nil, just add an empty argument bloc to prevent bugs when
550 // declaring functions inside macros
551 const Node& args = value.value();
552 if (args.nodeType() == NodeType::Symbol && args.string() == "nil")
554 else
555 leaf->push_back(args);
556 }
557 else
558 {
559 backtrack(position);
560 break;
561 }
562
563 const std::string comment = newlineOrComment();
564 // body
565 if (auto value = nodeOrValue(); value.has_value())
566 leaf->push_back(value.value().attachNearestCommentBefore(comment));
567 else
568 errorWithNextToken("Expected a body for the function");
569 return positioned(leaf, filepos);
570 }
571
572 const auto position = getCount();
573 const auto args_file_pos = getCursor();
574 if (auto args = functionArgs(args_file_pos); args.has_value())
575 leaf->push_back(args.value().attachNearestCommentBefore(comment_before_args));
576 else
577 {
578 backtrack(position);
579
580 if (auto value = nodeOrValue(); value.has_value())
581 leaf->push_back(value.value().attachNearestCommentBefore(comment_before_args));
582 else
583 errorWithNextToken("Expected an argument list");
584 }
585
586 const std::string comment = newlineOrComment();
587
588 if (auto value = nodeOrValue(); value.has_value())
589 leaf->push_back(value.value().attachNearestCommentBefore(comment));
590 else
591 errorWithNextToken("Expected a body for the function");
592
593 return positioned(leaf, filepos);
594 }
595
596 std::optional<Node> Parser::macroCondition(const FilePosition filepos)
597 {
598 std::optional<Node> leaf { NodeType::Macro };
599
600 if (!oneOf({ "$if" }))
601 return std::nullopt;
602 leaf->push_back(Node(Keyword::If));
603
604 std::string comment = newlineOrComment();
605 leaf->attachNearestCommentBefore(comment);
606
607 if (const auto cond_expr = nodeOrValue(); cond_expr.has_value())
608 leaf->push_back(cond_expr.value());
609 else
610 errorWithNextToken("$if need a valid condition");
611
613 if (auto value_if_true = nodeOrValue(); value_if_true.has_value())
614 leaf->push_back(value_if_true.value().attachNearestCommentBefore(comment));
615 else
616 errorWithNextToken("Expected a node or value after condition");
617
619 if (auto value_if_false = nodeOrValue(); value_if_false.has_value())
620 {
621 leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment));
623 leaf->list().back().attachCommentAfter(comment);
624 }
625
626 return positioned(leaf, filepos);
627 }
628
629 std::optional<Node> Parser::macroArgs(const FilePosition filepos)
630 {
631 if (!accept(IsChar('(')))
632 return std::nullopt;
633
634 std::optional<Node> args { NodeType::List };
635
636 std::string comment = newlineOrComment();
637 args->attachNearestCommentBefore(comment);
638
639 std::vector<std::string> names;
640 while (!isEOF())
641 {
642 const auto pos = getCount();
643
644 std::string arg_name;
645 if (!name(&arg_name))
646 break;
647
649 args->push_back(Node(NodeType::Symbol, arg_name).attachNearestCommentBefore(comment));
650
651 if (std::ranges::find(names, arg_name) != names.end())
652 {
653 backtrack(pos);
654 errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", arg_name));
655 }
656 names.push_back(arg_name);
657 }
658
659 const auto pos = getCount();
660 if (sequence("..."))
661 {
662 std::string spread_name;
663 if (!name(&spread_name))
664 errorWithNextToken("Expected a name for the variadic arguments list");
665
666 args->push_back(Node(NodeType::Spread, spread_name));
667 args->list().back().attachCommentAfter(newlineOrComment());
668
669 if (std::ranges::find(names, spread_name) != names.end())
670 {
671 backtrack(pos);
672 errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", spread_name));
673 }
674 }
675
676 if (!accept(IsChar(')')))
677 return std::nullopt;
678
680 if (!comment.empty())
681 args->attachCommentAfter(comment);
682
683 return positioned(args, filepos);
684 }
685
686 std::optional<Node> Parser::macro(const FilePosition filepos)
687 {
688 std::optional<Node> leaf { NodeType::Macro };
689
691 if (!accept(IsChar('(')))
692 return std::nullopt;
693
694 if (!oneOf({ "macro" }))
695 return std::nullopt;
696 std::string comment = newlineOrComment();
697 leaf->attachNearestCommentBefore(comment);
698
699 std::string symbol_name;
700 if (!name(&symbol_name))
701 errorWithNextToken("Expected a symbol to declare a macro");
702
704 leaf->push_back(Node(NodeType::Symbol, symbol_name).attachNearestCommentBefore(comment));
705
706 const auto position = getCount();
707 const auto args_file_pos = getCursor();
708 if (const auto args = macroArgs(args_file_pos); args.has_value())
709 leaf->push_back(args.value());
710 else
711 {
712 // if we couldn't parse arguments, then we have a value
713 backtrack(position);
714
716 const auto value = nodeOrValue();
718
719 if (value.has_value())
720 leaf->push_back(value.value());
721 else
722 errorWithNextToken(fmt::format("Expected an argument list, atom or node while defining macro `{}'", symbol_name));
723
724 leaf->list().back().attachCommentAfter(newlineOrComment());
725 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
726 return positioned(leaf, filepos);
727 }
728
730 const auto value = nodeOrValue();
732
733 if (value.has_value())
734 leaf->push_back(value.value());
735 else if (leaf->list().size() == 2) // the argument list is actually a function call and it's okay
736 {
737 leaf->list().back().attachCommentAfter(newlineOrComment());
738
739 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
740 return positioned(leaf, filepos);
741 }
742 else
743 {
744 backtrack(position);
745 errorWithNextToken(fmt::format("Expected a value while defining macro `{}'", symbol_name), context);
746 }
747
748 leaf->list().back().attachCommentAfter(newlineOrComment());
749
750 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
751 return positioned(leaf, filepos);
752 }
753
754 std::optional<Node> Parser::functionCall(const FilePosition filepos)
755 {
757 if (!accept(IsChar('(')))
758 return std::nullopt;
759 std::string comment = newlineOrComment();
760
761 const auto func_name_pos = getCursor();
762 std::optional<Node> func;
763 if (auto sym_or_field = anyAtomOf({ NodeType::Symbol, NodeType::Field }); sym_or_field.has_value())
764 func = sym_or_field->attachNearestCommentBefore(comment);
765 else if (auto nested = node(); nested.has_value())
766 func = nested->attachNearestCommentBefore(comment);
767 else
768 return std::nullopt;
769
770 if (func.value().nodeType() == NodeType::Symbol && func.value().string() == "ref")
771 error("`ref' can not be used outside a function's arguments list.", func_name_pos);
772
773 std::optional<Node> leaf { NodeType::List };
774 leaf->push_back(positioned(func.value(), func_name_pos));
775
777
778 while (!isEOF())
779 {
780 if (auto arg = nodeOrValue(); arg.has_value())
781 {
782 leaf->push_back(arg.value().attachNearestCommentBefore(comment));
784 }
785 else
786 break;
787 }
788
789 leaf->list().back().attachCommentAfter(comment);
791 if (!comment.empty())
792 leaf->list().back().attachCommentAfter(comment);
793
794 expectSuffixOrError(')', fmt::format("in function call to `{}'", func.value().repr()), context);
795 return positioned(leaf, filepos);
796 }
797
798 std::optional<Node> Parser::list(const FilePosition filepos)
799 {
800 std::optional<Node> leaf { NodeType::List };
801
803 if (!accept(IsChar('[')))
804 return std::nullopt;
805 leaf->setAltSyntax(true);
806 leaf->push_back(Node(NodeType::Symbol, "list"));
807
808 std::string comment = newlineOrComment();
809 leaf->attachNearestCommentBefore(comment);
810
811 while (!isEOF())
812 {
813 if (auto value = nodeOrValue(); value.has_value())
814 {
815 leaf->push_back(value.value().attachNearestCommentBefore(comment));
817 }
818 else
819 break;
820 }
821 leaf->list().back().attachCommentAfter(comment);
822
823 expectSuffixOrError(']', "to end list definition", context);
824 return positioned(leaf, filepos);
825 }
826
827 std::optional<Node> Parser::number(const FilePosition filepos)
828 {
829 std::string res;
830 if (signedNumber(&res))
831 {
832 double output;
833 if (Utils::isDouble(res, &output))
834 return positioned(Node(output), filepos);
835
836 error("Is not a valid number", filepos);
837 }
838 return std::nullopt;
839 }
840
841 std::optional<Node> Parser::string(const FilePosition filepos)
842 {
843 std::string res;
844 if (accept(IsChar('"')))
845 {
846 while (true)
847 {
848 const auto pos = getCursor();
849
850 if (accept(IsChar('\\')))
851 {
853 res += '\\';
854
855 if (accept(IsChar('"')))
856 res += '"';
857 else if (accept(IsChar('\\')))
858 res += '\\';
859 else if (accept(IsChar('n')))
860 res += m_mode == ParserMode::Interpret ? '\n' : 'n';
861 else if (accept(IsChar('t')))
862 res += m_mode == ParserMode::Interpret ? '\t' : 't';
863 else if (accept(IsChar('v')))
864 res += m_mode == ParserMode::Interpret ? '\v' : 'v';
865 else if (accept(IsChar('r')))
866 res += m_mode == ParserMode::Interpret ? '\r' : 'r';
867 else if (accept(IsChar('a')))
868 res += m_mode == ParserMode::Interpret ? '\a' : 'a';
869 else if (accept(IsChar('b')))
870 res += m_mode == ParserMode::Interpret ? '\b' : 'b';
871 else if (accept(IsChar('f')))
872 res += m_mode == ParserMode::Interpret ? '\f' : 'f';
873 else if (accept(IsChar('u')))
874 {
875 std::string seq;
876 if (hexNumber(4, &seq))
877 {
879 {
880 char utf8_str[5];
881 utf8::decode(seq.c_str(), utf8_str);
882 if (*utf8_str == '\0')
883 error("Invalid escape sequence", pos);
884 res += utf8_str;
885 }
886 else
887 res += "u" + seq;
888 }
889 else
890 error("Invalid escape sequence, expected 4 hex digits: \\uabcd", pos);
891 }
892 else if (accept(IsChar('U')))
893 {
894 std::string seq;
895 if (hexNumber(8, &seq))
896 {
898 {
899 std::size_t begin = 0;
900 for (; seq[begin] == '0'; ++begin)
901 ;
902 char utf8_str[5];
903 utf8::decode(seq.c_str() + begin, utf8_str);
904 if (*utf8_str == '\0')
905 error("Invalid escape sequence", pos);
906 res += utf8_str;
907 }
908 else
909 res += "U" + seq;
910 }
911 else
912 error("Invalid escape sequence, expected 8 hex digits: \\UABCDEF78", pos);
913 }
914 else
915 {
916 backtrack(getCount() - 1);
917 error("Unknown escape sequence", pos);
918 }
919 }
920 else
921 accept(IsNot(IsEither(IsChar('\\'), IsChar('"'))), &res);
922
923 if (accept(IsChar('"')))
924 break;
925 if (isEOF())
926 expectSuffixOrError('"', "after string");
927 }
928
929 return positioned(Node(NodeType::String, res), filepos);
930 }
931 return std::nullopt;
932 }
933
934 std::optional<Node> Parser::field(const FilePosition filepos)
935 {
936 std::string sym;
937 if (!name(&sym))
938 return std::nullopt;
939
940 std::optional<Node> leaf { Node(NodeType::Field) };
941 leaf->push_back(Node(NodeType::Symbol, sym));
942
943 while (true)
944 {
945 if (leaf->list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc
946 return std::nullopt;
947
948 if (leaf->list().size() > 1 && !accept(IsChar('.')))
949 break;
950
951 const auto filepos_inner = getCursor();
952 std::string res;
953 if (!name(&res))
954 errorWithNextToken("Expected a field name: <symbol>.<field>");
955 leaf->push_back(positioned(Node(NodeType::Symbol, res), filepos_inner));
956 }
957
958 return positioned(leaf, filepos);
959 }
960
961 std::optional<Node> Parser::symbol(const FilePosition filepos)
962 {
963 std::string res;
964 if (!name(&res))
965 return std::nullopt;
966 return positioned(Node(NodeType::Symbol, res), filepos);
967 }
968
969 std::optional<Node> Parser::spread(const FilePosition filepos)
970 {
971 std::string res;
972 if (sequence("..."))
973 {
974 if (!name(&res))
975 errorWithNextToken("Expected a name for the variadic");
976 return positioned(Node(NodeType::Spread, res), filepos);
977 }
978 return std::nullopt;
979 }
980
981 std::optional<Node> Parser::nil(const FilePosition filepos)
982 {
983 if (!accept(IsChar('(')))
984 return std::nullopt;
985
986 const std::string comment = newlineOrComment();
987 if (!accept(IsChar(')')))
988 return std::nullopt;
989
991 return positioned(Node(NodeType::Symbol, "nil").attachNearestCommentBefore(comment), filepos);
992 return positioned(Node(NodeType::List).attachNearestCommentBefore(comment), filepos);
993 }
994
995 std::optional<Node> Parser::atom()
996 {
997 const auto pos = getCount();
998 const auto filepos = getCursor();
999
1000 if (auto res = Parser::number(filepos); res.has_value())
1001 return res;
1002 backtrack(pos);
1003
1004 if (auto res = Parser::string(filepos); res.has_value())
1005 return res;
1006 backtrack(pos);
1007
1008 if (auto res = Parser::spread(filepos); m_allow_macro_behavior > 0 && res.has_value())
1009 return res;
1010 backtrack(pos);
1011
1012 if (auto res = Parser::field(filepos); res.has_value())
1013 return res;
1014 backtrack(pos);
1015
1016 if (auto res = Parser::symbol(filepos); res.has_value())
1017 return res;
1018 backtrack(pos);
1019
1020 if (auto res = Parser::nil(filepos); res.has_value())
1021 return res;
1022 backtrack(pos);
1023
1024 return std::nullopt;
1025 }
1026
1027 std::optional<Node> Parser::anyAtomOf(const std::initializer_list<NodeType> types)
1028 {
1029 if (auto value = atom(); value.has_value())
1030 {
1031 for (const auto type : types)
1032 {
1033 if (value->nodeType() == type)
1034 return value;
1035 }
1036 }
1037 return std::nullopt;
1038 }
1039
1040 std::optional<Node> Parser::nodeOrValue()
1041 {
1042 if (auto value = atom(); value.has_value())
1043 return value;
1044 if (auto sub_node = node(); sub_node.has_value())
1045 return sub_node;
1046
1047 return std::nullopt;
1048 }
1049
1050 std::optional<Node> Parser::wrapped(std::optional<Node> (Parser::*parser)(FilePosition), const std::string& name)
1051 {
1052 const auto cursor = getCursor();
1054 if (!prefix('('))
1055 return std::nullopt;
1056
1057 const std::string comment = newlineOrComment();
1058
1059 if (auto result = (this->*parser)(cursor); result.has_value())
1060 {
1061 result->attachNearestCommentBefore(result->comment() + comment);
1062 result.value().attachCommentAfter(newlineOrComment());
1063
1064 if (name == "function")
1065 expectSuffixOrError(')', "after function body. Did you forget to wrap the body with `{}'?", context);
1066 else
1067 expectSuffixOrError(')', "after " + name, context);
1068
1069 result.value().attachCommentAfter(spaceComment());
1070 return result;
1071 }
1072
1073 return std::nullopt;
1074 }
1075}
Parse ArkScript code, but do not handle any import declarations.
bool sequence(const std::string &s)
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
void error(const std::string &error, FilePosition start_at, const std::optional< CodeErrorContext > &additional_context=std::nullopt) const
Create an error context and throw an error containing said context.
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
std::string newlineOrComment()
bool hexNumber(unsigned length, std::string *s=nullptr)
void backtrack(long n)
Backtrack to a given position (this is NOT an offset!)
std::string peek() const
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
bool space(std::string *s=nullptr)
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Fetch the next token (space and paren delimited) to generate an error.
CodeErrorContext generateErrorContextAtCurrentPosition() const
void expectSuffixOrError(char suffix, const std::string &context, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Check for a closing char or generate an error.
bool signedNumber(std::string *s=nullptr)
FilePosition getCursor() const
void traceStart(std::string &&trace_name)
Definition Logger.hpp:109
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
Node & attachNearestCommentBefore(const std::string &comment)
Set the comment field with the nearest comment before this node.
Definition Node.cpp:128
void push_back(const Node &node) noexcept
Every node has a list as well as a value so we can push_back on all node no matter their type.
Definition Node.cpp:63
void setString(const std::string &value) noexcept
Set the String object.
Definition Node.cpp:117
Node & attachCommentAfter(const std::string &comment)
Set the comment_after field with the nearest comment after this node.
Definition Node.cpp:134
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
std::optional< Node > string(FilePosition filepos)
Definition Parser.cpp:841
std::optional< Node > letMutSet(FilePosition filepos)
Definition Parser.cpp:157
std::optional< Node > loop(FilePosition filepos)
Definition Parser.cpp:265
Parser(unsigned debug, ParserMode mode=ParserMode::Interpret)
Constructs a new Parser object.
Definition Parser.cpp:7
std::optional< Node > atom()
Try to parse an atom (number, string, spread, field, symbol, nil)
Definition Parser.cpp:995
std::optional< Node > spread(FilePosition filepos)
Definition Parser.cpp:969
std::optional< Node > number(FilePosition filepos)
Definition Parser.cpp:827
void process(const std::string &filename, const std::string &code)
Parse the given code.
Definition Parser.cpp:51
std::optional< Node > block(FilePosition filepos)
Definition Parser.cpp:419
std::optional< Node > macro(FilePosition filepos)
Definition Parser.cpp:686
std::optional< Node > field(FilePosition filepos)
Definition Parser.cpp:934
std::optional< Node > functionCall(FilePosition filepos)
Definition Parser.cpp:754
const Node & ast() const noexcept
Definition Parser.cpp:92
std::optional< Node > nodeOrValue()
Try to parse an atom first, if it fails try to parse a node.
Definition Parser.cpp:1040
std::vector< std::function< std::optional< Node >(FilePosition)> > m_parsers
Definition Parser.hpp:72
const std::vector< Import > & imports() const
Definition Parser.cpp:97
ParserMode m_mode
Definition Parser.hpp:66
std::optional< Node > condition(FilePosition filepos)
Definition Parser.cpp:231
std::optional< Node > del(FilePosition filepos)
Definition Parser.cpp:211
std::optional< Node > wrapped(std::optional< Node >(Parser::*parser)(FilePosition), const std::string &name)
Try to parse using a given parser, prefixing and suffixing it with (...), handling comments around th...
Definition Parser.cpp:1050
std::optional< Node > node()
Definition Parser.cpp:131
unsigned m_allow_macro_behavior
Toggled on when inside a macro definition, off afterward.
Definition Parser.hpp:70
std::optional< Node > function(FilePosition filepos)
Definition Parser.cpp:532
Node positioned(Node node, FilePosition cursor) const
Definition Parser.cpp:102
std::optional< Node > functionArgs(FilePosition filepos)
Definition Parser.cpp:459
std::optional< Node > anyAtomOf(std::initializer_list< NodeType > types)
Try to parse an atom, if any, match its type against the given list.
Definition Parser.cpp:1027
std::vector< Import > m_imports
Definition Parser.hpp:69
std::size_t m_nested_nodes
Nested node counter.
Definition Parser.hpp:71
std::optional< Node > list(FilePosition filepos)
Definition Parser.cpp:798
std::optional< Node > macroArgs(FilePosition filepos)
Definition Parser.cpp:629
std::optional< Node > symbol(FilePosition filepos)
Definition Parser.cpp:961
std::optional< Node > import_(FilePosition filepos)
Definition Parser.cpp:289
std::optional< Node > nil(FilePosition filepos)
Definition Parser.cpp:981
std::optional< Node > macroCondition(FilePosition filepos)
Definition Parser.cpp:596
bool isDouble(const std::string &s, double *output=nullptr)
Checks if a string is a valid double.
Definition Utils.hpp:85
NodeType
The different node types available.
Definition Common.hpp:44
@ Interpret
Escape sequences and () will be replaced by their UTF8 representation and nil, respectively.
constexpr std::array< std::string_view, 13 > nodeTypes
Node types as string, in the same order as the enum NodeType.
Definition Common.hpp:61
constexpr std::size_t MaxNestedNodes
Maximum number of nodes that can be nested while parsing code.
Definition Constants.hpp:75
void decode(const char *input, char *dest)
Convert hex string to utf8 string.
Definition utf8.hpp:67
Describe a position in a given file ; handled by the BaseParser.
Describes a span for a node/atom in a file, its start position and end position.
Definition Position.hpp:35
std::size_t line
Definition Import.hpp:14
std::vector< std::string > symbols
List of symbols to import, can be empty if none provided. (import package :a :b)
Definition Import.hpp:48
std::size_t col
Position in the source file.
Definition Import.hpp:14
std::string prefix
The filename without the extension.
Definition Import.hpp:23
bool is_glob
Import as glob (import package:*)
Definition Import.hpp:42
std::string toPackageString() const
Definition Import.hpp:54
std::vector< std::string > package
Package with all the segments.
Definition Import.hpp:31
bool with_prefix
Import with prefix (import package)
Definition Import.hpp:37