ArkScript
A small, lisp-inspired, functional scripting language
Parser.cpp
Go to the documentation of this file.
2
3#include <fmt/core.h>
4
5namespace Ark::internal
6{
7 Parser::Parser(const unsigned debug, const ParserMode mode) :
8 BaseParser(), m_mode(mode), m_logger("Parser", debug),
9 m_ast(NodeType::List), m_imports({}), m_allow_macro_behavior(0),
10 m_nested_nodes(0)
11 {
12 m_ast.push_back(Node(Keyword::Begin));
13
14 m_parsers = {
15 [this](FilePosition) {
16 return wrapped(&Parser::letMutSet, "variable assignment or declaration");
17 },
18 [this](FilePosition) {
19 return wrapped(&Parser::function, "function");
20 },
21 [this](FilePosition) {
22 return wrapped(&Parser::condition, "condition");
23 },
24 [this](FilePosition) {
25 return wrapped(&Parser::loop, "loop");
26 },
27 [this](const FilePosition filepos) {
28 return import_(filepos);
29 },
30 [this](const FilePosition filepos) {
31 return block(filepos);
32 },
33 [this](FilePosition) {
34 return wrapped(&Parser::macroCondition, "$if");
35 },
36 [this](const FilePosition filepos) {
37 return macro(filepos);
38 },
39 [this](FilePosition) {
40 return wrapped(&Parser::del, "del");
41 },
42 [this](const FilePosition filepos) {
43 return functionCall(filepos);
44 },
45 [this](const FilePosition filepos) {
46 return list(filepos);
47 }
48 };
49 }
50
51 void Parser::process(const std::string& filename, const std::string& code)
52 {
53 m_logger.traceStart("process");
54 initParser(filename, code);
55
56 while (!isEOF())
57 {
58 std::string comment = newlineOrComment();
59 if (isEOF())
60 {
61 if (!comment.empty())
62 m_ast.list().back().attachCommentAfter(comment);
63 break;
64 }
65
66 const auto pos = getCount();
67 if (auto n = node())
68 {
69 m_ast.push_back(n->attachNearestCommentBefore(n->comment() + comment));
70 m_ast.list().back().attachCommentAfter(spaceComment());
71 }
72 else
73 {
74 backtrack(pos);
75 std::string out = peek();
76 std::string message;
77 if (out == ")")
78 message = "Unexpected closing paren";
79 else if (out == "}")
80 message = "Unexpected closing bracket";
81 else if (out == "]")
82 message = "Unexpected closing square bracket";
83 else
84 errorWithNextToken("invalid syntax, expected node");
85 errorWithNextToken(message);
86 }
87 }
88
90 }
91
92 const Node& Parser::ast() const noexcept
93 {
94 return m_ast;
95 }
96
97 const std::vector<Import>& Parser::imports() const
98 {
99 return m_imports;
100 }
101
102 Node Parser::positioned(Node node, const FilePosition cursor) const
103 {
104 const auto [row, col] = cursor;
105 const auto [end_row, end_col] = getCursor();
106
107 node.m_filename = m_filename;
108 node.m_pos = FileSpan {
109 .start = FilePos { .line = row, .column = col },
110 .end = FilePos { .line = end_row, .column = end_col }
111 };
112 return node;
113 }
114
115 std::optional<Node>& Parser::positioned(std::optional<Node>& node, const FilePosition cursor) const
116 {
117 if (!node)
118 return node;
119
120 const auto [row, col] = cursor;
121 const auto [end_row, end_col] = getCursor();
122
123 node->m_filename = m_filename;
124 node->m_pos = FileSpan {
125 .start = FilePos { .line = row, .column = col },
126 .end = FilePos { .line = end_row, .column = end_col }
127 };
128 return node;
129 }
130
131 std::optional<Node> Parser::node()
132 {
134
136 errorWithNextToken(fmt::format("Too many nested node while parsing, exceeds limit of {}. Consider rewriting your code by breaking it in functions and macros.", MaxNestedNodes));
137
138 // save current position in buffer to be able to go back if needed
139 const auto position = getCount();
140 const auto filepos = getCursor();
141 std::optional<Node> result = std::nullopt;
142
143 for (auto&& parser : m_parsers)
144 {
145 result = parser(filepos);
146
147 if (result)
148 break;
149 backtrack(position);
150 }
151
152 // return std::nullopt only on parsing error, nothing matched, the user provided terrible code
154 return result;
155 }
156
157 std::optional<Node> Parser::letMutSet(const FilePosition filepos)
158 {
159 std::optional<Node> leaf { NodeType::List };
160
161 std::string token;
162 if (!oneOf({ "let", "mut", "set" }, &token))
163 return std::nullopt;
164
165 std::string comment = newlineOrComment();
166 leaf->attachNearestCommentBefore(comment);
167
168 if (token == "let")
169 leaf->push_back(Node(Keyword::Let));
170 else if (token == "mut")
171 leaf->push_back(Node(Keyword::Mut));
172 else // "set"
173 leaf->push_back(Node(Keyword::Set));
174
176 {
177 const auto position = getCount();
178 const auto value_pos = getCursor();
179 if (const auto value = nodeOrValue(); value.has_value())
180 {
181 const Node& sym = value.value();
182 if (sym.nodeType() == NodeType::List || sym.nodeType() == NodeType::Symbol || sym.nodeType() == NodeType::Macro || sym.nodeType() == NodeType::Spread)
183 leaf->push_back(sym);
184 else
185 error(fmt::format("Can not use a {} as a symbol name, even in a macro", nodeTypes[static_cast<std::size_t>(sym.nodeType())]), value_pos);
186 }
187 else
188 backtrack(position);
189 }
190
191 if (leaf->constList().size() == 1)
192 {
193 // we haven't parsed anything while in "macro state"
194 std::string symbol_name;
195 if (!name(&symbol_name))
196 errorWithNextToken(token + " needs a symbol");
197
198 leaf->push_back(Node(NodeType::Symbol, symbol_name));
199 }
200
202 if (auto value = nodeOrValue(); value.has_value())
203 leaf->push_back(value.value().attachNearestCommentBefore(comment));
204 else
205 errorWithNextToken("Expected a value");
206
207 return positioned(leaf, filepos);
208 }
209
210 std::optional<Node> Parser::del(const FilePosition filepos)
211 {
212 std::optional<Node> leaf { NodeType::List };
213
214 if (!oneOf({ "del" }))
215 return std::nullopt;
216 leaf->push_back(Node(Keyword::Del));
217
218 const std::string comment = newlineOrComment();
219
220 std::string symbol_name;
221 if (!name(&symbol_name))
222 errorWithNextToken("del needs a symbol");
223
224 leaf->push_back(Node(NodeType::Symbol, symbol_name));
225 leaf->list().back().attachNearestCommentBefore(comment);
226
227 return positioned(leaf, filepos);
228 }
229
230 std::optional<Node> Parser::condition(const FilePosition filepos)
231 {
232 std::optional<Node> leaf { NodeType::List };
233
234 if (!oneOf({ "if" }))
235 return std::nullopt;
236
237 std::string comment = newlineOrComment();
238
239 leaf->push_back(Node(Keyword::If));
240
241 if (auto cond_expr = nodeOrValue(); cond_expr.has_value())
242 leaf->push_back(cond_expr.value().attachNearestCommentBefore(comment));
243 else
244 errorWithNextToken("`if' needs a valid condition");
245
247 if (auto value_if_true = nodeOrValue(); value_if_true.has_value())
248 leaf->push_back(value_if_true.value().attachNearestCommentBefore(comment));
249 else
250 errorWithNextToken("Expected a node or value after condition");
251
253 if (auto value_if_false = nodeOrValue(); value_if_false.has_value())
254 {
255 leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment));
256 leaf->list().back().attachCommentAfter(newlineOrComment());
257 }
258 else if (!comment.empty())
259 leaf->attachCommentAfter(comment);
260
261 return positioned(leaf, filepos);
262 }
263
264 std::optional<Node> Parser::loop(const FilePosition filepos)
265 {
266 std::optional<Node> leaf { NodeType::List };
267
268 if (!oneOf({ "while" }))
269 return std::nullopt;
270
271 std::string comment = newlineOrComment();
272 leaf->push_back(Node(Keyword::While));
273
274 if (auto cond_expr = nodeOrValue(); cond_expr.has_value())
275 leaf->push_back(cond_expr.value().attachNearestCommentBefore(comment));
276 else
277 errorWithNextToken("`while' needs a valid condition");
278
280 if (auto body = nodeOrValue(); body.has_value())
281 leaf->push_back(body.value().attachNearestCommentBefore(comment));
282 else
283 errorWithNextToken("Expected a node or value after loop condition");
284
285 return positioned(leaf, filepos);
286 }
287
288 std::optional<Node> Parser::import_(const FilePosition filepos)
289 {
290 std::optional<Node> leaf { NodeType::List };
291
293 if (!accept(IsChar('(')))
294 return std::nullopt;
295
296 std::string comment = newlineOrComment();
297 leaf->attachNearestCommentBefore(comment);
298
299 if (!oneOf({ "import" }))
300 return std::nullopt;
301
303 leaf->push_back(Node(Keyword::Import));
304
305 Import import_data;
306 import_data.col = filepos.col;
307 import_data.line = filepos.row;
308
309 const auto pos = getCount();
310 if (!packageName(&import_data.prefix))
311 errorWithNextToken("Import expected a package name");
312
313 if (import_data.prefix.size() > 255)
314 {
315 backtrack(pos);
316 errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", import_data.prefix.size()));
317 }
318 import_data.package.push_back(import_data.prefix);
319
321 packageNode.push_back(Node(NodeType::Symbol, import_data.prefix));
322
323 // first, parse the package name
324 while (!isEOF())
325 {
326 const auto item_pos = getCursor();
327
328 // parsing package folder.foo.bar.yes
329 if (accept(IsChar('.')))
330 {
331 const auto package_pos = getCursor();
332 std::string path;
333 if (!packageName(&path))
334 errorWithNextToken("Package name expected after '.'");
335 else
336 {
337 packageNode.push_back(positioned(Node(NodeType::Symbol, path), package_pos));
338
339 import_data.package.push_back(path);
340 import_data.prefix = path; // in the end we will store the last element of the package, which is what we want
341
342 if (path.size() > 255)
343 {
344 backtrack(pos);
345 errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", path.size()));
346 }
347 }
348 }
349 else if (accept(IsChar(':')) && accept(IsChar('*'))) // parsing :*, terminal in imports
350 {
351 leaf->push_back(packageNode);
352 leaf->push_back(positioned(Node(NodeType::Symbol, "*"), item_pos));
353
354 space();
355 expectSuffixOrError(')', fmt::format("in import `{}'", import_data.toPackageString()), context);
356
357 // save the import data structure to know we encounter an import node, and retrieve its data more easily later on
358 import_data.with_prefix = false;
359 import_data.is_glob = true;
360 m_imports.push_back(import_data);
361
362 return positioned(leaf, filepos);
363 }
364 else
365 break;
366 }
367
369 // then parse the symbols to import, if any
370 if (space())
371 {
373
374 while (!isEOF())
375 {
376 if (accept(IsChar(':'))) // parsing potential :a :b :c
377 {
378 const auto symbol_pos = getCursor();
379 std::string symbol_name;
380 if (!name(&symbol_name))
381 errorWithNextToken("Expected a valid symbol to import");
382 if (symbol_name == "*")
383 error(fmt::format("Glob patterns can not be separated from the package, use (import {}:*) instead", import_data.toPackageString()), symbol_pos);
384
385 if (symbol_name.size() >= 2 && symbol_name[symbol_name.size() - 2] == ':' && symbol_name.back() == '*')
386 error("Glob pattern can not follow a symbol to import", FilePosition { .row = symbol_pos.row, .col = symbol_pos.col + symbol_name.size() - 2 });
387
388 symbols.push_back(positioned(Node(NodeType::Symbol, symbol_name).attachNearestCommentBefore(comment), symbol_pos));
389 comment.clear();
390
391 import_data.symbols.push_back(symbol_name);
392 // we do not need the prefix when importing specific symbols
393 import_data.with_prefix = false;
394 }
395
396 if (!space())
397 break;
399 }
400
401 if (!comment.empty() && !symbols.list().empty())
402 symbols.list().back().attachCommentAfter(comment);
403 }
404
405 leaf->push_back(packageNode);
406 leaf->push_back(symbols);
407 // save the import data
408 m_imports.push_back(import_data);
409
411 if (!comment.empty())
412 leaf->list().back().attachCommentAfter(comment);
413
414 expectSuffixOrError(')', fmt::format("in import `{}'", import_data.toPackageString()), context);
415 return positioned(leaf, filepos);
416 }
417
418 std::optional<Node> Parser::block(const FilePosition filepos)
419 {
420 std::optional<Node> leaf { NodeType::List };
421
423 bool alt_syntax = false;
424 std::string comment;
425 if (accept(IsChar('(')))
426 {
428 if (!oneOf({ "begin" }))
429 return std::nullopt;
430 }
431 else if (accept(IsChar('{')))
432 alt_syntax = true;
433 else
434 return std::nullopt;
435
436 leaf->setAltSyntax(alt_syntax);
437 leaf->push_back(Node(Keyword::Begin).attachNearestCommentBefore(comment));
438
440
441 while (!isEOF())
442 {
443 if (auto value = nodeOrValue(); value.has_value())
444 {
445 leaf->push_back(value.value().attachNearestCommentBefore(comment));
447 }
448 else
449 break;
450 }
451
453 expectSuffixOrError(alt_syntax ? '}' : ')', "to close block", context);
454 leaf->list().back().attachCommentAfter(comment);
455 return positioned(leaf, filepos);
456 }
457
458 std::optional<Node> Parser::functionArgs(const FilePosition filepos)
459 {
460 expect(IsChar('('));
461 std::optional<Node> args { NodeType::List };
462
463 std::string comment = newlineOrComment();
464 args->attachNearestCommentBefore(comment);
465
466 bool has_captures = false;
467
468 while (!isEOF())
469 {
470 const auto pos = getCursor();
471 if (accept(IsChar('&'))) // captures
472 {
473 has_captures = true;
474 std::string capture;
475 if (!name(&capture))
476 error("No symbol provided to capture", pos);
477
478 args->push_back(positioned(Node(NodeType::Capture, capture), pos));
479 }
480 else
481 {
482 std::string symbol_name;
483 if (!name(&symbol_name))
484 break;
485 if (has_captures)
486 error("Captured variables should be at the end of the argument list", pos);
487
488 args->push_back(positioned(Node(NodeType::Symbol, symbol_name), pos));
489 }
490
491 if (!comment.empty())
492 args->list().back().attachNearestCommentBefore(comment);
494 }
495
496 if (accept(IsChar(')')))
497 return positioned(args, filepos);
498 return std::nullopt;
499 }
500
501 std::optional<Node> Parser::function(const FilePosition filepos)
502 {
503 std::optional<Node> leaf { NodeType::List };
504
505 if (!oneOf({ "fun" }))
506 return std::nullopt;
507 leaf->push_back(Node(Keyword::Fun));
508
509 const std::string comment_before_args = newlineOrComment();
510
511 while (m_allow_macro_behavior > 0)
512 {
513 const auto position = getCount();
514
515 // args
516 if (const auto value = nodeOrValue(); value.has_value())
517 {
518 // if value is nil, just add an empty argument bloc to prevent bugs when
519 // declaring functions inside macros
520 const Node& args = value.value();
521 if (args.nodeType() == NodeType::Symbol && args.string() == "nil")
523 else
524 leaf->push_back(args);
525 }
526 else
527 {
528 backtrack(position);
529 break;
530 }
531
532 const std::string comment = newlineOrComment();
533 // body
534 if (auto value = nodeOrValue(); value.has_value())
535 leaf->push_back(value.value().attachNearestCommentBefore(comment));
536 else
537 errorWithNextToken("Expected a body for the function");
538 return positioned(leaf, filepos);
539 }
540
541 const auto position = getCount();
542 const auto args_file_pos = getCursor();
543 if (auto args = functionArgs(args_file_pos); args.has_value())
544 leaf->push_back(args.value().attachNearestCommentBefore(comment_before_args));
545 else
546 {
547 backtrack(position);
548
549 if (auto value = nodeOrValue(); value.has_value())
550 leaf->push_back(value.value().attachNearestCommentBefore(comment_before_args));
551 else
552 errorWithNextToken("Expected an argument list");
553 }
554
555 const std::string comment = newlineOrComment();
556
557 if (auto value = nodeOrValue(); value.has_value())
558 leaf->push_back(value.value().attachNearestCommentBefore(comment));
559 else
560 errorWithNextToken("Expected a body for the function");
561
562 return positioned(leaf, filepos);
563 }
564
565 std::optional<Node> Parser::macroCondition(const FilePosition filepos)
566 {
567 std::optional<Node> leaf { NodeType::Macro };
568
569 if (!oneOf({ "$if" }))
570 return std::nullopt;
571 leaf->push_back(Node(Keyword::If));
572
573 std::string comment = newlineOrComment();
574 leaf->attachNearestCommentBefore(comment);
575
576 if (const auto cond_expr = nodeOrValue(); cond_expr.has_value())
577 leaf->push_back(cond_expr.value());
578 else
579 errorWithNextToken("$if need a valid condition");
580
582 if (auto value_if_true = nodeOrValue(); value_if_true.has_value())
583 leaf->push_back(value_if_true.value().attachNearestCommentBefore(comment));
584 else
585 errorWithNextToken("Expected a node or value after condition");
586
588 if (auto value_if_false = nodeOrValue(); value_if_false.has_value())
589 {
590 leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment));
592 leaf->list().back().attachCommentAfter(comment);
593 }
594
595 return positioned(leaf, filepos);
596 }
597
598 std::optional<Node> Parser::macroArgs(const FilePosition filepos)
599 {
600 if (!accept(IsChar('(')))
601 return std::nullopt;
602
603 std::optional<Node> args { NodeType::List };
604
605 std::string comment = newlineOrComment();
606 args->attachNearestCommentBefore(comment);
607
608 std::vector<std::string> names;
609 while (!isEOF())
610 {
611 const auto pos = getCount();
612
613 std::string arg_name;
614 if (!name(&arg_name))
615 break;
616
618 args->push_back(Node(NodeType::Symbol, arg_name).attachNearestCommentBefore(comment));
619
620 if (std::ranges::find(names, arg_name) != names.end())
621 {
622 backtrack(pos);
623 errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", arg_name));
624 }
625 names.push_back(arg_name);
626 }
627
628 const auto pos = getCount();
629 if (sequence("..."))
630 {
631 std::string spread_name;
632 if (!name(&spread_name))
633 errorWithNextToken("Expected a name for the variadic arguments list");
634
635 args->push_back(Node(NodeType::Spread, spread_name));
636 args->list().back().attachCommentAfter(newlineOrComment());
637
638 if (std::ranges::find(names, spread_name) != names.end())
639 {
640 backtrack(pos);
641 errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", spread_name));
642 }
643 }
644
645 if (!accept(IsChar(')')))
646 return std::nullopt;
647
649 if (!comment.empty())
650 args->attachCommentAfter(comment);
651
652 return positioned(args, filepos);
653 }
654
655 std::optional<Node> Parser::macro(const FilePosition filepos)
656 {
657 std::optional<Node> leaf { NodeType::Macro };
658
660 if (!accept(IsChar('(')))
661 return std::nullopt;
662
663 if (!oneOf({ "macro" }))
664 return std::nullopt;
665 std::string comment = newlineOrComment();
666 leaf->attachNearestCommentBefore(comment);
667
668 std::string symbol_name;
669 if (!name(&symbol_name))
670 errorWithNextToken("Expected a symbol to declare a macro");
671
673 leaf->push_back(Node(NodeType::Symbol, symbol_name).attachNearestCommentBefore(comment));
674
675 const auto position = getCount();
676 const auto args_file_pos = getCursor();
677 if (const auto args = macroArgs(args_file_pos); args.has_value())
678 leaf->push_back(args.value());
679 else
680 {
681 // if we couldn't parse arguments, then we have a value
682 backtrack(position);
683
685 const auto value = nodeOrValue();
687
688 if (value.has_value())
689 leaf->push_back(value.value());
690 else
691 errorWithNextToken(fmt::format("Expected an argument list, atom or node while defining macro `{}'", symbol_name));
692
693 leaf->list().back().attachCommentAfter(newlineOrComment());
694 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
695 return positioned(leaf, filepos);
696 }
697
699 const auto value = nodeOrValue();
701
702 if (value.has_value())
703 leaf->push_back(value.value());
704 else if (leaf->list().size() == 2) // the argument list is actually a function call and it's okay
705 {
706 leaf->list().back().attachCommentAfter(newlineOrComment());
707
708 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
709 return positioned(leaf, filepos);
710 }
711 else
712 {
713 backtrack(position);
714 errorWithNextToken(fmt::format("Expected a value while defining macro `{}'", symbol_name), context);
715 }
716
717 leaf->list().back().attachCommentAfter(newlineOrComment());
718
719 expectSuffixOrError(')', fmt::format("to close macro `{}'", symbol_name), context);
720 return positioned(leaf, filepos);
721 }
722
723 std::optional<Node> Parser::functionCall(const FilePosition filepos)
724 {
726 if (!accept(IsChar('(')))
727 return std::nullopt;
728 std::string comment = newlineOrComment();
729
730 const auto func_name_pos = getCursor();
731 std::optional<Node> func;
732 if (auto sym_or_field = anyAtomOf({ NodeType::Symbol, NodeType::Field }); sym_or_field.has_value())
733 func = sym_or_field->attachNearestCommentBefore(comment);
734 else if (auto nested = node(); nested.has_value())
735 func = nested->attachNearestCommentBefore(comment);
736 else
737 return std::nullopt;
738
739 std::optional<Node> leaf { NodeType::List };
740 leaf->push_back(positioned(func.value(), func_name_pos));
741
743
744 while (!isEOF())
745 {
746 if (auto arg = nodeOrValue(); arg.has_value())
747 {
748 leaf->push_back(arg.value().attachNearestCommentBefore(comment));
750 }
751 else
752 break;
753 }
754
755 leaf->list().back().attachCommentAfter(comment);
757 if (!comment.empty())
758 leaf->list().back().attachCommentAfter(comment);
759
760 expectSuffixOrError(')', fmt::format("in function call to `{}'", func.value().repr()), context);
761 return positioned(leaf, filepos);
762 }
763
764 std::optional<Node> Parser::list(const FilePosition filepos)
765 {
766 std::optional<Node> leaf { NodeType::List };
767
769 if (!accept(IsChar('[')))
770 return std::nullopt;
771 leaf->setAltSyntax(true);
772 leaf->push_back(Node(NodeType::Symbol, "list"));
773
774 std::string comment = newlineOrComment();
775 leaf->attachNearestCommentBefore(comment);
776
777 while (!isEOF())
778 {
779 if (auto value = nodeOrValue(); value.has_value())
780 {
781 leaf->push_back(value.value().attachNearestCommentBefore(comment));
783 }
784 else
785 break;
786 }
787 leaf->list().back().attachCommentAfter(comment);
788
789 expectSuffixOrError(']', "to end list definition", context);
790 return positioned(leaf, filepos);
791 }
792
793 std::optional<Node> Parser::number(const FilePosition filepos)
794 {
795 std::string res;
796 if (signedNumber(&res))
797 {
798 double output;
799 if (Utils::isDouble(res, &output))
800 return positioned(Node(output), filepos);
801
802 error("Is not a valid number", filepos);
803 }
804 return std::nullopt;
805 }
806
807 std::optional<Node> Parser::string(const FilePosition filepos)
808 {
809 std::string res;
810 if (accept(IsChar('"')))
811 {
812 while (true)
813 {
814 const auto pos = getCursor();
815
816 if (accept(IsChar('\\')))
817 {
819 res += '\\';
820
821 if (accept(IsChar('"')))
822 res += '"';
823 else if (accept(IsChar('\\')))
824 res += '\\';
825 else if (accept(IsChar('n')))
826 res += m_mode == ParserMode::Interpret ? '\n' : 'n';
827 else if (accept(IsChar('t')))
828 res += m_mode == ParserMode::Interpret ? '\t' : 't';
829 else if (accept(IsChar('v')))
830 res += m_mode == ParserMode::Interpret ? '\v' : 'v';
831 else if (accept(IsChar('r')))
832 res += m_mode == ParserMode::Interpret ? '\r' : 'r';
833 else if (accept(IsChar('a')))
834 res += m_mode == ParserMode::Interpret ? '\a' : 'a';
835 else if (accept(IsChar('b')))
836 res += m_mode == ParserMode::Interpret ? '\b' : 'b';
837 else if (accept(IsChar('f')))
838 res += m_mode == ParserMode::Interpret ? '\f' : 'f';
839 else if (accept(IsChar('u')))
840 {
841 std::string seq;
842 if (hexNumber(4, &seq))
843 {
845 {
846 char utf8_str[5];
847 utf8::decode(seq.c_str(), utf8_str);
848 if (*utf8_str == '\0')
849 error("Invalid escape sequence", pos);
850 res += utf8_str;
851 }
852 else
853 res += "u" + seq;
854 }
855 else
856 error("Invalid escape sequence, expected 4 hex digits: \\uabcd", pos);
857 }
858 else if (accept(IsChar('U')))
859 {
860 std::string seq;
861 if (hexNumber(8, &seq))
862 {
864 {
865 std::size_t begin = 0;
866 for (; seq[begin] == '0'; ++begin)
867 ;
868 char utf8_str[5];
869 utf8::decode(seq.c_str() + begin, utf8_str);
870 if (*utf8_str == '\0')
871 error("Invalid escape sequence", pos);
872 res += utf8_str;
873 }
874 else
875 res += "U" + seq;
876 }
877 else
878 error("Invalid escape sequence, expected 8 hex digits: \\UABCDEF78", pos);
879 }
880 else
881 {
882 backtrack(getCount() - 1);
883 error("Unknown escape sequence", pos);
884 }
885 }
886 else
887 accept(IsNot(IsEither(IsChar('\\'), IsChar('"'))), &res);
888
889 if (accept(IsChar('"')))
890 break;
891 if (isEOF())
892 expectSuffixOrError('"', "after string");
893 }
894
895 return positioned(Node(NodeType::String, res), filepos);
896 }
897 return std::nullopt;
898 }
899
900 std::optional<Node> Parser::field(const FilePosition filepos)
901 {
902 std::string sym;
903 if (!name(&sym))
904 return std::nullopt;
905
906 std::optional<Node> leaf { Node(NodeType::Field) };
907 leaf->push_back(Node(NodeType::Symbol, sym));
908
909 while (true)
910 {
911 if (leaf->list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc
912 return std::nullopt;
913
914 if (leaf->list().size() > 1 && !accept(IsChar('.')))
915 break;
916
917 const auto filepos_inner = getCursor();
918 std::string res;
919 if (!name(&res))
920 errorWithNextToken("Expected a field name: <symbol>.<field>");
921 leaf->push_back(positioned(Node(NodeType::Symbol, res), filepos_inner));
922 }
923
924 return positioned(leaf, filepos);
925 }
926
927 std::optional<Node> Parser::symbol(const FilePosition filepos)
928 {
929 std::string res;
930 if (!name(&res))
931 return std::nullopt;
932 return positioned(Node(NodeType::Symbol, res), filepos);
933 }
934
935 std::optional<Node> Parser::spread(const FilePosition filepos)
936 {
937 std::string res;
938 if (sequence("..."))
939 {
940 if (!name(&res))
941 errorWithNextToken("Expected a name for the variadic");
942 return positioned(Node(NodeType::Spread, res), filepos);
943 }
944 return std::nullopt;
945 }
946
947 std::optional<Node> Parser::nil(const FilePosition filepos)
948 {
949 if (!accept(IsChar('(')))
950 return std::nullopt;
951
952 const std::string comment = newlineOrComment();
953 if (!accept(IsChar(')')))
954 return std::nullopt;
955
957 return positioned(Node(NodeType::Symbol, "nil").attachNearestCommentBefore(comment), filepos);
958 return positioned(Node(NodeType::List).attachNearestCommentBefore(comment), filepos);
959 }
960
961 std::optional<Node> Parser::atom()
962 {
963 const auto pos = getCount();
964 const auto filepos = getCursor();
965
966 if (auto res = Parser::number(filepos); res.has_value())
967 return res;
968 backtrack(pos);
969
970 if (auto res = Parser::string(filepos); res.has_value())
971 return res;
972 backtrack(pos);
973
974 if (auto res = Parser::spread(filepos); m_allow_macro_behavior > 0 && res.has_value())
975 return res;
976 backtrack(pos);
977
978 if (auto res = Parser::field(filepos); res.has_value())
979 return res;
980 backtrack(pos);
981
982 if (auto res = Parser::symbol(filepos); res.has_value())
983 return res;
984 backtrack(pos);
985
986 if (auto res = Parser::nil(filepos); res.has_value())
987 return res;
988 backtrack(pos);
989
990 return std::nullopt;
991 }
992
993 std::optional<Node> Parser::anyAtomOf(const std::initializer_list<NodeType> types)
994 {
995 if (auto value = atom(); value.has_value())
996 {
997 for (const auto type : types)
998 {
999 if (value->nodeType() == type)
1000 return value;
1001 }
1002 }
1003 return std::nullopt;
1004 }
1005
1006 std::optional<Node> Parser::nodeOrValue()
1007 {
1008 if (auto value = atom(); value.has_value())
1009 return value;
1010 if (auto sub_node = node(); sub_node.has_value())
1011 return sub_node;
1012
1013 return std::nullopt;
1014 }
1015
1016 std::optional<Node> Parser::wrapped(std::optional<Node> (Parser::*parser)(FilePosition), const std::string& name)
1017 {
1018 const auto cursor = getCursor();
1020 if (!prefix('('))
1021 return std::nullopt;
1022
1023 const std::string comment = newlineOrComment();
1024
1025 if (auto result = (this->*parser)(cursor); result.has_value())
1026 {
1027 result->attachNearestCommentBefore(result->comment() + comment);
1028 result.value().attachCommentAfter(newlineOrComment());
1029
1030 expectSuffixOrError(')', "after " + name, context);
1031
1032 result.value().attachCommentAfter(spaceComment());
1033 return result;
1034 }
1035
1036 return std::nullopt;
1037 }
1038}
Parse ArkScript code, but do not handle any import declarations.
bool sequence(const std::string &s)
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
void error(const std::string &error, FilePosition start_at, const std::optional< CodeErrorContext > &additional_context=std::nullopt) const
Create an error context and throw an error containing said context.
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
std::string newlineOrComment()
bool hexNumber(unsigned length, std::string *s=nullptr)
void backtrack(long n)
Backtrack to a given position (this is NOT an offset!)
std::string peek() const
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
bool space(std::string *s=nullptr)
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Fetch the next token (space and paren delimited) to generate an error.
CodeErrorContext generateErrorContextAtCurrentPosition() const
void expectSuffixOrError(char suffix, const std::string &context, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Check for a closing char or generate an error.
bool signedNumber(std::string *s=nullptr)
FilePosition getCursor() const
void traceStart(std::string &&trace_name)
Definition Logger.hpp:90
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
Node & attachNearestCommentBefore(const std::string &comment)
Set the comment field with the nearest comment before this node.
Definition Node.cpp:128
void push_back(const Node &node) noexcept
Every node has a list as well as a value so we can push_back on all node no matter their type.
Definition Node.cpp:63
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
std::optional< Node > string(FilePosition filepos)
Definition Parser.cpp:807
std::optional< Node > letMutSet(FilePosition filepos)
Definition Parser.cpp:157
std::optional< Node > loop(FilePosition filepos)
Definition Parser.cpp:264
Parser(unsigned debug, ParserMode mode=ParserMode::Interpret)
Constructs a new Parser object.
Definition Parser.cpp:7
std::optional< Node > atom()
Try to parse an atom (number, string, spread, field, symbol, nil)
Definition Parser.cpp:961
std::optional< Node > spread(FilePosition filepos)
Definition Parser.cpp:935
std::optional< Node > number(FilePosition filepos)
Definition Parser.cpp:793
void process(const std::string &filename, const std::string &code)
Parse the given code.
Definition Parser.cpp:51
std::optional< Node > block(FilePosition filepos)
Definition Parser.cpp:418
std::optional< Node > macro(FilePosition filepos)
Definition Parser.cpp:655
std::optional< Node > field(FilePosition filepos)
Definition Parser.cpp:900
std::optional< Node > functionCall(FilePosition filepos)
Definition Parser.cpp:723
const Node & ast() const noexcept
Definition Parser.cpp:92
std::optional< Node > nodeOrValue()
Try to parse an atom first, if it fails try to parse a node.
Definition Parser.cpp:1006
std::vector< std::function< std::optional< Node >(FilePosition)> > m_parsers
Definition Parser.hpp:72
const std::vector< Import > & imports() const
Definition Parser.cpp:97
ParserMode m_mode
Definition Parser.hpp:66
std::optional< Node > condition(FilePosition filepos)
Definition Parser.cpp:230
std::optional< Node > del(FilePosition filepos)
Definition Parser.cpp:210
std::optional< Node > wrapped(std::optional< Node >(Parser::*parser)(FilePosition), const std::string &name)
Try to parse using a given parser, prefixing and suffixing it with (...), handling comments around th...
Definition Parser.cpp:1016
std::optional< Node > node()
Definition Parser.cpp:131
unsigned m_allow_macro_behavior
Toggled on when inside a macro definition, off afterward.
Definition Parser.hpp:70
std::optional< Node > function(FilePosition filepos)
Definition Parser.cpp:501
Node positioned(Node node, FilePosition cursor) const
Definition Parser.cpp:102
std::optional< Node > functionArgs(FilePosition filepos)
Definition Parser.cpp:458
std::optional< Node > anyAtomOf(std::initializer_list< NodeType > types)
Try to parse an atom, if any, match its type against the given list.
Definition Parser.cpp:993
std::vector< Import > m_imports
Definition Parser.hpp:69
std::size_t m_nested_nodes
Nested node counter.
Definition Parser.hpp:71
std::optional< Node > list(FilePosition filepos)
Definition Parser.cpp:764
std::optional< Node > macroArgs(FilePosition filepos)
Definition Parser.cpp:598
std::optional< Node > symbol(FilePosition filepos)
Definition Parser.cpp:927
std::optional< Node > import_(FilePosition filepos)
Definition Parser.cpp:288
std::optional< Node > nil(FilePosition filepos)
Definition Parser.cpp:947
std::optional< Node > macroCondition(FilePosition filepos)
Definition Parser.cpp:565
bool isDouble(const std::string &s, double *output=nullptr)
Checks if a string is a valid double.
Definition Utils.hpp:85
constexpr std::array< std::string_view, 11 > nodeTypes
Node types as string, in the same order as the enum NodeType.
Definition Common.hpp:59
NodeType
The different node types available.
Definition Common.hpp:44
@ Interpret
Escape sequences and () will be replaced by their UTF8 representation and nil, respectively.
constexpr std::size_t MaxNestedNodes
Maximum number of nodes that can be nested while parsing code.
Definition Constants.hpp:72
void decode(const char *input, char *dest)
Convert hex string to utf8 string.
Definition utf8.hpp:67
Describe a position in a given file ; handled by the BaseParser.
Describes a span for a node/atom in a file, its start position and end position.
Definition Position.hpp:35
std::size_t line
Definition Import.hpp:14
std::vector< std::string > symbols
List of symbols to import, can be empty if none provided. (import package :a :b)
Definition Import.hpp:48
std::size_t col
Position in the source file.
Definition Import.hpp:14
std::string prefix
The filename without the extension.
Definition Import.hpp:23
bool is_glob
Import as glob (import package:*)
Definition Import.hpp:42
std::string toPackageString() const
Definition Import.hpp:54
std::vector< std::string > package
Package with all the segments.
Definition Import.hpp:31
bool with_prefix
Import with prefix (import package)
Definition Import.hpp:37