8 BaseParser(), m_mode(mode), m_logger(
"Parser", debug),
9 m_ast(
NodeType::
List), m_imports({}), m_allow_macro_behavior(0),
21 [
this](FilePosition) {
24 [
this](FilePosition) {
27 [
this](
const FilePosition filepos) {
28 return import_(filepos);
30 [
this](
const FilePosition filepos) {
31 return block(filepos);
33 [
this](FilePosition) {
36 [
this](
const FilePosition filepos) {
37 return macro(filepos);
39 [
this](FilePosition) {
42 [
this](
const FilePosition filepos) {
43 return functionCall(filepos);
45 [
this](
const FilePosition filepos) {
75 std::string out =
peek();
78 message =
"Unexpected closing paren";
80 message =
"Unexpected closing bracket";
82 message =
"Unexpected closing square bracket";
104 const auto [row, col] = cursor;
105 const auto [end_row, end_col] =
getCursor();
109 .start =
FilePos { .line = row, .column = col },
110 .end =
FilePos { .line = end_row, .column = end_col }
120 const auto [row, col] = cursor;
121 const auto [end_row, end_col] =
getCursor();
125 .start =
FilePos { .line = row, .column = col },
126 .end =
FilePos { .line = end_row, .column = end_col }
136 errorWithNextToken(fmt::format(
"Too many nested node while parsing, exceeds limit of {}. Consider rewriting your code by breaking it in functions and macros.",
MaxNestedNodes));
141 std::optional<Node> result = std::nullopt;
145 result = parser(filepos);
162 if (!
oneOf({
"let",
"mut",
"set" }, &token))
166 leaf->attachNearestCommentBefore(
comment);
170 else if (token ==
"mut")
179 if (
const auto value =
nodeOrValue(); value.has_value())
181 const Node& sym = value.value();
185 error(fmt::format(
"Can not use a {} as a symbol name, even in a macro",
nodeTypes[
static_cast<std::size_t
>(sym.
nodeType())]), value_pos);
191 if (leaf->constList().size() == 1)
194 std::string symbol_name;
196 if (!
name(&symbol_name))
204 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
215 if (!
oneOf({
"del" }))
221 std::string symbol_name;
222 if (!
name(&symbol_name))
226 leaf->list().back().attachNearestCommentBefore(
comment);
235 if (!
oneOf({
"if" }))
242 if (
auto cond_expr =
nodeOrValue(); cond_expr.has_value())
243 leaf->push_back(cond_expr.value().attachNearestCommentBefore(
comment));
248 if (
auto value_if_true =
nodeOrValue(); value_if_true.has_value())
249 leaf->push_back(value_if_true.value().attachNearestCommentBefore(
comment));
254 if (
auto value_if_false =
nodeOrValue(); value_if_false.has_value())
256 leaf->push_back(value_if_false.value().attachNearestCommentBefore(
comment));
260 leaf->attachCommentAfter(
comment);
269 if (!
oneOf({
"while" }))
275 if (
auto cond_expr =
nodeOrValue(); cond_expr.has_value())
276 leaf->push_back(cond_expr.value().attachNearestCommentBefore(
comment));
282 leaf->push_back(body.value().attachNearestCommentBefore(
comment));
298 leaf->attachNearestCommentBefore(
comment);
300 if (!
oneOf({
"import" }))
307 import_data.
col = filepos.
col;
308 import_data.
line = filepos.
row;
314 if (import_data.
prefix.size() > 255)
317 errorWithNextToken(fmt::format(
"Import name too long, expected at most 255 characters, got {}", import_data.
prefix.size()));
340 import_data.
package.push_back(path);
341 import_data.
prefix = path;
343 if (path.size() > 255)
346 errorWithNextToken(fmt::format(
"Import name too long, expected at most 255 characters, got {}", path.size()));
352 leaf->push_back(packageNode);
380 std::string symbol_name;
381 if (!
name(&symbol_name))
383 if (symbol_name ==
"*")
384 error(fmt::format(
"Glob patterns can not be separated from the package, use (import {}:*) instead", import_data.
toPackageString()), symbol_pos);
386 if (symbol_name.size() >= 2 && symbol_name[symbol_name.size() - 2] ==
':' && symbol_name.back() ==
'*')
387 error(
"Glob pattern can not follow a symbol to import",
FilePosition { .row = symbol_pos.row, .col = symbol_pos.col + symbol_name.size() - 2 });
392 import_data.
symbols.push_back(symbol_name);
406 leaf->push_back(packageNode);
407 leaf->push_back(symbols);
413 leaf->list().back().attachCommentAfter(
comment);
424 bool alt_syntax =
false;
429 if (!
oneOf({
"begin" }))
437 leaf->setAltSyntax(alt_syntax);
446 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
455 leaf->list().back().attachCommentAfter(
comment);
465 args->attachNearestCommentBefore(
comment);
467 bool has_captures =
false;
477 error(
"No symbol provided to capture", pos);
484 std::string modifier;
486 if (!
oneOf({
"mut",
"ref" }, &modifier))
493 if (modifier ==
"mut")
495 else if (modifier ==
"ref")
502 std::string symbol_name;
503 if (!
name(&symbol_name))
504 error(fmt::format(
"Expected a symbol name for the attribute with modifier `{}'", modifier), pos);
507 args->push_back(
positioned(arg_with_attr, pos));
513 std::string symbol_name;
514 if (!
name(&symbol_name))
517 error(
"Captured variables should be at the end of the argument list", pos);
523 args->list().back().attachNearestCommentBefore(
comment);
536 if (!
oneOf({
"fun" }))
547 if (
const auto value =
nodeOrValue(); value.has_value())
551 const Node& args = value.value();
555 leaf->push_back(args);
566 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
574 if (
auto args =
functionArgs(args_file_pos); args.has_value())
575 leaf->push_back(args.value().attachNearestCommentBefore(comment_before_args));
581 leaf->push_back(value.value().attachNearestCommentBefore(comment_before_args));
589 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
600 if (!
oneOf({
"$if" }))
605 leaf->attachNearestCommentBefore(
comment);
607 if (
const auto cond_expr =
nodeOrValue(); cond_expr.has_value())
608 leaf->push_back(cond_expr.value());
613 if (
auto value_if_true =
nodeOrValue(); value_if_true.has_value())
614 leaf->push_back(value_if_true.value().attachNearestCommentBefore(
comment));
619 if (
auto value_if_false =
nodeOrValue(); value_if_false.has_value())
621 leaf->push_back(value_if_false.value().attachNearestCommentBefore(
comment));
623 leaf->list().back().attachCommentAfter(
comment);
637 args->attachNearestCommentBefore(
comment);
639 std::vector<std::string> names;
644 std::string arg_name;
645 if (!
name(&arg_name))
651 if (std::ranges::find(names, arg_name) != names.end())
654 errorWithNextToken(fmt::format(
"Argument names must be unique, can not reuse `{}'", arg_name));
656 names.push_back(arg_name);
662 std::string spread_name;
663 if (!
name(&spread_name))
669 if (std::ranges::find(names, spread_name) != names.end())
672 errorWithNextToken(fmt::format(
"Argument names must be unique, can not reuse `{}'", spread_name));
681 args->attachCommentAfter(
comment);
694 if (!
oneOf({
"macro" }))
697 leaf->attachNearestCommentBefore(
comment);
699 std::string symbol_name;
700 if (!
name(&symbol_name))
708 if (
const auto args =
macroArgs(args_file_pos); args.has_value())
709 leaf->push_back(args.value());
719 if (value.has_value())
720 leaf->push_back(value.value());
722 errorWithNextToken(fmt::format(
"Expected an argument list, atom or node while defining macro `{}'", symbol_name));
733 if (value.has_value())
734 leaf->push_back(value.value());
735 else if (leaf->list().size() == 2)
745 errorWithNextToken(fmt::format(
"Expected a value while defining macro `{}'", symbol_name), context);
762 std::optional<Node> func;
764 func = sym_or_field->attachNearestCommentBefore(
comment);
765 else if (
auto nested =
node(); nested.has_value())
766 func = nested->attachNearestCommentBefore(
comment);
770 if (func.value().nodeType() ==
NodeType::Symbol && func.value().string() ==
"ref")
771 error(
"`ref' can not be used outside a function's arguments list.", func_name_pos);
774 leaf->push_back(
positioned(func.value(), func_name_pos));
782 leaf->push_back(arg.value().attachNearestCommentBefore(
comment));
789 leaf->list().back().attachCommentAfter(
comment);
792 leaf->list().back().attachCommentAfter(
comment);
794 expectSuffixOrError(
')', fmt::format(
"in function call to `{}'", func.value().repr()), context);
805 leaf->setAltSyntax(
true);
809 leaf->attachNearestCommentBefore(
comment);
815 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
821 leaf->list().back().attachCommentAfter(
comment);
836 error(
"Is not a valid number", filepos);
882 if (*utf8_str ==
'\0')
883 error(
"Invalid escape sequence", pos);
890 error(
"Invalid escape sequence, expected 4 hex digits: \\uabcd", pos);
899 std::size_t begin = 0;
900 for (; seq[begin] ==
'0'; ++begin)
904 if (*utf8_str ==
'\0')
905 error(
"Invalid escape sequence", pos);
912 error(
"Invalid escape sequence, expected 8 hex digits: \\UABCDEF78", pos);
917 error(
"Unknown escape sequence", pos);
1020 if (
auto res =
Parser::nil(filepos); res.has_value())
1024 return std::nullopt;
1029 if (
auto value =
atom(); value.has_value())
1031 for (
const auto type : types)
1033 if (value->nodeType() == type)
1037 return std::nullopt;
1042 if (
auto value =
atom(); value.has_value())
1044 if (
auto sub_node =
node(); sub_node.has_value())
1047 return std::nullopt;
1055 return std::nullopt;
1059 if (
auto result = (this->*parser)(cursor); result.has_value())
1061 result->attachNearestCommentBefore(result->comment() +
comment);
1064 if (
name ==
"function")
1065 expectSuffixOrError(
')',
"after function body. Did you forget to wrap the body with `{}'?", context);
1073 return std::nullopt;
Parse ArkScript code, but do not handle any import declarations.
bool sequence(const std::string &s)
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
void error(const std::string &error, FilePosition start_at, const std::optional< CodeErrorContext > &additional_context=std::nullopt) const
Create an error context and throw an error containing said context.
std::string spaceComment()
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
std::string newlineOrComment()
bool hexNumber(unsigned length, std::string *s=nullptr)
void backtrack(long n)
Backtrack to a given position (this is NOT an offset!)
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
bool space(std::string *s=nullptr)
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Fetch the next token (space and paren delimited) to generate an error.
CodeErrorContext generateErrorContextAtCurrentPosition() const
void expectSuffixOrError(char suffix, const std::string &context, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Check for a closing char or generate an error.
bool signedNumber(std::string *s=nullptr)
FilePosition getCursor() const
void traceStart(std::string &&trace_name)
A node of an Abstract Syntax Tree for ArkScript.
NodeType nodeType() const noexcept
Return the node type.
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Node & attachNearestCommentBefore(const std::string &comment)
Set the comment field with the nearest comment before this node.
void push_back(const Node &node) noexcept
Every node has a list as well as a value so we can push_back on all node no matter their type.
void setString(const std::string &value) noexcept
Set the String object.
Node & attachCommentAfter(const std::string &comment)
Set the comment_after field with the nearest comment after this node.
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
std::optional< Node > string(FilePosition filepos)
std::optional< Node > letMutSet(FilePosition filepos)
std::optional< Node > loop(FilePosition filepos)
Parser(unsigned debug, ParserMode mode=ParserMode::Interpret)
Constructs a new Parser object.
std::optional< Node > atom()
Try to parse an atom (number, string, spread, field, symbol, nil)
std::optional< Node > spread(FilePosition filepos)
std::optional< Node > number(FilePosition filepos)
void process(const std::string &filename, const std::string &code)
Parse the given code.
std::optional< Node > block(FilePosition filepos)
std::optional< Node > macro(FilePosition filepos)
std::optional< Node > field(FilePosition filepos)
std::optional< Node > functionCall(FilePosition filepos)
const Node & ast() const noexcept
std::optional< Node > nodeOrValue()
Try to parse an atom first, if it fails try to parse a node.
std::vector< std::function< std::optional< Node >(FilePosition)> > m_parsers
const std::vector< Import > & imports() const
std::optional< Node > condition(FilePosition filepos)
std::optional< Node > del(FilePosition filepos)
std::optional< Node > wrapped(std::optional< Node >(Parser::*parser)(FilePosition), const std::string &name)
Try to parse using a given parser, prefixing and suffixing it with (...), handling comments around th...
std::optional< Node > node()
unsigned m_allow_macro_behavior
Toggled on when inside a macro definition, off afterward.
std::optional< Node > function(FilePosition filepos)
Node positioned(Node node, FilePosition cursor) const
std::optional< Node > functionArgs(FilePosition filepos)
std::optional< Node > anyAtomOf(std::initializer_list< NodeType > types)
Try to parse an atom, if any, match its type against the given list.
std::vector< Import > m_imports
std::size_t m_nested_nodes
Nested node counter.
std::optional< Node > list(FilePosition filepos)
std::optional< Node > macroArgs(FilePosition filepos)
std::optional< Node > symbol(FilePosition filepos)
std::optional< Node > import_(FilePosition filepos)
std::optional< Node > nil(FilePosition filepos)
std::optional< Node > macroCondition(FilePosition filepos)
bool isDouble(const std::string &s, double *output=nullptr)
Checks if a string is a valid double.
NodeType
The different node types available.
@ Interpret
Escape sequences and () will be replaced by their UTF8 representation and nil, respectively.
constexpr std::array< std::string_view, 13 > nodeTypes
Node types as string, in the same order as the enum NodeType.
constexpr std::size_t MaxNestedNodes
Maximum number of nodes that can be nested while parsing code.
void decode(const char *input, char *dest)
Convert hex string to utf8 string.
Describe a position in a given file ; handled by the BaseParser.
Describes a span for a node/atom in a file, its start position and end position.
std::vector< std::string > symbols
List of symbols to import, can be empty if none provided. (import package :a :b)
std::size_t col
Position in the source file.
std::string prefix
The filename without the extension.
bool is_glob
Import as glob (import package:*)
std::string toPackageString() const
std::vector< std::string > package
Package with all the segments.
bool with_prefix
Import with prefix (import package)