8 BaseParser(), m_mode(mode), m_logger(
"Parser", debug),
9 m_ast(
NodeType::
List), m_imports({}), m_allow_macro_behavior(0),
21 [
this](FilePosition) {
24 [
this](FilePosition) {
27 [
this](
const FilePosition filepos) {
28 return import_(filepos);
30 [
this](
const FilePosition filepos) {
31 return block(filepos);
33 [
this](FilePosition) {
36 [
this](
const FilePosition filepos) {
37 return macro(filepos);
39 [
this](FilePosition) {
42 [
this](
const FilePosition filepos) {
43 return functionCall(filepos);
45 [
this](
const FilePosition filepos) {
75 std::string out =
peek();
78 message =
"Unexpected closing paren";
80 message =
"Unexpected closing bracket";
82 message =
"Unexpected closing square bracket";
104 const auto [row, col] = cursor;
105 const auto [end_row, end_col] =
getCursor();
109 .start =
FilePos { .line = row, .column = col },
110 .end =
FilePos { .line = end_row, .column = end_col }
120 const auto [row, col] = cursor;
121 const auto [end_row, end_col] =
getCursor();
125 .start =
FilePos { .line = row, .column = col },
126 .end =
FilePos { .line = end_row, .column = end_col }
136 errorWithNextToken(fmt::format(
"Too many nested node while parsing, exceeds limit of {}. Consider rewriting your code by breaking it in functions and macros.",
MaxNestedNodes));
141 std::optional<Node> result = std::nullopt;
145 result = parser(filepos);
162 if (!
oneOf({
"let",
"mut",
"set" }, &token))
166 leaf->attachNearestCommentBefore(
comment);
170 else if (token ==
"mut")
179 if (
const auto value =
nodeOrValue(); value.has_value())
181 const Node& sym = value.value();
185 error(fmt::format(
"Can not use a {} as a symbol name, even in a macro",
nodeTypes[
static_cast<std::size_t
>(sym.
nodeType())]), value_pos);
191 if (leaf->constList().size() == 1)
194 std::string symbol_name;
195 if (!
name(&symbol_name))
203 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
214 if (!
oneOf({
"del" }))
220 std::string symbol_name;
221 if (!
name(&symbol_name))
225 leaf->list().back().attachNearestCommentBefore(
comment);
234 if (!
oneOf({
"if" }))
241 if (
auto cond_expr =
nodeOrValue(); cond_expr.has_value())
242 leaf->push_back(cond_expr.value().attachNearestCommentBefore(
comment));
247 if (
auto value_if_true =
nodeOrValue(); value_if_true.has_value())
248 leaf->push_back(value_if_true.value().attachNearestCommentBefore(
comment));
253 if (
auto value_if_false =
nodeOrValue(); value_if_false.has_value())
255 leaf->push_back(value_if_false.value().attachNearestCommentBefore(
comment));
259 leaf->attachCommentAfter(
comment);
268 if (!
oneOf({
"while" }))
274 if (
auto cond_expr =
nodeOrValue(); cond_expr.has_value())
275 leaf->push_back(cond_expr.value().attachNearestCommentBefore(
comment));
281 leaf->push_back(body.value().attachNearestCommentBefore(
comment));
297 leaf->attachNearestCommentBefore(
comment);
299 if (!
oneOf({
"import" }))
306 import_data.
col = filepos.
col;
307 import_data.
line = filepos.
row;
313 if (import_data.
prefix.size() > 255)
316 errorWithNextToken(fmt::format(
"Import name too long, expected at most 255 characters, got {}", import_data.
prefix.size()));
339 import_data.
package.push_back(path);
340 import_data.
prefix = path;
342 if (path.size() > 255)
345 errorWithNextToken(fmt::format(
"Import name too long, expected at most 255 characters, got {}", path.size()));
351 leaf->push_back(packageNode);
379 std::string symbol_name;
380 if (!
name(&symbol_name))
382 if (symbol_name ==
"*")
383 error(fmt::format(
"Glob patterns can not be separated from the package, use (import {}:*) instead", import_data.
toPackageString()), symbol_pos);
385 if (symbol_name.size() >= 2 && symbol_name[symbol_name.size() - 2] ==
':' && symbol_name.back() ==
'*')
386 error(
"Glob pattern can not follow a symbol to import",
FilePosition { .row = symbol_pos.row, .col = symbol_pos.col + symbol_name.size() - 2 });
391 import_data.
symbols.push_back(symbol_name);
405 leaf->push_back(packageNode);
406 leaf->push_back(symbols);
412 leaf->list().back().attachCommentAfter(
comment);
423 bool alt_syntax =
false;
428 if (!
oneOf({
"begin" }))
436 leaf->setAltSyntax(alt_syntax);
445 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
454 leaf->list().back().attachCommentAfter(
comment);
464 args->attachNearestCommentBefore(
comment);
466 bool has_captures =
false;
476 error(
"No symbol provided to capture", pos);
482 std::string symbol_name;
483 if (!
name(&symbol_name))
486 error(
"Captured variables should be at the end of the argument list", pos);
492 args->list().back().attachNearestCommentBefore(
comment);
505 if (!
oneOf({
"fun" }))
516 if (
const auto value =
nodeOrValue(); value.has_value())
520 const Node& args = value.value();
524 leaf->push_back(args);
535 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
543 if (
auto args =
functionArgs(args_file_pos); args.has_value())
544 leaf->push_back(args.value().attachNearestCommentBefore(comment_before_args));
550 leaf->push_back(value.value().attachNearestCommentBefore(comment_before_args));
558 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
569 if (!
oneOf({
"$if" }))
574 leaf->attachNearestCommentBefore(
comment);
576 if (
const auto cond_expr =
nodeOrValue(); cond_expr.has_value())
577 leaf->push_back(cond_expr.value());
582 if (
auto value_if_true =
nodeOrValue(); value_if_true.has_value())
583 leaf->push_back(value_if_true.value().attachNearestCommentBefore(
comment));
588 if (
auto value_if_false =
nodeOrValue(); value_if_false.has_value())
590 leaf->push_back(value_if_false.value().attachNearestCommentBefore(
comment));
592 leaf->list().back().attachCommentAfter(
comment);
606 args->attachNearestCommentBefore(
comment);
608 std::vector<std::string> names;
613 std::string arg_name;
614 if (!
name(&arg_name))
620 if (std::ranges::find(names, arg_name) != names.end())
623 errorWithNextToken(fmt::format(
"Argument names must be unique, can not reuse `{}'", arg_name));
625 names.push_back(arg_name);
631 std::string spread_name;
632 if (!
name(&spread_name))
638 if (std::ranges::find(names, spread_name) != names.end())
641 errorWithNextToken(fmt::format(
"Argument names must be unique, can not reuse `{}'", spread_name));
650 args->attachCommentAfter(
comment);
663 if (!
oneOf({
"macro" }))
666 leaf->attachNearestCommentBefore(
comment);
668 std::string symbol_name;
669 if (!
name(&symbol_name))
677 if (
const auto args =
macroArgs(args_file_pos); args.has_value())
678 leaf->push_back(args.value());
688 if (value.has_value())
689 leaf->push_back(value.value());
691 errorWithNextToken(fmt::format(
"Expected an argument list, atom or node while defining macro `{}'", symbol_name));
702 if (value.has_value())
703 leaf->push_back(value.value());
704 else if (leaf->list().size() == 2)
714 errorWithNextToken(fmt::format(
"Expected a value while defining macro `{}'", symbol_name), context);
731 std::optional<Node> func;
733 func = sym_or_field->attachNearestCommentBefore(
comment);
734 else if (
auto nested =
node(); nested.has_value())
735 func = nested->attachNearestCommentBefore(
comment);
740 leaf->push_back(
positioned(func.value(), func_name_pos));
748 leaf->push_back(arg.value().attachNearestCommentBefore(
comment));
755 leaf->list().back().attachCommentAfter(
comment);
758 leaf->list().back().attachCommentAfter(
comment);
760 expectSuffixOrError(
')', fmt::format(
"in function call to `{}'", func.value().repr()), context);
771 leaf->setAltSyntax(
true);
775 leaf->attachNearestCommentBefore(
comment);
781 leaf->push_back(value.value().attachNearestCommentBefore(
comment));
787 leaf->list().back().attachCommentAfter(
comment);
802 error(
"Is not a valid number", filepos);
848 if (*utf8_str ==
'\0')
849 error(
"Invalid escape sequence", pos);
856 error(
"Invalid escape sequence, expected 4 hex digits: \\uabcd", pos);
865 std::size_t begin = 0;
866 for (; seq[begin] ==
'0'; ++begin)
870 if (*utf8_str ==
'\0')
871 error(
"Invalid escape sequence", pos);
878 error(
"Invalid escape sequence, expected 8 hex digits: \\UABCDEF78", pos);
883 error(
"Unknown escape sequence", pos);
986 if (
auto res =
Parser::nil(filepos); res.has_value())
995 if (
auto value =
atom(); value.has_value())
997 for (
const auto type : types)
999 if (value->nodeType() == type)
1003 return std::nullopt;
1008 if (
auto value =
atom(); value.has_value())
1010 if (
auto sub_node =
node(); sub_node.has_value())
1013 return std::nullopt;
1021 return std::nullopt;
1025 if (
auto result = (this->*parser)(cursor); result.has_value())
1027 result->attachNearestCommentBefore(result->comment() +
comment);
1036 return std::nullopt;
Parse ArkScript code, but do not handle any import declarations.
bool sequence(const std::string &s)
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
void error(const std::string &error, FilePosition start_at, const std::optional< CodeErrorContext > &additional_context=std::nullopt) const
Create an error context and throw an error containing said context.
std::string spaceComment()
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
std::string newlineOrComment()
bool hexNumber(unsigned length, std::string *s=nullptr)
void backtrack(long n)
Backtrack to a given position (this is NOT an offset!)
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
bool space(std::string *s=nullptr)
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Fetch the next token (space and paren delimited) to generate an error.
CodeErrorContext generateErrorContextAtCurrentPosition() const
void expectSuffixOrError(char suffix, const std::string &context, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Check for a closing char or generate an error.
bool signedNumber(std::string *s=nullptr)
FilePosition getCursor() const
void traceStart(std::string &&trace_name)
A node of an Abstract Syntax Tree for ArkScript.
NodeType nodeType() const noexcept
Return the node type.
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Node & attachNearestCommentBefore(const std::string &comment)
Set the comment field with the nearest comment before this node.
void push_back(const Node &node) noexcept
Every node has a list as well as a value so we can push_back on all node no matter their type.
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
std::optional< Node > string(FilePosition filepos)
std::optional< Node > letMutSet(FilePosition filepos)
std::optional< Node > loop(FilePosition filepos)
Parser(unsigned debug, ParserMode mode=ParserMode::Interpret)
Constructs a new Parser object.
std::optional< Node > atom()
Try to parse an atom (number, string, spread, field, symbol, nil)
std::optional< Node > spread(FilePosition filepos)
std::optional< Node > number(FilePosition filepos)
void process(const std::string &filename, const std::string &code)
Parse the given code.
std::optional< Node > block(FilePosition filepos)
std::optional< Node > macro(FilePosition filepos)
std::optional< Node > field(FilePosition filepos)
std::optional< Node > functionCall(FilePosition filepos)
const Node & ast() const noexcept
std::optional< Node > nodeOrValue()
Try to parse an atom first, if it fails try to parse a node.
std::vector< std::function< std::optional< Node >(FilePosition)> > m_parsers
const std::vector< Import > & imports() const
std::optional< Node > condition(FilePosition filepos)
std::optional< Node > del(FilePosition filepos)
std::optional< Node > wrapped(std::optional< Node >(Parser::*parser)(FilePosition), const std::string &name)
Try to parse using a given parser, prefixing and suffixing it with (...), handling comments around th...
std::optional< Node > node()
unsigned m_allow_macro_behavior
Toggled on when inside a macro definition, off afterward.
std::optional< Node > function(FilePosition filepos)
Node positioned(Node node, FilePosition cursor) const
std::optional< Node > functionArgs(FilePosition filepos)
std::optional< Node > anyAtomOf(std::initializer_list< NodeType > types)
Try to parse an atom, if any, match its type against the given list.
std::vector< Import > m_imports
std::size_t m_nested_nodes
Nested node counter.
std::optional< Node > list(FilePosition filepos)
std::optional< Node > macroArgs(FilePosition filepos)
std::optional< Node > symbol(FilePosition filepos)
std::optional< Node > import_(FilePosition filepos)
std::optional< Node > nil(FilePosition filepos)
std::optional< Node > macroCondition(FilePosition filepos)
bool isDouble(const std::string &s, double *output=nullptr)
Checks if a string is a valid double.
constexpr std::array< std::string_view, 11 > nodeTypes
Node types as string, in the same order as the enum NodeType.
NodeType
The different node types available.
@ Interpret
Escape sequences and () will be replaced by their UTF8 representation and nil, respectively.
constexpr std::size_t MaxNestedNodes
Maximum number of nodes that can be nested while parsing code.
void decode(const char *input, char *dest)
Convert hex string to utf8 string.
Describe a position in a given file ; handled by the BaseParser.
Describes a span for a node/atom in a file, its start position and end position.
std::vector< std::string > symbols
List of symbols to import, can be empty if none provided. (import package :a :b)
std::size_t col
Position in the source file.
std::string prefix
The filename without the extension.
bool is_glob
Import as glob (import package:*)
std::string toPackageString() const
std::vector< std::string > package
Package with all the segments.
bool with_prefix
Import with prefix (import package)