ArkScript
A small, lisp-inspired, functional scripting language
Parser.hpp
Go to the documentation of this file.
1/**
2 * @file Parser.hpp
3 * @author Alexandre Plateau (lexplt.dev@gmail.com)
4 * @brief Parse ArkScript code, but do not handle any import declarations
5 * @date 2024-05-12
6 *
7 * @copyright Copyright (c) 2024-2025
8 *
9 */
10
11#ifndef COMPILER_AST_PARSER_HPP
12#define COMPILER_AST_PARSER_HPP
13
17#include <Ark/Logger.hpp>
18#include <Ark/Utils.hpp>
19#include <Ark/Platform.hpp>
20
21#include <string>
22#include <vector>
23#include <optional>
24#include <functional>
25
26#include <utf8.hpp>
27
28namespace Ark::internal
29{
30 class ARK_API Parser final : public BaseParser
31 {
32 public:
33 /**
34 * @brief Constructs a new Parser object
35 * @param debug debug level
36 * @param interpret interpret escape codes in strings
37 */
38 explicit Parser(unsigned debug, bool interpret = true);
39
40 /**
41 * @brief Parse the given code
42 * @param filename can be left empty, used for error generation
43 * @param code content of the file
44 */
45 void process(const std::string& filename, const std::string& code);
46
47 /**
48 *
49 * @return const Node& resulting AST after processing the given code
50 */
51 [[nodiscard]] const Node& ast() const noexcept;
52
53 /**
54 *
55 * @return const std::vector<Import>& list of imports detected by the parser
56 */
57 [[nodiscard]] const std::vector<Import>& imports() const;
58
59 private:
60 bool m_interpret; ///< interpret escape codes in strings
63 std::vector<Import> m_imports;
64 unsigned m_allow_macro_behavior; ///< Toggled on when inside a macro definition, off afterward
65 std::size_t m_nested_nodes; ///< Nested node counter
66 std::vector<std::function<std::optional<Node>()>> m_parsers;
67
68 /**
69 * @brief Update a node given a file position
70 * @param node node to update
71 * @param cursor the node position in file
72 * @return Node& the modified node
73 */
74 Node& setNodePosAndFilename(Node& node, const std::optional<FilePosition>& cursor = std::nullopt) const;
75
76 std::optional<Node> node();
77 std::optional<Node> letMutSet();
78 std::optional<Node> del();
79 std::optional<Node> condition();
80 std::optional<Node> loop();
81 std::optional<Node> import_();
82 std::optional<Node> block();
83 std::optional<Node> functionArgs();
84 std::optional<Node> function();
85 std::optional<Node> macroCondition();
86 std::optional<Node> macroArgs();
87 std::optional<Node> macro();
88 std::optional<Node> functionCall();
89 std::optional<Node> list();
90
91 std::optional<Node> number()
92 {
93 auto pos = getCount();
94
95 std::string res;
96 if (signedNumber(&res))
97 {
98 double output;
99 if (Utils::isDouble(res, &output))
100 return std::optional<Node>(output);
101 backtrack(pos);
102 error("Is not a valid number", res);
103 }
104 return std::nullopt;
105 }
106
107 std::optional<Node> string()
108 {
109 std::string res;
110 if (accept(IsChar('"')))
111 {
112 while (true)
113 {
114 if (accept(IsChar('\\')))
115 {
116 if (!m_interpret)
117 res += '\\';
118
119 if (accept(IsChar('"')))
120 res += '"';
121 else if (accept(IsChar('\\')))
122 res += '\\';
123 else if (accept(IsChar('n')))
124 res += m_interpret ? '\n' : 'n';
125 else if (accept(IsChar('t')))
126 res += m_interpret ? '\t' : 't';
127 else if (accept(IsChar('v')))
128 res += m_interpret ? '\v' : 'v';
129 else if (accept(IsChar('r')))
130 res += m_interpret ? '\r' : 'r';
131 else if (accept(IsChar('a')))
132 res += m_interpret ? '\a' : 'a';
133 else if (accept(IsChar('b')))
134 res += m_interpret ? '\b' : 'b';
135 else if (accept(IsChar('f')))
136 res += m_interpret ? '\f' : 'f';
137 else if (accept(IsChar('u')))
138 {
139 std::string seq;
140 if (hexNumber(4, &seq))
141 {
142 if (m_interpret)
143 {
144 char utf8_str[5];
145 utf8::decode(seq.c_str(), utf8_str);
146 if (*utf8_str == '\0')
147 error("Invalid escape sequence", "\\u" + seq);
148 res += utf8_str;
149 }
150 else
151 res += "u" + seq;
152 }
153 else
154 error("Invalid escape sequence", "\\u");
155 }
156 else if (accept(IsChar('U')))
157 {
158 std::string seq;
159 if (hexNumber(8, &seq))
160 {
161 if (m_interpret)
162 {
163 std::size_t begin = 0;
164 for (; seq[begin] == '0'; ++begin)
165 ;
166 char utf8_str[5];
167 utf8::decode(seq.c_str() + begin, utf8_str);
168 if (*utf8_str == '\0')
169 error("Invalid escape sequence", "\\U" + seq);
170 res += utf8_str;
171 }
172 else
173 res += "U" + seq;
174 }
175 else
176 error("Invalid escape sequence", "\\U");
177 }
178 else
179 {
180 backtrack(getCount() - 1);
181 error("Unknown escape sequence", "\\");
182 }
183 }
184 else
185 accept(IsNot(IsEither(IsChar('\\'), IsChar('"'))), &res);
186
187 if (accept(IsChar('"')))
188 break;
189 if (isEOF())
190 expectSuffixOrError('"', "after string");
191 }
192
193 return { Node(NodeType::String, res) };
194 }
195 return std::nullopt;
196 }
197
198 std::optional<Node> field()
199 {
200 std::string sym;
201 if (!name(&sym))
202 return std::nullopt;
203
204 std::optional<Node> leaf { Node(NodeType::Field) };
205 setNodePosAndFilename(leaf.value());
206 leaf->push_back(Node(NodeType::Symbol, sym));
207
208 while (true)
209 {
210 if (leaf->list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc
211 return std::nullopt;
212
213 if (leaf->list().size() > 1 && !accept(IsChar('.')))
214 break;
215 std::string res;
216 if (!name(&res))
217 errorWithNextToken("Expected a field name: <symbol>.<field>");
218 leaf->push_back(Node(NodeType::Symbol, res));
219 }
220
221 return leaf;
222 }
223
224 std::optional<Node> symbol()
225 {
226 std::string res;
227 if (!name(&res))
228 return std::nullopt;
229 return { Node(NodeType::Symbol, res) };
230 }
231
232 std::optional<Node> spread()
233 {
234 std::string res;
235 if (sequence("..."))
236 {
237 if (!name(&res))
238 errorWithNextToken("Expected a name for the variadic");
239 return { Node(NodeType::Spread, res) };
240 }
241 return std::nullopt;
242 }
243
244 std::optional<Node> nil()
245 {
246 if (!accept(IsChar('(')))
247 return std::nullopt;
248
249 std::string comment;
250 newlineOrComment(&comment);
251 if (!accept(IsChar(')')))
252 return std::nullopt;
253
254 if (m_interpret)
255 return { Node(NodeType::Symbol, "nil").attachNearestCommentBefore(comment) };
256 return { Node(NodeType::List).attachNearestCommentBefore(comment) };
257 }
258
259 /**
260 * @brief Try to parse an atom (number, string, spread, field, symbol, nil)
261 * @return std::optional<Node> std::nullopt if no atom could be parsed
262 */
263 std::optional<Node> atom();
264
265 /**
266 * @brief Try to parse an atom, if any, match its type against the given list
267 * @param types autorized types
268 * @return std::optional<Node> std::nullopt if the parsed atom didn't match the given types
269 */
270 std::optional<Node> anyAtomOf(std::initializer_list<NodeType> types);
271
272 /**
273 * @brief Try to parse an atom first, if it fails try to parse a node
274 * @return std::optional<Node> std::nullopt if no atom or node could be parsed
275 */
276 std::optional<Node> nodeOrValue();
277
278 /**
279 * @brief Try to parse using a given parser, prefixing and suffixing it with (...), handling comments around the parsed node
280 * @param parser parser method returning a std::optional<Node>
281 * @param name construction name, eg "let", "condition"
282 * @return std::optional<Node> std::nullopt if the parser didn't match
283 */
284 std::optional<Node> wrapped(std::optional<Node> (Parser::*parser)(), const std::string& name);
285 };
286}
287
288#endif
Lots of utilities about string, filesystem and more.
Internal logger.
#define ARK_API
Definition Module.hpp:28
AST node used by the parser, optimizer and compiler.
ArkScript configuration macros.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:30
Node & attachNearestCommentBefore(const std::string &comment)
Set the comment field with the nearest comment before this node.
Definition Node.cpp:128
std::vector< std::function< std::optional< Node >()> > m_parsers
Definition Parser.hpp:66
bool m_interpret
interpret escape codes in strings
Definition Parser.hpp:60
std::optional< Node > nil()
Definition Parser.hpp:244
std::optional< Node > symbol()
Definition Parser.hpp:224
std::optional< Node > number()
Definition Parser.hpp:91
std::optional< Node > string()
Definition Parser.hpp:107
std::optional< Node > spread()
Definition Parser.hpp:232
std::optional< Node > field()
Definition Parser.hpp:198
unsigned m_allow_macro_behavior
Toggled on when inside a macro definition, off afterward.
Definition Parser.hpp:64
std::vector< Import > m_imports
Definition Parser.hpp:63
std::size_t m_nested_nodes
Nested node counter.
Definition Parser.hpp:65
void decode(const char *input, char *dest)
Convert hex string to utf8 string.
Definition utf8.hpp:67