ArkScript
A small, fast, functional and scripting language for video games
Parser.hpp
Go to the documentation of this file.
1/**
2 * @file Parser.hpp
3 * @author Alexandre Plateau ([email protected])
4 * @brief Parse ArkScript code, but do not handle any import declarations
5 * @version 0.2
6 * @date 2024-05-12
7 *
8 * @copyright Copyright (c) 2024
9 *
10 */
11
12#ifndef COMPILER_AST_PARSER_HPP
13#define COMPILER_AST_PARSER_HPP
14
18#include <Ark/Logger.hpp>
19#include <Ark/Utils.hpp>
20#include <Ark/Platform.hpp>
21
22#include <string>
23#include <optional>
24#include <vector>
25
26#include <utf8.hpp>
27
28namespace Ark::internal
29{
30 class ARK_API Parser final : public BaseParser
31 {
32 public:
33 /**
34 * @brief Constructs a new Parser object
35 * @param debug debug level
36 * @param interpret interpret escape codes in strings
37 */
38 explicit Parser(unsigned debug, bool interpret = true);
39
40 /**
41 * @brief Parse the given code
42 * @param filename can be left empty, used for error generation
43 * @param code content of the file
44 */
45 void process(const std::string& filename, const std::string& code);
46
47 /**
48 *
49 * @return const Node& resulting AST after processing the given code
50 */
51 [[nodiscard]] const Node& ast() const noexcept;
52
53 /**
54 *
55 * @return const std::vector<Import>& list of imports detected by the parser
56 */
57 [[nodiscard]] const std::vector<Import>& imports() const;
58
59 private:
60 bool m_interpret; ///< interpret escape codes in strings
63 std::vector<Import> m_imports;
64 unsigned m_allow_macro_behavior; ///< Toggled on when inside a macro definition, off afterward
65
66 /**
67 * @brief Update a node given a file position
68 * @param node node to update
69 * @param cursor the node position in file
70 * @return Node& the modified node
71 */
72 Node& setNodePosAndFilename(Node& node, const std::optional<FilePosition>& cursor = std::nullopt) const;
73
74 std::optional<Node> node();
75 std::optional<Node> letMutSet();
76 std::optional<Node> del();
77 std::optional<Node> condition();
78 std::optional<Node> loop();
79 std::optional<Node> import_();
80 std::optional<Node> block();
81 std::optional<Node> functionArgs();
82 std::optional<Node> function();
83 std::optional<Node> macroCondition();
84 std::optional<Node> macroArgs();
85 std::optional<Node> macro();
86 std::optional<Node> functionCall();
87 std::optional<Node> list();
88
89 std::optional<Node> number()
90 {
91 auto pos = getCount();
92
93 std::string res;
94 if (signedNumber(&res))
95 {
96 double output;
97 if (Utils::isDouble(res, &output))
98 return std::optional<Node>(output);
99 backtrack(pos);
100 error("Is not a valid number", res);
101 }
102 return std::nullopt;
103 }
104
105 std::optional<Node> string()
106 {
107 std::string res;
108 if (accept(IsChar('"')))
109 {
110 while (true)
111 {
112 if (accept(IsChar('\\')))
113 {
114 if (!m_interpret)
115 res += '\\';
116
117 if (accept(IsChar('"')))
118 res += '"';
119 else if (accept(IsChar('\\')))
120 res += '\\';
121 else if (accept(IsChar('n')))
122 res += m_interpret ? '\n' : 'n';
123 else if (accept(IsChar('t')))
124 res += m_interpret ? '\t' : 't';
125 else if (accept(IsChar('v')))
126 res += m_interpret ? '\v' : 'v';
127 else if (accept(IsChar('r')))
128 res += m_interpret ? '\r' : 'r';
129 else if (accept(IsChar('a')))
130 res += m_interpret ? '\a' : 'a';
131 else if (accept(IsChar('b')))
132 res += m_interpret ? '\b' : 'b';
133 else if (accept(IsChar('f')))
134 res += m_interpret ? '\f' : 'f';
135 else if (accept(IsChar('u')))
136 {
137 std::string seq;
138 if (hexNumber(4, &seq))
139 {
140 if (m_interpret)
141 {
142 char utf8_str[5];
143 utf8::decode(seq.c_str(), utf8_str);
144 if (*utf8_str == '\0')
145 error("Invalid escape sequence", "\\u" + seq);
146 res += utf8_str;
147 }
148 else
149 res += "u" + seq;
150 }
151 else
152 error("Invalid escape sequence", "\\u");
153 }
154 else if (accept(IsChar('U')))
155 {
156 std::string seq;
157 if (hexNumber(8, &seq))
158 {
159 if (m_interpret)
160 {
161 std::size_t begin = 0;
162 for (; seq[begin] == '0'; ++begin)
163 ;
164 char utf8_str[5];
165 utf8::decode(seq.c_str() + begin, utf8_str);
166 if (*utf8_str == '\0')
167 error("Invalid escape sequence", "\\U" + seq);
168 res += utf8_str;
169 }
170 else
171 res += "U" + seq;
172 }
173 else
174 error("Invalid escape sequence", "\\U");
175 }
176 else
177 {
178 backtrack(getCount() - 1);
179 error("Unknown escape sequence", "\\");
180 }
181 }
182 else
183 accept(IsNot(IsEither(IsChar('\\'), IsChar('"'))), &res);
184
185 if (accept(IsChar('"')))
186 break;
187 if (isEOF())
188 errorMissingSuffix('"', "string");
189 }
190
191 return { Node(NodeType::String, res) };
192 }
193 return std::nullopt;
194 }
195
196 std::optional<Node> field()
197 {
198 std::string sym;
199 if (!name(&sym))
200 return std::nullopt;
201
202 std::optional<Node> leaf { Node(NodeType::Field) };
203 setNodePosAndFilename(leaf.value());
204 leaf->push_back(Node(NodeType::Symbol, sym));
205
206 while (true)
207 {
208 if (leaf->list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc
209 return std::nullopt;
210
211 if (leaf->list().size() > 1 && !accept(IsChar('.')))
212 break;
213 std::string res;
214 if (!name(&res))
215 errorWithNextToken("Expected a field name: <symbol>.<field>");
216 leaf->push_back(Node(NodeType::Symbol, res));
217 }
218
219 return leaf;
220 }
221
222 std::optional<Node> symbol()
223 {
224 std::string res;
225 if (!name(&res))
226 return std::nullopt;
227 return { Node(NodeType::Symbol, res) };
228 }
229
230 std::optional<Node> spread()
231 {
232 std::string res;
233 if (sequence("..."))
234 {
235 if (!name(&res))
236 errorWithNextToken("Expected a name for the variadic");
237 return { Node(NodeType::Spread, res) };
238 }
239 return std::nullopt;
240 }
241
242 std::optional<Node> nil()
243 {
244 if (!accept(IsChar('(')))
245 return std::nullopt;
246
247 std::string comment;
248 newlineOrComment(&comment);
249 if (!accept(IsChar(')')))
250 return std::nullopt;
251
252 if (m_interpret)
253 return { Node(NodeType::Symbol, "nil").attachNearestCommentBefore(comment) };
254 return { Node(NodeType::List).attachNearestCommentBefore(comment) };
255 }
256
257 /**
258 * @brief Try to parse an atom (number, string, spread, field, symbol, nil)
259 * @return std::optional<Node> std::nullopt if no atom could be parsed
260 */
261 std::optional<Node> atom();
262
263 /**
264 * @brief Try to parse an atom, if any, match its type against the given list
265 * @param types autorized types
266 * @return std::optional<Node> std::nullopt if the parsed atom didn't match the given types
267 */
268 std::optional<Node> anyAtomOf(std::initializer_list<NodeType> types);
269
270 /**
271 * @brief Try to parse an atom first, if it fails try to parse a node
272 * @return std::optional<Node> std::nullopt if no atom or node could be parsed
273 */
274 std::optional<Node> nodeOrValue();
275
276 /**
277 * @brief Try to parse using a given parser, prefixing and suffixing it with (...), handling comments around the parsed node
278 * @param parser parser method returning a std::optional<Node>
279 * @param name construction name, eg "let", "condition"
280 * @return std::optional<Node> std::nullopt if the parser didn't match
281 */
282 std::optional<Node> wrapped(std::optional<Node> (Parser::*parser)(), const std::string& name);
283 };
284}
285
286#endif
Lots of utilities about string, filesystem and more.
Internal logger.
#define ARK_API
Definition Module.hpp:28
AST node used by the parser, optimizer and compiler.
ArkScript configuration macros.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:31
Node & attachNearestCommentBefore(const std::string &comment)
Set the comment field with the nearest comment before this node.
Definition Node.cpp:119
bool m_interpret
interpret escape codes in strings
Definition Parser.hpp:60
std::optional< Node > nil()
Definition Parser.hpp:242
std::optional< Node > symbol()
Definition Parser.hpp:222
std::optional< Node > number()
Definition Parser.hpp:89
std::optional< Node > string()
Definition Parser.hpp:105
std::optional< Node > spread()
Definition Parser.hpp:230
std::optional< Node > field()
Definition Parser.hpp:196
unsigned m_allow_macro_behavior
Toggled on when inside a macro definition, off afterward.
Definition Parser.hpp:64
std::vector< Import > m_imports
Definition Parser.hpp:63
void decode(const char *input, char *dest)
Convert hex string to utf8 string.
Definition utf8.hpp:67