ArkScript
A small, fast, functional and scripting language for video games
BaseParser.cpp
Go to the documentation of this file.
2#include <Ark/Exceptions.hpp>
3
4#include <utility>
5#include <algorithm>
6
7#include <fmt/core.h>
8
9namespace Ark::internal
10{
11 void BaseParser::registerNewLine(std::string::iterator it, std::size_t row)
12 {
13 // search for an existing new line position
14 if (std::ranges::find_if(m_it_to_row, [it](const auto& pair) {
15 return pair.first == it;
16 }) != m_it_to_row.end())
17 return;
18
19 // if the mapping is empty, the loop while never hit and we'll never insert anything
20 if (m_it_to_row.empty())
21 {
22 m_it_to_row.emplace_back(it, row);
23 return;
24 }
25
26 for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i)
27 {
28 auto current = m_it_to_row[i].first;
29 auto next = i + 1 < end ? m_it_to_row[i + 1].first : m_str.end();
30 if (current < it && it < next)
31 {
32 m_it_to_row.insert(
33 m_it_to_row.begin() + static_cast<decltype(m_it_to_row)::difference_type>(i) + 1,
34 std::make_pair(it, row));
35 break;
36 }
37 }
38 }
39
41 {
43 if (isEOF())
44 {
45 m_sym = utf8_char_t(); // reset sym to EOF
46 return;
47 }
48
49 // getting a character from the stream
50 auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
51 m_next_it = it;
52 m_sym = sym;
53
54 if (*m_it == '\n')
55 {
56 ++m_filepos.row;
57 m_filepos.col = 0;
59 }
60 else if (m_sym.isPrintable())
62 }
63
64 void BaseParser::initParser(const std::string& filename, const std::string& code)
65 {
66 m_filename = filename;
67
68 // if the input string is empty, raise an error
69 if (code.empty())
70 {
72 error("Expected symbol, got empty string", "");
73 }
74
75 m_str = code;
76 m_it = m_next_it = m_str.begin();
77
78 // otherwise, get the first symbol
79 next();
80 }
81
82 void BaseParser::backtrack(const long n)
83 {
84 if (std::cmp_greater_equal(n, m_str.size()))
85 return;
86
87 m_it = m_str.begin() + n;
88 auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
89 m_next_it = it;
90 m_sym = sym;
91
92 // search for the nearest it < m_it in the map to know the line number
93 for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i)
94 {
95 auto [at, line] = m_it_to_row[i];
96 if (it < at)
97 {
98 m_filepos.row = line - 1;
99 break;
100 }
101 }
102 // compute the position in the line
103 std::string_view view = m_str;
104 const auto it_pos = static_cast<std::size_t>(std::distance(m_str.begin(), m_it));
105 view = view.substr(0, it_pos);
106 const auto nearest_newline_index = view.find_last_of('\n');
107 if (nearest_newline_index != std::string_view::npos)
108 m_filepos.col = it_pos - nearest_newline_index;
109 else
110 m_filepos.col = it_pos + 1;
111 }
112
114 {
115 return m_filepos;
116 }
117
118 void BaseParser::error(const std::string& error, std::string exp)
119 {
120 const auto [row, col] = getCursor();
121 throw CodeError(error, m_filename, row, col, std::move(exp), m_sym);
122 }
123
124 void BaseParser::errorWithNextToken(const std::string& message)
125 {
126 const auto pos = getCount();
127 std::string next_token;
128
129 anyUntil(IsEither(IsInlineSpace, IsEither(IsChar('('), IsChar(')'))), &next_token);
130 backtrack(pos);
131
132 error(message, next_token);
133 }
134
135 void BaseParser::errorMissingSuffix(const char suffix, const std::string& node_name)
136 {
137 errorWithNextToken(fmt::format("Missing '{}' after {}", suffix, node_name));
138 }
139
140 bool BaseParser::accept(const CharPred& t, std::string* s)
141 {
142 if (isEOF())
143 return false;
144
145 // return false if the predicate couldn't consume the symbol
146 if (!t(m_sym.codepoint()))
147 return false;
148 // otherwise, add it to the string and go to the next symbol
149 if (s != nullptr)
150 *s += m_sym.c_str();
151
152 next();
153 return true;
154 }
155
156 bool BaseParser::expect(const CharPred& t, std::string* s)
157 {
158 // throw an error if the predicate couldn't consume the symbol
159 if (!t(m_sym.codepoint()))
160 error("Expected " + t.name, m_sym.c_str());
161 // otherwise, add it to the string and go to the next symbol
162 if (s != nullptr)
163 *s += m_sym.c_str();
164 next();
165 return true;
166 }
167
168 bool BaseParser::space(std::string* s)
169 {
170 if (accept(IsSpace))
171 {
172 if (s != nullptr)
173 s->push_back(' ');
174 // loop while there are still ' ' to consume
175 while (accept(IsSpace))
176 ;
177 return true;
178 }
179 return false;
180 }
181
182 bool BaseParser::inlineSpace(std::string* s)
183 {
185 {
186 if (s != nullptr)
187 s->push_back(' ');
188 // loop while there are still ' ' to consume
189 while (accept(IsInlineSpace))
190 ;
191 return true;
192 }
193 return false;
194 }
195
196 bool BaseParser::comment(std::string* s)
197 {
198 if (accept(IsChar('#'), s))
199 {
200 while (accept(IsNot(IsChar('\n')), s))
201 ;
202 accept(IsChar('\n'), s);
203 return true;
204 }
205 return false;
206 }
207
208 bool BaseParser::spaceComment(std::string* s)
209 {
210 bool matched = false;
211
212 inlineSpace();
213 while (!isEOF() && comment(s))
214 {
215 inlineSpace();
216 matched = true;
217 }
218
219 return matched;
220 }
221
222 bool BaseParser::newlineOrComment(std::string* s)
223 {
224 bool matched = false;
225
226 space();
227 while (!isEOF() && comment(s))
228 {
229 space();
230 matched = true;
231 }
232
233 return matched;
234 }
235
236 bool BaseParser::prefix(const char c)
237 {
238 if (!accept(IsChar(c)))
239 return false;
240 return true;
241 }
242
243 bool BaseParser::suffix(const char c)
244 {
245 return accept(IsChar(c));
246 }
247
248 bool BaseParser::number(std::string* s)
249 {
250 if (accept(IsDigit, s))
251 {
252 // consume all the digits available,
253 // stop when the symbol isn't a digit anymore
254 while (accept(IsDigit, s))
255 ;
256 return true;
257 }
258 return false;
259 }
260
261 bool BaseParser::signedNumber(std::string* s)
262 {
263 accept(IsMinus, s);
264 if (!number(s))
265 return false;
266
267 // (optional) floating part
268 accept(IsChar('.'), s) && number(s);
269 // (optional) scientific part
270 if (accept(IsEither(IsChar('e'), IsChar('E')), s))
271 {
272 accept(IsEither(IsMinus, IsChar('+')), s);
273 number(s);
274 }
275
276 return true;
277 }
278
279 bool BaseParser::hexNumber(unsigned int length, std::string* s)
280 {
281 while (length != 0)
282 {
283 if (!accept(IsHex, s))
284 return false;
285 --length;
286 }
287 return true;
288 }
289
290 bool BaseParser::name(std::string* s)
291 {
292 const auto alpha_symbols = IsEither(IsAlpha, IsSymbol);
293 const auto alnum_symbols = IsEither(IsAlnum, IsSymbol);
294
295 if (accept(alpha_symbols, s))
296 {
297 while (accept(alnum_symbols, s))
298 ;
299 return true;
300 }
301 return false;
302 }
303
304 bool BaseParser::sequence(const std::string& s)
305 {
306 return std::ranges::all_of(s, [this](const char c) {
307 return accept(IsChar(c));
308 });
309 }
310
311 bool BaseParser::packageName(std::string* s)
312 {
313 if (accept(IsAlnum, s))
314 {
315 while (accept(IsEither(IsAlnum, IsEither(IsChar('_'), IsChar('-'))), s))
316 ;
317 return true;
318 }
319 return false;
320 }
321
322 bool BaseParser::anyUntil(const CharPred& delim, std::string* s)
323 {
324 if (accept(IsNot(delim), s))
325 {
326 while (accept(IsNot(delim), s))
327 ;
328 return true;
329 }
330 return false;
331 }
332
333 bool BaseParser::oneOf(const std::initializer_list<std::string> words, std::string* s)
334 {
335 std::string buffer;
336 if (!name(&buffer))
337 return false;
338
339 if (s)
340 *s = buffer;
341
342 return std::ranges::any_of(words, [&buffer](const std::string& word) {
343 return word == buffer;
344 });
345 }
346}
ArkScript homemade exceptions.
bool sequence(const std::string &s)
FilePosition m_filepos
The position of the cursor in the file.
bool spaceComment(std::string *s=nullptr)
std::string::iterator m_next_it
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
bool number(std::string *s=nullptr)
std::string::iterator m_it
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
bool hexNumber(unsigned length, std::string *s=nullptr)
bool newlineOrComment(std::string *s=nullptr)
bool anyUntil(const CharPred &delim, std::string *s=nullptr)
Match any char that do not match the predicate.
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
void error(const std::string &error, std::string exp)
bool space(std::string *s=nullptr)
void registerNewLine(std::string::iterator it, std::size_t row)
Register the position of a new line, with an iterator pointing to the new line and the row number.
void next()
getting next character and changing the values of count/row/col/sym
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message)
Fetch the next token (space and paren delimited) to generate an error.
void errorMissingSuffix(char suffix, const std::string &node_name)
Generate an error for a given node when a suffix is missing.
bool inlineSpace(std::string *s=nullptr)
bool signedNumber(std::string *s=nullptr)
std::vector< std::pair< std::string::iterator, std::size_t > > m_it_to_row
A crude map of position to line number to speed up line number computing.
utf8_char_t m_sym
The current utf8 character we're on.
FilePosition getCursor() const
codepoint_t codepoint() const
Definition utf8_char.hpp:94
const char * c_str() const
Definition utf8_char.hpp:92
static std::pair< std::string::iterator, utf8_char_t > at(const std::string::iterator it, const std::string::iterator end)
Parse a codepoint and compute its length and representation.
Definition utf8_char.hpp:32
std::size_t size() const
Definition utf8_char.hpp:93
const IsChar IsMinus('-')
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
const std::string name
Describe a position in a given file ; handled by the BaseParser.