ArkScript
A small, fast, functional and scripting language for video games
BaseParser.cpp
Go to the documentation of this file.
2#include <Ark/Exceptions.hpp>
3
4#include <utility>
5#include <algorithm>
6
7#include <fmt/core.h>
8
9namespace Ark::internal
10{
11 void BaseParser::registerNewLine(std::string::iterator it, std::size_t row)
12 {
13 // search for an existing new line position
14 if (std::ranges::find_if(m_it_to_row, [it](const auto& pair) {
15 return pair.first == it;
16 }) != m_it_to_row.end())
17 return;
18
19 // if the mapping is empty, the loop while never hit and we'll never insert anything
20 if (m_it_to_row.empty())
21 {
22 m_it_to_row.emplace_back(it, row);
23 return;
24 }
25
26 for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i)
27 {
28 auto current_it = m_it_to_row[i].first;
29 auto next_it = i + 1 < end ? m_it_to_row[i + 1].first : m_str.end();
30 if (current_it < it && it < next_it)
31 {
32 m_it_to_row.insert(
33 m_it_to_row.begin() + static_cast<decltype(m_it_to_row)::difference_type>(i) + 1,
34 std::make_pair(it, row));
35 break;
36 }
37 }
38 }
39
41 {
43 if (isEOF())
44 {
45 m_sym = utf8_char_t(); // reset sym to EOF
46 return;
47 }
48
49 // getting a character from the stream
50 auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
51 m_next_it = it;
52 m_sym = sym;
53
54 if (*m_it == '\n')
55 {
56 ++m_filepos.row;
57 m_filepos.col = 0;
59 }
60 else if (m_sym.isPrintable())
62 }
63
64 void BaseParser::initParser(const std::string& filename, const std::string& code)
65 {
66 m_filename = filename;
67
68 // if the input string is empty, raise an error
69 if (code.empty())
70 {
72 error("Expected symbol, got empty string", "");
73 }
74
75 m_str = code;
76 m_it = m_next_it = m_str.begin();
77
78 // otherwise, get the first symbol
79 next();
80 }
81
82 void BaseParser::backtrack(const long n)
83 {
84 if (std::cmp_greater_equal(n, m_str.size()))
85 return;
86
87 m_it = m_str.begin() + n;
88 auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
89 m_next_it = it;
90 m_sym = sym;
91
92 // search for the nearest it < m_it in the map to know the line number
93 for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i)
94 {
95 auto [at, line] = m_it_to_row[i];
96 if (it <= at)
97 {
98 m_filepos.row = line - 1;
99 break;
100 }
101 }
102 // compute the position in the line
103 std::string_view view = m_str;
104 const auto it_pos = static_cast<std::size_t>(std::distance(m_str.begin(), m_it));
105 view = view.substr(0, it_pos);
106 const auto nearest_newline_index = view.find_last_of('\n');
107 if (nearest_newline_index != std::string_view::npos)
108 m_filepos.col = it_pos - nearest_newline_index;
109 else
110 m_filepos.col = it_pos + 1;
111 }
112
114 {
115 return m_filepos;
116 }
117
119 {
120 const auto [row, col] = getCursor();
121
122 return CodeErrorContext(
124 row,
125 col,
126 expr,
127 m_sym);
128 }
129
130 void BaseParser::error(const std::string& error, std::string exp, const std::optional<CodeErrorContext>& additional_context)
131 {
132 const auto [row, col] = getCursor();
133 throw CodeError(
134 error,
135 CodeErrorContext(m_filename, row, col, std::move(exp), m_sym),
136 additional_context);
137 }
138
139 void BaseParser::errorWithNextToken(const std::string& message, const std::optional<CodeErrorContext>& additional_context)
140 {
141 const auto pos = getCount();
142 std::string next_token;
143
144 anyUntil(IsEither(IsInlineSpace, IsEither(IsChar('('), IsChar(')'))), &next_token);
145 backtrack(pos);
146
147 error(message, next_token, additional_context);
148 }
149
150 void BaseParser::expectSuffixOrError(const char suffix, const std::string& context, const std::optional<CodeErrorContext>& additional_context)
151 {
152 if (!accept(IsChar(suffix)))
153 errorWithNextToken(fmt::format("Missing '{}' {}", suffix, context), additional_context);
154 }
155
156 bool BaseParser::accept(const CharPred& t, std::string* s)
157 {
158 if (isEOF())
159 return false;
160
161 // return false if the predicate couldn't consume the symbol
162 if (!t(m_sym.codepoint()))
163 return false;
164 // otherwise, add it to the string and go to the next symbol
165 if (s != nullptr)
166 *s += m_sym.c_str();
167
168 next();
169 return true;
170 }
171
172 bool BaseParser::expect(const CharPred& t, std::string* s)
173 {
174 // throw an error if the predicate couldn't consume the symbol
175 if (!t(m_sym.codepoint()))
176 error("Expected " + t.name, m_sym.c_str());
177 // otherwise, add it to the string and go to the next symbol
178 if (s != nullptr)
179 *s += m_sym.c_str();
180 next();
181 return true;
182 }
183
184 std::string BaseParser::peek() const
185 {
186 return m_sym.c_str();
187 }
188
189 bool BaseParser::space(std::string* s)
190 {
191 if (accept(IsSpace))
192 {
193 if (s != nullptr)
194 s->push_back(' ');
195 // loop while there are still ' ' to consume
196 while (accept(IsSpace))
197 ;
198 return true;
199 }
200 return false;
201 }
202
203 bool BaseParser::inlineSpace(std::string* s)
204 {
206 {
207 if (s != nullptr)
208 s->push_back(' ');
209 // loop while there are still ' ' to consume
210 while (accept(IsInlineSpace))
211 ;
212 return true;
213 }
214 return false;
215 }
216
217 bool BaseParser::comment(std::string* s)
218 {
219 if (accept(IsChar('#'), s))
220 {
221 while (accept(IsNot(IsChar('\n')), s))
222 ;
223 accept(IsChar('\n'), s);
224 return true;
225 }
226 return false;
227 }
228
229 bool BaseParser::spaceComment(std::string* s)
230 {
231 bool matched = false;
232
233 inlineSpace();
234 while (!isEOF() && comment(s))
235 {
236 inlineSpace();
237 matched = true;
238 }
239
240 return matched;
241 }
242
243 bool BaseParser::newlineOrComment(std::string* s)
244 {
245 bool matched = false;
246
247 space();
248 while (!isEOF() && comment(s))
249 {
250 space();
251 matched = true;
252 }
253
254 return matched;
255 }
256
257 bool BaseParser::prefix(const char c)
258 {
259 if (!accept(IsChar(c)))
260 return false;
261 return true;
262 }
263
264 bool BaseParser::number(std::string* s)
265 {
266 if (accept(IsDigit, s))
267 {
268 // consume all the digits available,
269 // stop when the symbol isn't a digit anymore
270 while (accept(IsDigit, s))
271 ;
272 return true;
273 }
274 return false;
275 }
276
277 bool BaseParser::signedNumber(std::string* s)
278 {
279 accept(IsMinus, s);
280 if (!number(s))
281 return false;
282
283 // (optional) floating part
284 accept(IsChar('.'), s) && number(s);
285 // (optional) scientific part
286 if (accept(IsEither(IsChar('e'), IsChar('E')), s))
287 {
288 accept(IsEither(IsMinus, IsChar('+')), s);
289 number(s);
290 }
291
292 return true;
293 }
294
295 bool BaseParser::hexNumber(unsigned int length, std::string* s)
296 {
297 while (length != 0)
298 {
299 if (!accept(IsHex, s))
300 return false;
301 --length;
302 }
303 return true;
304 }
305
306 bool BaseParser::name(std::string* s)
307 {
308 const auto alpha_symbols = IsEither(IsAlpha, IsSymbol);
309 const auto alnum_symbols = IsEither(IsAlnum, IsSymbol);
310
311 if (accept(alpha_symbols, s))
312 {
313 while (accept(alnum_symbols, s))
314 ;
315 return true;
316 }
317 return false;
318 }
319
320 bool BaseParser::sequence(const std::string& s)
321 {
322 return std::ranges::all_of(s, [this](const char c) {
323 return accept(IsChar(c));
324 });
325 }
326
327 bool BaseParser::packageName(std::string* s)
328 {
329 if (accept(IsAlnum, s))
330 {
331 while (accept(IsEither(IsAlnum, IsEither(IsChar('_'), IsChar('-'))), s))
332 ;
333 return true;
334 }
335 return false;
336 }
337
338 bool BaseParser::anyUntil(const CharPred& delim, std::string* s)
339 {
340 if (accept(IsNot(delim), s))
341 {
342 while (accept(IsNot(delim), s))
343 ;
344 return true;
345 }
346 return false;
347 }
348
349 bool BaseParser::oneOf(const std::initializer_list<std::string> words, std::string* s)
350 {
351 std::string buffer;
352 if (!name(&buffer))
353 return false;
354
355 if (s)
356 *s = buffer;
357
358 return std::ranges::any_of(words, [&buffer](const std::string& word) {
359 return word == buffer;
360 });
361 }
362}
ArkScript homemade exceptions.
bool sequence(const std::string &s)
FilePosition m_filepos
The position of the cursor in the file.
bool spaceComment(std::string *s=nullptr)
std::string::iterator m_next_it
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
bool number(std::string *s=nullptr)
std::string::iterator m_it
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
bool hexNumber(unsigned length, std::string *s=nullptr)
bool newlineOrComment(std::string *s=nullptr)
void backtrack(long n)
Backtrack to a given position (this is NOT an offset!)
CodeErrorContext generateErrorContext(const std::string &expr)
bool anyUntil(const CharPred &delim, std::string *s=nullptr)
Match any char that do not match the predicate.
std::string peek() const
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
bool space(std::string *s=nullptr)
void registerNewLine(std::string::iterator it, std::size_t row)
Register the position of a new line, with an iterator pointing to the new line and the row number.
void next()
getting next character and changing the values of count/row/col/sym
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Fetch the next token (space and paren delimited) to generate an error.
void error(const std::string &error, std::string exp, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
bool inlineSpace(std::string *s=nullptr)
void expectSuffixOrError(char suffix, const std::string &context, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Check for a closing char or generate an error.
bool signedNumber(std::string *s=nullptr)
std::vector< std::pair< std::string::iterator, std::size_t > > m_it_to_row
A crude map of position to line number to speed up line number computing.
utf8_char_t m_sym
The current utf8 character we're on.
FilePosition getCursor() const
codepoint_t codepoint() const
Definition utf8_char.hpp:94
const char * c_str() const
Definition utf8_char.hpp:92
static std::pair< std::string::iterator, utf8_char_t > at(const std::string::iterator it, const std::string::iterator end)
Parse a codepoint and compute its length and representation.
Definition utf8_char.hpp:32
std::size_t size() const
Definition utf8_char.hpp:93
const IsChar IsMinus('-')
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
const std::string name
Describe a position in a given file ; handled by the BaseParser.