ArkScript
A small, lisp-inspired, functional scripting language
BaseParser.cpp
Go to the documentation of this file.
2#include <Ark/Exceptions.hpp>
3
4#include <utility>
5#include <algorithm>
6
7#include <fmt/core.h>
8
9namespace Ark::internal
10{
11 void BaseParser::registerNewLine(std::string::iterator it, std::size_t row)
12 {
13 // search for an existing new line position
14 if (std::ranges::find_if(m_it_to_row, [it](const auto& pair) {
15 return pair.first == it;
16 }) != m_it_to_row.end())
17 return;
18
19 // if the mapping is empty, the loop while never hit and we'll never insert anything
20 if (m_it_to_row.empty())
21 {
22 m_it_to_row.emplace_back(it, row);
23 return;
24 }
25
26 for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i)
27 {
28 auto current_it = m_it_to_row[i].first;
29 auto next_it = i + 1 < end ? m_it_to_row[i + 1].first : m_str.end();
30 if (current_it < it && it < next_it)
31 {
32 m_it_to_row.insert(
33 m_it_to_row.begin() + static_cast<decltype(m_it_to_row)::difference_type>(i) + 1,
34 std::make_pair(it, row));
35 break;
36 }
37 }
38 }
39
41 {
43 if (isEOF())
44 {
45 m_sym = utf8_char_t(); // reset sym to EOF
46 return;
47 }
48
49 // getting a character from the stream
50 auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
51 m_next_it = it;
52 m_sym = sym;
53
54 if (*m_it == '\n')
55 {
56 ++m_filepos.row;
57 m_filepos.col = 0;
59 }
60 else if (m_sym.isPrintable())
62 }
63
64 void BaseParser::initParser(const std::string& filename, const std::string& code)
65 {
66 m_filename = filename;
67
68 // if the input string is empty, raise an error
69 if (code.empty())
70 {
72 error("Expected symbol, got empty string", "");
73 }
74
75 m_str = code;
76 m_it = m_next_it = m_str.begin();
77
78 // otherwise, get the first symbol
79 next();
80 }
81
82 void BaseParser::backtrack(const long n)
83 {
84 if (std::cmp_greater_equal(n, m_str.size()))
85 return;
86
87 if (std::cmp_less(n, m_str.size()))
88 m_it = m_str.begin() + n;
89 else
90 m_it = m_str.begin();
91
92 auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
93 m_next_it = it;
94 m_sym = sym;
95
96 // search for the nearest it < m_it in the map to know the line number
97 for (const auto& [at, line] : m_it_to_row)
98 {
99 if (it <= at)
100 {
101 m_filepos.row = line - 1;
102 break;
103 }
104 }
105 // compute the position in the line
106 std::string_view view = m_str;
107 const auto it_pos = static_cast<std::size_t>(std::distance(m_str.begin(), m_it));
108 view = view.substr(0, it_pos);
109 const auto nearest_newline_index = view.find_last_of('\n');
110 if (nearest_newline_index != std::string_view::npos)
111 m_filepos.col = it_pos - nearest_newline_index;
112 else
113 m_filepos.col = it_pos + 1;
114 }
115
117 {
118 return m_filepos;
119 }
120
122 {
123 const auto [row, col] = getCursor();
124
125 return CodeErrorContext(
127 row,
128 col,
129 expr,
130 m_sym);
131 }
132
133 void BaseParser::error(const std::string& error, std::string exp, const std::optional<CodeErrorContext>& additional_context)
134 {
135 const auto [row, col] = getCursor();
136 throw CodeError(
137 error,
138 CodeErrorContext(m_filename, row, col, std::move(exp), m_sym),
139 additional_context);
140 }
141
142 void BaseParser::errorWithNextToken(const std::string& message, const std::optional<CodeErrorContext>& additional_context)
143 {
144 const auto pos = getCount();
145 std::string next_token;
146
147 anyUntil(IsEither(IsInlineSpace, IsEither(IsChar('('), IsChar(')'))), &next_token);
148 backtrack(pos);
149
150 error(message, next_token, additional_context);
151 }
152
153 void BaseParser::expectSuffixOrError(const char suffix, const std::string& context, const std::optional<CodeErrorContext>& additional_context)
154 {
155 if (!accept(IsChar(suffix)))
156 errorWithNextToken(fmt::format("Missing '{}' {}", suffix, context), additional_context);
157 }
158
159 bool BaseParser::accept(const CharPred& t, std::string* s)
160 {
161 if (isEOF())
162 return false;
163
164 // return false if the predicate couldn't consume the symbol
165 if (!t(m_sym.codepoint()))
166 return false;
167 // otherwise, add it to the string and go to the next symbol
168 if (s != nullptr)
169 *s += m_sym.c_str();
170
171 next();
172 return true;
173 }
174
175 bool BaseParser::expect(const CharPred& t, std::string* s)
176 {
177 // throw an error if the predicate couldn't consume the symbol
178 if (!t(m_sym.codepoint()))
179 error("Expected " + t.name, m_sym.c_str());
180 // otherwise, add it to the string and go to the next symbol
181 if (s != nullptr)
182 *s += m_sym.c_str();
183 next();
184 return true;
185 }
186
187 std::string BaseParser::peek() const
188 {
189 return m_sym.c_str();
190 }
191
192 bool BaseParser::space(std::string* s)
193 {
194 if (accept(IsSpace))
195 {
196 if (s != nullptr)
197 s->push_back(' ');
198 // loop while there are still ' ' to consume
199 while (accept(IsSpace))
200 ;
201 return true;
202 }
203 return false;
204 }
205
206 bool BaseParser::inlineSpace(std::string* s)
207 {
209 {
210 if (s != nullptr)
211 s->push_back(' ');
212 // loop while there are still ' ' to consume
213 while (accept(IsInlineSpace))
214 ;
215 return true;
216 }
217 return false;
218 }
219
220 bool BaseParser::comment(std::string* s)
221 {
222 if (accept(IsChar('#'), s))
223 {
224 while (accept(IsNot(IsChar('\n')), s))
225 ;
226 accept(IsChar('\n'), s);
227 return true;
228 }
229 return false;
230 }
231
232 bool BaseParser::spaceComment(std::string* s)
233 {
234 bool matched = false;
235
236 inlineSpace();
237 while (!isEOF() && comment(s))
238 {
239 inlineSpace();
240 matched = true;
241 }
242
243 return matched;
244 }
245
246 bool BaseParser::newlineOrComment(std::string* s)
247 {
248 bool matched = false;
249
250 space();
251 while (!isEOF() && comment(s))
252 {
253 space();
254 matched = true;
255 }
256
257 return matched;
258 }
259
260 bool BaseParser::prefix(const char c)
261 {
262 if (!accept(IsChar(c)))
263 return false;
264 return true;
265 }
266
267 bool BaseParser::number(std::string* s)
268 {
269 if (accept(IsDigit, s))
270 {
271 // consume all the digits available,
272 // stop when the symbol isn't a digit anymore
273 while (accept(IsDigit, s))
274 ;
275 return true;
276 }
277 return false;
278 }
279
280 bool BaseParser::signedNumber(std::string* s)
281 {
282 accept(IsMinus, s);
283 if (!number(s))
284 return false;
285
286 // (optional) floating part
287 accept(IsChar('.'), s) && number(s);
288 // (optional) scientific part
289 if (accept(IsEither(IsChar('e'), IsChar('E')), s))
290 {
291 accept(IsEither(IsMinus, IsChar('+')), s);
292 number(s);
293 }
294
295 return true;
296 }
297
298 bool BaseParser::hexNumber(unsigned int length, std::string* s)
299 {
300 while (length != 0)
301 {
302 if (!accept(IsHex, s))
303 return false;
304 --length;
305 }
306 return true;
307 }
308
309 bool BaseParser::name(std::string* s)
310 {
311 const auto alpha_symbols = IsEither(IsAlpha, IsSymbol);
312 const auto alnum_symbols = IsEither(IsAlnum, IsSymbol);
313
314 if (accept(alpha_symbols, s))
315 {
316 while (accept(alnum_symbols, s))
317 ;
318 return true;
319 }
320 return false;
321 }
322
323 bool BaseParser::sequence(const std::string& s)
324 {
325 return std::ranges::all_of(s, [this](const char c) {
326 return accept(IsChar(c));
327 });
328 }
329
330 bool BaseParser::packageName(std::string* s)
331 {
332 if (accept(IsAlnum, s))
333 {
334 while (accept(IsEither(IsAlnum, IsEither(IsChar('_'), IsChar('-'))), s))
335 ;
336 return true;
337 }
338 return false;
339 }
340
341 bool BaseParser::anyUntil(const CharPred& delim, std::string* s)
342 {
343 if (accept(IsNot(delim), s))
344 {
345 while (accept(IsNot(delim), s))
346 ;
347 return true;
348 }
349 return false;
350 }
351
352 bool BaseParser::oneOf(const std::initializer_list<std::string> words, std::string* s)
353 {
354 std::string buffer;
355 if (!name(&buffer))
356 return false;
357
358 if (s)
359 *s = buffer;
360
361 return std::ranges::any_of(words, [&buffer](const std::string& word) {
362 return word == buffer;
363 });
364 }
365}
ArkScript homemade exceptions.
bool sequence(const std::string &s)
FilePosition m_filepos
The position of the cursor in the file.
bool spaceComment(std::string *s=nullptr)
std::string::iterator m_next_it
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
bool number(std::string *s=nullptr)
std::string::iterator m_it
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
bool hexNumber(unsigned length, std::string *s=nullptr)
bool newlineOrComment(std::string *s=nullptr)
void backtrack(long n)
Backtrack to a given position (this is NOT an offset!)
CodeErrorContext generateErrorContext(const std::string &expr)
bool anyUntil(const CharPred &delim, std::string *s=nullptr)
Match any char that do not match the predicate.
std::string peek() const
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
bool space(std::string *s=nullptr)
void registerNewLine(std::string::iterator it, std::size_t row)
Register the position of a new line, with an iterator pointing to the new line and the row number.
void next()
getting next character and changing the values of count/row/col/sym
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Fetch the next token (space and paren delimited) to generate an error.
void error(const std::string &error, std::string exp, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
bool inlineSpace(std::string *s=nullptr)
void expectSuffixOrError(char suffix, const std::string &context, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Check for a closing char or generate an error.
bool signedNumber(std::string *s=nullptr)
std::vector< std::pair< std::string::iterator, std::size_t > > m_it_to_row
A crude map of position to line number to speed up line number computing.
utf8_char_t m_sym
The current utf8 character we're on.
FilePosition getCursor() const
codepoint_t codepoint() const
Definition utf8_char.hpp:94
const char * c_str() const
Definition utf8_char.hpp:92
static std::pair< std::string::iterator, utf8_char_t > at(const std::string::iterator it, const std::string::iterator end)
Parse a codepoint and compute its length and representation.
Definition utf8_char.hpp:32
std::size_t size() const
Definition utf8_char.hpp:93
const IsChar IsMinus('-')
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
const std::string name
Describe a position in a given file ; handled by the BaseParser.