ArkScript
A small, lisp-inspired, functional scripting language
BaseParser.cpp
Go to the documentation of this file.
3
4#include <utility>
5#include <algorithm>
6
7#include <fmt/core.h>
8
9namespace Ark::internal
10{
11 void BaseParser::registerNewLine(std::string::iterator it, std::size_t row)
12 {
13 // search for an existing new line position
14 if (std::ranges::find_if(m_it_to_row, [it](const auto& pair) {
15 return pair.first == it;
16 }) != m_it_to_row.end())
17 return;
18
19 // if the mapping is empty, the loop while never hit, and we'll never insert anything
20 if (m_it_to_row.empty())
21 {
22 m_it_to_row.emplace_back(it, row);
23 return;
24 }
25
26 for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i)
27 {
28 auto current_it = m_it_to_row[i].first;
29 auto next_it = i + 1 < end ? m_it_to_row[i + 1].first : m_str.end();
30 if (current_it < it && it < next_it)
31 {
32 m_it_to_row.insert(
33 m_it_to_row.begin() + static_cast<decltype(m_it_to_row)::difference_type>(i) + 1,
34 std::make_pair(it, row));
35 break;
36 }
37 }
38 }
39
41 {
43 if (isEOF())
44 {
45 m_sym = utf8_char_t(); // reset sym to EOF
46 return;
47 }
48
49 // getting a character from the stream
50 auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
51 m_next_it = it;
52 m_sym = sym;
53
54 if (*m_it == '\n')
55 {
56 ++m_filepos.row;
57 m_filepos.col = 0;
59 }
60 else if (m_sym.isPrintable())
62 }
63
64 void BaseParser::initParser(const std::string& filename, const std::string& code)
65 {
66 m_filename = filename;
67
68 // if the input string is empty, raise an error
69 if (code.empty())
70 {
72 error("Expected symbol, got empty string", m_filepos);
73 }
74
75 m_str = code;
76 m_it = m_next_it = m_str.begin();
77
78 // otherwise, get the first symbol
79 next();
80 }
81
82 void BaseParser::backtrack(const long n)
83 {
84 if (std::cmp_less(n, m_str.size()))
85 m_it = m_str.begin() + n;
86 else
87 return;
88
89 auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
90 m_next_it = it;
91 m_sym = sym;
92
93 // search for the nearest it < m_it in the map to know the line number
94 for (const auto& [at, line] : m_it_to_row)
95 {
96 if (it <= at)
97 {
98 m_filepos.row = line - 1;
99 break;
100 }
101 }
102 // compute the position in the line
103 const auto it_pos = static_cast<std::size_t>(std::distance(m_str.begin(), m_it));
104 const std::string_view view { m_str.begin(), m_it };
105 const auto nearest_newline_index = view.find_last_of('\n');
106 if (nearest_newline_index != std::string_view::npos)
107 m_filepos.col = it_pos - nearest_newline_index;
108 else
109 m_filepos.col = it_pos + 1;
110 }
111
113 {
114 return m_filepos;
115 }
116
118 {
119 const auto [row, col] = getCursor();
120
121 return CodeErrorContext(
123 // for additional contexts, the end position is useless
124 FileSpan { .start = FilePos { .line = row, .column = col }, .end = std::nullopt });
125 }
126
127 void BaseParser::error(const std::string& error, const FilePosition start_at, const std::optional<CodeErrorContext>& additional_context) const
128 {
129 const auto [row, col] = getCursor();
130 throw CodeError(
131 error,
134 FileSpan {
135 .start = FilePos { .line = start_at.row, .column = start_at.col },
136 .end = FilePos { .line = row, .column = col } }),
137 additional_context);
138 }
139
140 void BaseParser::errorWithNextToken(const std::string& message, const std::optional<CodeErrorContext>& additional_context)
141 {
142 const auto filepos = getCursor();
143
145 error(message, filepos, additional_context);
146 }
147
148 void BaseParser::expectSuffixOrError(const char suffix, const std::string& context, const std::optional<CodeErrorContext>& additional_context)
149 {
150 if (!accept(IsChar(suffix)))
151 errorWithNextToken(fmt::format("Missing '{}' {}", suffix, context), additional_context);
152 }
153
154 bool BaseParser::accept(const CharPred& t, std::string* s)
155 {
156 if (isEOF())
157 return false;
158
159 // return false if the predicate couldn't consume the symbol
160 if (!t(m_sym.codepoint()))
161 return false;
162 // otherwise, add it to the string and go to the next symbol
163 if (s != nullptr)
164 *s += m_sym.c_str();
165
166 next();
167 return true;
168 }
169
170 bool BaseParser::expect(const CharPred& t, std::string* s)
171 {
172 // throw an error if the predicate couldn't consume the symbol
173 if (!t(m_sym.codepoint()))
174 error("Expected " + t.name, getCursor());
175 // otherwise, add it to the string and go to the next symbol
176 if (s != nullptr)
177 *s += m_sym.c_str();
178 next();
179 return true;
180 }
181
182 std::string BaseParser::peek() const
183 {
184 return m_sym.c_str();
185 }
186
187 bool BaseParser::space(std::string* s)
188 {
189 if (accept(IsSpace))
190 {
191 if (s != nullptr)
192 s->push_back(' ');
193 // loop while there are still ' ' to consume
194 while (accept(IsSpace))
195 ;
196 return true;
197 }
198 return false;
199 }
200
201 bool BaseParser::inlineSpace(std::string* s)
202 {
204 {
205 if (s != nullptr)
206 s->push_back(' ');
207 // loop while there are still ' ' to consume
208 while (accept(IsInlineSpace))
209 ;
210 return true;
211 }
212 return false;
213 }
214
215 bool BaseParser::comment(std::string* s)
216 {
217 if (accept(IsChar('#'), s))
218 {
219 while (accept(IsNot(IsChar('\n')), s))
220 ;
221 accept(IsChar('\n'), s);
222 return true;
223 }
224 return false;
225 }
226
228 {
229 std::string s;
230
231 inlineSpace();
232 while (!isEOF() && comment(&s))
233 inlineSpace();
234
235 return s;
236 }
237
239 {
240 std::string s;
241
242 space();
243 while (!isEOF() && comment(&s))
244 space();
245
246 return s;
247 }
248
249 bool BaseParser::prefix(const char c)
250 {
251 if (!accept(IsChar(c)))
252 return false;
253 return true;
254 }
255
256 bool BaseParser::number(std::string* s)
257 {
258 if (accept(IsDigit, s))
259 {
260 // consume all the digits available,
261 // stop when the symbol isn't a digit anymore
262 while (accept(IsDigit, s))
263 ;
264 return true;
265 }
266 return false;
267 }
268
269 bool BaseParser::signedNumber(std::string* s)
270 {
271 accept(IsMinus, s);
272 if (!number(s))
273 return false;
274
275 // (optional) floating part
276 accept(IsChar('.'), s) && number(s);
277 // (optional) scientific part
278 if (accept(IsEither(IsChar('e'), IsChar('E')), s))
279 {
280 accept(IsEither(IsMinus, IsChar('+')), s);
281 number(s);
282 }
283
284 return true;
285 }
286
287 bool BaseParser::hexNumber(unsigned int length, std::string* s)
288 {
289 while (length != 0)
290 {
291 if (!accept(IsHex, s))
292 return false;
293 --length;
294 }
295 return true;
296 }
297
298 bool BaseParser::name(std::string* s)
299 {
300 const auto alpha_symbols = IsEither(IsAlpha, IsSymbol);
301 const auto alnum_symbols = IsEither(IsAlnum, IsSymbol);
302
303 if (accept(alpha_symbols, s))
304 {
305 while (accept(alnum_symbols, s))
306 ;
307 return true;
308 }
309 return false;
310 }
311
312 bool BaseParser::sequence(const std::string& s)
313 {
314 return std::ranges::all_of(s, [this](const char c) {
315 return accept(IsChar(c));
316 });
317 }
318
319 bool BaseParser::packageName(std::string* s)
320 {
321 if (accept(IsAlnum, s))
322 {
323 while (accept(IsEither(IsAlnum, IsEither(IsChar('_'), IsChar('-'))), s))
324 ;
325 return true;
326 }
327 return false;
328 }
329
330 bool BaseParser::anyUntil(const CharPred& delim, std::string* s)
331 {
332 if (accept(IsNot(delim), s))
333 {
334 while (accept(IsNot(delim), s))
335 ;
336 return true;
337 }
338 return false;
339 }
340
341 bool BaseParser::oneOf(const std::initializer_list<std::string> words, std::string* s)
342 {
343 std::string buffer;
344 if (!name(&buffer))
345 return false;
346
347 if (s)
348 *s = buffer;
349
350 return std::ranges::any_of(words, [&buffer](const std::string& word) {
351 return word == buffer;
352 });
353 }
354}
ArkScript homemade exceptions.
bool sequence(const std::string &s)
FilePosition m_filepos
The position of the cursor in the file.
std::string::iterator m_next_it
void initParser(const std::string &filename, const std::string &code)
bool expect(const CharPred &t, std::string *s=nullptr)
heck if a Character Predicate was able to parse, call next() if matching ; throw a CodeError if it do...
bool number(std::string *s=nullptr)
void error(const std::string &error, FilePosition start_at, const std::optional< CodeErrorContext > &additional_context=std::nullopt) const
Create an error context and throw an error containing said context.
std::string::iterator m_it
bool accept(const CharPred &t, std::string *s=nullptr)
check if a Character Predicate was able to parse, call next() if matching
std::string newlineOrComment()
bool hexNumber(unsigned length, std::string *s=nullptr)
void backtrack(long n)
Backtrack to a given position (this is NOT an offset!)
bool anyUntil(const CharPred &delim, std::string *s=nullptr)
Match any char that do not match the predicate.
std::string peek() const
bool oneOf(std::initializer_list< std::string > words, std::string *s=nullptr)
Fetch a token and try to match one of the given words.
bool space(std::string *s=nullptr)
void registerNewLine(std::string::iterator it, std::size_t row)
Register the position of a new line, with an iterator pointing to the new line and the row number.
void next()
getting next character and changing the values of count/row/col/sym
bool name(std::string *s=nullptr)
bool comment(std::string *s=nullptr)
bool packageName(std::string *s=nullptr)
void errorWithNextToken(const std::string &message, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Fetch the next token (space and paren delimited) to generate an error.
CodeErrorContext generateErrorContextAtCurrentPosition() const
bool inlineSpace(std::string *s=nullptr)
void expectSuffixOrError(char suffix, const std::string &context, const std::optional< CodeErrorContext > &additional_context=std::nullopt)
Check for a closing char or generate an error.
bool signedNumber(std::string *s=nullptr)
std::vector< std::pair< std::string::iterator, std::size_t > > m_it_to_row
A crude map of position to line number to speed up line number computing.
utf8_char_t m_sym
The current utf8 character we're on.
FilePosition getCursor() const
codepoint_t codepoint() const
Definition utf8_char.hpp:92
const char * c_str() const
Definition utf8_char.hpp:90
static std::pair< std::string::iterator, utf8_char_t > at(const std::string::iterator it, const std::string::iterator end)
Parse a codepoint and compute its length and representation.
Definition utf8_char.hpp:30
std::size_t size() const
Definition utf8_char.hpp:91
const IsChar IsMinus('-')
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
const std::string name
Describe a position in a given file ; handled by the BaseParser.
Describes a span for a node/atom in a file, its start position and end position.
Definition Position.hpp:35