ArkScript
A small, fast, functional and scripting language for video games
Compiler.cpp
Go to the documentation of this file.
2 
3 #include <fstream>
4 #include <chrono>
5 #include <limits>
6 #include <filesystem>
7 #include <picosha2.h>
8 
9 #include <Ark/Literals.hpp>
10 #include <Ark/Utils.hpp>
13 
14 namespace Ark
15 {
16  using namespace internal;
17  using namespace literals;
18 
19  Compiler::Compiler(unsigned debug, const std::vector<std::string>& libenv, uint16_t options) :
20  m_parser(debug, options, libenv), m_optimizer(options),
21  m_options(options), m_debug(debug)
22  {}
23 
24  void Compiler::feed(const std::string& code, const std::string& filename)
25  {
26  m_parser.feed(code, filename);
27 
29  mp.feed(m_parser.ast());
30  m_optimizer.feed(mp.ast());
31  }
32 
34  {
36 
37  m_code_pages.emplace_back(); // create empty page
38 
39  // gather symbols, values, and start to create code segments
40  _compile(m_optimizer.ast(), /* current_page */ 0, /* produces_result */ false, /* is_terminal */ false);
41  // throw an error on undefined symbol uses
43 
45 
46  // push the different code segments
47  for (const bytecode_t& page : m_code_pages)
48  {
50 
51  // push number of elements
52  pushNumber(static_cast<uint16_t>(page.size() + 1));
53 
54  for (auto inst : page)
55  m_bytecode.push_back(inst);
56  // just in case we got too far, always add a HALT to be sure the
57  // VM won't do anything crazy
58  m_bytecode.push_back(Instruction::HALT);
59  }
60 
61  if (!m_code_pages.size())
62  {
64  pushNumber(1_u16);
65  m_bytecode.push_back(Instruction::HALT);
66  }
67 
68  constexpr std::size_t header_size = 18;
69 
70  // generate a hash of the tables + bytecode
71  std::vector<unsigned char> hash_out(picosha2::k_digest_size);
72  picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out);
73  m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end());
74  }
75 
76  void Compiler::saveTo(const std::string& file)
77  {
78  if (m_debug >= 1)
79  std::cout << "Final bytecode size: " << m_bytecode.size() * sizeof(uint8_t) << "B\n";
80 
81  std::ofstream output(file, std::ofstream::binary);
82  output.write(reinterpret_cast<char*>(&m_bytecode[0]), m_bytecode.size() * sizeof(uint8_t));
83  output.close();
84  }
85 
86  const bytecode_t& Compiler::bytecode() noexcept
87  {
88  return m_bytecode;
89  }
90 
91  void Compiler::pushFileHeader() noexcept
92  {
93  /*
94  Generating headers:
95  - lang name (to be sure we are executing an ArkScript file)
96  on 4 bytes (ark + padding)
97  - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
98  - timestamp (8 bytes, unix format)
99  */
100 
101  m_bytecode.push_back('a');
102  m_bytecode.push_back('r');
103  m_bytecode.push_back('k');
104  m_bytecode.push_back(0_u8);
105 
106  // push version
110 
111  // push timestamp
112  unsigned long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
113  std::chrono::system_clock::now().time_since_epoch())
114  .count();
115  for (char c = 0; c < 8; c++)
116  {
117  unsigned shift = 8 * (7 - c);
118  uint8_t ts_byte = (timestamp & (0xffULL << shift)) >> shift;
119  m_bytecode.push_back(ts_byte);
120  }
121  }
122 
124  {
125  /*
126  - symbols table
127  + elements
128  - values table header
129  + elements
130  */
131 
133  // push size
134  pushNumber(static_cast<uint16_t>(m_symbols.size()));
135  // push elements
136  for (auto sym : m_symbols)
137  {
138  // push the string, null terminated
139  std::string s = sym.string();
140  for (std::size_t i = 0, size = s.size(); i < size; ++i)
141  m_bytecode.push_back(s[i]);
142  m_bytecode.push_back(0_u8);
143  }
144 
145  // values table
147  // push size
148  pushNumber(static_cast<uint16_t>(m_values.size()));
149  // push elements (separated with 0x00)
150  for (auto val : m_values)
151  {
152  if (val.type == ValTableElemType::Number)
153  {
155  auto n = std::get<double>(val.value);
156  std::string t = std::to_string(n);
157  for (std::size_t i = 0, size = t.size(); i < size; ++i)
158  m_bytecode.push_back(t[i]);
159  }
160  else if (val.type == ValTableElemType::String)
161  {
163  std::string t = std::get<std::string>(val.value);
164  for (std::size_t i = 0, size = t.size(); i < size; ++i)
165  m_bytecode.push_back(t[i]);
166  }
167  else if (val.type == ValTableElemType::PageAddr)
168  {
170  pushNumber(static_cast<uint16_t>(std::get<std::size_t>(val.value)));
171  }
172  else
173  throw CompilationError("trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code");
174 
175  m_bytecode.push_back(0_u8);
176  }
177  }
178 
179  std::size_t Compiler::countArkObjects(const std::vector<Node>& lst) noexcept
180  {
181  std::size_t n = 0;
182  for (const Node& node : lst)
183  {
184  if (node.nodeType() != NodeType::GetField)
185  n++;
186  }
187  return n;
188  }
189 
190  std::optional<std::size_t> Compiler::isOperator(const std::string& name) noexcept
191  {
192  auto it = std::find(internal::operators.begin(), internal::operators.end(), name);
193  if (it != internal::operators.end())
194  return std::distance(internal::operators.begin(), it);
195  return std::nullopt;
196  }
197 
198  std::optional<std::size_t> Compiler::isBuiltin(const std::string& name) noexcept
199  {
200  auto it = std::find_if(Builtins::builtins.begin(), Builtins::builtins.end(),
201  [&name](const std::pair<std::string, Value>& element) -> bool {
202  return name == element.first;
203  });
204  if (it != Builtins::builtins.end())
205  return std::distance(Builtins::builtins.begin(), it);
206  return std::nullopt;
207  }
208 
209  void Compiler::pushSpecificInstArgc(Instruction inst, uint16_t previous, int p) noexcept
210  {
211  if (inst == Instruction::LIST)
212  pushNumber(previous, page_ptr(p));
213  else if (inst == Instruction::APPEND || inst == Instruction::APPEND_IN_PLACE ||
215  pushNumber(previous - 1, page_ptr(p));
216  }
217 
218  bool Compiler::mayBeFromPlugin(const std::string& name) noexcept
219  {
220  std::string splitted = Utils::splitString(name, ':')[0];
221  auto it = std::find_if(m_plugins.begin(), m_plugins.end(),
222  [&splitted](const std::string& plugin) -> bool {
223  return std::filesystem::path(plugin).stem().string() == splitted;
224  });
225  return it != m_plugins.end();
226  }
227 
228  void Compiler::throwCompilerError(const std::string& message, const Node& node)
229  {
230  throw CompilationError(makeNodeBasedErrorCtx(message, node));
231  }
232 
233  void Compiler::_compile(const Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name)
234  {
235  // register symbols
236  if (x.nodeType() == NodeType::Symbol)
237  compileSymbol(x, p, produces_result);
238  else if (x.nodeType() == NodeType::GetField)
239  {
240  std::string name = x.string();
241  // 'name' shouldn't be a builtin/operator, we can use it as-is
242  uint16_t i = addSymbol(x);
243 
244  page(p).emplace_back(Instruction::GET_FIELD);
245  pushNumber(i, page_ptr(p));
246  }
247  // register values
248  else if (x.nodeType() == NodeType::String || x.nodeType() == NodeType::Number)
249  {
250  uint16_t i = addValue(x);
251 
252  if (!produces_result)
253  {
254  page(p).emplace_back(Instruction::LOAD_CONST);
255  pushNumber(i, page_ptr(p));
256  }
257  }
258  // empty code block should be nil
259  else if (x.constList().empty())
260  {
261  if (!produces_result)
262  {
263  auto it_builtin = isBuiltin("nil");
264  page(p).emplace_back(Instruction::BUILTIN);
265  pushNumber(static_cast<uint16_t>(it_builtin.value()), page_ptr(p));
266  }
267  }
268  // specific instructions
269  else if (auto c0 = x.constList()[0]; c0.nodeType() == NodeType::Symbol && isSpecific(c0.string()).has_value())
270  compileSpecific(c0, x, p, produces_result);
271  // registering structures
272  else if (x.constList()[0].nodeType() == NodeType::Keyword)
273  {
274  Keyword n = x.constList()[0].keyword();
275 
276  switch (n)
277  {
278  case Keyword::If:
279  compileIf(x, p, produces_result, is_terminal, var_name);
280  break;
281 
282  case Keyword::Set:
283  [[fallthrough]];
284  case Keyword::Let:
285  [[fallthrough]];
286  case Keyword::Mut:
287  compileLetMutSet(n, x, p);
288  break;
289 
290  case Keyword::Fun:
291  compileFunction(x, p, produces_result, var_name);
292  break;
293 
294  case Keyword::Begin:
295  {
296  for (std::size_t i = 1, size = x.constList().size(); i < size; ++i)
297  _compile(
298  x.constList()[i],
299  p,
300  // All the nodes in a begin (except for the last one) are producing a result that we want to drop.
301  (i != size - 1) ? true : produces_result,
302  // If the begin is a terminal node, only its last node is terminal.
303  is_terminal ? (i == size - 1) : false,
304  var_name);
305  break;
306  }
307 
308  case Keyword::While:
309  compileWhile(x, p);
310  break;
311 
312  case Keyword::Import:
313  compilePluginImport(x, p);
314  break;
315 
316  case Keyword::Quote:
317  compileQuote(x, p, produces_result, is_terminal, var_name);
318  break;
319 
320  case Keyword::Del:
321  compileDel(x, p);
322  break;
323  }
324  }
325  else
326  {
327  // if we are here, we should have a function name
328  // push arguments first, then function name, then call it
329  handleCalls(x, p, produces_result, is_terminal, var_name);
330  }
331  }
332 
333  void Compiler::compileSymbol(const Node& x, int p, bool produces_result)
334  {
335  std::string name = x.string();
336 
337  if (auto it_builtin = isBuiltin(name))
338  {
339  page(p).emplace_back(Instruction::BUILTIN);
340  pushNumber(static_cast<uint16_t>(it_builtin.value()), page_ptr(p));
341  }
342  else if (auto it_operator = isOperator(name))
343  page(p).emplace_back(static_cast<uint8_t>(Instruction::FIRST_OPERATOR + it_operator.value()));
344  else // var-use
345  {
346  uint16_t i = addSymbol(x);
347 
348  page(p).emplace_back(Instruction::LOAD_SYMBOL);
349  pushNumber(i, page_ptr(p));
350  }
351 
352  if (produces_result)
353  page(p).push_back(Instruction::POP);
354  }
355 
356  void Compiler::compileSpecific(const Node& c0, const Node& x, int p, bool produces_result)
357  {
358  std::string name = c0.string();
359  Instruction inst = isSpecific(name).value();
360 
361  // length of at least 1 since we got a symbol name
362  uint16_t argc = countArkObjects(x.constList()) - 1;
363  // error, can not use append/concat/pop (and their in place versions) with a <2 length argument list
364  if (argc < 2 && inst != Instruction::LIST)
365  throw CompilationError("can not use " + name + " with less than 2 arguments");
366 
367  // compile arguments in reverse order
368  for (uint16_t i = x.constList().size() - 1; i > 0; --i)
369  {
370  uint16_t j = i;
371  while (x.constList()[j].nodeType() == NodeType::GetField)
372  --j;
373  uint16_t diff = i - j;
374  while (j < i)
375  {
376  _compile(x.constList()[j], p, false, false);
377  ++j;
378  }
379  _compile(x.constList()[i], p, false, false);
380  i -= diff;
381  }
382 
383  // put inst and number of arguments
384  page(p).emplace_back(inst);
385  pushSpecificInstArgc(inst, argc, p);
386 
387  if (produces_result && name != "pop!") // pop! never pushes a value
388  page(p).push_back(Instruction::POP);
389  }
390 
391  void Compiler::compileIf(const Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name)
392  {
393  // compile condition
394  _compile(x.constList()[1], p, false, false);
395 
396  // jump only if needed to the if
397  page(p).push_back(Instruction::POP_JUMP_IF_TRUE);
398  std::size_t jump_to_if_pos = page(p).size();
399  // absolute address to jump to if condition is true
400  pushNumber(0_u16, page_ptr(p));
401 
402  // else code
403  if (x.constList().size() == 4) // we have an else clause
404  _compile(x.constList()[3], p, produces_result, is_terminal, var_name);
405 
406  // when else is finished, jump to end
407  page(p).push_back(Instruction::JUMP);
408  std::size_t jump_to_end_pos = page(p).size();
409  pushNumber(0_u16, page_ptr(p));
410 
411  // set jump to if pos
412  setNumberAt(p, jump_to_if_pos, page(p).size());
413  // if code
414  _compile(x.constList()[2], p, produces_result, is_terminal, var_name);
415  // set jump to end pos
416  setNumberAt(p, jump_to_end_pos, page(p).size());
417  }
418 
419  void Compiler::compileFunction(const Node& x, int p, bool produces_result, const std::string& var_name)
420  {
421  // capture, if needed
422  for (auto it = x.constList()[1].constList().begin(), it_end = x.constList()[1].constList().end(); it != it_end; ++it)
423  {
424  if (it->nodeType() == NodeType::Capture)
425  {
426  // first check that the capture is a defined symbol
427  if (std::find(m_defined_symbols.begin(), m_defined_symbols.end(), it->string()) == m_defined_symbols.end())
428  {
429  // we didn't find it in the defined symbol list, thus we can't capture it
430  throwCompilerError("Can not capture " + it->string() + " because it is referencing an unbound variable.", *it);
431  }
432  page(p).emplace_back(Instruction::CAPTURE);
433  addDefinedSymbol(it->string());
434  uint16_t var_id = addSymbol(*it);
435  pushNumber(var_id, page_ptr(p));
436  }
437  }
438 
439  // create new page for function body
440  m_code_pages.emplace_back();
441  std::size_t page_id = m_code_pages.size() - 1;
442  // load value on the stack
443  page(p).emplace_back(Instruction::LOAD_CONST);
444  // save page_id into the constants table as PageAddr
445  pushNumber(addValue(page_id, x), page_ptr(p));
446 
447  // pushing arguments from the stack into variables in the new scope
448  for (auto it = x.constList()[1].constList().begin(), it_end = x.constList()[1].constList().end(); it != it_end; ++it)
449  {
450  if (it->nodeType() == NodeType::Symbol)
451  {
452  page(page_id).emplace_back(Instruction::MUT);
453  uint16_t var_id = addSymbol(*it);
454  addDefinedSymbol(it->string());
455  pushNumber(var_id, page_ptr(page_id));
456  }
457  }
458 
459  // push body of the function
460  _compile(x.constList()[2], page_id, false, true, var_name);
461 
462  // return last value on the stack
463  page(page_id).emplace_back(Instruction::RET);
464 
465  if (produces_result)
466  page(p).push_back(Instruction::POP);
467  }
468 
469  void Compiler::compileLetMutSet(Keyword n, const Node& x, int p)
470  {
471  uint16_t i = addSymbol(x.constList()[1]);
472  if (n != Keyword::Set)
473  addDefinedSymbol(x.constList()[1].string());
474 
475  // put value before symbol id
476  putValue(x, p, false);
477 
478  if (n == Keyword::Let)
479  page(p).push_back(Instruction::LET);
480  else if (n == Keyword::Mut)
481  page(p).push_back(Instruction::MUT);
482  else
483  page(p).push_back(Instruction::STORE);
484  pushNumber(i, page_ptr(p));
485  }
486 
487  void Compiler::compileWhile(const Node& x, int p)
488  {
489  // save current position to jump there at the end of the loop
490  std::size_t current = page(p).size();
491  // push condition
492  _compile(x.constList()[1], p, false, false);
493  // absolute jump to end of block if condition is false
494  page(p).push_back(Instruction::POP_JUMP_IF_FALSE);
495  std::size_t jump_to_end_pos = page(p).size();
496  // absolute address to jump to if condition is false
497  pushNumber(0_u16, page_ptr(p));
498  // push code to page
499  _compile(x.constList()[2], p, true, false);
500  // loop, jump to the condition
501  page(p).push_back(Instruction::JUMP);
502  // abosolute address
503  pushNumber(static_cast<uint16_t>(current), page_ptr(p));
504  // set jump to end pos
505  setNumberAt(p, jump_to_end_pos, page(p).size());
506  }
507 
508  void Compiler::compileQuote(const Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name)
509  {
510  // create new page for quoted code
511  m_code_pages.emplace_back();
512  std::size_t page_id = m_code_pages.size() - 1;
513  _compile(x.constList()[1], page_id, false, is_terminal, var_name);
514  page(page_id).emplace_back(Instruction::RET); // return to the last frame
515 
516  // call it
517  uint16_t id = addValue(page_id, x); // save page_id into the constants table as PageAddr
518  page(p).emplace_back(Instruction::LOAD_CONST);
519  pushNumber(id, page_ptr(p));
520 
521  if (produces_result)
522  page(p).push_back(Instruction::POP);
523  }
524 
525  void Compiler::compilePluginImport(const Node& x, int p)
526  {
527  // register plugin path in the constants table
528  uint16_t id = addValue(x.constList()[1]);
529  // save plugin name to use it later
530  m_plugins.push_back(x.constList()[1].string());
531  // add plugin instruction + id of the constant refering to the plugin path
532  page(p).emplace_back(Instruction::PLUGIN);
533  pushNumber(id, page_ptr(p));
534  }
535 
536  void Compiler::compileDel(const Node& x, int p)
537  {
538  // get id of symbol to delete
539  uint16_t i = addSymbol(x.constList()[1]);
540 
541  page(p).emplace_back(Instruction::DEL);
542  pushNumber(i, page_ptr(p));
543  }
544 
545  void Compiler::handleCalls(const Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name)
546  {
547  m_temp_pages.emplace_back();
548  int proc_page = -static_cast<int>(m_temp_pages.size());
549  _compile(x.constList()[0], proc_page, false, false); // storing proc
550 
551  // trying to handle chained closure.field.field.field...
552  std::size_t n = 1; // we need it later
553  const std::size_t end = x.constList().size();
554  while (n < end)
555  {
556  if (x.constList()[n].nodeType() == NodeType::GetField)
557  {
558  _compile(x.constList()[n], proc_page, false, false);
559  n++;
560  }
561  else
562  break;
563  }
564  std::size_t proc_page_len = m_temp_pages.back().size();
565 
566  // we know that operators take only 1 instruction, so if there are more
567  // it's a builtin/function
568  if (proc_page_len > 1)
569  {
570  if (is_terminal && x.constList()[0].nodeType() == NodeType::Symbol && var_name == x.constList()[0].string())
571  {
572  // we can drop the temp page as we won't be using it
573  m_temp_pages.pop_back();
574 
575  // push the arguments in reverse order
576  for (std::size_t i = x.constList().size() - 1; i >= n; --i)
577  _compile(x.constList()[i], p, false, false);
578 
579  // jump to the top of the function
580  page(p).push_back(Instruction::JUMP);
581  pushNumber(0_u16, page_ptr(p));
582 
583  return; // skip the possible Instruction::POP at the end
584  }
585  else
586  {
587  // push arguments on current page
588  for (auto exp = x.constList().begin() + n, exp_end = x.constList().end(); exp != exp_end; ++exp)
589  _compile(*exp, p, false, false);
590  // push proc from temp page
591  for (auto const& inst : m_temp_pages.back())
592  page(p).push_back(inst);
593  m_temp_pages.pop_back();
594 
595  // call the procedure
596  page(p).push_back(Instruction::CALL);
597  // number of arguments
598  std::size_t args_count = 0;
599  for (auto it = x.constList().begin() + 1, it_end = x.constList().end(); it != it_end; ++it)
600  {
601  if (it->nodeType() != NodeType::GetField &&
602  it->nodeType() != NodeType::Capture)
603  args_count++;
604  }
605  pushNumber(static_cast<uint16_t>(args_count), page_ptr(p));
606  }
607  }
608  else // operator
609  {
610  // retrieve operator
611  auto op_inst = m_temp_pages.back()[0];
612  m_temp_pages.pop_back();
613 
614  if (op_inst == Instruction::ASSERT)
615  produces_result = false;
616 
617  // push arguments on current page
618  std::size_t exp_count = 0;
619  for (std::size_t index = n, size = x.constList().size(); index < size; ++index)
620  {
621  _compile(x.constList()[index], p, false, false);
622 
623  if ((index + 1 < size &&
624  x.constList()[index + 1].nodeType() != NodeType::GetField &&
625  x.constList()[index + 1].nodeType() != NodeType::Capture) ||
626  index + 1 == size)
627  exp_count++;
628 
629  // in order to be able to handle things like (op A B C D...)
630  // which should be transformed into A B op C op D op...
631  if (exp_count >= 2)
632  page(p).push_back(op_inst);
633  }
634 
635  if (exp_count == 1)
636  page(p).push_back(op_inst);
637 
638  // need to check we didn't push the (op A B C D...) things for operators not supporting it
639  if (exp_count > 2)
640  {
641  switch (op_inst)
642  {
643  // authorized instructions
644  case Instruction::ADD: [[fallthrough]];
645  case Instruction::SUB: [[fallthrough]];
646  case Instruction::MUL: [[fallthrough]];
647  case Instruction::DIV: [[fallthrough]];
648  case Instruction::AND_: [[fallthrough]];
649  case Instruction::OR_: [[fallthrough]];
650  case Instruction::MOD:
651  break;
652 
653  default:
655  "can not create a chained expression (of length " + std::to_string(exp_count) +
656  ") for operator `" + std::string(internal::operators[static_cast<std::size_t>(op_inst - Instruction::FIRST_OPERATOR)]) +
657  "'. You most likely forgot a `)'.",
658  x);
659  }
660  }
661  }
662 
663  if (produces_result)
664  page(p).push_back(Instruction::POP);
665  }
666 
667  void Compiler::putValue(const Node& x, int p, bool produces_result)
668  {
669  std::string name = x.constList()[1].string();
670 
671  // starting at index = 2 because x is a (let|mut|set variable ...) node
672  for (std::size_t idx = 2, end = x.constList().size(); idx < end; ++idx)
673  _compile(x.constList()[idx], p, produces_result, false, name);
674  }
675 
676  uint16_t Compiler::addSymbol(const Node& sym)
677  {
678  // otherwise, add the symbol, and return its id in the table
679  auto it = std::find_if(m_symbols.begin(), m_symbols.end(), [&sym](const Node& sym_node) -> bool {
680  return sym_node.string() == sym.string();
681  });
682  if (it == m_symbols.end())
683  {
684  m_symbols.push_back(sym);
685  it = m_symbols.begin() + m_symbols.size() - 1;
686  }
687 
688  auto distance = std::distance(m_symbols.begin(), it);
689  if (distance < std::numeric_limits<uint16_t>::max())
690  return static_cast<uint16_t>(distance);
691  else
692  throwCompilerError("Too many symbols (exceeds 65'536), aborting compilation.", sym);
693  }
694 
695  uint16_t Compiler::addValue(const Node& x)
696  {
697  ValTableElem v(x);
698  auto it = std::find(m_values.begin(), m_values.end(), v);
699  if (it == m_values.end())
700  {
701  m_values.push_back(v);
702  it = m_values.begin() + m_values.size() - 1;
703  }
704 
705  auto distance = std::distance(m_values.begin(), it);
706  if (distance < std::numeric_limits<uint16_t>::max())
707  return static_cast<uint16_t>(distance);
708  else
709  throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", x);
710  }
711 
712  uint16_t Compiler::addValue(std::size_t page_id, const Node& current)
713  {
714  ValTableElem v(page_id);
715  auto it = std::find(m_values.begin(), m_values.end(), v);
716  if (it == m_values.end())
717  {
718  m_values.push_back(v);
719  it = m_values.begin() + m_values.size() - 1;
720  }
721 
722  auto distance = std::distance(m_values.begin(), it);
723  if (distance < std::numeric_limits<uint16_t>::max())
724  return static_cast<uint16_t>(distance);
725  else
726  throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", current);
727  }
728 
729  void Compiler::addDefinedSymbol(const std::string& sym)
730  {
731  // otherwise, add the symbol, and return its id in the table
732  auto it = std::find(m_defined_symbols.begin(), m_defined_symbols.end(), sym);
733  if (it == m_defined_symbols.end())
734  m_defined_symbols.push_back(sym);
735  }
736 
738  {
739  for (const Node& sym : m_symbols)
740  {
741  const std::string& str = sym.string();
742  bool is_plugin = mayBeFromPlugin(str);
743 
744  auto it = std::find(m_defined_symbols.begin(), m_defined_symbols.end(), str);
745  if (it == m_defined_symbols.end() && !is_plugin)
746  {
747  std::string suggestion = offerSuggestion(str);
748  if (suggestion.empty())
749  throwCompilerError("Unbound variable error \"" + str + "\" (variable is used but not defined)", sym);
750 
751  throwCompilerError("Unbound variable error \"" + str + "\" (did you mean \"" + suggestion + "\"?)", sym);
752  }
753  }
754  }
755 
756  std::string Compiler::offerSuggestion(const std::string& str)
757  {
758  std::string suggestion;
759  // our suggestion shouldn't require more than half the string to change
760  std::size_t suggestion_distance = str.size() / 2;
761 
762  for (const std::string& symbol : m_defined_symbols)
763  {
764  std::size_t current_distance = Utils::levenshteinDistance(str, symbol);
765  if (current_distance <= suggestion_distance)
766  {
767  suggestion_distance = current_distance;
768  suggestion = symbol;
769  }
770  }
771 
772  return suggestion;
773  }
774 
775  void Compiler::pushNumber(uint16_t n, std::vector<uint8_t>* page) noexcept
776  {
777  if (page == nullptr)
778  {
779  m_bytecode.push_back((n & 0xff00) >> 8);
780  m_bytecode.push_back(n & 0x00ff);
781  }
782  else
783  {
784  page->emplace_back((n & 0xff00) >> 8);
785  page->emplace_back(n & 0x00ff);
786  }
787  }
788 }
Host the declaration of all the ArkScript builtins.
ArkScript compiler is in charge of transforming the AST into bytecode.
constexpr int ARK_VERSION_MAJOR
Definition: Constants.hpp:16
constexpr int ARK_VERSION_PATCH
Definition: Constants.hpp:18
constexpr int ARK_VERSION_MINOR
Definition: Constants.hpp:17
User defined literals for Ark internals.
Handles the macros and their expansion in ArkScript source code.
Lots of utilities about string, filesystem and more.
CompilationError thrown by the compiler.
Definition: Exceptions.hpp:131
void _compile(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name="")
Compile a single node recursively.
Definition: Compiler.cpp:233
std::vector< uint8_t > & page(int i) noexcept
helper functions to get a temp or finalized code page
Definition: Compiler.hpp:109
void putValue(const internal::Node &x, int p, bool produces_result)
Put a value in the bytecode, handling the closures chains.
Definition: Compiler.cpp:667
void pushNumber(uint16_t n, std::vector< uint8_t > *page=nullptr) noexcept
Push a number on stack (need 2 bytes)
Definition: Compiler.cpp:775
void pushSpecificInstArgc(internal::Instruction inst, uint16_t previous, int p) noexcept
Compute specific instruction argument count.
Definition: Compiler.cpp:209
void compileIf(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
Definition: Compiler.cpp:391
std::vector< std::vector< uint8_t > > m_temp_pages
we need temporary code pages for some compilations passes
Definition: Compiler.hpp:86
void compileLetMutSet(internal::Keyword n, const internal::Node &x, int p)
Definition: Compiler.cpp:469
std::vector< std::string > m_defined_symbols
Definition: Compiler.hpp:82
std::optional< internal::Instruction > isSpecific(const std::string &name) noexcept
Check if a symbol needs to be compiled to a specific instruction.
Definition: Compiler.hpp:167
Compiler(unsigned debug, const std::vector< std::string > &libenv, uint16_t options=DefaultFeatures)
Construct a new Compiler object.
Definition: Compiler.cpp:19
void checkForUndefinedSymbol()
Checks for undefined symbols, not present in the defined symbols table.
Definition: Compiler.cpp:737
internal::Optimizer m_optimizer
Definition: Compiler.hpp:78
const bytecode_t & bytecode() noexcept
Return the constructed bytecode object.
Definition: Compiler.cpp:86
bool mayBeFromPlugin(const std::string &name) noexcept
Checking if a symbol may be coming from a plugin.
Definition: Compiler.cpp:218
void compileDel(const internal::Node &x, int p)
Definition: Compiler.cpp:536
void compileFunction(const internal::Node &x, int p, bool produces_result, const std::string &var_name)
Definition: Compiler.cpp:419
std::vector< std::string > m_plugins
Definition: Compiler.hpp:83
void compile()
Start the compilation.
Definition: Compiler.cpp:33
void compilePluginImport(const internal::Node &x, int p)
Definition: Compiler.cpp:525
void addDefinedSymbol(const std::string &sym)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
Definition: Compiler.cpp:729
std::string offerSuggestion(const std::string &str)
Suggest a symbol of what the user may have meant to input.
Definition: Compiler.cpp:756
unsigned m_debug
the debug level of the compiler
Definition: Compiler.hpp:89
std::vector< internal::ValTableElem > m_values
Definition: Compiler.hpp:84
void compileWhile(const internal::Node &x, int p)
Definition: Compiler.cpp:487
std::optional< std::size_t > isBuiltin(const std::string &name) noexcept
Checking if a symbol is a builtin.
Definition: Compiler.cpp:198
uint16_t m_options
Definition: Compiler.hpp:79
internal::Parser m_parser
Definition: Compiler.hpp:77
uint16_t addSymbol(const internal::Node &sym)
Register a given node in the symbol table.
Definition: Compiler.cpp:676
void compileSpecific(const internal::Node &c0, const internal::Node &x, int p, bool produces_result)
Definition: Compiler.cpp:356
uint16_t addValue(const internal::Node &x)
Register a given node in the value table.
Definition: Compiler.cpp:695
std::vector< uint8_t > * page_ptr(int i) noexcept
helper functions to get a temp or finalized code page
Definition: Compiler.hpp:122
void handleCalls(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
Definition: Compiler.cpp:545
void feed(const std::string &code, const std::string &filename=ARK_NO_NAME_FILE)
Feed the differents variables with information taken from the given source code file.
Definition: Compiler.cpp:24
void compileSymbol(const internal::Node &x, int p, bool produces_result)
Definition: Compiler.cpp:333
void setNumberAt(int p, std::size_t at_inst, std::size_t number)
Definition: Compiler.hpp:129
std::vector< internal::Node > m_symbols
Definition: Compiler.hpp:81
void throwCompilerError(const std::string &message, const internal::Node &node)
Throw a nice error message.
Definition: Compiler.cpp:228
std::optional< std::size_t > isOperator(const std::string &name) noexcept
Checking if a symbol is an operator.
Definition: Compiler.cpp:190
void compileQuote(const internal::Node &x, int p, bool produces_result, bool is_terminal, const std::string &var_name)
Definition: Compiler.cpp:508
void saveTo(const std::string &file)
Save generated bytecode to a file.
Definition: Compiler.cpp:76
std::vector< std::vector< uint8_t > > m_code_pages
Definition: Compiler.hpp:85
bytecode_t m_bytecode
Definition: Compiler.hpp:88
std::size_t countArkObjects(const std::vector< internal::Node > &lst) noexcept
Count the number of "valid" ark objects in a node.
Definition: Compiler.cpp:179
void pushFileHeader() noexcept
Push the file headers (magic, version used, timestamp)
Definition: Compiler.cpp:91
void pushSymAndValTables()
Push the symbols and values tables.
Definition: Compiler.cpp:123
The class handling the macros definitions and calls, given an AST.
Definition: Processor.hpp:30
const Node & ast() const noexcept
Return the modified AST.
Definition: Processor.cpp:46
void feed(const Node &ast)
Send the complete AST (after the inclusions and stuff), and work on it.
Definition: Processor.cpp:30
A node of an Abstract Syntax Tree for ArkScript.
Definition: Node.hpp:29
NodeType nodeType() const noexcept
Return the node type.
Definition: Node.cpp:126
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition: Node.cpp:92
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition: Node.cpp:119
void feed(const Node &ast)
Send the AST to the optimizer, then run the different optimization strategies on it.
Definition: Optimizer.cpp:9
const Node & ast() const noexcept
Returns the modified AST.
Definition: Optimizer.cpp:17
const Node & ast() const noexcept
Return the generated AST.
Definition: Parser.cpp:59
void feed(const std::string &code, const std::string &filename=ARK_NO_NAME_FILE)
Give the code to parse.
Definition: Parser.cpp:21
std::vector< std::string > splitString(const std::string &source, char sep)
Cut a string into pieces, given a character separator.
Definition: Utils.hpp:34
int levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
Definition: Utils.cpp:33
const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 25 > operators
Definition: Common.hpp:89
std::string makeNodeBasedErrorCtx(const std::string &message, const Node &node)
Construct an error message based on a given node.
Keyword
The different keywords available.
Definition: Common.hpp:59
Instruction
The different bytecodes are stored here.
Definition: Builtins.hpp:21
std::vector< uint8_t > bytecode_t
Definition: Common.hpp:22
A Compiler Value class helper to handle multiple types.