ArkScript
A small, fast, functional and scripting language for video games
BytecodeReader.cpp
Go to the documentation of this file.
2
5#undef abs
6#include <Ark/Utils.hpp>
7
8#include <termcolor/proxy.hpp>
9#include <picosha2.h>
10
11namespace Ark
12{
13 using namespace Ark::internal;
14
15 void BytecodeReader::feed(const std::string& file)
16 {
17 std::ifstream ifs(file, std::ios::binary | std::ios::ate);
18 if (!ifs.good())
19 throw std::runtime_error("[BytecodeReader] Couldn't open file '" + file + "'");
20 std::size_t pos = ifs.tellg();
21 // reserve appropriate number of bytes
22 std::vector<char> temp(pos);
23 ifs.seekg(0, std::ios::beg);
24 ifs.read(&temp[0], pos);
25 ifs.close();
26
28 for (std::size_t i = 0; i < pos; ++i)
29 m_bytecode[i] = static_cast<uint8_t>(temp[i]);
30 }
31
33 {
34 return m_bytecode;
35 }
36
37 unsigned long long BytecodeReader::timestamp()
38 {
39 bytecode_t b = bytecode();
40 std::size_t i = 0;
41
42 // we want to see a 'ark\0' header
43 if (!(b.size() > 4 && b[i++] == 'a' && b[i++] == 'r' && b[i++] == 'k' && b[i++] == Instruction::NOP))
44 return 0;
45
46 // read major, minor and patch
47 readNumber(i);
48 i++;
49 readNumber(i);
50 i++;
51 readNumber(i);
52 i++;
53
54 // reading the timestamp in big endian
55 using timestamp_t = unsigned long long;
56 timestamp_t timestamp = 0;
57 auto aa = (static_cast<timestamp_t>(m_bytecode[i]) << 56),
58 ba = (static_cast<timestamp_t>(m_bytecode[++i]) << 48),
59 ca = (static_cast<timestamp_t>(m_bytecode[++i]) << 40),
60 da = (static_cast<timestamp_t>(m_bytecode[++i]) << 32),
61 ea = (static_cast<timestamp_t>(m_bytecode[++i]) << 24),
62 fa = (static_cast<timestamp_t>(m_bytecode[++i]) << 16),
63 ga = (static_cast<timestamp_t>(m_bytecode[++i]) << 8),
64 ha = (static_cast<timestamp_t>(m_bytecode[++i]));
65 i++;
66 timestamp = aa + ba + ca + da + ea + fa + ga + ha;
67
68 return timestamp;
69 }
70
72 const std::optional<uint16_t> sStart,
73 const std::optional<uint16_t> sEnd,
74 const std::optional<uint16_t> cPage)
75 {
76 bytecode_t b = bytecode();
77 std::size_t i = 0;
78
79 std::ostream& os = std::cout;
80
81 if (!(b.size() > 4 && b[i++] == 'a' && b[i++] == 'r' && b[i++] == 'k' && b[i++] == Instruction::NOP))
82 {
83 os << "Invalid format";
84 return;
85 }
86
87 uint16_t major = readNumber(i);
88 i++;
89 uint16_t minor = readNumber(i);
90 i++;
91 uint16_t patch = readNumber(i);
92 i++;
93 os << "Version: " << major << "." << minor << "." << patch << "\n";
94
95 using timestamp_t = unsigned long long;
96 timestamp_t timestamp = 0;
97 auto aa = (static_cast<timestamp_t>(m_bytecode[i]) << 56),
98 ba = (static_cast<timestamp_t>(m_bytecode[++i]) << 48),
99 ca = (static_cast<timestamp_t>(m_bytecode[++i]) << 40),
100 da = (static_cast<timestamp_t>(m_bytecode[++i]) << 32),
101 ea = (static_cast<timestamp_t>(m_bytecode[++i]) << 24),
102 fa = (static_cast<timestamp_t>(m_bytecode[++i]) << 16),
103 ga = (static_cast<timestamp_t>(m_bytecode[++i]) << 8),
104 ha = (static_cast<timestamp_t>(m_bytecode[++i]));
105 i++;
106 timestamp = aa + ba + ca + da + ea + fa + ga + ha;
107 os << "Timestamp: " << timestamp << "\n";
108
109 os << "SHA256: ";
110 for (std::size_t j = 0; j < picosha2::k_digest_size; ++j)
111 {
112 os << std::hex << static_cast<int>(m_bytecode[i]) << std::dec;
113 ++i;
114 }
115 os << "\n\n";
116
117 std::vector<std::string> symbols;
118 std::vector<std::string> values;
119
120 // reading the different tables, one after another
121
122 if ((sStart.has_value() && !sEnd.has_value()) || (!sStart.has_value() && sEnd.has_value()))
123 {
124 os << termcolor::red << "Both start and end parameter need to be provided together\n"
125 << termcolor::reset;
126 return;
127 }
128 else if (sStart.has_value() && sEnd.has_value() && sStart.value() >= sEnd.value())
129 {
130 os << termcolor::red << "Invalid slice start and end arguments\n"
131 << termcolor::reset;
132 return;
133 }
134
135 if (b[i] == Instruction::SYM_TABLE_START)
136 {
137 i++;
138 uint16_t size = readNumber(i);
139 i++;
140 uint16_t sliceSize = size;
141 bool showSym = (segment == BytecodeSegment::All || segment == BytecodeSegment::Symbols);
142
143 if (showSym && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
144 os << termcolor::red << "Slice start or end can't be greater than the segment size: " << size << "\n";
145 else if (showSym && sStart.has_value() && sEnd.has_value())
146 sliceSize = sEnd.value() - sStart.value() + 1;
147
148 if (showSym || segment == BytecodeSegment::HeadersOnly)
149 os << termcolor::cyan << "Symbols table" << termcolor::reset << " (length: " << sliceSize << ")\n";
150
151 for (uint16_t j = 0; j < size; ++j)
152 {
153 if (auto start = sStart; auto end = sEnd)
154 showSym = showSym && (j >= start.value() && j <= end.value());
155
156 std::string content;
157 while (b[i] != 0)
158 content += b[i++];
159 i++;
160
161 if (showSym)
162 {
163 os << static_cast<int>(j) << ") ";
164 os << content << "\n";
165 }
166
167 symbols.push_back(content);
168 }
169 if (showSym)
170 os << "\n";
171 }
172 else
173 {
174 os << termcolor::red << "Missing symbole table entry point\n"
175 << termcolor::reset;
176 return;
177 }
178
179 if (segment == BytecodeSegment::Symbols)
180 return;
181
182 if (b[i] == Instruction::VAL_TABLE_START)
183 {
184 i++;
185 uint16_t size = readNumber(i);
186 i++;
187 uint16_t sliceSize = size;
188
189 bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::Values);
190 if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
191 os << termcolor::red << "Slice start or end can't be greater than the segment size: " << size << "\n";
192 else if (showVal && sStart.has_value() && sEnd.has_value())
193 sliceSize = sEnd.value() - sStart.value() + 1;
194
195 if (showVal || segment == BytecodeSegment::HeadersOnly)
196 os << termcolor::green << "Constants table" << termcolor::reset << " (length: " << sliceSize << ")\n";
197
198 for (uint16_t j = 0; j < size; ++j)
199 {
200 if (auto start = sStart; auto end = sEnd)
201 showVal = showVal && (j >= start.value() && j <= end.value());
202
203 if (showVal)
204 os << static_cast<int>(j) << ") ";
205 uint8_t type = b[i];
206 i++;
207
208 if (type == Instruction::NUMBER_TYPE)
209 {
210 std::string val;
211 while (b[i] != 0)
212 val.push_back(b[i++]);
213 i++;
214 if (showVal)
215 os << "(Number) " << val;
216 values.push_back("(Number) " + val);
217 }
218 else if (type == Instruction::STRING_TYPE)
219 {
220 std::string val;
221 while (b[i] != 0)
222 val.push_back(b[i++]);
223 i++;
224 if (showVal)
225 os << "(String) " << val;
226 values.push_back("(String) " + val);
227 }
228 else if (type == Instruction::FUNC_TYPE)
229 {
230 uint16_t addr = readNumber(i);
231 i++;
232 if (showVal)
233 os << "(PageAddr) " << addr;
234 values.push_back("(PageAddr) " + std::to_string(addr));
235 i++;
236 }
237 else
238 {
239 os << termcolor::red << "Unknown value type: " << static_cast<int>(type) << '\n'
240 << termcolor::reset;
241 return;
242 }
243
244 if (showVal)
245 os << "\n";
246 }
247
248 if (showVal)
249 os << "\n";
250 }
251 else
252 {
253 os << termcolor::red << "Missing constant table entry point\n"
254 << termcolor::reset;
255 return;
256 }
257
258 if (segment == BytecodeSegment::Values)
259 return;
260
261 uint16_t pp = 0;
262
263 while (b[i] == Instruction::CODE_SEGMENT_START && (segment == BytecodeSegment::All || segment == BytecodeSegment::Code || segment == BytecodeSegment::HeadersOnly))
264 {
265 i++;
266 uint16_t size = readNumber(i);
267 i++;
268 uint16_t sliceSize = size;
269
270 if (sStart.has_value() && sEnd.has_value())
271 sliceSize = sEnd.value() - sStart.value() + 1;
272
273 bool displayCode = true;
274
275 if (auto page = cPage)
276 displayCode = pp == page.value();
277
278 if (displayCode)
279 os << termcolor::magenta << "Code segment " << pp << termcolor::reset << " (length: " << sliceSize << ")\n";
280
281 if (size == 0)
282 {
283 if (displayCode)
284 os << "NOP";
285 }
286 else
287 {
288 uint16_t j = i;
289
290 bool displayLine = segment == BytecodeSegment::HeadersOnly ? false : displayCode;
291 while (true)
292 {
293 uint16_t line_number = i - j;
294 if (sStart.has_value() && sEnd.has_value() && ((sStart.value() > size) || (sEnd.value() > size)))
295 {
296 os << termcolor::red << "Slice start or end can't be greater than the segment size: " << size << termcolor::reset << "\n";
297 return;
298 }
299 else if (sStart.has_value() && sEnd.has_value() && cPage.has_value())
300 displayLine = displayCode && (line_number >= sStart.value() && line_number <= sEnd.value());
301
302 if (displayLine)
303 os << termcolor::cyan << line_number << termcolor::reset << " " << termcolor::yellow;
304 uint8_t inst = b[i];
305 i++;
306
307 if (inst == Instruction::NOP)
308 {
309 if (displayLine)
310 os << "NOP\n";
311 }
312 else if (inst == Instruction::LOAD_SYMBOL)
313 {
314 uint16_t index = readNumber(i);
315 if (displayLine)
316 os << "LOAD_SYMBOL " << termcolor::green << symbols[index] << "\n";
317 i++;
318 }
319 else if (inst == Instruction::LOAD_CONST)
320 {
321 uint16_t index = readNumber(i);
322 if (displayLine)
323 os << "LOAD_CONST " << termcolor::magenta << values[index] << "\n";
324 i++;
325 }
326 else if (inst == Instruction::POP_JUMP_IF_TRUE)
327 {
328 uint16_t value = readNumber(i);
329 if (displayLine)
330 os << "POP_JUMP_IF_TRUE " << termcolor::red << "(" << value << ")\n";
331 i++;
332 }
333 else if (inst == Instruction::STORE)
334 {
335 uint16_t index = readNumber(i);
336 if (displayLine)
337 os << "STORE " << termcolor::green << symbols[index] << "\n";
338 i++;
339 }
340 else if (inst == Instruction::LET)
341 {
342 uint16_t index = readNumber(i);
343 if (displayLine)
344 os << "LET " << termcolor::green << symbols[index] << "\n";
345 i++;
346 }
347 else if (inst == Instruction::POP_JUMP_IF_FALSE)
348 {
349 uint16_t value = readNumber(i);
350 if (displayLine)
351 os << "POP_JUMP_IF_FALSE " << termcolor::red << "(" << value << ")\n";
352 i++;
353 }
354 else if (inst == Instruction::JUMP)
355 {
356 uint16_t value = readNumber(i);
357 if (displayLine)
358 os << "JUMP " << termcolor::red << "(" << value << ")\n";
359 i++;
360 }
361 else if (inst == Instruction::RET)
362 {
363 if (displayLine)
364 os << "RET\n";
365 }
366 else if (inst == Instruction::HALT)
367 {
368 if (displayLine)
369 os << "HALT\n";
370 }
371 else if (inst == Instruction::CALL)
372 {
373 uint16_t value = readNumber(i);
374 if (displayLine)
375 os << "CALL " << termcolor::reset << "(" << value << ")\n";
376 i++;
377 }
378 else if (inst == Instruction::CAPTURE)
379 {
380 uint16_t index = readNumber(i);
381 if (displayLine)
382 os << "CAPTURE " << termcolor::reset << symbols[index] << "\n";
383 i++;
384 }
385 else if (inst == Instruction::BUILTIN)
386 {
387 uint16_t index = readNumber(i);
388 if (displayLine)
389 os << "BUILTIN " << termcolor::reset << Builtins::builtins[index].first << "\n";
390 i++;
391 }
392 else if (inst == Instruction::MUT)
393 {
394 uint16_t index = readNumber(i);
395 if (displayLine)
396 os << "MUT " << termcolor::green << symbols[index] << "\n";
397 i++;
398 }
399 else if (inst == Instruction::DEL)
400 {
401 uint16_t index = readNumber(i);
402 if (displayLine)
403 os << "DEL " << termcolor::green << symbols[index] << "\n";
404 i++;
405 }
406 else if (inst == Instruction::SAVE_ENV)
407 {
408 if (displayLine)
409 os << "SAVE_ENV\n";
410 }
411 else if (inst == Instruction::GET_FIELD)
412 {
413 uint16_t index = readNumber(i);
414 if (displayLine)
415 os << "GET_FIELD " << termcolor::green << symbols[index] << "\n";
416 i++;
417 }
418 else if (inst == Instruction::PLUGIN)
419 {
420 uint16_t index = readNumber(i);
421 if (displayLine)
422 os << "PLUGIN " << termcolor::magenta << values[index] << "\n";
423 i++;
424 }
425 else if (inst == Instruction::LIST)
426 {
427 uint16_t value = readNumber(i);
428 if (displayLine)
429 os << "LIST " << termcolor::reset << "(" << value << ")\n";
430 i++;
431 }
432 else if (inst == Instruction::APPEND)
433 {
434 uint16_t value = readNumber(i);
435 if (displayLine)
436 os << "APPEND " << termcolor::reset << "(" << value << ")\n";
437 i++;
438 }
439 else if (inst == Instruction::CONCAT)
440 {
441 uint16_t value = readNumber(i);
442 if (displayLine)
443 os << "CONCAT " << termcolor::reset << "(" << value << ")\n";
444 i++;
445 }
446 else if (inst == Instruction::APPEND_IN_PLACE)
447 {
448 uint16_t value = readNumber(i);
449 if (displayLine)
450 os << "APPEND_IN_PLACE " << termcolor::reset << "(" << value << ")\n";
451 i++;
452 }
453 else if (inst == Instruction::CONCAT_IN_PLACE)
454 {
455 uint16_t value = readNumber(i);
456 if (displayLine)
457 os << "CONCAT_IN_PLACE " << termcolor::reset << "(" << value << ")\n";
458 i++;
459 }
460 else if (inst == Instruction::POP_LIST)
461 {
462 if (displayLine)
463 os << "POP_LIST " << termcolor::reset << "\n";
464 i++;
465 }
466 else if (inst == Instruction::POP_LIST_IN_PLACE)
467 {
468 if (displayLine)
469 os << "POP_LIST_IN_PLACE " << termcolor::reset << "\n";
470 i++;
471 }
472 else if (inst == Instruction::POP)
473 {
474 if (displayLine)
475 os << "POP\n";
476 }
477 else if (inst == Instruction::ADD)
478 {
479 if (displayLine)
480 os << "ADD\n";
481 }
482 else if (inst == Instruction::SUB)
483 {
484 if (displayLine)
485 os << "SUB\n";
486 }
487 else if (inst == Instruction::MUL)
488 {
489 if (displayLine)
490 os << "MUL\n";
491 }
492 else if (inst == Instruction::DIV)
493 {
494 if (displayLine)
495 os << "DIV\n";
496 }
497 else if (inst == Instruction::GT)
498 {
499 if (displayLine)
500 os << "GT\n";
501 }
502 else if (inst == Instruction::LT)
503 {
504 if (displayLine)
505 os << "LT\n";
506 }
507 else if (inst == Instruction::LE)
508 {
509 if (displayLine)
510 os << "LE\n";
511 }
512 else if (inst == Instruction::GE)
513 {
514 if (displayLine)
515 os << "GE\n";
516 }
517 else if (inst == Instruction::NEQ)
518 {
519 if (displayLine)
520 os << "NEQ\n";
521 }
522 else if (inst == Instruction::EQ)
523 {
524 if (displayLine)
525 os << "EQ\n";
526 }
527 else if (inst == Instruction::LEN)
528 {
529 if (displayLine)
530 os << "LEN\n";
531 }
532 else if (inst == Instruction::EMPTY)
533 {
534 if (displayLine)
535 os << "EMPTY\n";
536 }
537 else if (inst == Instruction::TAIL)
538 {
539 if (displayLine)
540 os << "TAIL\n";
541 }
542 else if (inst == Instruction::HEAD)
543 {
544 if (displayLine)
545 os << "HEAD\n";
546 }
547 else if (inst == Instruction::ISNIL)
548 {
549 if (displayLine)
550 os << "ISNIL\n";
551 }
552 else if (inst == Instruction::ASSERT)
553 {
554 if (displayLine)
555 os << "ASSERT\n";
556 }
557 else if (inst == Instruction::TO_NUM)
558 {
559 if (displayLine)
560 os << "TO_NUM\n";
561 }
562 else if (inst == Instruction::TO_STR)
563 {
564 if (displayLine)
565 os << "TO_STR\n";
566 }
567 else if (inst == Instruction::AT)
568 {
569 if (displayLine)
570 os << "AT\n";
571 }
572 else if (inst == Instruction::AND_)
573 {
574 if (displayLine)
575 os << "AND_\n";
576 }
577 else if (inst == Instruction::OR_)
578 {
579 if (displayLine)
580 os << "OR_\n";
581 }
582 else if (inst == Instruction::MOD)
583 {
584 if (displayLine)
585 os << "MOD\n";
586 }
587 else if (inst == Instruction::TYPE)
588 {
589 if (displayLine)
590 os << "TYPE\n";
591 }
592 else if (inst == Instruction::HASFIELD)
593 {
594 if (displayLine)
595 os << "HASFIELD\n";
596 }
597 else if (inst == Instruction::NOT)
598 {
599 if (displayLine)
600 os << "NOT\n";
601 }
602 else
603 {
604 if (displayLine)
605 os << termcolor::reset << "Unknown instruction: " << static_cast<int>(inst) << '\n'
606 << termcolor::reset;
607 return;
608 }
609
610 if (i - j == size)
611 break;
612 }
613 }
614 if (displayCode && segment != BytecodeSegment::HeadersOnly)
615 os << "\n"
616 << termcolor::reset;
617
618 if (cPage.has_value() && pp == cPage)
619 return;
620
621 ++pp;
622
623 if (i == b.size())
624 break;
625 }
626 }
627
628 uint16_t BytecodeReader::readNumber(std::size_t& i)
629 {
630 uint16_t x = (static_cast<uint16_t>(m_bytecode[i]) << 8),
631 y = static_cast<uint16_t>(m_bytecode[++i]);
632 return x + y;
633 }
634}
Host the declaration of all the ArkScript builtins.
A bytecode disassembler for ArkScript.
The different instructions used by the compiler and virtual machine.
Lots of utilities about string, filesystem and more.
const bytecode_t & bytecode() noexcept
Return the bytecode object constructed.
unsigned long long timestamp()
Return the read timestamp from the bytecode file.
void display(BytecodeSegment segment=BytecodeSegment::All, std::optional< uint16_t > sStart=std::nullopt, std::optional< uint16_t > sEnd=std::nullopt, std::optional< uint16_t > cPage=std::nullopt)
Display the bytecode opcode in a human friendly way.
void feed(const std::string &file)
Construct needed data before displaying information about a given file.
uint16_t readNumber(std::size_t &i)
Read a number from the bytecode, under the instruction pointer i.
const std::vector< std::pair< std::string, Value > > builtins
Definition: Builtins.hpp:21
std::vector< uint8_t > bytecode_t
Definition: Common.hpp:22