ArkScript
A small, fast, functional and scripting language for video games
BytecodeReader.cpp
Go to the documentation of this file.
2
5
6#include <unordered_map>
7#include <Proxy/Picosha2.hpp>
10#include <fmt/core.h>
11#include <fmt/color.h>
12
13namespace Ark
14{
15 using namespace Ark::internal;
16
17 void BytecodeReader::feed(const bytecode_t& bytecode)
18 {
19 m_bytecode = bytecode;
20 }
21
22 void BytecodeReader::feed(const std::string& file)
23 {
24 std::ifstream ifs(file, std::ios::binary | std::ios::ate);
25 if (!ifs.good())
26 throw std::runtime_error(fmt::format("[BytecodeReader] Couldn't open file '{}'", file));
27
28 const auto pos = ifs.tellg();
29 // reserve appropriate number of bytes
30 std::vector<char> temp(static_cast<std::size_t>(pos));
31 ifs.seekg(0, std::ios::beg);
32 ifs.read(&temp[0], pos);
33 ifs.close();
34
35 m_bytecode = bytecode_t(static_cast<std::size_t>(pos));
36 for (std::size_t i = 0; i < static_cast<std::size_t>(pos); ++i)
37 m_bytecode[i] = static_cast<uint8_t>(temp[i]);
38 }
39
41 {
42 return m_bytecode.size() >= bytecode::Magic.size() &&
43 m_bytecode[0] == bytecode::Magic[0] &&
44 m_bytecode[1] == bytecode::Magic[1] &&
45 m_bytecode[2] == bytecode::Magic[2] &&
47 }
48
50 {
51 if (!checkMagic() || m_bytecode.size() < bytecode::Magic.size() + bytecode::Version.size())
52 return Version { 0, 0, 0 };
53
54 return Version {
55 .major = static_cast<uint16_t>((m_bytecode[4] << 8) + m_bytecode[5]),
56 .minor = static_cast<uint16_t>((m_bytecode[6] << 8) + m_bytecode[7]),
57 .patch = static_cast<uint16_t>((m_bytecode[8] << 8) + m_bytecode[9])
58 };
59 }
60
61 unsigned long long BytecodeReader::timestamp() const
62 {
63 // 4 (ark\0) + version (2 bytes / number) + timestamp = 18 bytes
65 return 0;
66
67 // reading the timestamp in big endian
68 using timestamp_t = unsigned long long;
69 return (static_cast<timestamp_t>(m_bytecode[10]) << 56) +
70 (static_cast<timestamp_t>(m_bytecode[11]) << 48) +
71 (static_cast<timestamp_t>(m_bytecode[12]) << 40) +
72 (static_cast<timestamp_t>(m_bytecode[13]) << 32) +
73 (static_cast<timestamp_t>(m_bytecode[14]) << 24) +
74 (static_cast<timestamp_t>(m_bytecode[15]) << 16) +
75 (static_cast<timestamp_t>(m_bytecode[16]) << 8) +
76 static_cast<timestamp_t>(m_bytecode[17]);
77 }
78
79 std::vector<unsigned char> BytecodeReader::sha256() const
80 {
81 if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize + picosha2::k_digest_size)
82 return {};
83
84 std::vector<unsigned char> sha(picosha2::k_digest_size);
85 for (std::size_t i = 0; i < picosha2::k_digest_size; ++i)
86 sha[i] = m_bytecode[bytecode::HeaderSize + i];
87 return sha;
88 }
89
91 {
92 if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize + picosha2::k_digest_size ||
93 m_bytecode[bytecode::HeaderSize + picosha2::k_digest_size] != SYM_TABLE_START)
94 return {};
95
96 std::size_t i = bytecode::HeaderSize + picosha2::k_digest_size + 1;
97 const uint16_t size = readNumber(i);
98 i++;
99
100 Symbols block;
101 block.start = bytecode::HeaderSize + picosha2::k_digest_size;
102 block.symbols.reserve(size);
103
104 for (uint16_t j = 0; j < size; ++j)
105 {
106 std::string content;
107 while (m_bytecode[i] != 0)
108 content.push_back(static_cast<char>(m_bytecode[i++]));
109 i++;
110
111 block.symbols.push_back(content);
112 }
113
114 block.end = i;
115 return block;
116 }
117
119 {
120 if (!checkMagic())
121 return {};
122
123 std::size_t i = symbols.end;
124 if (m_bytecode[i] != VAL_TABLE_START)
125 return {};
126 i++;
127
128 const uint16_t size = readNumber(i);
129 i++;
130 Values block;
131 block.start = symbols.end;
132 block.values.reserve(size);
133
134 for (uint16_t j = 0; j < size; ++j)
135 {
136 const uint8_t type = m_bytecode[i];
137 i++;
138
139 if (type == NUMBER_TYPE)
140 {
142 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
143 i += sizeof(decltype(exp));
145 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
146 i += sizeof(decltype(mant));
147
148 const ieee754::DecomposedDouble d { exp, mant };
149 double val = ieee754::deserialize(d);
150 block.values.emplace_back(val);
151 }
152 else if (type == STRING_TYPE)
153 {
154 std::string val;
155 while (m_bytecode[i] != 0)
156 val.push_back(static_cast<char>(m_bytecode[i++]));
157 block.values.emplace_back(val);
158 }
159 else if (type == FUNC_TYPE)
160 {
161 const uint16_t addr = readNumber(i);
162 i++;
163 block.values.emplace_back(addr);
164 }
165 else
166 throw std::runtime_error(fmt::format("Unknown value type: {:x}", type));
167 i++;
168 }
169
170 block.end = i;
171 return block;
172 }
173
175 {
176 if (!checkMagic())
177 return {};
178
179 std::size_t i = values.end;
181 return {};
182 i++;
183
184 const uint16_t size = readNumber(i);
185 i++;
186
187 Filenames block;
188 block.start = values.end;
189 block.filenames.reserve(size);
190
191 for (uint16_t j = 0; j < size; ++j)
192 {
193 std::string val;
194 while (m_bytecode[i] != 0)
195 val.push_back(static_cast<char>(m_bytecode[i++]));
196 block.filenames.emplace_back(val);
197 i++;
198 }
199
200 block.end = i;
201 return block;
202 }
203
205 {
206 if (!checkMagic())
207 return {};
208
209 std::size_t i = filenames.end;
211 return {};
212 i++;
213
214 const uint16_t size = readNumber(i);
215 i++;
216
217 InstLocations block;
218 block.start = filenames.end;
219 block.locations.reserve(size);
220
221 for (uint16_t j = 0; j < size; ++j)
222 {
223 auto pp = readNumber(i);
224 i++;
225
226 auto ip = readNumber(i);
227 i++;
228
229 auto file_id = readNumber(i);
230 i++;
231
232 auto line = deserializeBE<uint32_t>(
233 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
234 i += 4;
235
236 block.locations.push_back(
237 { .page_pointer = pp,
238 .inst_pointer = ip,
239 .filename_id = file_id,
240 .line = line });
241 }
242
243 block.end = i;
244 return block;
245 }
246
247 Code BytecodeReader::code(const InstLocations& instLocations) const
248 {
249 if (!checkMagic())
250 return {};
251
252 std::size_t i = instLocations.end;
253
254 Code block;
255 block.start = i;
256
257 while (m_bytecode[i] == CODE_SEGMENT_START)
258 {
259 i++;
260 const std::size_t size = readNumber(i) * 4;
261 i++;
262
263 block.pages.emplace_back().reserve(size);
264 for (std::size_t j = 0; j < size; ++j)
265 block.pages.back().push_back(m_bytecode[i++]);
266
267 if (i == m_bytecode.size())
268 break;
269 }
270
271 return block;
272 }
273
275 const std::optional<uint16_t> sStart,
276 const std::optional<uint16_t> sEnd,
277 const std::optional<uint16_t> cPage) const
278 {
279 if (!checkMagic())
280 {
281 fmt::println("Invalid format");
282 return;
283 }
284
285 auto [major, minor, patch] = version();
286 fmt::println("Version: {}.{}.{}", major, minor, patch);
287 fmt::println("Timestamp: {}", timestamp());
288 fmt::print("SHA256: ");
289 for (const auto sha = sha256(); unsigned char h : sha)
290 fmt::print("{:02x}", h);
291 fmt::print("\n\n");
292
293 // reading the different tables, one after another
294
295 if ((sStart.has_value() && !sEnd.has_value()) || (!sStart.has_value() && sEnd.has_value()))
296 {
297 fmt::print(fmt::fg(fmt::color::red), "Both start and end parameter need to be provided together\n");
298 return;
299 }
300 if (sStart.has_value() && sEnd.has_value() && sStart.value() >= sEnd.value())
301 {
302 fmt::print(fmt::fg(fmt::color::red), "Invalid slice start and end arguments\n");
303 return;
304 }
305
306 const auto syms = symbols();
307 const auto vals = values(syms);
308 const auto files = filenames(vals);
309 const auto inst_locs = instLocations(files);
310 const auto code_block = code(inst_locs);
311
312 // symbols table
313 {
314 std::size_t size = syms.symbols.size();
315 std::size_t sliceSize = size;
316 bool showSym = (segment == BytecodeSegment::All || segment == BytecodeSegment::Symbols);
317
318 if (showSym && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
319 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
320 else if (showSym && sStart.has_value() && sEnd.has_value())
321 sliceSize = sEnd.value() - sStart.value() + 1;
322
323 if (showSym || segment == BytecodeSegment::HeadersOnly)
324 fmt::println("{} (length: {})", fmt::styled("Symbols table", fmt::fg(fmt::color::cyan)), sliceSize);
325
326 for (std::size_t j = 0; j < size; ++j)
327 {
328 if (auto start = sStart; auto end = sEnd)
329 showSym = showSym && (j >= start.value() && j <= end.value());
330
331 if (showSym)
332 fmt::println("{}) {}", j, syms.symbols[j]);
333 }
334
335 if (showSym)
336 fmt::print("\n");
337 if (segment == BytecodeSegment::Symbols)
338 return;
339 }
340
341 // values table
342 {
343 std::size_t size = vals.values.size();
344 std::size_t sliceSize = size;
345
346 bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::Values);
347 if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
348 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
349 else if (showVal && sStart.has_value() && sEnd.has_value())
350 sliceSize = sEnd.value() - sStart.value() + 1;
351
352 if (showVal || segment == BytecodeSegment::HeadersOnly)
353 fmt::println("{} (length: {})", fmt::styled("Constants table", fmt::fg(fmt::color::cyan)), sliceSize);
354
355 for (std::size_t j = 0; j < size; ++j)
356 {
357 if (auto start = sStart; auto end = sEnd)
358 showVal = showVal && (j >= start.value() && j <= end.value());
359
360 if (showVal)
361 {
362 switch (const auto val = vals.values[j]; val.valueType())
363 {
365 fmt::println("{}) (Number) {}", j, val.number());
366 break;
368 fmt::println("{}) (String) {}", j, val.string());
369 break;
371 fmt::println("{}) (PageAddr) {}", j, val.pageAddr());
372 break;
373 default:
374 fmt::print(fmt::fg(fmt::color::red), "Value type not handled: {}\n", types_to_str[static_cast<std::size_t>(val.valueType())]);
375 break;
376 }
377 }
378 }
379
380 if (showVal)
381 fmt::print("\n");
382 if (segment == BytecodeSegment::Values)
383 return;
384 }
385
386 // inst locs + file
387 {
388 std::size_t size = inst_locs.locations.size();
389 std::size_t sliceSize = size;
390
391 bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::InstructionLocation);
392 if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
393 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
394 else if (showVal && sStart.has_value() && sEnd.has_value())
395 sliceSize = sEnd.value() - sStart.value() + 1;
396
397 if (showVal || segment == BytecodeSegment::HeadersOnly)
398 fmt::println("{} (length: {})", fmt::styled("Instruction locations table", fmt::fg(fmt::color::cyan)), sliceSize);
399 if (showVal && size > 0)
400 fmt::println(" PP, IP");
401
402 for (std::size_t j = 0; j < size; ++j)
403 {
404 if (auto start = sStart; auto end = sEnd)
405 showVal = showVal && (j >= start.value() && j <= end.value());
406
407 const auto& location = inst_locs.locations[j];
408 if (showVal)
409 fmt::println("{:>3},{:>3} -> {}:{}", location.page_pointer, location.inst_pointer, files.filenames[location.filename_id], location.line);
410 }
411
412 if (showVal)
413 fmt::print("\n");
414 }
415
416 const auto stringify_value = [](const Value& val) -> std::string {
417 switch (val.valueType())
418 {
420 return fmt::format("{} (Number)", val.number());
422 return fmt::format("{} (String)", val.string());
424 return fmt::format("{} (PageAddr)", val.pageAddr());
425 default:
426 return "";
427 }
428 };
429
430 enum class ArgKind
431 {
432 Symbol,
433 Value,
434 Builtin,
435 Raw
436 };
437
438 struct Arg
439 {
440 ArgKind kind;
441 uint16_t arg;
442 };
443
444 const std::unordered_map<Instruction, ArgKind> arg_kinds = {
445 { LOAD_SYMBOL, ArgKind::Symbol },
446 { LOAD_SYMBOL_BY_INDEX, ArgKind::Raw },
447 { LOAD_CONST, ArgKind::Value },
448 { POP_JUMP_IF_TRUE, ArgKind::Raw },
449 { STORE, ArgKind::Symbol },
450 { SET_VAL, ArgKind::Symbol },
451 { POP_JUMP_IF_FALSE, ArgKind::Raw },
452 { JUMP, ArgKind::Raw },
453 { CALL, ArgKind::Raw },
454 { CALL_BUILTIN, ArgKind::Raw },
455 { CAPTURE, ArgKind::Symbol },
456 { BUILTIN, ArgKind::Builtin },
457 { DEL, ArgKind::Symbol },
458 { MAKE_CLOSURE, ArgKind::Value },
459 { GET_FIELD, ArgKind::Symbol },
460 { PLUGIN, ArgKind::Value },
461 { LIST, ArgKind::Raw },
462 { APPEND, ArgKind::Raw },
463 { CONCAT, ArgKind::Raw },
464 { APPEND_IN_PLACE, ArgKind::Raw },
465 { CONCAT_IN_PLACE, ArgKind::Raw }
466 };
467
468 const auto color_print_inst = [&syms, &vals, &stringify_value](const std::string& name, std::optional<Arg> arg = std::nullopt) {
469 fmt::print("{}", fmt::styled(name, fmt::fg(fmt::color::gold)));
470 if (arg.has_value())
471 {
472 switch (auto [kind, idx] = arg.value(); kind)
473 {
474 case ArgKind::Symbol:
475 fmt::print(fmt::fg(fmt::color::green), " {}\n", syms.symbols[idx]);
476 break;
477 case ArgKind::Value:
478 fmt::print(fmt::fg(fmt::color::magenta), " {}\n", stringify_value(vals.values[idx]));
479 break;
480 case ArgKind::Builtin:
481 fmt::print(" {}\n", Builtins::builtins[idx].first);
482 break;
483 case ArgKind::Raw:
484 fmt::print(fmt::fg(fmt::color::red), " ({})\n", idx);
485 break;
486 }
487 }
488 else
489 fmt::print("\n");
490 };
491
492 if (segment == BytecodeSegment::All || segment == BytecodeSegment::Code || segment == BytecodeSegment::HeadersOnly)
493 {
494 uint16_t pp = 0;
495
496 for (const auto& page : code_block.pages)
497 {
498 bool displayCode = true;
499
500 if (auto wanted_page = cPage)
501 displayCode = pp == wanted_page.value();
502
503 if (displayCode)
504 fmt::println(
505 "{} {} (length: {})",
506 fmt::styled("Code segment", fmt::fg(fmt::color::magenta)),
507 fmt::styled(pp, fmt::fg(fmt::color::magenta)),
508 page.size());
509
510 if (page.empty())
511 {
512 if (displayCode)
513 fmt::print("NOP");
514 }
515 else
516 {
517 if (cPage.value_or(pp) != pp)
518 continue;
519 if (segment == BytecodeSegment::HeadersOnly)
520 continue;
521 if (sStart.has_value() && sEnd.has_value() && ((sStart.value() > page.size()) || (sEnd.value() > page.size())))
522 {
523 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", page.size());
524 return;
525 }
526
527 for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(page.size()); j < end; j += 4)
528 {
529 const uint8_t inst = page[j];
530 // TEMP
531 const uint8_t padding = page[j + 1];
532 const auto arg = static_cast<uint16_t>((page[j + 2] << 8) + page[j + 3]);
533
534 // instruction number
535 fmt::print(fmt::fg(fmt::color::cyan), "{:>4}", j / 4);
536 // padding inst arg arg
537 fmt::print(" {:02x} {:02x} {:02x} {:02x} ", inst, padding, page[j + 2], page[j + 3]);
538
539 if (const auto idx = static_cast<std::size_t>(inst); idx < InstructionNames.size())
540 {
541 const auto inst_name = InstructionNames[idx];
542 if (const auto iinst = static_cast<Instruction>(inst); arg_kinds.contains(iinst))
543 color_print_inst(inst_name, Arg { arg_kinds.at(iinst), arg });
544 else
545 color_print_inst(inst_name);
546 }
547 else
548 fmt::println("Unknown instruction");
549 }
550 }
551 if (displayCode && segment != BytecodeSegment::HeadersOnly)
552 fmt::print("\n");
553
554 ++pp;
555 }
556 }
557 }
558
559 uint16_t BytecodeReader::readNumber(std::size_t& i) const
560 {
561 const auto x = static_cast<uint16_t>(m_bytecode[i] << 8);
562 const uint16_t y = m_bytecode[++i];
563 return x + y;
564 }
565}
Host the declaration of all the ArkScript builtins.
A bytecode disassembler for ArkScript.
The different instructions used by the compiler and virtual machine.
Symbols symbols() const
unsigned long long timestamp() const
Return the read timestamp from the bytecode file.
uint16_t readNumber(std::size_t &i) const
Read a number from the bytecode, under the instruction pointer i.
Filenames filenames(const Values &values) const
InstLocations instLocations(const Filenames &filenames) const
Version version() const
Code code(const InstLocations &instLocations) const
Values values(const Symbols &symbols) const
void display(BytecodeSegment segment=BytecodeSegment::All, std::optional< uint16_t > sStart=std::nullopt, std::optional< uint16_t > sEnd=std::nullopt, std::optional< uint16_t > cPage=std::nullopt) const
Display the bytecode opcode in a human friendly way.
std::vector< unsigned char > sha256() const
void feed(const std::string &file)
Construct needed data before displaying information about a given file.
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array Magic
Definition Common.hpp:29
constexpr std::array Version
Definition Common.hpp:30
constexpr std::size_t HeaderSize
Definition Common.hpp:39
double deserialize(const DecomposedDouble d)
T deserializeBE(std::vector< uint8_t >::const_iterator begin, std::vector< uint8_t >::const_iterator end)
Instruction
The different bytecodes are stored here.
T deserializeLE(std::vector< uint8_t >::const_iterator begin, std::vector< uint8_t >::const_iterator end)
constexpr std::array InstructionNames
constexpr std::array types_to_str
Definition Value.hpp:51
std::vector< uint8_t > bytecode_t
Definition Common.hpp:22
std::vector< bytecode_t > pages
std::size_t start
Point to the CODE_SEGMENT_START byte in the bytecode.
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::vector< std::string > filenames
std::size_t start
Point to the FILENAMES_TABLE_START byte in the bytecode.
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the INST_LOC_TABLE_START byte in the bytecode.
std::vector< internal::InstLoc > locations
std::vector< std::string > symbols
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the SYM_TABLE_START byte in the bytecode.
std::vector< Value > values
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the VAL_TABLE_START byte in the bytecode.