ArkScript
A small, lisp-inspired, functional scripting language
BytecodeReader.cpp
Go to the documentation of this file.
2
5
6#include <unordered_map>
7#include <Proxy/Picosha2.hpp>
10#include <fmt/core.h>
11#include <fmt/color.h>
12
13namespace Ark
14{
15 using namespace Ark::internal;
16
17 void BytecodeReader::feed(const bytecode_t& bytecode)
18 {
19 m_bytecode = bytecode;
20 }
21
22 void BytecodeReader::feed(const std::string& file)
23 {
24 std::ifstream ifs(file, std::ios::binary | std::ios::ate);
25 if (!ifs.good())
26 throw std::runtime_error(fmt::format("[BytecodeReader] Couldn't open file '{}'", file));
27
28 const auto pos = ifs.tellg();
29 // reserve appropriate number of bytes
30 std::vector<char> temp(static_cast<std::size_t>(pos));
31 ifs.seekg(0, std::ios::beg);
32 ifs.read(&temp[0], pos);
33 ifs.close();
34
35 m_bytecode = bytecode_t(static_cast<std::size_t>(pos));
36 for (std::size_t i = 0; i < static_cast<std::size_t>(pos); ++i)
37 m_bytecode[i] = static_cast<uint8_t>(temp[i]);
38 }
39
41 {
42 return m_bytecode.size() >= bytecode::Magic.size() &&
43 m_bytecode[0] == bytecode::Magic[0] &&
44 m_bytecode[1] == bytecode::Magic[1] &&
45 m_bytecode[2] == bytecode::Magic[2] &&
47 }
48
50 {
51 if (!checkMagic() || m_bytecode.size() < bytecode::Magic.size() + bytecode::Version.size())
52 return Version { 0, 0, 0 };
53
54 return Version {
55 .major = static_cast<uint16_t>((m_bytecode[4] << 8) + m_bytecode[5]),
56 .minor = static_cast<uint16_t>((m_bytecode[6] << 8) + m_bytecode[7]),
57 .patch = static_cast<uint16_t>((m_bytecode[8] << 8) + m_bytecode[9])
58 };
59 }
60
61 unsigned long long BytecodeReader::timestamp() const
62 {
63 // 4 (ark\0) + version (2 bytes / number) + timestamp = 18 bytes
65 return 0;
66
67 // reading the timestamp in big endian
68 using timestamp_t = unsigned long long;
69 return (static_cast<timestamp_t>(m_bytecode[10]) << 56) +
70 (static_cast<timestamp_t>(m_bytecode[11]) << 48) +
71 (static_cast<timestamp_t>(m_bytecode[12]) << 40) +
72 (static_cast<timestamp_t>(m_bytecode[13]) << 32) +
73 (static_cast<timestamp_t>(m_bytecode[14]) << 24) +
74 (static_cast<timestamp_t>(m_bytecode[15]) << 16) +
75 (static_cast<timestamp_t>(m_bytecode[16]) << 8) +
76 static_cast<timestamp_t>(m_bytecode[17]);
77 }
78
79 std::vector<unsigned char> BytecodeReader::sha256() const
80 {
81 if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize + picosha2::k_digest_size)
82 return {};
83
84 std::vector<unsigned char> sha(picosha2::k_digest_size);
85 for (std::size_t i = 0; i < picosha2::k_digest_size; ++i)
86 sha[i] = m_bytecode[bytecode::HeaderSize + i];
87 return sha;
88 }
89
91 {
92 if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize + picosha2::k_digest_size ||
93 m_bytecode[bytecode::HeaderSize + picosha2::k_digest_size] != SYM_TABLE_START)
94 return {};
95
96 std::size_t i = bytecode::HeaderSize + picosha2::k_digest_size + 1;
97 const uint16_t size = readNumber(i);
98 i++;
99
100 Symbols block;
101 block.start = bytecode::HeaderSize + picosha2::k_digest_size;
102 block.symbols.reserve(size);
103
104 for (uint16_t j = 0; j < size; ++j)
105 {
106 std::string content;
107 while (m_bytecode[i] != 0)
108 content.push_back(static_cast<char>(m_bytecode[i++]));
109 i++;
110
111 block.symbols.push_back(content);
112 }
113
114 block.end = i;
115 return block;
116 }
117
119 {
120 if (!checkMagic())
121 return {};
122
123 std::size_t i = symbols.end;
124 if (m_bytecode[i] != VAL_TABLE_START)
125 return {};
126 i++;
127
128 const uint16_t size = readNumber(i);
129 i++;
130 Values block;
131 block.start = symbols.end;
132 block.values.reserve(size);
133
134 for (uint16_t j = 0; j < size; ++j)
135 {
136 const uint8_t type = m_bytecode[i];
137 i++;
138
139 if (type == NUMBER_TYPE)
140 {
142 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
143 i += sizeof(decltype(exp));
145 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
146 i += sizeof(decltype(mant));
147
148 const ieee754::DecomposedDouble d { exp, mant };
149 double val = ieee754::deserialize(d);
150 block.values.emplace_back(val);
151 }
152 else if (type == STRING_TYPE)
153 {
154 std::string val;
155 while (m_bytecode[i] != 0)
156 val.push_back(static_cast<char>(m_bytecode[i++]));
157 block.values.emplace_back(val);
158 }
159 else if (type == FUNC_TYPE)
160 {
161 const uint16_t addr = readNumber(i);
162 i++;
163 block.values.emplace_back(addr);
164 }
165 else
166 throw std::runtime_error(fmt::format("Unknown value type: {:x}", type));
167 i++;
168 }
169
170 block.end = i;
171 return block;
172 }
173
175 {
176 if (!checkMagic())
177 return {};
178
179 std::size_t i = values.end;
181 return {};
182 i++;
183
184 const uint16_t size = readNumber(i);
185 i++;
186
187 Filenames block;
188 block.start = values.end;
189 block.filenames.reserve(size);
190
191 for (uint16_t j = 0; j < size; ++j)
192 {
193 std::string val;
194 while (m_bytecode[i] != 0)
195 val.push_back(static_cast<char>(m_bytecode[i++]));
196 block.filenames.emplace_back(val);
197 i++;
198 }
199
200 block.end = i;
201 return block;
202 }
203
205 {
206 if (!checkMagic())
207 return {};
208
209 std::size_t i = filenames.end;
211 return {};
212 i++;
213
214 const uint16_t size = readNumber(i);
215 i++;
216
217 InstLocations block;
218 block.start = filenames.end;
219 block.locations.reserve(size);
220
221 for (uint16_t j = 0; j < size; ++j)
222 {
223 auto pp = readNumber(i);
224 i++;
225
226 auto ip = readNumber(i);
227 i++;
228
229 auto file_id = readNumber(i);
230 i++;
231
232 auto line = deserializeBE<uint32_t>(
233 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
234 i += 4;
235
236 block.locations.push_back(
237 { .page_pointer = pp,
238 .inst_pointer = ip,
239 .filename_id = file_id,
240 .line = line });
241 }
242
243 block.end = i;
244 return block;
245 }
246
247 Code BytecodeReader::code(const InstLocations& instLocations) const
248 {
249 if (!checkMagic())
250 return {};
251
252 std::size_t i = instLocations.end;
253
254 Code block;
255 block.start = i;
256
257 while (m_bytecode[i] == CODE_SEGMENT_START)
258 {
259 i++;
260 const std::size_t size = readNumber(i) * 4;
261 i++;
262
263 block.pages.emplace_back().reserve(size);
264 for (std::size_t j = 0; j < size; ++j)
265 block.pages.back().push_back(m_bytecode[i++]);
266
267 if (i == m_bytecode.size())
268 break;
269 }
270
271 return block;
272 }
273
274 std::optional<InstLoc> BytecodeReader::findSourceLocation(const std::vector<InstLoc>& inst_locations, const std::size_t ip, const std::size_t pp) const
275 {
276 std::optional<InstLoc> match = std::nullopt;
277
278 for (const auto location : inst_locations)
279 {
280 if (location.page_pointer == pp && !match)
281 match = location;
282
283 // select the best match: we want to find the location that's nearest our instruction pointer,
284 // but not equal to it as the IP will always be pointing to the next instruction,
285 // not yet executed. Thus, the erroneous instruction is the previous one.
286 if (location.page_pointer == pp && match && location.inst_pointer < ip / 4)
287 match = location;
288
289 // early exit because we won't find anything better, as inst locations are ordered by ascending (pp, ip)
290 if (location.page_pointer > pp || (location.page_pointer == pp && location.inst_pointer >= ip / 4))
291 break;
292 }
293
294 return match;
295 }
296
298 const std::optional<uint16_t> sStart,
299 const std::optional<uint16_t> sEnd,
300 const std::optional<uint16_t> cPage) const
301 {
302 if (!checkMagic())
303 {
304 fmt::println("Invalid format");
305 return;
306 }
307
308 if (segment == BytecodeSegment::All || segment == BytecodeSegment::HeadersOnly)
309 {
310 auto [major, minor, patch] = version();
311 fmt::println("Version: {}.{}.{}", major, minor, patch);
312 fmt::println("Timestamp: {}", timestamp());
313 fmt::print("SHA256: ");
314 for (const auto sha = sha256(); unsigned char h : sha)
315 fmt::print("{:02x}", h);
316 fmt::print("\n\n");
317 }
318
319 // reading the different tables, one after another
320
321 if ((sStart.has_value() && !sEnd.has_value()) || (!sStart.has_value() && sEnd.has_value()))
322 {
323 fmt::print(fmt::fg(fmt::color::red), "Both start and end parameter need to be provided together\n");
324 return;
325 }
326 if (sStart.has_value() && sEnd.has_value() && sStart.value() >= sEnd.value())
327 {
328 fmt::print(fmt::fg(fmt::color::red), "Invalid slice start and end arguments\n");
329 return;
330 }
331
332 const auto syms = symbols();
333 const auto vals = values(syms);
334 const auto files = filenames(vals);
335 const auto inst_locs = instLocations(files);
336 const auto code_block = code(inst_locs);
337
338 // symbols table
339 {
340 std::size_t size = syms.symbols.size();
341 std::size_t sliceSize = size;
342 bool showSym = (segment == BytecodeSegment::All || segment == BytecodeSegment::Symbols);
343
344 if (showSym && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
345 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
346 else if (showSym && sStart.has_value() && sEnd.has_value())
347 sliceSize = sEnd.value() - sStart.value() + 1;
348
349 if (showSym || segment == BytecodeSegment::HeadersOnly)
350 fmt::println("{} (length: {})", fmt::styled("Symbols table", fmt::fg(fmt::color::cyan)), sliceSize);
351
352 for (std::size_t j = 0; j < size; ++j)
353 {
354 if (auto start = sStart; auto end = sEnd)
355 showSym = showSym && (j >= start.value() && j <= end.value());
356
357 if (showSym)
358 fmt::println("{}) {}", j, syms.symbols[j]);
359 }
360
361 if (showSym)
362 fmt::print("\n");
363 if (segment == BytecodeSegment::Symbols)
364 return;
365 }
366
367 // values table
368 {
369 std::size_t size = vals.values.size();
370 std::size_t sliceSize = size;
371
372 bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::Values);
373 if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
374 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
375 else if (showVal && sStart.has_value() && sEnd.has_value())
376 sliceSize = sEnd.value() - sStart.value() + 1;
377
378 if (showVal || segment == BytecodeSegment::HeadersOnly)
379 fmt::println("{} (length: {})", fmt::styled("Constants table", fmt::fg(fmt::color::cyan)), sliceSize);
380
381 for (std::size_t j = 0; j < size; ++j)
382 {
383 if (auto start = sStart; auto end = sEnd)
384 showVal = showVal && (j >= start.value() && j <= end.value());
385
386 if (showVal)
387 {
388 switch (const auto val = vals.values[j]; val.valueType())
389 {
391 fmt::println("{}) (Number) {}", j, val.number());
392 break;
394 fmt::println("{}) (String) {}", j, val.string());
395 break;
397 fmt::println("{}) (PageAddr) {}", j, val.pageAddr());
398 break;
399 default:
400 fmt::print(fmt::fg(fmt::color::red), "Value type not handled: {}\n", std::to_string(val.valueType()));
401 break;
402 }
403 }
404 }
405
406 if (showVal)
407 fmt::print("\n");
408 if (segment == BytecodeSegment::Values)
409 return;
410 }
411
412 // inst locs + file
413 {
414 std::size_t size = inst_locs.locations.size();
415 std::size_t sliceSize = size;
416
417 bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::InstructionLocation);
418 if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
419 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
420 else if (showVal && sStart.has_value() && sEnd.has_value())
421 sliceSize = sEnd.value() - sStart.value() + 1;
422
423 if (showVal || segment == BytecodeSegment::HeadersOnly)
424 fmt::println("{} (length: {})", fmt::styled("Instruction locations table", fmt::fg(fmt::color::cyan)), sliceSize);
425 if (showVal && size > 0)
426 fmt::println(" PP, IP");
427
428 for (std::size_t j = 0; j < size; ++j)
429 {
430 if (auto start = sStart; auto end = sEnd)
431 showVal = showVal && (j >= start.value() && j <= end.value());
432
433 const auto& location = inst_locs.locations[j];
434 if (showVal)
435 fmt::println("{:>3},{:>3} -> {}:{}", location.page_pointer, location.inst_pointer, files.filenames[location.filename_id], location.line);
436 }
437
438 if (showVal)
439 fmt::print("\n");
440 }
441
442 const auto stringify_value = [](const Value& val) -> std::string {
443 switch (val.valueType())
444 {
446 return fmt::format("{} (Number)", val.number());
448 return fmt::format("{} (String)", val.string());
450 return fmt::format("{} (PageAddr)", val.pageAddr());
451 default:
452 return "";
453 }
454 };
455
456 enum class ArgKind
457 {
458 Symbol,
459 Constant,
460 Builtin,
461 Raw, ///< eg: Stack index, jump address, number
462 RawHex,
463 ConstConst,
464 ConstSym,
465 SymConst,
466 SymSym,
467 BuiltinRaw, ///< Builtin, number
468 ConstRaw, ///< Constant, number
469 SymRaw, ///< Symbol, number
470 RawSym, ///< Symbol index, symbol
471 RawConst, ///< Symbol index, constant
472 RawRaw, ///< Symbol index, symbol index
473 RawRawRaw
474 };
475
476 struct Arg
477 {
478 ArgKind kind;
479 uint8_t padding;
480 uint16_t arg;
481
482 [[nodiscard]] uint16_t primary() const
483 {
484 return arg & 0x0fff;
485 }
486
487 [[nodiscard]] uint16_t secondary() const
488 {
489 return static_cast<uint16_t>((padding << 4) | (arg & 0xf000) >> 12);
490 }
491 };
492
493 const std::unordered_map<Instruction, ArgKind> arg_kinds = {
494 { LOAD_FAST, ArgKind::Symbol },
495 { LOAD_FAST_BY_INDEX, ArgKind::Raw },
496 { LOAD_SYMBOL, ArgKind::Symbol },
497 { LOAD_CONST, ArgKind::Constant },
498 { POP_JUMP_IF_TRUE, ArgKind::Raw },
499 { STORE, ArgKind::Symbol },
500 { STORE_REF, ArgKind::Symbol },
501 { SET_VAL, ArgKind::Symbol },
502 { POP_JUMP_IF_FALSE, ArgKind::Raw },
503 { JUMP, ArgKind::Raw },
504 { PUSH_RETURN_ADDRESS, ArgKind::RawHex },
505 { CALL, ArgKind::Raw },
506 { CAPTURE, ArgKind::Symbol },
507 { RENAME_NEXT_CAPTURE, ArgKind::Symbol },
508 { BUILTIN, ArgKind::Builtin },
509 { DEL, ArgKind::Symbol },
510 { MAKE_CLOSURE, ArgKind::Constant },
511 { GET_FIELD, ArgKind::Symbol },
512 { PLUGIN, ArgKind::Constant },
513 { LIST, ArgKind::Raw },
514 { APPEND, ArgKind::Raw },
515 { CONCAT, ArgKind::Raw },
516 { APPEND_IN_PLACE, ArgKind::Raw },
517 { CONCAT_IN_PLACE, ArgKind::Raw },
518 { POP_LIST, ArgKind::Raw },
519 { POP_LIST_IN_PLACE, ArgKind::Raw },
520 { SET_AT_INDEX, ArgKind::Raw },
521 { SET_AT_2_INDEX, ArgKind::Raw },
522 { RESET_SCOPE_JUMP, ArgKind::Raw },
523 { LOAD_CONST_LOAD_CONST, ArgKind::ConstConst },
524 { LOAD_CONST_STORE, ArgKind::ConstSym },
525 { LOAD_CONST_SET_VAL, ArgKind::ConstSym },
526 { STORE_FROM, ArgKind::SymSym },
527 { STORE_FROM_INDEX, ArgKind::RawSym },
528 { SET_VAL_FROM, ArgKind::SymSym },
529 { SET_VAL_FROM_INDEX, ArgKind::RawSym },
530 { INCREMENT, ArgKind::SymRaw },
531 { INCREMENT_BY_INDEX, ArgKind::RawRaw },
532 { INCREMENT_STORE, ArgKind::RawRaw },
533 { DECREMENT, ArgKind::SymRaw },
534 { DECREMENT_BY_INDEX, ArgKind::RawRaw },
535 { DECREMENT_STORE, ArgKind::SymRaw },
536 { STORE_TAIL, ArgKind::SymSym },
537 { STORE_TAIL_BY_INDEX, ArgKind::RawSym },
538 { STORE_HEAD, ArgKind::SymSym },
539 { STORE_HEAD_BY_INDEX, ArgKind::RawSym },
540 { STORE_LIST, ArgKind::RawSym },
541 { SET_VAL_TAIL, ArgKind::SymSym },
542 { SET_VAL_TAIL_BY_INDEX, ArgKind::RawSym },
543 { SET_VAL_HEAD, ArgKind::SymSym },
544 { SET_VAL_HEAD_BY_INDEX, ArgKind::RawSym },
545 { CALL_BUILTIN, ArgKind::BuiltinRaw },
546 { CALL_BUILTIN_WITHOUT_RETURN_ADDRESS, ArgKind::BuiltinRaw },
547 { LT_CONST_JUMP_IF_FALSE, ArgKind::ConstRaw },
548 { LT_CONST_JUMP_IF_TRUE, ArgKind::ConstRaw },
549 { LT_SYM_JUMP_IF_FALSE, ArgKind::SymRaw },
550 { GT_CONST_JUMP_IF_TRUE, ArgKind::ConstRaw },
551 { GT_CONST_JUMP_IF_FALSE, ArgKind::ConstRaw },
552 { GT_SYM_JUMP_IF_FALSE, ArgKind::SymRaw },
553 { EQ_CONST_JUMP_IF_TRUE, ArgKind::ConstRaw },
554 { EQ_SYM_INDEX_JUMP_IF_TRUE, ArgKind::SymRaw },
555 { NEQ_CONST_JUMP_IF_TRUE, ArgKind::ConstRaw },
556 { NEQ_SYM_JUMP_IF_FALSE, ArgKind::SymRaw },
557 { CALL_SYMBOL, ArgKind::SymRaw },
558 { CALL_SYMBOL_BY_INDEX, ArgKind::RawRaw },
559 { CALL_CURRENT_PAGE, ArgKind::SymRaw },
560 { GET_FIELD_FROM_SYMBOL, ArgKind::SymSym },
561 { GET_FIELD_FROM_SYMBOL_INDEX, ArgKind::RawSym },
562 { AT_SYM_SYM, ArgKind::SymSym },
563 { AT_SYM_INDEX_SYM_INDEX, ArgKind::RawRaw },
564 { AT_SYM_INDEX_CONST, ArgKind::RawConst },
565 { CHECK_TYPE_OF, ArgKind::SymConst },
566 { CHECK_TYPE_OF_BY_INDEX, ArgKind::RawConst },
567 { APPEND_IN_PLACE_SYM, ArgKind::SymRaw },
568 { APPEND_IN_PLACE_SYM_INDEX, ArgKind::RawRaw },
569 { STORE_LEN, ArgKind::RawSym },
570 { LT_LEN_SYM_JUMP_IF_FALSE, ArgKind::SymRaw },
571 { MUL_BY, ArgKind::RawRaw },
572 { MUL_BY_INDEX, ArgKind::RawRaw },
573 { MUL_SET_VAL, ArgKind::RawRaw },
574 { FUSED_MATH, ArgKind::RawRawRaw }
575 };
576
577 const auto builtin_name = [](const uint16_t idx) {
578 return Builtins::builtins[idx].first;
579 };
580 const auto value_str = [&stringify_value, &vals](const uint16_t idx) {
581 return stringify_value(vals.values[idx]);
582 };
583 const auto symbol_name = [&syms](const uint16_t idx) {
584 return syms.symbols[idx];
585 };
586
587 const auto color_print_inst = [=](const std::string& name, std::optional<Arg> arg = std::nullopt) {
588 fmt::print("{}", fmt::styled(name, fmt::fg(fmt::color::gold)));
589 if (arg.has_value())
590 {
591 constexpr auto sym_color = fmt::fg(fmt::color::green);
592 constexpr auto const_color = fmt::fg(fmt::color::magenta);
593 constexpr auto raw_color = fmt::fg(fmt::color::red);
594
595 switch (auto [kind, _, idx] = arg.value(); kind)
596 {
597 case ArgKind::Symbol:
598 fmt::print(sym_color, " {}\n", symbol_name(idx));
599 break;
600 case ArgKind::Constant:
601 fmt::print(const_color, " {}\n", value_str(idx));
602 break;
603 case ArgKind::Builtin:
604 fmt::print(" {}\n", builtin_name(idx));
605 break;
606 case ArgKind::Raw:
607 fmt::print(raw_color, " ({})\n", idx);
608 break;
609 case ArgKind::RawHex:
610 fmt::print(raw_color, " ({:#x})\n", idx);
611 break;
612 case ArgKind::ConstConst:
613 fmt::print(" {}, {}\n", fmt::styled(value_str(arg->primary()), const_color), fmt::styled(value_str(arg->secondary()), const_color));
614 break;
615 case ArgKind::ConstSym:
616 fmt::print(" {}, {}\n", fmt::styled(value_str(arg->primary()), const_color), fmt::styled(symbol_name(arg->secondary()), sym_color));
617 break;
618 case ArgKind::SymConst:
619 fmt::print(" {}, {}\n", fmt::styled(symbol_name(arg->primary()), sym_color), fmt::styled(value_str(arg->secondary()), const_color));
620 break;
621 case ArgKind::SymSym:
622 fmt::print(" {}, {}\n", fmt::styled(symbol_name(arg->primary()), sym_color), fmt::styled(symbol_name(arg->secondary()), sym_color));
623 break;
624 case ArgKind::BuiltinRaw:
625 fmt::print(" {}, {}\n", builtin_name(arg->primary()), fmt::styled(arg->secondary(), raw_color));
626 break;
627 case ArgKind::ConstRaw:
628 fmt::print(" {}, {}\n", fmt::styled(value_str(arg->primary()), const_color), fmt::styled(arg->secondary(), raw_color));
629 break;
630 case ArgKind::SymRaw:
631 fmt::print(" {}, {}\n", fmt::styled(symbol_name(arg->primary()), sym_color), fmt::styled(arg->secondary(), raw_color));
632 break;
633 case ArgKind::RawSym:
634 fmt::print(" {}, {}\n", fmt::styled(arg->primary(), raw_color), fmt::styled(symbol_name(arg->secondary()), sym_color));
635 break;
636 case ArgKind::RawConst:
637 fmt::print(" {}, {}\n", fmt::styled(arg->primary(), raw_color), fmt::styled(value_str(arg->secondary()), const_color));
638 break;
639 case ArgKind::RawRaw:
640 fmt::print(" {}, {}\n", fmt::styled(arg->primary(), raw_color), fmt::styled(arg->secondary(), raw_color));
641 break;
642 case ArgKind::RawRawRaw:
643 fmt::print(" {}, {}, {}\n", fmt::styled(arg->padding, raw_color), fmt::styled((arg->arg & 0xff00) >> 8, raw_color), fmt::styled(arg->arg & 0x00ff, raw_color));
644 break;
645 }
646 }
647 else
648 fmt::print("\n");
649 };
650
651 if (segment == BytecodeSegment::All || segment == BytecodeSegment::Code || segment == BytecodeSegment::HeadersOnly)
652 {
653 uint16_t pp = 0;
654
655 for (const auto& page : code_block.pages)
656 {
657 bool displayCode = true;
658
659 if (auto wanted_page = cPage)
660 displayCode = pp == wanted_page.value();
661
662 if (displayCode)
663 fmt::println(
664 "{} {} (length: {})",
665 fmt::styled("Code segment", fmt::fg(fmt::color::magenta)),
666 fmt::styled(pp, fmt::fg(fmt::color::magenta)),
667 page.size());
668
669 if (page.empty())
670 {
671 if (displayCode)
672 fmt::print("NOP");
673 }
674 else if (cPage.value_or(pp) == pp && segment != BytecodeSegment::HeadersOnly)
675 {
676 if (sStart.has_value() && sEnd.has_value() && ((sStart.value() > page.size()) || (sEnd.value() > page.size())))
677 {
678 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", page.size());
679 return;
680 }
681
682 std::optional<InstLoc> previous_loc = std::nullopt;
683
684 for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(page.size()); j < end; j += 4)
685 {
686 const uint8_t inst = page[j];
687 const uint8_t padding = page[j + 1];
688 const auto arg = static_cast<uint16_t>((page[j + 2] << 8) + page[j + 3]);
689
690 auto maybe_loc = findSourceLocation(inst_locs.locations, j, pp);
691
692 // location
693 // we want to print it only when it changed, either the file, the line, or both
694 if (maybe_loc && (!previous_loc || maybe_loc != previous_loc))
695 {
696 if (!previous_loc || previous_loc->filename_id != maybe_loc->filename_id)
697 fmt::println("{}", files.filenames[maybe_loc->filename_id]);
698 fmt::print("{:>4}", maybe_loc->line + 1);
699 previous_loc = maybe_loc;
700 }
701 else
702 fmt::print(" ");
703 // instruction number
704 fmt::print(fmt::fg(fmt::color::cyan), "{:>4x}", j / 4);
705 // padding inst arg arg
706 fmt::print(" {:02x} {:02x} {:02x} {:02x} ", inst, padding, page[j + 2], page[j + 3]);
707
708 if (const auto idx = static_cast<std::size_t>(inst); idx < InstructionNames.size())
709 {
710 const auto inst_name = InstructionNames[idx];
711 if (const auto iinst = static_cast<Instruction>(inst); arg_kinds.contains(iinst))
712 color_print_inst(inst_name, Arg { arg_kinds.at(iinst), padding, arg });
713 else
714 color_print_inst(inst_name);
715 }
716 else
717 fmt::println("Unknown instruction");
718 }
719 }
720 if (displayCode && segment != BytecodeSegment::HeadersOnly)
721 fmt::print("\n");
722
723 ++pp;
724 }
725 }
726 }
727
728 uint16_t BytecodeReader::readNumber(std::size_t& i) const
729 {
730 const auto x = static_cast<uint16_t>(m_bytecode[i] << 8);
731 const uint16_t y = m_bytecode[++i];
732 return x + y;
733 }
734}
Host the declaration of all the ArkScript builtins.
A bytecode disassembler for ArkScript.
The different instructions used by the compiler and virtual machine.
std::optional< internal::InstLoc > findSourceLocation(const std::vector< internal::InstLoc > &inst_locations, std::size_t ip, std::size_t pp) const
Find the location of an instruction.
Symbols symbols() const
unsigned long long timestamp() const
Return the read timestamp from the bytecode file.
uint16_t readNumber(std::size_t &i) const
Read a number from the bytecode, under the instruction pointer i.
Filenames filenames(const Values &values) const
InstLocations instLocations(const Filenames &filenames) const
Version version() const
Code code(const InstLocations &instLocations) const
Values values(const Symbols &symbols) const
void display(BytecodeSegment segment=BytecodeSegment::All, std::optional< uint16_t > sStart=std::nullopt, std::optional< uint16_t > sEnd=std::nullopt, std::optional< uint16_t > cPage=std::nullopt) const
Display the bytecode opcode in a human friendly way.
std::vector< unsigned char > sha256() const
void feed(const std::string &file)
Construct needed data before displaying information about a given file.
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array Magic
Definition Common.hpp:29
constexpr std::array Version
Definition Common.hpp:30
constexpr std::size_t HeaderSize
Definition Common.hpp:39
double deserialize(const DecomposedDouble d)
@ Raw
Keep all text as is without modifying it (useful for the code formatter)
T deserializeBE(std::vector< uint8_t >::const_iterator begin, std::vector< uint8_t >::const_iterator end)
Instruction
The different bytecodes are stored here.
@ CALL_BUILTIN_WITHOUT_RETURN_ADDRESS
T deserializeLE(std::vector< uint8_t >::const_iterator begin, std::vector< uint8_t >::const_iterator end)
constexpr std::array InstructionNames
std::vector< uint8_t > bytecode_t
Definition Common.hpp:22
std::string to_string(const Ark::ValueType type) noexcept
Definition Value.hpp:233
std::vector< bytecode_t > pages
std::size_t start
Point to the CODE_SEGMENT_START byte in the bytecode.
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::vector< std::string > filenames
std::size_t start
Point to the FILENAMES_TABLE_START byte in the bytecode.
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the INST_LOC_TABLE_START byte in the bytecode.
std::vector< internal::InstLoc > locations
std::vector< std::string > symbols
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the SYM_TABLE_START byte in the bytecode.
std::vector< Value > values
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the VAL_TABLE_START byte in the bytecode.