ArkScript
A small, lisp-inspired, functional scripting language
BytecodeReader.cpp
Go to the documentation of this file.
2
5
6#include <unordered_map>
7#include <Proxy/Picosha2.hpp>
10#include <fmt/core.h>
11#include <fmt/color.h>
12
13namespace Ark
14{
15 using namespace Ark::internal;
16
17 void BytecodeReader::feed(const bytecode_t& bytecode)
18 {
19 m_bytecode = bytecode;
20 }
21
22 void BytecodeReader::feed(const std::string& file)
23 {
24 std::ifstream ifs(file, std::ios::binary | std::ios::ate);
25 if (!ifs.good())
26 throw std::runtime_error(fmt::format("[BytecodeReader] Couldn't open file '{}'", file));
27
28 const auto pos = ifs.tellg();
29 // reserve appropriate number of bytes
30 std::vector<char> temp(static_cast<std::size_t>(pos));
31 ifs.seekg(0, std::ios::beg);
32 ifs.read(&temp[0], pos);
33 ifs.close();
34
35 m_bytecode = bytecode_t(static_cast<std::size_t>(pos));
36 for (std::size_t i = 0; i < static_cast<std::size_t>(pos); ++i)
37 m_bytecode[i] = static_cast<uint8_t>(temp[i]);
38 }
39
41 {
42 return m_bytecode.size() >= bytecode::Magic.size() &&
43 m_bytecode[0] == bytecode::Magic[0] &&
44 m_bytecode[1] == bytecode::Magic[1] &&
45 m_bytecode[2] == bytecode::Magic[2] &&
47 }
48
50 {
51 if (!checkMagic() || m_bytecode.size() < bytecode::Magic.size() + bytecode::Version.size())
52 return Version { 0, 0, 0 };
53
54 return Version {
55 .major = static_cast<uint16_t>((m_bytecode[4] << 8) + m_bytecode[5]),
56 .minor = static_cast<uint16_t>((m_bytecode[6] << 8) + m_bytecode[7]),
57 .patch = static_cast<uint16_t>((m_bytecode[8] << 8) + m_bytecode[9])
58 };
59 }
60
61 unsigned long long BytecodeReader::timestamp() const
62 {
63 // 4 (ark\0) + version (2 bytes / number) + timestamp = 18 bytes
65 return 0;
66
67 // reading the timestamp in big endian
68 using timestamp_t = unsigned long long;
69 return (static_cast<timestamp_t>(m_bytecode[10]) << 56) +
70 (static_cast<timestamp_t>(m_bytecode[11]) << 48) +
71 (static_cast<timestamp_t>(m_bytecode[12]) << 40) +
72 (static_cast<timestamp_t>(m_bytecode[13]) << 32) +
73 (static_cast<timestamp_t>(m_bytecode[14]) << 24) +
74 (static_cast<timestamp_t>(m_bytecode[15]) << 16) +
75 (static_cast<timestamp_t>(m_bytecode[16]) << 8) +
76 static_cast<timestamp_t>(m_bytecode[17]);
77 }
78
79 std::vector<unsigned char> BytecodeReader::sha256() const
80 {
81 if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize + picosha2::k_digest_size)
82 return {};
83
84 std::vector<unsigned char> sha(picosha2::k_digest_size);
85 for (std::size_t i = 0; i < picosha2::k_digest_size; ++i)
86 sha[i] = m_bytecode[bytecode::HeaderSize + i];
87 return sha;
88 }
89
91 {
92 if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize + picosha2::k_digest_size ||
93 m_bytecode[bytecode::HeaderSize + picosha2::k_digest_size] != SYM_TABLE_START)
94 return {};
95
96 std::size_t i = bytecode::HeaderSize + picosha2::k_digest_size + 1;
97 const uint16_t size = readNumber(i);
98 i++;
99
100 Symbols block;
101 block.start = bytecode::HeaderSize + picosha2::k_digest_size;
102 block.symbols.reserve(size);
103
104 for (uint16_t j = 0; j < size; ++j)
105 {
106 std::string content;
107 while (m_bytecode[i] != 0)
108 content.push_back(static_cast<char>(m_bytecode[i++]));
109 i++;
110
111 block.symbols.push_back(content);
112 }
113
114 block.end = i;
115 return block;
116 }
117
119 {
120 if (!checkMagic())
121 return {};
122
123 std::size_t i = symbols.end;
124 if (m_bytecode[i] != VAL_TABLE_START)
125 return {};
126 i++;
127
128 const uint16_t size = readNumber(i);
129 i++;
130 Values block;
131 block.start = symbols.end;
132 block.values.reserve(size);
133
134 for (uint16_t j = 0; j < size; ++j)
135 {
136 const uint8_t type = m_bytecode[i];
137 i++;
138
139 if (type == NUMBER_TYPE)
140 {
142 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
143 i += sizeof(decltype(exp));
145 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
146 i += sizeof(decltype(mant));
147
148 const ieee754::DecomposedDouble d { exp, mant };
149 double val = ieee754::deserialize(d);
150 block.values.emplace_back(val);
151 }
152 else if (type == STRING_TYPE)
153 {
154 std::string val;
155 while (m_bytecode[i] != 0)
156 val.push_back(static_cast<char>(m_bytecode[i++]));
157 block.values.emplace_back(val);
158 }
159 else if (type == FUNC_TYPE)
160 {
161 const uint16_t addr = readNumber(i);
162 i++;
163 block.values.emplace_back(addr);
164 }
165 else
166 throw std::runtime_error(fmt::format("Unknown value type: {:x}", type));
167 i++;
168 }
169
170 block.end = i;
171 return block;
172 }
173
175 {
176 if (!checkMagic())
177 return {};
178
179 std::size_t i = values.end;
181 return {};
182 i++;
183
184 const uint16_t size = readNumber(i);
185 i++;
186
187 Filenames block;
188 block.start = values.end;
189 block.filenames.reserve(size);
190
191 for (uint16_t j = 0; j < size; ++j)
192 {
193 std::string val;
194 while (m_bytecode[i] != 0)
195 val.push_back(static_cast<char>(m_bytecode[i++]));
196 block.filenames.emplace_back(val);
197 i++;
198 }
199
200 block.end = i;
201 return block;
202 }
203
205 {
206 if (!checkMagic())
207 return {};
208
209 std::size_t i = filenames.end;
211 return {};
212 i++;
213
214 const uint16_t size = readNumber(i);
215 i++;
216
217 InstLocations block;
218 block.start = filenames.end;
219 block.locations.reserve(size);
220
221 for (uint16_t j = 0; j < size; ++j)
222 {
223 auto pp = readNumber(i);
224 i++;
225
226 auto ip = readNumber(i);
227 i++;
228
229 auto file_id = readNumber(i);
230 i++;
231
232 auto line = deserializeBE<uint32_t>(
233 m_bytecode.begin() + static_cast<std::vector<uint8_t>::difference_type>(i), m_bytecode.end());
234 i += 4;
235
236 block.locations.push_back(
237 { .page_pointer = pp,
238 .inst_pointer = ip,
239 .filename_id = file_id,
240 .line = line });
241 }
242
243 block.end = i;
244 return block;
245 }
246
247 Code BytecodeReader::code(const InstLocations& instLocations) const
248 {
249 if (!checkMagic())
250 return {};
251
252 std::size_t i = instLocations.end;
253
254 Code block;
255 block.start = i;
256
257 while (m_bytecode[i] == CODE_SEGMENT_START)
258 {
259 i++;
260 const std::size_t size = readNumber(i) * 4;
261 i++;
262
263 block.pages.emplace_back().reserve(size);
264 for (std::size_t j = 0; j < size; ++j)
265 block.pages.back().push_back(m_bytecode[i++]);
266
267 if (i == m_bytecode.size())
268 break;
269 }
270
271 return block;
272 }
273
274 std::optional<InstLoc> BytecodeReader::findSourceLocation(const std::vector<InstLoc>& inst_locations, const std::size_t ip, const std::size_t pp) const
275 {
276 std::optional<InstLoc> match = std::nullopt;
277
278 for (const auto location : inst_locations)
279 {
280 if (location.page_pointer == pp && !match)
281 match = location;
282
283 // select the best match: we want to find the location that's nearest our instruction pointer,
284 // but not equal to it as the IP will always be pointing to the next instruction,
285 // not yet executed. Thus, the erroneous instruction is the previous one.
286 if (location.page_pointer == pp && match && location.inst_pointer < ip / 4)
287 match = location;
288
289 // early exit because we won't find anything better, as inst locations are ordered by ascending (pp, ip)
290 if (location.page_pointer > pp || (location.page_pointer == pp && location.inst_pointer >= ip / 4))
291 break;
292 }
293
294 return match;
295 }
296
298 const std::optional<uint16_t> sStart,
299 const std::optional<uint16_t> sEnd,
300 const std::optional<uint16_t> cPage) const
301 {
302 if (!checkMagic())
303 {
304 fmt::println("Invalid format");
305 return;
306 }
307
308 if (segment == BytecodeSegment::All || segment == BytecodeSegment::HeadersOnly)
309 {
310 auto [major, minor, patch] = version();
311 fmt::println("Version: {}.{}.{}", major, minor, patch);
312 fmt::println("Timestamp: {}", timestamp());
313 fmt::print("SHA256: ");
314 for (const auto sha = sha256(); unsigned char h : sha)
315 fmt::print("{:02x}", h);
316 fmt::print("\n\n");
317 }
318
319 // reading the different tables, one after another
320
321 if ((sStart.has_value() && !sEnd.has_value()) || (!sStart.has_value() && sEnd.has_value()))
322 {
323 fmt::print(fmt::fg(fmt::color::red), "Both start and end parameter need to be provided together\n");
324 return;
325 }
326 if (sStart.has_value() && sEnd.has_value() && sStart.value() >= sEnd.value())
327 {
328 fmt::print(fmt::fg(fmt::color::red), "Invalid slice start and end arguments\n");
329 return;
330 }
331
332 const auto syms = symbols();
333 const auto vals = values(syms);
334 const auto files = filenames(vals);
335 const auto inst_locs = instLocations(files);
336 const auto code_block = code(inst_locs);
337
338 // symbols table
339 {
340 std::size_t size = syms.symbols.size();
341 std::size_t sliceSize = size;
342 bool showSym = (segment == BytecodeSegment::All || segment == BytecodeSegment::Symbols);
343
344 if (showSym && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
345 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
346 else if (showSym && sStart.has_value() && sEnd.has_value())
347 sliceSize = sEnd.value() - sStart.value() + 1;
348
349 if (showSym || segment == BytecodeSegment::HeadersOnly)
350 fmt::println("{} (length: {})", fmt::styled("Symbols table", fmt::fg(fmt::color::cyan)), sliceSize);
351
352 for (std::size_t j = 0; j < size; ++j)
353 {
354 if (auto start = sStart; auto end = sEnd)
355 showSym = showSym && (j >= start.value() && j <= end.value());
356
357 if (showSym)
358 fmt::println("{}) {}", j, syms.symbols[j]);
359 }
360
361 if (showSym)
362 fmt::print("\n");
363 if (segment == BytecodeSegment::Symbols)
364 return;
365 }
366
367 // values table
368 {
369 std::size_t size = vals.values.size();
370 std::size_t sliceSize = size;
371
372 bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::Values);
373 if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
374 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
375 else if (showVal && sStart.has_value() && sEnd.has_value())
376 sliceSize = sEnd.value() - sStart.value() + 1;
377
378 if (showVal || segment == BytecodeSegment::HeadersOnly)
379 fmt::println("{} (length: {})", fmt::styled("Constants table", fmt::fg(fmt::color::cyan)), sliceSize);
380
381 for (std::size_t j = 0; j < size; ++j)
382 {
383 if (auto start = sStart; auto end = sEnd)
384 showVal = showVal && (j >= start.value() && j <= end.value());
385
386 if (showVal)
387 {
388 switch (const auto val = vals.values[j]; val.valueType())
389 {
391 fmt::println("{}) (Number) {}", j, val.number());
392 break;
394 fmt::println("{}) (String) {}", j, val.string());
395 break;
397 fmt::println("{}) (PageAddr) {}", j, val.pageAddr());
398 break;
399 default:
400 fmt::print(fmt::fg(fmt::color::red), "Value type not handled: {}\n", std::to_string(val.valueType()));
401 break;
402 }
403 }
404 }
405
406 if (showVal)
407 fmt::print("\n");
408 if (segment == BytecodeSegment::Values)
409 return;
410 }
411
412 // inst locs + file
413 {
414 std::size_t size = inst_locs.locations.size();
415 std::size_t sliceSize = size;
416
417 bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::InstructionLocation);
418 if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
419 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
420 else if (showVal && sStart.has_value() && sEnd.has_value())
421 sliceSize = sEnd.value() - sStart.value() + 1;
422
423 if (showVal || segment == BytecodeSegment::HeadersOnly)
424 fmt::println("{} (length: {})", fmt::styled("Instruction locations table", fmt::fg(fmt::color::cyan)), sliceSize);
425 if (showVal && size > 0)
426 fmt::println(" PP, IP");
427
428 for (std::size_t j = 0; j < size; ++j)
429 {
430 if (auto start = sStart; auto end = sEnd)
431 showVal = showVal && (j >= start.value() && j <= end.value());
432
433 const auto& location = inst_locs.locations[j];
434 if (showVal)
435 fmt::println("{:>3},{:>3} -> {}:{}", location.page_pointer, location.inst_pointer, files.filenames[location.filename_id], location.line);
436 }
437
438 if (showVal)
439 fmt::print("\n");
440 }
441
442 const auto stringify_value = [](const Value& val) -> std::string {
443 switch (val.valueType())
444 {
446 return fmt::format("{} (Number)", val.number());
448 return fmt::format("{} (String)", val.string());
450 return fmt::format("{} (PageAddr)", val.pageAddr());
451 default:
452 return "";
453 }
454 };
455
456 enum class ArgKind
457 {
458 Symbol,
459 Constant,
460 Builtin,
461 Raw, ///< eg: Stack index, jump address, number
462 ConstConst,
463 ConstSym,
464 SymConst,
465 SymSym,
466 BuiltinRaw, ///< Builtin, number
467 ConstRaw, ///< Constant, number
468 SymRaw, ///< Symbol, number
469 RawSym, ///< Symbol index, symbol
470 RawConst, ///< Symbol index, constant
471 RawRaw, ///< Symbol index, symbol index
472 RawRawRaw
473 };
474
475 struct Arg
476 {
477 ArgKind kind;
478 uint8_t padding;
479 uint16_t arg;
480
481 [[nodiscard]] uint16_t primary() const
482 {
483 return arg & 0x0fff;
484 }
485
486 [[nodiscard]] uint16_t secondary() const
487 {
488 return static_cast<uint16_t>((padding << 4) | (arg & 0xf000) >> 12);
489 }
490 };
491
492 const std::unordered_map<Instruction, ArgKind> arg_kinds = {
493 { LOAD_SYMBOL, ArgKind::Symbol },
494 { LOAD_SYMBOL_BY_INDEX, ArgKind::Raw },
495 { LOAD_CONST, ArgKind::Constant },
496 { POP_JUMP_IF_TRUE, ArgKind::Raw },
497 { STORE, ArgKind::Symbol },
498 { STORE_REF, ArgKind::Symbol },
499 { SET_VAL, ArgKind::Symbol },
500 { POP_JUMP_IF_FALSE, ArgKind::Raw },
501 { JUMP, ArgKind::Raw },
502 { CALL, ArgKind::Raw },
503 { CAPTURE, ArgKind::Symbol },
504 { RENAME_NEXT_CAPTURE, ArgKind::Symbol },
505 { BUILTIN, ArgKind::Builtin },
506 { DEL, ArgKind::Symbol },
507 { MAKE_CLOSURE, ArgKind::Constant },
508 { GET_FIELD, ArgKind::Symbol },
509 { PLUGIN, ArgKind::Constant },
510 { LIST, ArgKind::Raw },
511 { APPEND, ArgKind::Raw },
512 { CONCAT, ArgKind::Raw },
513 { APPEND_IN_PLACE, ArgKind::Raw },
514 { CONCAT_IN_PLACE, ArgKind::Raw },
515 { RESET_SCOPE_JUMP, ArgKind::Raw },
516 { GET_CURRENT_PAGE_ADDR, ArgKind::Symbol },
517 { LOAD_CONST_LOAD_CONST, ArgKind::ConstConst },
518 { LOAD_CONST_STORE, ArgKind::ConstSym },
519 { LOAD_CONST_SET_VAL, ArgKind::ConstSym },
520 { STORE_FROM, ArgKind::SymSym },
521 { STORE_FROM_INDEX, ArgKind::RawSym },
522 { SET_VAL_FROM, ArgKind::SymSym },
523 { SET_VAL_FROM_INDEX, ArgKind::RawSym },
524 { INCREMENT, ArgKind::SymRaw },
525 { INCREMENT_BY_INDEX, ArgKind::RawRaw },
526 { INCREMENT_STORE, ArgKind::RawRaw },
527 { DECREMENT, ArgKind::SymRaw },
528 { DECREMENT_BY_INDEX, ArgKind::RawRaw },
529 { DECREMENT_STORE, ArgKind::SymRaw },
530 { STORE_TAIL, ArgKind::SymSym },
531 { STORE_TAIL_BY_INDEX, ArgKind::RawSym },
532 { STORE_HEAD, ArgKind::SymSym },
533 { STORE_HEAD_BY_INDEX, ArgKind::RawSym },
534 { STORE_LIST, ArgKind::RawSym },
535 { SET_VAL_TAIL, ArgKind::SymSym },
536 { SET_VAL_TAIL_BY_INDEX, ArgKind::RawSym },
537 { SET_VAL_HEAD, ArgKind::SymSym },
538 { SET_VAL_HEAD_BY_INDEX, ArgKind::RawSym },
539 { CALL_BUILTIN, ArgKind::BuiltinRaw },
540 { CALL_BUILTIN_WITHOUT_RETURN_ADDRESS, ArgKind::BuiltinRaw },
541 { LT_CONST_JUMP_IF_FALSE, ArgKind::ConstRaw },
542 { LT_CONST_JUMP_IF_TRUE, ArgKind::ConstRaw },
543 { LT_SYM_JUMP_IF_FALSE, ArgKind::SymRaw },
544 { GT_CONST_JUMP_IF_TRUE, ArgKind::ConstRaw },
545 { GT_CONST_JUMP_IF_FALSE, ArgKind::ConstRaw },
546 { GT_SYM_JUMP_IF_FALSE, ArgKind::SymRaw },
547 { EQ_CONST_JUMP_IF_TRUE, ArgKind::ConstRaw },
548 { EQ_SYM_INDEX_JUMP_IF_TRUE, ArgKind::SymRaw },
549 { NEQ_CONST_JUMP_IF_TRUE, ArgKind::ConstRaw },
550 { NEQ_SYM_JUMP_IF_FALSE, ArgKind::SymRaw },
551 { CALL_SYMBOL, ArgKind::SymRaw },
552 { CALL_CURRENT_PAGE, ArgKind::SymRaw },
553 { GET_FIELD_FROM_SYMBOL, ArgKind::SymSym },
554 { GET_FIELD_FROM_SYMBOL_INDEX, ArgKind::RawSym },
555 { AT_SYM_SYM, ArgKind::SymSym },
556 { AT_SYM_INDEX_SYM_INDEX, ArgKind::RawRaw },
557 { AT_SYM_INDEX_CONST, ArgKind::RawConst },
558 { CHECK_TYPE_OF, ArgKind::SymConst },
559 { CHECK_TYPE_OF_BY_INDEX, ArgKind::RawConst },
560 { APPEND_IN_PLACE_SYM, ArgKind::SymRaw },
561 { APPEND_IN_PLACE_SYM_INDEX, ArgKind::RawRaw },
562 { STORE_LEN, ArgKind::RawSym },
563 { LT_LEN_SYM_JUMP_IF_FALSE, ArgKind::SymRaw },
564 { MUL_BY, ArgKind::RawRaw },
565 { MUL_BY_INDEX, ArgKind::RawRaw },
566 { MUL_SET_VAL, ArgKind::RawRaw },
567 { FUSED_MATH, ArgKind::RawRawRaw }
568 };
569
570 const auto builtin_name = [](const uint16_t idx) {
571 return Builtins::builtins[idx].first;
572 };
573 const auto value_str = [&stringify_value, &vals](const uint16_t idx) {
574 return stringify_value(vals.values[idx]);
575 };
576 const auto symbol_name = [&syms](const uint16_t idx) {
577 return syms.symbols[idx];
578 };
579
580 const auto color_print_inst = [=](const std::string& name, std::optional<Arg> arg = std::nullopt) {
581 fmt::print("{}", fmt::styled(name, fmt::fg(fmt::color::gold)));
582 if (arg.has_value())
583 {
584 constexpr auto sym_color = fmt::fg(fmt::color::green);
585 constexpr auto const_color = fmt::fg(fmt::color::magenta);
586 constexpr auto raw_color = fmt::fg(fmt::color::red);
587
588 switch (auto [kind, _, idx] = arg.value(); kind)
589 {
590 case ArgKind::Symbol:
591 fmt::print(sym_color, " {}\n", symbol_name(idx));
592 break;
593 case ArgKind::Constant:
594 fmt::print(const_color, " {}\n", value_str(idx));
595 break;
596 case ArgKind::Builtin:
597 fmt::print(" {}\n", builtin_name(idx));
598 break;
599 case ArgKind::Raw:
600 fmt::print(raw_color, " ({})\n", idx);
601 break;
602 case ArgKind::ConstConst:
603 fmt::print(" {}, {}\n", fmt::styled(value_str(arg->primary()), const_color), fmt::styled(value_str(arg->secondary()), const_color));
604 break;
605 case ArgKind::ConstSym:
606 fmt::print(" {}, {}\n", fmt::styled(value_str(arg->primary()), const_color), fmt::styled(symbol_name(arg->secondary()), sym_color));
607 break;
608 case ArgKind::SymConst:
609 fmt::print(" {}, {}\n", fmt::styled(symbol_name(arg->primary()), sym_color), fmt::styled(value_str(arg->secondary()), const_color));
610 break;
611 case ArgKind::SymSym:
612 fmt::print(" {}, {}\n", fmt::styled(symbol_name(arg->primary()), sym_color), fmt::styled(symbol_name(arg->secondary()), sym_color));
613 break;
614 case ArgKind::BuiltinRaw:
615 fmt::print(" {}, {}\n", builtin_name(arg->primary()), fmt::styled(arg->secondary(), raw_color));
616 break;
617 case ArgKind::ConstRaw:
618 fmt::print(" {}, {}\n", fmt::styled(value_str(arg->primary()), const_color), fmt::styled(arg->secondary(), raw_color));
619 break;
620 case ArgKind::SymRaw:
621 fmt::print(" {}, {}\n", fmt::styled(symbol_name(arg->primary()), sym_color), fmt::styled(arg->secondary(), raw_color));
622 break;
623 case ArgKind::RawSym:
624 fmt::print(" {}, {}\n", fmt::styled(arg->primary(), raw_color), fmt::styled(symbol_name(arg->secondary()), sym_color));
625 break;
626 case ArgKind::RawConst:
627 fmt::print(" {}, {}\n", fmt::styled(arg->primary(), raw_color), fmt::styled(value_str(arg->secondary()), const_color));
628 break;
629 case ArgKind::RawRaw:
630 fmt::print(" {}, {}\n", fmt::styled(arg->primary(), raw_color), fmt::styled(arg->secondary(), raw_color));
631 break;
632 case ArgKind::RawRawRaw:
633 fmt::print(" {}, {}, {}\n", fmt::styled(arg->padding, raw_color), fmt::styled((arg->arg & 0xff00) >> 8, raw_color), fmt::styled(arg->arg & 0x00ff, raw_color));
634 break;
635 }
636 }
637 else
638 fmt::print("\n");
639 };
640
641 if (segment == BytecodeSegment::All || segment == BytecodeSegment::Code || segment == BytecodeSegment::HeadersOnly)
642 {
643 uint16_t pp = 0;
644
645 for (const auto& page : code_block.pages)
646 {
647 bool displayCode = true;
648
649 if (auto wanted_page = cPage)
650 displayCode = pp == wanted_page.value();
651
652 if (displayCode)
653 fmt::println(
654 "{} {} (length: {})",
655 fmt::styled("Code segment", fmt::fg(fmt::color::magenta)),
656 fmt::styled(pp, fmt::fg(fmt::color::magenta)),
657 page.size());
658
659 if (page.empty())
660 {
661 if (displayCode)
662 fmt::print("NOP");
663 }
664 else if (cPage.value_or(pp) == pp && segment != BytecodeSegment::HeadersOnly)
665 {
666 if (sStart.has_value() && sEnd.has_value() && ((sStart.value() > page.size()) || (sEnd.value() > page.size())))
667 {
668 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", page.size());
669 return;
670 }
671
672 std::optional<InstLoc> previous_loc = std::nullopt;
673
674 for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(page.size()); j < end; j += 4)
675 {
676 const uint8_t inst = page[j];
677 const uint8_t padding = page[j + 1];
678 const auto arg = static_cast<uint16_t>((page[j + 2] << 8) + page[j + 3]);
679
680 auto maybe_loc = findSourceLocation(inst_locs.locations, j, pp);
681
682 // location
683 // we want to print it only when it changed, either the file, the line, or both
684 if (maybe_loc && (!previous_loc || maybe_loc != previous_loc))
685 {
686 if (!previous_loc || previous_loc->filename_id != maybe_loc->filename_id)
687 fmt::println("{}", files.filenames[maybe_loc->filename_id]);
688 fmt::print("{:>4}", maybe_loc->line + 1);
689 previous_loc = maybe_loc;
690 }
691 else
692 fmt::print(" ");
693 // instruction number
694 fmt::print(fmt::fg(fmt::color::cyan), "{:>4}", j / 4);
695 // padding inst arg arg
696 fmt::print(" {:02x} {:02x} {:02x} {:02x} ", inst, padding, page[j + 2], page[j + 3]);
697
698 if (const auto idx = static_cast<std::size_t>(inst); idx < InstructionNames.size())
699 {
700 const auto inst_name = InstructionNames[idx];
701 if (const auto iinst = static_cast<Instruction>(inst); arg_kinds.contains(iinst))
702 color_print_inst(inst_name, Arg { arg_kinds.at(iinst), padding, arg });
703 else
704 color_print_inst(inst_name);
705 }
706 else
707 fmt::println("Unknown instruction");
708 }
709 }
710 if (displayCode && segment != BytecodeSegment::HeadersOnly)
711 fmt::print("\n");
712
713 ++pp;
714 }
715 }
716 }
717
718 uint16_t BytecodeReader::readNumber(std::size_t& i) const
719 {
720 const auto x = static_cast<uint16_t>(m_bytecode[i] << 8);
721 const uint16_t y = m_bytecode[++i];
722 return x + y;
723 }
724}
Host the declaration of all the ArkScript builtins.
A bytecode disassembler for ArkScript.
The different instructions used by the compiler and virtual machine.
std::optional< internal::InstLoc > findSourceLocation(const std::vector< internal::InstLoc > &inst_locations, std::size_t ip, std::size_t pp) const
Find the location of an instruction.
Symbols symbols() const
unsigned long long timestamp() const
Return the read timestamp from the bytecode file.
uint16_t readNumber(std::size_t &i) const
Read a number from the bytecode, under the instruction pointer i.
Filenames filenames(const Values &values) const
InstLocations instLocations(const Filenames &filenames) const
Version version() const
Code code(const InstLocations &instLocations) const
Values values(const Symbols &symbols) const
void display(BytecodeSegment segment=BytecodeSegment::All, std::optional< uint16_t > sStart=std::nullopt, std::optional< uint16_t > sEnd=std::nullopt, std::optional< uint16_t > cPage=std::nullopt) const
Display the bytecode opcode in a human friendly way.
std::vector< unsigned char > sha256() const
void feed(const std::string &file)
Construct needed data before displaying information about a given file.
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array Magic
Definition Common.hpp:29
constexpr std::array Version
Definition Common.hpp:30
constexpr std::size_t HeaderSize
Definition Common.hpp:39
double deserialize(const DecomposedDouble d)
@ Raw
Keep all text as is without modifying it (useful for the code formatter)
T deserializeBE(std::vector< uint8_t >::const_iterator begin, std::vector< uint8_t >::const_iterator end)
Instruction
The different bytecodes are stored here.
@ CALL_BUILTIN_WITHOUT_RETURN_ADDRESS
T deserializeLE(std::vector< uint8_t >::const_iterator begin, std::vector< uint8_t >::const_iterator end)
constexpr std::array InstructionNames
std::vector< uint8_t > bytecode_t
Definition Common.hpp:22
std::string to_string(const Ark::ValueType type) noexcept
Definition Value.hpp:258
std::vector< bytecode_t > pages
std::size_t start
Point to the CODE_SEGMENT_START byte in the bytecode.
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::vector< std::string > filenames
std::size_t start
Point to the FILENAMES_TABLE_START byte in the bytecode.
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the INST_LOC_TABLE_START byte in the bytecode.
std::vector< internal::InstLoc > locations
std::vector< std::string > symbols
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the SYM_TABLE_START byte in the bytecode.
std::vector< Value > values
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the VAL_TABLE_START byte in the bytecode.