ArkScript
A small, fast, functional and scripting language for video games
BytecodeReader.cpp
Go to the documentation of this file.
2
5
6#include <iomanip>
7#include <unordered_map>
8#include <picosha2.h>
9#include <fmt/core.h>
10#include <fmt/color.h>
11
12namespace Ark
13{
14 using namespace Ark::internal;
15
16 void BytecodeReader::feed(const bytecode_t& bytecode)
17 {
19 }
20
21 void BytecodeReader::feed(const std::string& file)
22 {
23 std::ifstream ifs(file, std::ios::binary | std::ios::ate);
24 if (!ifs.good())
25 throw std::runtime_error(fmt::format("[BytecodeReader] Couldn't open file '{}'", file));
26
27 const auto pos = ifs.tellg();
28 // reserve appropriate number of bytes
29 std::vector<char> temp(static_cast<std::size_t>(pos));
30 ifs.seekg(0, std::ios::beg);
31 ifs.read(&temp[0], pos);
32 ifs.close();
33
34 m_bytecode = bytecode_t(static_cast<std::size_t>(pos));
35 for (std::size_t i = 0; i < static_cast<std::size_t>(pos); ++i)
36 m_bytecode[i] = static_cast<uint8_t>(temp[i]);
37 }
38
40 {
41 return m_bytecode.size() >= 4 && m_bytecode[0] == 'a' &&
42 m_bytecode[1] == 'r' && m_bytecode[2] == 'k' &&
44 }
45
47 {
48 return m_bytecode;
49 }
50
52 {
53 if (!checkMagic() || m_bytecode.size() < 10)
54 return Version { 0, 0, 0 };
55
56 return Version {
57 .major = static_cast<uint16_t>((m_bytecode[4] << 8) + m_bytecode[5]),
58 .minor = static_cast<uint16_t>((m_bytecode[6] << 8) + m_bytecode[7]),
59 .patch = static_cast<uint16_t>((m_bytecode[8] << 8) + m_bytecode[9])
60 };
61 }
62
63 unsigned long long BytecodeReader::timestamp() const
64 {
65 // 4 (ark\0) + version (2 bytes / number) + timestamp = 18 bytes
66 if (!checkMagic() || m_bytecode.size() < 18)
67 return 0;
68
69 // reading the timestamp in big endian
70 using timestamp_t = unsigned long long;
71 return (static_cast<timestamp_t>(m_bytecode[10]) << 56) +
72 (static_cast<timestamp_t>(m_bytecode[11]) << 48) +
73 (static_cast<timestamp_t>(m_bytecode[12]) << 40) +
74 (static_cast<timestamp_t>(m_bytecode[13]) << 32) +
75 (static_cast<timestamp_t>(m_bytecode[14]) << 24) +
76 (static_cast<timestamp_t>(m_bytecode[15]) << 16) +
77 (static_cast<timestamp_t>(m_bytecode[16]) << 8) +
78 static_cast<timestamp_t>(m_bytecode[17]);
79 }
80
81 std::vector<unsigned char> BytecodeReader::sha256() const
82 {
83 if (!checkMagic() || m_bytecode.size() < 18 + picosha2::k_digest_size)
84 return {};
85
86 std::vector<unsigned char> sha(picosha2::k_digest_size);
87 for (std::size_t i = 0; i < picosha2::k_digest_size; ++i)
88 sha[i] = m_bytecode[18 + i];
89 return sha;
90 }
91
93 {
94 if (!checkMagic() || m_bytecode.size() < 18 + picosha2::k_digest_size ||
95 m_bytecode[18 + picosha2::k_digest_size] != SYM_TABLE_START)
96 return {};
97
98 std::size_t i = 18 + picosha2::k_digest_size + 1;
99 const uint16_t size = readNumber(i);
100 i++;
101
102 Symbols block;
103 block.start = 18 + picosha2::k_digest_size;
104 block.symbols.reserve(size);
105
106 for (uint16_t j = 0; j < size; ++j)
107 {
108 std::string content;
109 while (m_bytecode[i] != 0)
110 content.push_back(static_cast<char>(m_bytecode[i++]));
111 i++;
112
113 block.symbols.push_back(content);
114 }
115
116 block.end = i;
117 return block;
118 }
119
121 {
122 if (!checkMagic())
123 return {};
124
125 std::size_t i = symbols.end;
126 if (m_bytecode[i] != VAL_TABLE_START)
127 return {};
128 i++;
129
130 const uint16_t size = readNumber(i);
131 i++;
132 Values block;
133 block.start = symbols.end;
134 block.values.reserve(size);
135
136 for (uint16_t j = 0; j < size; ++j)
137 {
138 const uint8_t type = m_bytecode[i];
139 i++;
140
141 if (type == NUMBER_TYPE)
142 {
143 std::string val;
144 while (m_bytecode[i] != 0)
145 val.push_back(static_cast<char>(m_bytecode[i++]));
146 block.values.emplace_back(std::stod(val));
147 }
148 else if (type == STRING_TYPE)
149 {
150 std::string val;
151 while (m_bytecode[i] != 0)
152 val.push_back(static_cast<char>(m_bytecode[i++]));
153 block.values.emplace_back(val);
154 }
155 else if (type == FUNC_TYPE)
156 {
157 const uint16_t addr = readNumber(i);
158 i++;
159 block.values.emplace_back(addr);
160 }
161 else
162 throw std::runtime_error(fmt::format("Unknown value type: {:x}", type));
163 i++;
164 }
165
166 block.end = i;
167 return block;
168 }
169
170 Code BytecodeReader::code(const Values& values) const
171 {
172 if (!checkMagic())
173 return {};
174
175 std::size_t i = values.end;
176
177 Code block;
178 block.start = i;
179
180 while (m_bytecode[i] == CODE_SEGMENT_START)
181 {
182 i++;
183 const std::size_t size = readNumber(i) * 4;
184 i++;
185
186 block.pages.emplace_back().reserve(size);
187 for (std::size_t j = 0; j < size; ++j)
188 block.pages.back().push_back(m_bytecode[i++]);
189
190 if (i == m_bytecode.size())
191 break;
192 }
193
194 return block;
195 }
196
198 const std::optional<uint16_t> sStart,
199 const std::optional<uint16_t> sEnd,
200 const std::optional<uint16_t> cPage) const
201 {
202 if (!checkMagic())
203 {
204 fmt::print("Invalid format");
205 return;
206 }
207
208 auto [major, minor, patch] = version();
209 fmt::println("Version: {}.{}.{}", major, minor, patch);
210 fmt::println("Timestamp: {}", timestamp());
211 fmt::print("SHA256: ");
212 for (const auto sha = sha256(); unsigned char h : sha)
213 fmt::print("{:02x}", h);
214 fmt::print("\n\n");
215
216 // reading the different tables, one after another
217
218 if ((sStart.has_value() && !sEnd.has_value()) || (!sStart.has_value() && sEnd.has_value()))
219 {
220 fmt::print(fmt::fg(fmt::color::red), "Both start and end parameter need to be provided together\n");
221 return;
222 }
223 if (sStart.has_value() && sEnd.has_value() && sStart.value() >= sEnd.value())
224 {
225 fmt::print(fmt::fg(fmt::color::red), "Invalid slice start and end arguments\n");
226 return;
227 }
228
229 const auto syms = symbols();
230 const auto vals = values(syms);
231 const auto code_block = code(vals);
232
233 // symbols table
234 {
235 std::size_t size = syms.symbols.size();
236 std::size_t sliceSize = size;
237 bool showSym = (segment == BytecodeSegment::All || segment == BytecodeSegment::Symbols);
238
239 if (showSym && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
240 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
241 else if (showSym && sStart.has_value() && sEnd.has_value())
242 sliceSize = sEnd.value() - sStart.value() + 1;
243
244 if (showSym || segment == BytecodeSegment::HeadersOnly)
245 fmt::println("{} (length: {})", fmt::styled("Symbols table", fmt::fg(fmt::color::cyan)), sliceSize);
246
247 for (std::size_t j = 0; j < size; ++j)
248 {
249 if (auto start = sStart; auto end = sEnd)
250 showSym = showSym && (j >= start.value() && j <= end.value());
251
252 if (showSym)
253 fmt::println("{}) {}", j, syms.symbols[j]);
254 }
255
256 if (showSym)
257 fmt::print("\n");
258 if (segment == BytecodeSegment::Symbols)
259 return;
260 }
261
262 // values table
263 {
264 std::size_t size = vals.values.size();
265 std::size_t sliceSize = size;
266
267 bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::Values);
268 if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size))
269 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", size);
270 else if (showVal && sStart.has_value() && sEnd.has_value())
271 sliceSize = sEnd.value() - sStart.value() + 1;
272
273 if (showVal || segment == BytecodeSegment::HeadersOnly)
274 fmt::println("{} (length: {})", fmt::styled("Constants table", fmt::fg(fmt::color::cyan)), sliceSize);
275
276 for (std::size_t j = 0; j < size; ++j)
277 {
278 if (auto start = sStart; auto end = sEnd)
279 showVal = showVal && (j >= start.value() && j <= end.value());
280
281 if (showVal)
282 {
283 switch (const auto val = vals.values[j]; val.valueType())
284 {
286 fmt::println("{}) (Number) {}", j, val.number());
287 break;
289 fmt::println("{}) (String) {}", j, val.string());
290 break;
292 fmt::println("{}) (PageAddr) {}", j, val.pageAddr());
293 break;
294 default:
295 fmt::print(fmt::fg(fmt::color::red), "Value type not handled: {}\n", types_to_str[static_cast<std::size_t>(val.valueType())]);
296 break;
297 }
298 }
299 }
300
301 if (showVal)
302 fmt::print("\n");
303 if (segment == BytecodeSegment::Values)
304 return;
305 }
306
307 const auto stringify_value = [](const Value& val) -> std::string {
308 switch (val.valueType())
309 {
311 return fmt::format("{} (Number)", val.number());
313 return fmt::format("{} (String)", val.string());
315 return fmt::format("{} (PageAddr)", val.pageAddr());
316 default:
317 return "";
318 }
319 };
320
321 enum class ArgKind
322 {
323 Symbol,
324 Value,
325 Builtin,
326 Raw
327 };
328
329 struct Arg
330 {
331 ArgKind kind;
332 uint16_t arg;
333 };
334
335 const std::unordered_map<Instruction, ArgKind> arg_kinds = {
336 { LOAD_SYMBOL, ArgKind::Symbol },
337 { LOAD_CONST, ArgKind::Value },
338 { POP_JUMP_IF_TRUE, ArgKind::Raw },
339 { STORE, ArgKind::Symbol },
340 { SET_VAL, ArgKind::Symbol },
341 { POP_JUMP_IF_FALSE, ArgKind::Raw },
342 { JUMP, ArgKind::Raw },
343 { CALL, ArgKind::Raw },
344 { CALL_BUILTIN, ArgKind::Raw },
345 { CAPTURE, ArgKind::Symbol },
346 { BUILTIN, ArgKind::Builtin },
347 { DEL, ArgKind::Symbol },
348 { MAKE_CLOSURE, ArgKind::Value },
349 { GET_FIELD, ArgKind::Symbol },
350 { PLUGIN, ArgKind::Value },
351 { LIST, ArgKind::Raw },
352 { APPEND, ArgKind::Raw },
353 { CONCAT, ArgKind::Raw },
354 { APPEND_IN_PLACE, ArgKind::Raw },
355 { CONCAT_IN_PLACE, ArgKind::Raw }
356 };
357
358 const auto color_print_inst = [&syms, &vals, &stringify_value](const std::string& name, std::optional<Arg> arg = std::nullopt) {
359 fmt::print("{}", fmt::styled(name, fmt::fg(fmt::color::gold)));
360 if (arg.has_value())
361 {
362 switch (auto [kind, idx] = arg.value(); kind)
363 {
364 case ArgKind::Symbol:
365 fmt::print(fmt::fg(fmt::color::green), " {}\n", syms.symbols[idx]);
366 break;
367 case ArgKind::Value:
368 fmt::print(fmt::fg(fmt::color::magenta), " {}\n", stringify_value(vals.values[idx]));
369 break;
370 case ArgKind::Builtin:
371 fmt::print(" {}\n", Builtins::builtins[idx].first);
372 break;
373 case ArgKind::Raw:
374 fmt::print(fmt::fg(fmt::color::red), " ({})\n", idx);
375 break;
376 }
377 }
378 else
379 fmt::print("\n");
380 };
381
382 if (segment == BytecodeSegment::All || segment == BytecodeSegment::Code || segment == BytecodeSegment::HeadersOnly)
383 {
384 uint16_t pp = 0;
385
386 for (const auto& page : code_block.pages)
387 {
388 bool displayCode = true;
389
390 if (auto wanted_page = cPage)
391 displayCode = pp == wanted_page.value();
392
393 if (displayCode)
394 fmt::println(
395 "{} {} (length: {})",
396 fmt::styled("Code segment", fmt::fg(fmt::color::magenta)),
397 fmt::styled(pp, fmt::fg(fmt::color::magenta)),
398 page.size());
399
400 if (page.empty())
401 {
402 if (displayCode)
403 fmt::print("NOP");
404 }
405 else
406 {
407 if (cPage.value_or(pp) != pp)
408 continue;
409 if (segment == BytecodeSegment::HeadersOnly)
410 continue;
411 if (sStart.has_value() && sEnd.has_value() && ((sStart.value() > page.size()) || (sEnd.value() > page.size())))
412 {
413 fmt::print(fmt::fg(fmt::color::red), "Slice start or end can't be greater than the segment size: {}\n", page.size());
414 return;
415 }
416
417 for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(page.size()); j < end; j += 4)
418 {
419 const uint8_t inst = page[j];
420 // TEMP
421 const uint8_t padding = page[j + 1];
422 const auto arg = static_cast<uint16_t>((page[j + 2] << 8) + page[j + 3]);
423
424 // instruction number
425 fmt::print(fmt::fg(fmt::color::cyan), "{:>4}", j / 4);
426 // padding inst arg arg
427 fmt::print(" {:02x} {:02x} {:02x} {:02x} ", inst, padding, page[j + 2], page[j + 3]);
428
429 if (const auto idx = static_cast<std::size_t>(inst); idx < InstructionNames.size())
430 {
431 const auto inst_name = InstructionNames[idx];
432 if (const auto iinst = static_cast<Instruction>(inst); arg_kinds.contains(iinst))
433 color_print_inst(inst_name, Arg { arg_kinds.at(iinst), arg });
434 else
435 color_print_inst(inst_name);
436 }
437 else
438 fmt::println("Unknown instruction");
439 }
440 }
441 if (displayCode && segment != BytecodeSegment::HeadersOnly)
442 fmt::print("\n");
443
444 ++pp;
445 }
446 }
447 }
448
449 uint16_t BytecodeReader::readNumber(std::size_t& i) const
450 {
451 const auto x = static_cast<uint16_t>(m_bytecode[i] << 8);
452 const uint16_t y = m_bytecode[++i];
453 return x + y;
454 }
455}
Host the declaration of all the ArkScript builtins.
A bytecode disassembler for ArkScript.
The different instructions used by the compiler and virtual machine.
Symbols symbols() const
unsigned long long timestamp() const
Return the read timestamp from the bytecode file.
const bytecode_t & bytecode() noexcept
Return the bytecode object constructed.
uint16_t readNumber(std::size_t &i) const
Read a number from the bytecode, under the instruction pointer i.
Version version() const
Values values(const Symbols &symbols) const
void display(BytecodeSegment segment=BytecodeSegment::All, std::optional< uint16_t > sStart=std::nullopt, std::optional< uint16_t > sEnd=std::nullopt, std::optional< uint16_t > cPage=std::nullopt) const
Display the bytecode opcode in a human friendly way.
std::vector< unsigned char > sha256() const
Code code(const Values &values) const
void feed(const std::string &file)
Construct needed data before displaying information about a given file.
ARK_API const std::vector< std::pair< std::string, Value > > builtins
Instruction
The different bytecodes are stored here.
constexpr std::array InstructionNames
constexpr std::array types_to_str
Definition Value.hpp:52
std::vector< uint8_t > bytecode_t
Definition Common.hpp:22
std::vector< bytecode_t > pages
std::size_t start
Point to the CODE_SEGMENT_START byte in the bytecode.
std::vector< std::string > symbols
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the SYM_TABLE_START byte in the bytecode.
std::vector< Value > values
std::size_t end
Point to the byte following the last byte of the table in the bytecode.
std::size_t start
Point to the VAL_TABLE_START byte in the bytecode.