ArkScript
A small, fast, functional and scripting language for video games
IRCompiler.cpp
Go to the documentation of this file.
2
3#include <chrono>
4#include <utility>
5#include <optional>
6#include <unordered_map>
7#include <Proxy/Picosha2.hpp>
8#include <fmt/ostream.h>
9
10#include <Ark/Constants.hpp>
11#include <Ark/Literals.hpp>
15
16namespace Ark::internal
17{
18 using namespace literals;
19
20 IRCompiler::IRCompiler(const unsigned debug) :
21 m_logger("IRCompiler", debug)
22 {}
23
24 void IRCompiler::process(const std::vector<IR::Block>& pages, const std::vector<std::string>& symbols, const std::vector<ValTableElem>& values)
25 {
26 m_logger.traceStart("process");
28 pushSymbolTable(symbols);
29 pushValueTable(values);
30
31 // compute a list of unique filenames
32 for (const auto& page : pages)
33 {
34 for (const auto& inst : page)
35 {
36 if (std::ranges::find(m_filenames, inst.filename()) == m_filenames.end() && inst.hasValidSourceLocation())
37 m_filenames.push_back(inst.filename());
38 }
39 }
40
42 pushInstLocTable(pages);
43
44 m_ir = pages;
45 compile();
46
47 if (m_ir.empty())
48 {
49 // code segment with a single instruction
51 m_bytecode.push_back(0_u8);
52 m_bytecode.push_back(1_u8);
53
54 m_bytecode.push_back(0_u8);
55 m_bytecode.push_back(HALT);
56 m_bytecode.push_back(0_u8);
57 m_bytecode.push_back(0_u8);
58 }
59
60 // generate a hash of the tables + bytecode
61 std::vector<unsigned char> hash_out(picosha2::k_digest_size);
62 picosha2::hash256(m_bytecode.begin() + bytecode::HeaderSize, m_bytecode.end(), hash_out);
63 m_bytecode.insert(m_bytecode.begin() + bytecode::HeaderSize, hash_out.begin(), hash_out.end());
64
66 }
67
68 void IRCompiler::dumpToStream(std::ostream& stream) const
69 {
70 std::size_t index = 0;
71 for (const auto& block : m_ir)
72 {
73 fmt::println(stream, "page_{}", index);
74 for (const auto& entity : block)
75 {
76 switch (entity.kind())
77 {
78 case IR::Kind::Label:
79 fmt::println(stream, ".L{}:", entity.label());
80 break;
81
82 case IR::Kind::Goto:
83 fmt::println(stream, "\tGOTO L{}", entity.label());
84 break;
85
87 fmt::println(stream, "\tGOTO_IF_TRUE L{}", entity.label());
88 break;
89
91 fmt::println(stream, "\tGOTO_IF_FALSE L{}", entity.label());
92 break;
93
95 fmt::println(stream, "\t{} {}", InstructionNames[entity.inst()], entity.primaryArg());
96 break;
97
99 fmt::println(stream, "\t{} {}, {}", InstructionNames[entity.inst()], entity.primaryArg(), entity.secondaryArg());
100 break;
101 }
102 }
103
104 fmt::println(stream, "");
105 ++index;
106 }
107 }
108
109 const bytecode_t& IRCompiler::bytecode() const noexcept
110 {
111 return m_bytecode;
112 }
113
115 {
116 // push the different code segments
117 for (std::size_t i = 0, end = m_ir.size(); i < end; ++i)
118 {
119 IR::Block& page = m_ir[i];
120 // just in case we got too far, always add a HALT to be sure the
121 // VM won't do anything crazy
122 page.emplace_back(HALT);
123
124 // push number of elements
125 const auto page_size = std::ranges::count_if(page, [](const auto& a) {
126 return a.kind() != IR::Kind::Label;
127 });
128 if (std::cmp_greater(page_size, std::numeric_limits<uint16_t>::max()))
129 throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i));
130
133
134 // register labels position
135 uint16_t pos = 0;
136 std::unordered_map<IR::label_t, uint16_t> label_to_position;
137 for (auto& inst : page)
138 {
139 switch (inst.kind())
140 {
141 case IR::Kind::Label:
142 label_to_position[inst.label()] = pos;
143 break;
144
145 default:
146 ++pos;
147 }
148 }
149
150 for (auto& inst : page)
151 {
152 switch (inst.kind())
153 {
154 case IR::Kind::Goto:
155 pushWord(Word(JUMP, label_to_position[inst.label()]));
156 break;
157
159 pushWord(Word(POP_JUMP_IF_TRUE, label_to_position[inst.label()]));
160 break;
161
163 pushWord(Word(POP_JUMP_IF_FALSE, label_to_position[inst.label()]));
164 break;
165
166 case IR::Kind::Opcode:
167 [[fallthrough]];
169 pushWord(inst.bytecode());
170 break;
171
172 default:
173 break;
174 }
175 }
176 }
177 }
178
179 void IRCompiler::pushWord(const Word& word)
180 {
181 m_bytecode.push_back(word.opcode);
182 m_bytecode.push_back(word.byte_1);
183 m_bytecode.push_back(word.byte_2);
184 m_bytecode.push_back(word.byte_3);
185 }
186
188 {
189 /*
190 Generating headers:
191 - lang name (to be sure we are executing an ArkScript file)
192 on 4 bytes (ark + padding)
193 - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
194 - timestamp (8 bytes, unix format)
195 */
196
197 m_bytecode.push_back('a');
198 m_bytecode.push_back('r');
199 m_bytecode.push_back('k');
200 m_bytecode.push_back(0_u8);
201
202 // push version
203 for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH })
205
206 // push timestamp
207 const long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
208 std::chrono::system_clock::now().time_since_epoch())
209 .count();
210 for (long i = 0; i < 8; ++i)
211 {
212 const long shift = 8 * (7 - i);
213 const auto ts_byte = static_cast<uint8_t>((timestamp & (0xffLL << shift)) >> shift);
214 m_bytecode.push_back(ts_byte);
215 }
216 }
217
218 void IRCompiler::pushSymbolTable(const std::vector<std::string>& symbols)
219 {
220 const std::size_t symbol_size = symbols.size();
221 if (symbol_size > std::numeric_limits<uint16_t>::max())
222 throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size));
223
224 m_bytecode.push_back(SYM_TABLE_START);
226
227 for (const auto& sym : symbols)
228 {
229 // push the string, null terminated
230 std::ranges::transform(sym, std::back_inserter(m_bytecode), [](const char i) {
231 return static_cast<uint8_t>(i);
232 });
233 m_bytecode.push_back(0_u8);
234 }
235 }
236
237 void IRCompiler::pushValueTable(const std::vector<ValTableElem>& values)
238 {
239 const std::size_t value_size = values.size();
240 if (value_size > std::numeric_limits<uint16_t>::max())
241 throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size));
242
243 m_bytecode.push_back(VAL_TABLE_START);
245
246 for (const ValTableElem& val : values)
247 {
248 switch (val.type)
249 {
251 {
252 m_bytecode.push_back(NUMBER_TYPE);
253 const auto n = std::get<double>(val.value);
254 const auto [exponent, mantissa] = ieee754::serialize(n);
255 serializeToVecLE(exponent, m_bytecode);
256 serializeToVecLE(mantissa, m_bytecode);
257 break;
258 }
259
261 {
262 m_bytecode.push_back(STRING_TYPE);
263 auto t = std::get<std::string>(val.value);
264 std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
265 return static_cast<uint8_t>(i);
266 });
267 break;
268 }
269
271 {
272 m_bytecode.push_back(FUNC_TYPE);
273 const std::size_t addr = std::get<std::size_t>(val.value);
275 break;
276 }
277 }
278
279 m_bytecode.push_back(0_u8);
280 }
281 }
282
284 {
285 if (m_filenames.size() > std::numeric_limits<uint16_t>::max())
286 throw std::overflow_error(fmt::format("Too many filenames: {}, exceeds the maximum size of 2^16 - 1", m_filenames.size()));
287
289 // push number of elements
291
292 for (const auto& name : m_filenames)
293 {
294 std::ranges::transform(name, std::back_inserter(m_bytecode), [](const char i) {
295 return static_cast<uint8_t>(i);
296 });
297 m_bytecode.push_back(0_u8);
298 }
299 }
300
301 void IRCompiler::pushInstLocTable(const std::vector<IR::Block>& pages)
302 {
303 std::vector<internal::InstLoc> locations;
304 for (std::size_t i = 0, end = pages.size(); i < end; ++i)
305 {
306 const auto& page = pages[i];
307 uint16_t ip = 0;
308
309 for (const auto& inst : page)
310 {
311 if (inst.hasValidSourceLocation())
312 {
313 // we are guaranteed to have a value since we listed all existing filenames in IRCompiler::process before,
314 // thus we do not have to check if std::ranges::find returned a valid iterator.
315 auto file_id = static_cast<uint16_t>(std::distance(m_filenames.begin(), std::ranges::find(m_filenames, inst.filename())));
316
317 std::optional<internal::InstLoc> prev = std::nullopt;
318 if (!locations.empty())
319 prev = locations.back();
320
321 // skip redundant instruction location
322 if (!(prev.has_value() && prev->filename_id == file_id && prev->line == inst.sourceLine() && prev->page_pointer == i))
323 locations.push_back(
324 { .page_pointer = static_cast<uint16_t>(i),
325 .inst_pointer = ip,
326 .filename_id = file_id,
327 .line = static_cast<uint32_t>(inst.sourceLine()) });
328 }
329
330 if (inst.kind() != IR::Kind::Label)
331 ++ip;
332 }
333 }
334
336 serializeOn2BytesToVecBE(locations.size(), m_bytecode);
337
338 std::optional<internal::InstLoc> prev = std::nullopt;
339
340 for (const auto& loc : locations)
341 {
342 serializeOn2BytesToVecBE(loc.page_pointer, m_bytecode);
343 serializeOn2BytesToVecBE(loc.inst_pointer, m_bytecode);
344 serializeOn2BytesToVecBE(loc.filename_id, m_bytecode);
345 serializeToVecBE(loc.line, m_bytecode);
346
347 prev = loc;
348 }
349 }
350}
Constants used by ArkScript.
constexpr int ARK_VERSION_MAJOR
Definition Constants.hpp:17
constexpr int ARK_VERSION_PATCH
Definition Constants.hpp:19
constexpr int ARK_VERSION_MINOR
Definition Constants.hpp:18
Compile the intermediate representation to bytecode.
User defined literals for Ark internals.
void pushInstLocTable(const std::vector< IR::Block > &pages)
IRCompiler(unsigned debug)
Create a new IRCompiler.
void dumpToStream(std::ostream &stream) const
Dump the IR given to process to an output stream.
const bytecode_t & bytecode() const noexcept
Return the constructed bytecode object.
std::vector< std::string > m_filenames
void pushWord(const Word &word)
Push a word (4 bytes) to the m_bytecode.
std::vector< IR::Block > m_ir
void pushFileHeader() noexcept
Push the file headers (magic, version used, timestamp)
void process(const std::vector< IR::Block > &pages, const std::vector< std::string > &symbols, const std::vector< ValTableElem > &values)
Turn a given IR into bytecode.
void pushValueTable(const std::vector< ValTableElem > &values)
void pushSymbolTable(const std::vector< std::string > &symbols)
void traceStart(std::string &&trace_name)
Definition Logger.hpp:74
std::vector< Entity > Block
Definition Entity.hpp:83
constexpr std::size_t HeaderSize
Definition Common.hpp:39
DecomposedDouble serialize(const double n)
void serializeToVecBE(std::integral auto number, std::vector< uint8_t > &out)
void serializeToVecLE(std::integral auto number, std::vector< uint8_t > &out)
void serializeOn2BytesToVecBE(std::integral auto number, std::vector< uint8_t > &out)
constexpr std::array InstructionNames
std::vector< uint8_t > bytecode_t
Definition Common.hpp:22
A Compiler Value class helper to handle multiple types.
uint8_t opcode
Instruction opcode.
Definition Word.hpp:18