ArkScript
A small, fast, functional and scripting language for video games
IRCompiler.cpp
Go to the documentation of this file.
2
3#include <chrono>
4#include <utility>
5#include <unordered_map>
6#include <picosha2.h>
7#include <fmt/ostream.h>
8
9#include <Ark/Constants.hpp>
10#include <Ark/Literals.hpp>
11
12namespace Ark::internal
13{
14 using namespace literals;
15
16 IRCompiler::IRCompiler(const unsigned debug) :
17 m_logger("IRCompiler", debug)
18 {}
19
20 void IRCompiler::process(const std::vector<IR::Block>& pages, const std::vector<std::string>& symbols, const std::vector<ValTableElem>& values)
21 {
22 m_logger.traceStart("process");
24 pushSymAndValTables(symbols, values);
25
26 m_ir = pages;
27 compile();
28
29 if (m_ir.empty())
30 {
31 // code segment with a single instruction
33 m_bytecode.push_back(0_u8);
34 m_bytecode.push_back(1_u8);
35
36 m_bytecode.push_back(0_u8);
37 m_bytecode.push_back(HALT);
38 m_bytecode.push_back(0_u8);
39 m_bytecode.push_back(0_u8);
40 }
41
42 constexpr std::size_t header_size = 18;
43
44 // generate a hash of the tables + bytecode
45 std::vector<unsigned char> hash_out(picosha2::k_digest_size);
46 picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out);
47 m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end());
48
50 }
51
52 void IRCompiler::dumpToStream(std::ostream& stream) const
53 {
54 std::size_t index = 0;
55 for (const auto& block : m_ir)
56 {
57 fmt::println(stream, "page_{}", index);
58 for (const auto entity : block)
59 {
60 switch (entity.kind())
61 {
62 case IR::Kind::Label:
63 fmt::println(stream, ".L{}:", entity.label());
64 break;
65
66 case IR::Kind::Goto:
67 fmt::println(stream, "\tGOTO L{}", entity.label());
68 break;
69
71 fmt::println(stream, "\tGOTO_IF_TRUE L{}", entity.label());
72 break;
73
75 fmt::println(stream, "\tGOTO_IF_FALSE L{}", entity.label());
76 break;
77
79 fmt::println(stream, "\t{} {}", InstructionNames[entity.inst()], entity.primaryArg());
80 break;
81
83 fmt::println(stream, "\t{} {}, {}", InstructionNames[entity.inst()], entity.primaryArg(), entity.secondaryArg());
84 break;
85 }
86 }
87
88 fmt::println(stream, "");
89 ++index;
90 }
91 }
92
93 const bytecode_t& IRCompiler::bytecode() const noexcept
94 {
95 return m_bytecode;
96 }
97
99 {
100 // push the different code segments
101 for (std::size_t i = 0, end = m_ir.size(); i < end; ++i)
102 {
103 IR::Block& page = m_ir[i];
104 // just in case we got too far, always add a HALT to be sure the
105 // VM won't do anything crazy
106 page.emplace_back(HALT);
107
108 // push number of elements
109 const auto page_size = std::ranges::count_if(page, [](const auto& a) {
110 return a.kind() != IR::Kind::Label;
111 });
112 if (std::cmp_greater(page_size, std::numeric_limits<uint16_t>::max()))
113 throw std::overflow_error(fmt::format("Size of page {} exceeds the maximum size of 2^16 - 1", i));
114
116 m_bytecode.push_back(static_cast<uint8_t>((page_size & 0xff00) >> 8));
117 m_bytecode.push_back(static_cast<uint8_t>(page_size & 0x00ff));
118
119 // register labels position
120 uint16_t pos = 0;
121 std::unordered_map<IR::label_t, uint16_t> label_to_position;
122 for (auto inst : page)
123 {
124 switch (inst.kind())
125 {
126 case IR::Kind::Label:
127 label_to_position[inst.label()] = pos;
128 break;
129
130 default:
131 ++pos;
132 }
133 }
134
135 for (auto inst : page)
136 {
137 switch (inst.kind())
138 {
139 case IR::Kind::Goto:
140 pushWord(Word(JUMP, label_to_position[inst.label()]));
141 break;
142
144 pushWord(Word(POP_JUMP_IF_TRUE, label_to_position[inst.label()]));
145 break;
146
148 pushWord(Word(POP_JUMP_IF_FALSE, label_to_position[inst.label()]));
149 break;
150
151 case IR::Kind::Opcode:
152 [[fallthrough]];
154 pushWord(inst.bytecode());
155 break;
156
157 default:
158 break;
159 }
160 }
161 }
162 }
163
164 void IRCompiler::pushWord(const Word& word)
165 {
166 m_bytecode.push_back(word.opcode);
167 m_bytecode.push_back(word.byte_1);
168 m_bytecode.push_back(word.byte_2);
169 m_bytecode.push_back(word.byte_3);
170 }
171
173 {
174 /*
175 Generating headers:
176 - lang name (to be sure we are executing an ArkScript file)
177 on 4 bytes (ark + padding)
178 - version (major: 2 bytes, minor: 2 bytes, patch: 2 bytes)
179 - timestamp (8 bytes, unix format)
180 */
181
182 m_bytecode.push_back('a');
183 m_bytecode.push_back('r');
184 m_bytecode.push_back('k');
185 m_bytecode.push_back(0_u8);
186
187 // push version
188 for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH })
189 {
190 m_bytecode.push_back(static_cast<uint8_t>((n & 0xff00) >> 8));
191 m_bytecode.push_back(static_cast<uint8_t>(n & 0x00ff));
192 }
193
194 // push timestamp
195 const long long timestamp = std::chrono::duration_cast<std::chrono::seconds>(
196 std::chrono::system_clock::now().time_since_epoch())
197 .count();
198 for (long i = 0; i < 8; ++i)
199 {
200 const long shift = 8 * (7 - i);
201 const auto ts_byte = static_cast<uint8_t>((timestamp & (0xffLL << shift)) >> shift);
202 m_bytecode.push_back(ts_byte);
203 }
204 }
205
206 void IRCompiler::pushSymAndValTables(const std::vector<std::string>& symbols, const std::vector<ValTableElem>& values)
207 {
208 const std::size_t symbol_size = symbols.size();
209 if (symbol_size > std::numeric_limits<uint16_t>::max())
210 throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size));
211
212 m_bytecode.push_back(SYM_TABLE_START);
213 m_bytecode.push_back(static_cast<uint8_t>((symbol_size & 0xff00) >> 8));
214 m_bytecode.push_back(static_cast<uint8_t>(symbol_size & 0x00ff));
215
216 for (const auto& sym : symbols)
217 {
218 // push the string, null terminated
219 std::ranges::transform(sym, std::back_inserter(m_bytecode), [](const char i) {
220 return static_cast<uint8_t>(i);
221 });
222 m_bytecode.push_back(0_u8);
223 }
224
225 const std::size_t value_size = values.size();
226 if (value_size > std::numeric_limits<uint16_t>::max())
227 throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size));
228
229 m_bytecode.push_back(VAL_TABLE_START);
230 m_bytecode.push_back(static_cast<uint8_t>((value_size & 0xff00) >> 8));
231 m_bytecode.push_back(static_cast<uint8_t>(value_size & 0x00ff));
232
233 for (const ValTableElem& val : values)
234 {
235 switch (val.type)
236 {
238 {
239 m_bytecode.push_back(NUMBER_TYPE);
240 const auto n = std::get<double>(val.value);
241 std::string t = std::to_string(n);
242 std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
243 return static_cast<uint8_t>(i);
244 });
245 break;
246 }
247
249 {
250 m_bytecode.push_back(STRING_TYPE);
251 auto t = std::get<std::string>(val.value);
252 std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) {
253 return static_cast<uint8_t>(i);
254 });
255 break;
256 }
257
259 {
260 m_bytecode.push_back(FUNC_TYPE);
261 const std::size_t addr = std::get<std::size_t>(val.value);
262 m_bytecode.push_back(static_cast<uint8_t>((addr & 0xff00) >> 8));
263 m_bytecode.push_back(static_cast<uint8_t>(addr & 0x00ff));
264 break;
265 }
266 }
267
268 m_bytecode.push_back(0_u8);
269 }
270 }
271}
Constants used by ArkScript.
constexpr int ARK_VERSION_MAJOR
Definition Constants.hpp:18
constexpr int ARK_VERSION_PATCH
Definition Constants.hpp:20
constexpr int ARK_VERSION_MINOR
Definition Constants.hpp:19
Compile the intermediate representation to bytecode.
User defined literals for Ark internals.
IRCompiler(unsigned debug)
Create a new IRCompiler.
void dumpToStream(std::ostream &stream) const
Dump the IR given to process to an output stream.
const bytecode_t & bytecode() const noexcept
Return the constructed bytecode object.
void pushWord(const Word &word)
Push a word to the m_bytecode.
std::vector< IR::Block > m_ir
void pushFileHeader() noexcept
Push the file headers (magic, version used, timestamp)
void process(const std::vector< IR::Block > &pages, const std::vector< std::string > &symbols, const std::vector< ValTableElem > &values)
Turn a given IR into bytecode.
void pushSymAndValTables(const std::vector< std::string > &symbols, const std::vector< ValTableElem > &values)
Push the symbols and values tables.
void traceStart(std::string &&trace_name)
Definition Logger.hpp:75
std::vector< Entity > Block
Definition Entity.hpp:73
constexpr std::array InstructionNames
std::vector< uint8_t > bytecode_t
Definition Common.hpp:22
A Compiler Value class helper to handle multiple types.
uint8_t opcode
Instruction opcode.
Definition Word.hpp:19