ArkScript
A small, fast, functional and scripting language for video games
Compiler.hpp
Go to the documentation of this file.
1/**
2 * @file Compiler.hpp
3 * @author Alexandre Plateau ([email protected])
4 * @brief ArkScript compiler is in charge of transforming the AST into bytecode
5 * @version 0.3
6 * @date 2020-10-27
7 *
8 * @copyright Copyright (c) 2020-2021
9 *
10 */
11
12#ifndef ARK_COMPILER_COMPILER_HPP
13#define ARK_COMPILER_COMPILER_HPP
14
15#include <vector>
16#include <string>
17#include <cinttypes>
18#include <optional>
19
20#include <Ark/Platform.hpp>
26
27namespace Ark
28{
29 class State;
30
31 /**
32 * @brief The ArkScript bytecode compiler
33 *
34 */
36 {
37 public:
38 /**
39 * @brief Construct a new Compiler object
40 *
41 * @param debug the debug level
42 * @param options the compilers options
43 */
44 Compiler(unsigned debug, const std::vector<std::string>& libenv, uint16_t options = DefaultFeatures);
45
46 /**
47 * @brief Feed the differents variables with information taken from the given source code file
48 *
49 * @param code the code of the file
50 * @param filename the name of the file
51 */
52 void feed(const std::string& code, const std::string& filename = ARK_NO_NAME_FILE);
53
54 /**
55 * @brief Start the compilation
56 *
57 */
58 void compile();
59
60 /**
61 * @brief Save generated bytecode to a file
62 *
63 * @param file the name of the file where the bytecode will be saved
64 */
65 void saveTo(const std::string& file);
66
67 /**
68 * @brief Return the constructed bytecode object
69 *
70 * @return const bytecode_t&
71 */
72 const bytecode_t& bytecode() noexcept;
73
74 friend class Ark::State;
75
76 private:
79 uint16_t m_options;
80 // tables: symbols, values, plugins and codes
81 std::vector<internal::Node> m_symbols;
82 std::vector<std::string> m_defined_symbols;
83 std::vector<std::string> m_plugins;
84 std::vector<internal::ValTableElem> m_values;
85 std::vector<std::vector<uint8_t>> m_code_pages;
86 std::vector<std::vector<uint8_t>> m_temp_pages; ///< we need temporary code pages for some compilations passes
87
89 unsigned m_debug; ///< the debug level of the compiler
90
91 /**
92 * @brief Push the file headers (magic, version used, timestamp)
93 *
94 */
95 void pushFileHeader() noexcept;
96
97 /**
98 * @brief Push the symbols and values tables
99 *
100 */
101 void pushSymAndValTables();
102
103 /**
104 * @brief helper functions to get a temp or finalized code page
105 *
106 * @param i page index, if negative, refers to a temporary code page
107 * @return std::vector<uint8_t>&
108 */
109 inline std::vector<uint8_t>& page(int i) noexcept
110 {
111 if (i >= 0)
112 return m_code_pages[i];
113 return m_temp_pages[-i - 1];
114 }
115
116 /**
117 * @brief helper functions to get a temp or finalized code page
118 *
119 * @param i page index, if negative, refers to a temporary code page
120 * @return std::vector<uint8_t>*
121 */
122 inline std::vector<uint8_t>* page_ptr(int i) noexcept
123 {
124 if (i >= 0)
125 return &m_code_pages[i];
126 return &m_temp_pages[-i - 1];
127 }
128
129 inline void setNumberAt(int p, std::size_t at_inst, std::size_t number)
130 {
131 page(p)[at_inst] = (number & 0xff00) >> 8;
132 page(p)[at_inst + 1] = number & 0x00ff;
133 }
134
135 /**
136 * @brief Count the number of "valid" ark objects in a node
137 * @details Isn't considered valid a GetField, because we use
138 * this function to count the number of arguments of function calls.
139 *
140 * @param lst
141 * @return std::size_t
142 */
143 std::size_t countArkObjects(const std::vector<internal::Node>& lst) noexcept;
144
145 /**
146 * @brief Checking if a symbol is an operator
147 *
148 * @param name symbol name
149 * @return std::optional<std::size_t> position in the operators' list
150 */
151 std::optional<std::size_t> isOperator(const std::string& name) noexcept;
152
153 /**
154 * @brief Checking if a symbol is a builtin
155 *
156 * @param name symbol name
157 * @return std::optional<std::size_t> position in the builtins' list
158 */
159 std::optional<std::size_t> isBuiltin(const std::string& name) noexcept;
160
161 /**
162 * @brief Check if a symbol needs to be compiled to a specific instruction
163 *
164 * @param name
165 * @return std::optional<internal::Instruction> corresponding instruction if it exists
166 */
167 inline std::optional<internal::Instruction> isSpecific(const std::string& name) noexcept
168 {
169 if (name == "list")
170 return internal::Instruction::LIST;
171 else if (name == "append")
172 return internal::Instruction::APPEND;
173 else if (name == "concat")
174 return internal::Instruction::CONCAT;
175 else if (name == "append!")
176 return internal::Instruction::APPEND_IN_PLACE;
177 else if (name == "concat!")
178 return internal::Instruction::CONCAT_IN_PLACE;
179 else if (name == "pop")
180 return internal::Instruction::POP_LIST;
181 else if (name == "pop!")
182 return internal::Instruction::POP_LIST_IN_PLACE;
183
184 return std::nullopt;
185 }
186
187 /**
188 * @brief Check if a given instruction is unary (takes only one argument)
189 *
190 * @param inst
191 * @return true the instruction is unary
192 * @return false
193 */
194 bool isUnaryInst(internal::Instruction inst) noexcept;
195
196 /**
197 * @brief Compute specific instruction argument count
198 *
199 * @param inst
200 * @param previous
201 * @param p
202 */
203 void pushSpecificInstArgc(internal::Instruction inst, uint16_t previous, int p) noexcept;
204
205 /**
206 * @brief Checking if a symbol may be coming from a plugin
207 *
208 * @param name symbol name
209 * @return true the symbol may be from a plugin, loaded at runtime
210 * @return false
211 */
212 bool mayBeFromPlugin(const std::string& name) noexcept;
213
214 /**
215 * @brief Throw a nice error message
216 *
217 * @param message
218 * @param node
219 */
220 [[noreturn]] void throwCompilerError(const std::string& message, const internal::Node& node);
221
222 /**
223 * @brief Display a warning message
224 *
225 * @param message
226 * @param node
227 */
228 void compilerWarning(const std::string& message, const internal::Node& node);
229
230 /**
231 * @brief Compile a single node recursively
232 *
233 * @param x the internal::Node to compile
234 * @param p the current page number we're on
235 * @param produces_result
236 * @param is_terminal
237 * @param var_name
238 */
239 void _compile(const internal::Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name = "");
240
241 void compileSymbol(const internal::Node& x, int p, bool produces_result);
242 void compileSpecific(const internal::Node& c0, const internal::Node& x, int p, bool produces_result);
243 void compileIf(const internal::Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name);
244 void compileFunction(const internal::Node& x, int p, bool produces_result, const std::string& var_name);
245 void compileLetMutSet(internal::Keyword n, const internal::Node& x, int p);
246 void compileWhile(const internal::Node& x, int p);
247 void compileQuote(const internal::Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name);
248 void compilePluginImport(const internal::Node& x, int p);
249 void compileDel(const internal::Node& x, int p);
250 void handleCalls(const internal::Node& x, int p, bool produces_result, bool is_terminal, const std::string& var_name);
251
252 /**
253 * @brief Put a value in the bytecode, handling the closures chains
254 *
255 * @param x value node
256 * @param p current page index
257 * @param produces_result
258 */
259 void putValue(const internal::Node& x, int p, bool produces_result);
260
261 /**
262 * @brief Register a given node in the symbol table
263 * @details Can throw if the table is full
264 *
265 * @param sym
266 * @return uint16_t
267 */
268 uint16_t addSymbol(const internal::Node& sym);
269
270 /**
271 * @brief Register a given node in the value table
272 * @details Can throw if the table is full
273 *
274 * @param x
275 * @return uint16_t
276 */
277 uint16_t addValue(const internal::Node& x);
278
279 /**
280 * @brief Register a page id (function reference) in the value table
281 * @details Can throw if the table is full
282 *
283 * @param page_id
284 * @param current A reference to the current node, for context
285 * @return std::size_t
286 */
287 uint16_t addValue(std::size_t page_id, const internal::Node& current);
288
289 /**
290 * @brief Register a symbol as defined, so that later we can throw errors on undefined symbols
291 *
292 * @param sym
293 */
294 void addDefinedSymbol(const std::string& sym);
295
296 /**
297 * @brief Checks for undefined symbols, not present in the defined symbols table
298 *
299 */
300 void checkForUndefinedSymbol();
301
302 /**
303 * @brief Push a number on stack (need 2 bytes)
304 *
305 * @param n the number to push
306 * @param page the page where it should land, nullptr for current page
307 */
308 void pushNumber(uint16_t n, std::vector<uint8_t>* page = nullptr) noexcept;
309
310 /**
311 * @brief Suggest a symbol of what the user may have meant to input
312 *
313 * @param str the string
314 * @return std::string
315 */
316 std::string offerSuggestion(const std::string& str);
317 };
318}
319
320#endif
#define ARK_NO_NAME_FILE
Definition: Constants.hpp:26
The different instructions used by the compiler and virtual machine.
#define ARK_API
Definition: Module.hpp:29
AST node used by the parser, optimizer and compiler.
Optimizes a given ArkScript AST.
Parses a token stream into an AST by using the Ark::Node.
ArkScript configuration macros.
The basic value type handled by the compiler.
The ArkScript bytecode compiler.
Definition: Compiler.hpp:36
std::optional< internal::Instruction > isSpecific(const std::string &name) noexcept
Check if a symbol needs to be compiled to a specific instruction.
Definition: Compiler.hpp:167
std::vector< std::vector< uint8_t > > m_temp_pages
we need temporary code pages for some compilations passes
Definition: Compiler.hpp:86
std::vector< std::string > m_defined_symbols
Definition: Compiler.hpp:82
internal::Optimizer m_optimizer
Definition: Compiler.hpp:78
std::vector< std::string > m_plugins
Definition: Compiler.hpp:83
unsigned m_debug
the debug level of the compiler
Definition: Compiler.hpp:89
std::vector< internal::ValTableElem > m_values
Definition: Compiler.hpp:84
uint16_t m_options
Definition: Compiler.hpp:79
internal::Parser m_parser
Definition: Compiler.hpp:77
std::vector< uint8_t > * page_ptr(int i) noexcept
helper functions to get a temp or finalized code page
Definition: Compiler.hpp:122
void setNumberAt(int p, std::size_t at_inst, std::size_t number)
Definition: Compiler.hpp:129
std::vector< internal::Node > m_symbols
Definition: Compiler.hpp:81
std::vector< std::vector< uint8_t > > m_code_pages
Definition: Compiler.hpp:85
bytecode_t m_bytecode
Definition: Compiler.hpp:88
Ark state to handle the dirty job of loading and compiling ArkScript code.
Definition: State.hpp:31
A node of an Abstract Syntax Tree for ArkScript.
Definition: Node.hpp:29
The ArkScript AST optimizer.
Definition: Optimizer.hpp:32
The parser is responsible of constructing the Abstract Syntax Tree from a token list.
Definition: Parser.hpp:44
Keyword
The different keywords available.
Definition: Common.hpp:59
Instruction
The different bytecodes are stored here.
Definition: Builtins.hpp:21
constexpr uint16_t DefaultFeatures
Definition: Constants.hpp:52
std::vector< uint8_t > bytecode_t
Definition: Common.hpp:22