ArkScript
A small, lisp-inspired, functional scripting language
NameResolutionPass.cpp
Go to the documentation of this file.
2
3#include <Ark/Exceptions.hpp>
4#include <Ark/Utils.hpp>
6
7namespace Ark::internal
8{
10 Pass("NameResolution", debug)
11 {
12 for (const auto& builtin : Builtins::builtins)
13 m_language_symbols.emplace(builtin.first);
14 for (auto ope : Language::operators)
15 m_language_symbols.emplace(ope);
16 for (auto inst : Language::listInstructions)
17 m_language_symbols.emplace(inst);
18
22 }
23
25 {
26 m_logger.traceStart("process");
27
28 m_ast = ast;
29 visit(m_ast, /* register_declarations= */ true);
30
32
33 m_logger.trace("AST after name resolution");
35 m_ast.debugPrint(std::cout) << '\n';
36
37 m_logger.traceStart("checkForUndefinedSymbol");
40 }
41
42 const Node& NameResolutionPass::ast() const noexcept
43 {
44 return m_ast;
45 }
46
47 std::string NameResolutionPass::addDefinedSymbol(const std::string& sym, const bool is_mutable)
48 {
49 const std::string fully_qualified_name = m_scope_resolver.registerInCurrent(sym, is_mutable);
50 m_defined_symbols.emplace(fully_qualified_name);
51 return fully_qualified_name;
52 }
53
54 void NameResolutionPass::visit(Node& node, const bool register_declarations)
55 {
56 switch (node.nodeType())
57 {
59 {
60 const std::string old_name = node.string();
62 addSymbolNode(node, old_name);
63 break;
64 }
65
66 case NodeType::Field:
67 for (auto& child : node.list())
68 {
69 const std::string old_name = child.string();
70 // in case of field, no need to check if we can fully qualify names
72 addSymbolNode(child, old_name);
73 }
74 break;
75
76 case NodeType::List:
77 if (!node.constList().empty())
78 {
79 if (node.constList()[0].nodeType() == NodeType::Keyword)
80 visitKeyword(node, node.constList()[0].keyword(), register_declarations);
81 else
82 {
83 // function calls
84 // the UpdateRef function calls kind get a special treatment, like let/mut/set,
85 // because we need to check for mutability errors
86 if (node.constList().size() > 1 && node.constList()[0].nodeType() == NodeType::Symbol &&
87 node.constList()[1].nodeType() == NodeType::Symbol && register_declarations)
88 {
89 const auto funcname = node.constList()[0].string();
90 const auto arg = node.constList()[1].string();
91
92 if (std::ranges::find(Language::UpdateRef, funcname) != Language::UpdateRef.end() && m_scope_resolver.isImmutable(arg).value_or(false))
93 throw CodeError(
94 fmt::format("MutabilityError: Can not modify the constant list `{}' using `{}'", arg, funcname),
96 node.filename(),
97 node.constList()[1].line(),
98 node.constList()[1].col(),
99 arg));
100
101 // check that we aren't doing a (append! a a) nor a (concat! a a)
102 if (funcname == Language::AppendInPlace || funcname == Language::ConcatInPlace)
103 {
104 for (std::size_t i = 2, end = node.constList().size(); i < end; ++i)
105 {
106 if (node.constList()[i].nodeType() == NodeType::Symbol && node.constList()[i].string() == arg)
107 throw CodeError(
108 fmt::format("MutabilityError: Can not {} the list `{}' to itself", funcname, arg),
110 node.filename(),
111 node.constList()[1].line(),
112 node.constList()[1].col(),
113 arg));
114 }
115 }
116 }
117
118 for (auto& child : node.list())
119 visit(child, register_declarations);
120 }
121 }
122 break;
123
125 {
126 auto& namespace_ = node.arkNamespace();
127 // no need to guard createNewNamespace with an if (register_declarations), we want to keep the namespace node
128 // (which will get ignored by the compiler, that only uses its AST), so that we can (re)construct the
129 // scopes correctly
130 m_scope_resolver.createNewNamespace(namespace_.name, namespace_.with_prefix, namespace_.is_glob, namespace_.symbols);
132
133 visit(*namespace_.ast, /* register_declarations= */ true);
134 // dual visit so that we can handle forward references
135 visit(*namespace_.ast, /* register_declarations= */ false);
136
137 // if we had specific symbols to import, check that those exist
138 if (!namespace_.symbols.empty())
139 {
140 for (const auto& sym : namespace_.symbols)
141 {
142 if (!scope->get(sym, true).has_value())
143 throw CodeError(
144 fmt::format("ImportError: Can not import symbol {} from {}, as it isn't in the package", sym, namespace_.name),
146 namespace_.ast->filename(),
147 namespace_.ast->line(),
148 namespace_.ast->col(),
149 "import"));
150 }
151 }
152
154 break;
155 }
156
157 default:
158 break;
159 }
160 }
161
162 void NameResolutionPass::visitKeyword(Node& node, const Keyword keyword, const bool register_declarations)
163 {
164 switch (keyword)
165 {
166 case Keyword::Set:
167 [[fallthrough]];
168 case Keyword::Let:
169 [[fallthrough]];
170 case Keyword::Mut:
171 // first, visit the value, then register the symbol
172 // this allows us to detect things like (let foo (fun (&foo) ()))
173 if (node.constList().size() > 2)
174 visit(node.list()[2], register_declarations);
175 if (node.constList().size() > 1 && node.constList()[1].nodeType() == NodeType::Symbol)
176 {
177 const std::string& name = node.constList()[1].string();
178 if (m_language_symbols.contains(name) && register_declarations)
179 throw CodeError(
180 fmt::format("Can not use a reserved identifier ('{}') as a {} name.", name, keyword == Keyword::Let ? "constant" : "variable"),
182 node.filename(),
183 node.constList()[1].line(),
184 node.constList()[1].col(),
185 name));
186
187 if (m_scope_resolver.isInScope(name) && keyword == Keyword::Let && register_declarations)
188 throw CodeError(
189 fmt::format("MutabilityError: Can not use 'let' to redefine variable `{}'", name),
191 node.filename(),
192 node.constList()[1].line(),
193 node.constList()[1].col(),
194 name));
195 if (keyword == Keyword::Set && m_scope_resolver.isRegistered(name))
196 {
197 if (m_scope_resolver.isImmutable(name).value_or(false) && register_declarations)
198 throw CodeError(
199 fmt::format("MutabilityError: Can not set the constant `{}' to {}", name, node.constList()[2].repr()),
201 node.filename(),
202 node.constList()[1].line(),
203 node.constList()[1].col(),
204 name));
205
207 }
208 else if (keyword != Keyword::Set)
209 {
210 // update the declared variable name to use the fully qualified name
211 // this will prevent name conflicts, and handle scope resolution
212 const std::string fully_qualified_name = addDefinedSymbol(name, keyword != Keyword::Let);
213 if (register_declarations)
214 node.list()[1].setString(fully_qualified_name);
215 }
216 }
217 break;
218
219 case Keyword::Import:
220 if (!node.constList().empty())
221 m_plugin_names.push_back(node.constList()[1].constList().back().string());
222 break;
223
224 case Keyword::While:
225 // create a new scope to track variables
227 for (auto& child : node.list())
228 visit(child, register_declarations);
229 // remove the scope once the loop has been compiled, only we were registering declarations
231 break;
232
233 case Keyword::Fun:
234 // create a new scope to track variables
236
237 if (node.constList()[1].nodeType() == NodeType::List)
238 {
239 for (auto& child : node.list()[1].list())
240 {
241 if (child.nodeType() == NodeType::Capture)
242 {
243 if (!m_scope_resolver.isRegistered(child.string()) && register_declarations)
244 throw CodeError(
245 fmt::format("Can not capture `{}' because it is referencing a variable defined in an unreachable scope.", child.string()),
247 child.filename(),
248 child.line(),
249 child.col(),
250 child.repr()));
251
252 // update the declared variable name to use the fully qualified name
253 // this will prevent name conflicts, and handle scope resolution
254 std::string fqn = updateSymbolWithFullyQualifiedName(child);
255 addDefinedSymbol(fqn, true);
256 }
257 else if (child.nodeType() == NodeType::Symbol)
258 addDefinedSymbol(child.string(), /* is_mutable= */ true);
259 }
260 }
261 if (node.constList().size() > 2)
262 visit(node.list()[2], register_declarations);
263
264 // remove the scope once the function has been compiled, only we were registering declarations
266 break;
267
268 default:
269 for (auto& child : node.list())
270 visit(child, register_declarations);
271 break;
272 }
273 }
274
275 void NameResolutionPass::addSymbolNode(const Node& symbol, const std::string& old_name)
276 {
277 const std::string& name = symbol.string();
278
279 // we don't accept builtins/operators as a user symbol
280 if (m_language_symbols.contains(name))
281 return;
282
283 // remove the old name node, to avoid false positive when looking for unbound symbols
284 if (!old_name.empty())
285 {
286 auto it = std::ranges::find_if(m_symbol_nodes, [&old_name, &symbol](const Node& sym_node) -> bool {
287 return sym_node.string() == old_name &&
288 sym_node.col() == symbol.col() &&
289 sym_node.line() == symbol.line() &&
290 sym_node.filename() == symbol.filename();
291 });
292 if (it != m_symbol_nodes.end())
293 {
294 it->setString(name);
295 return;
296 }
297 }
298
299 const auto it = std::ranges::find_if(m_symbol_nodes, [&name](const Node& sym_node) -> bool {
300 return sym_node.string() == name;
301 });
302 if (it == m_symbol_nodes.end())
303 m_symbol_nodes.push_back(symbol);
304 }
305
306 bool NameResolutionPass::mayBeFromPlugin(const std::string& name) const noexcept
307 {
308 std::string splitted = Utils::splitString(name, ':')[0];
309 const auto it = std::ranges::find_if(
310 m_plugin_names,
311 [&splitted](const std::string& plugin) -> bool {
312 return plugin == splitted;
313 });
314 return it != m_plugin_names.end();
315 }
316
318 {
319 auto [allowed, fqn] = m_scope_resolver.canFullyQualifyName(symbol.string());
320
321 if (m_language_symbols.contains(fqn) && symbol.string() != fqn)
322 {
323 throw CodeError(
324 fmt::format(
325 "Symbol `{}' was resolved to `{}', which is also a builtin name. Either the symbol or the package it's in needs to be renamed to avoid conflicting with the builtin.",
326 symbol.string(), fqn),
328 symbol.filename(),
329 symbol.line(),
330 symbol.col(),
331 symbol.repr()));
332 }
333 if (!allowed)
334 {
335 std::string message;
336 if (fqn.ends_with("#hidden"))
337 message = fmt::format(
338 R"(Unbound variable "{}". However, it exists in a namespace as "{}", did you forget to add it to the symbol list while importing?)",
339 symbol.string(),
340 fqn.substr(0, fqn.find_first_of('#')));
341 else
342 message = fmt::format(R"(Unbound variable "{}". However, it exists in a namespace as "{}", did you forget to prefix it with its namespace?)", symbol.string(), fqn);
343
344 if (m_logger.shouldTrace())
345 m_ast.debugPrint(std::cout) << '\n';
346
347 throw CodeError(
348 message,
350 symbol.filename(),
351 symbol.line(),
352 symbol.col(),
353 symbol.repr()));
354 }
355
356 symbol.setString(fqn);
357 return fqn;
358 }
359
361 {
362 for (const auto& sym : m_symbol_nodes)
363 {
364 const auto& str = sym.string();
365 const bool is_plugin = mayBeFromPlugin(str);
366
367 if (!m_defined_symbols.contains(str) && !is_plugin)
368 {
369 std::string message;
370
371 const std::string suggestion = offerSuggestion(str);
372 if (suggestion.empty())
373 message = fmt::format(R"(Unbound variable error "{}" (variable is used but not defined))", str);
374 else
375 {
376 const std::string prefix = suggestion.substr(0, suggestion.find_first_of(':'));
377 const std::string note_about_prefix = fmt::format(
378 " You either forgot to import it in the symbol list (eg `(import {} :{})') or need to fully qualify it by adding the namespace",
379 prefix,
380 str);
381 const bool add_note = suggestion.ends_with(":" + str);
382 message = fmt::format(R"(Unbound variable error "{}" (did you mean "{}"?{}))", str, suggestion, add_note ? note_about_prefix : "");
383 }
384
385 throw CodeError(message, CodeErrorContext(sym.filename(), sym.line(), sym.col(), sym.repr()));
386 }
387 }
388 }
389
390 std::string NameResolutionPass::offerSuggestion(const std::string& str) const
391 {
392 auto iterate = [](const std::string& word, const std::unordered_set<std::string>& dict) -> std::string {
393 std::string suggestion;
394 // our suggestion shouldn't require more than half the string to change
395 std::size_t suggestion_distance = word.size() / 2;
396 for (const std::string& symbol : dict)
397 {
398 const std::size_t current_distance = Utils::levenshteinDistance(word, symbol);
399 if (current_distance <= suggestion_distance)
400 {
401 suggestion_distance = current_distance;
402 suggestion = symbol;
403 }
404 }
405 return suggestion;
406 };
407
408 std::string suggestion = iterate(str, m_defined_symbols);
409 // look for a suggestion related to language builtins
410 if (suggestion.empty())
411 suggestion = iterate(str, m_language_symbols);
412 // look for a suggestion related to a namespace change
413 if (suggestion.empty())
414 {
415 if (const auto it = std::ranges::find_if(m_defined_symbols, [&str](const std::string& symbol) {
416 return symbol.ends_with(":" + str);
417 });
418 it != m_defined_symbols.end())
419 suggestion = *it;
420 }
421
422 return suggestion;
423 }
424}
Lots of utilities about string, filesystem and more.
Host the declaration of all the ArkScript builtins.
ArkScript homemade exceptions.
Resolves names and fully qualify them in the AST (prefixing them with the package they are from)
bool shouldTrace() const
Definition Logger.hpp:51
void trace(const char *fmt, Args &&... args)
Write a trace level log using fmtlib.
Definition Logger.hpp:113
void traceStart(std::string &&trace_name)
Definition Logger.hpp:90
std::vector< std::string > m_plugin_names
void visit(Node &node, bool register_declarations)
Recursively visit nodes.
void visitKeyword(Node &node, Keyword keyword, bool register_declarations)
const Node & ast() const noexcept override
Unused overload that return the input AST (untouched as this pass only generates errors)
void checkForUndefinedSymbol() const
Checks for undefined symbols, not present in the defined symbols table.
std::string offerSuggestion(const std::string &str) const
Suggest a symbol of what the user may have meant to input.
std::unordered_set< std::string > m_language_symbols
Precomputed set of language symbols that can't be used to define variables.
std::unordered_set< std::string > m_defined_symbols
bool mayBeFromPlugin(const std::string &name) const noexcept
Checking if a symbol may be coming from a plugin.
void process(const Node &ast) override
Start visiting the given AST, checking for mutability violation and unbound variables.
std::string addDefinedSymbol(const std::string &sym, bool is_mutable)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
std::string updateSymbolWithFullyQualifiedName(Node &symbol)
void addSymbolNode(const Node &symbol, const std::string &old_name="")
Register a given node in the symbol table.
NameResolutionPass(unsigned debug)
Create a NameResolutionPass.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:30
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
const std::string & filename() const noexcept
Return the filename in which this node was created.
Definition Node.cpp:174
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
Namespace & arkNamespace() noexcept
Return the namespace held by the value (if the node type allows it)
Definition Node.cpp:53
std::string repr() const noexcept
Compute a representation of the node without any comments or additional sugar, colors,...
Definition Node.cpp:189
std::ostream & debugPrint(std::ostream &os) const noexcept
Print a node to an output stream with added type annotations.
Definition Node.cpp:289
std::size_t col() const noexcept
Get the column at which this node was created.
Definition Node.cpp:169
void setString(const std::string &value) noexcept
Set the String object.
Definition Node.cpp:112
std::size_t line() const noexcept
Get the line at which this node was created.
Definition Node.cpp:164
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
An interface to describe compiler passes.
Definition Pass.hpp:23
std::string registerInCurrent(const std::string &name, bool is_mutable)
Register a Declaration in the current (last) scope.
void createNewNamespace(const std::string &name, bool with_prefix, bool is_glob, const std::vector< std::string > &symbols)
Create a new namespace scope.
void saveNamespaceAndRemove()
Save the last scope as a namespace, by attaching it to the nearest namespace scope.
std::string getFullyQualifiedNameInNearestScope(const std::string &name) const
Get a FQN from a variable name in the nearest scope it is declared in.
bool isRegistered(const std::string &name) const
Checks if any scope has 'name', in reverse order.
void createNew()
Create a new scope.
StaticScope * currentScope() const
Return a non-owning raw pointer to the current scope.
bool isInScope(const std::string &name) const
Checks if 'name' is in the current scope.
void removeLastScope()
Remove the last scope.
std::optional< bool > isImmutable(const std::string &name) const
Checks the scopes in reverse order for 'name' and returns its mutability status.
std::pair< bool, std::string > canFullyQualifyName(const std::string &name)
Checks if a name can be fully qualified (allows only unprefixed names to be resolved by glob namespac...
virtual std::optional< Declaration > get(const std::string &name, bool extensive_lookup)
Try to return a Declaration from this scope with a given name.
std::vector< std::string > splitString(const std::string &source, const char sep)
Cut a string into pieces, given a character separator.
Definition Utils.hpp:31
ARK_API std::size_t levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
Definition Utils.cpp:5
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 9 > listInstructions
Definition Common.hpp:115
constexpr std::string_view AppendInPlace
Definition Common.hpp:102
constexpr std::array< std::string_view, 24 > operators
Definition Common.hpp:149
constexpr std::string_view ConcatInPlace
Definition Common.hpp:103
constexpr std::string_view SysArgs
Definition Common.hpp:127
constexpr std::string_view And
Definition Common.hpp:129
constexpr std::array UpdateRef
All the builtins that modify in place a variable.
Definition Common.hpp:108
constexpr std::string_view Or
Definition Common.hpp:130
Keyword
The different keywords available.
Definition Common.hpp:75
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)