ArkScript
A small, lisp-inspired, functional scripting language
NameResolutionPass.cpp
Go to the documentation of this file.
2
4#include <Ark/Utils/Utils.hpp>
6
7namespace Ark::internal
8{
10 Pass("NameResolution", debug)
11 {
12 for (const auto& builtin : Builtins::builtins)
13 m_language_symbols.emplace(builtin.first);
14 for (auto ope : Language::operators)
15 m_language_symbols.emplace(ope);
16 for (auto inst : Language::listInstructions)
17 m_language_symbols.emplace(inst);
18
23 }
24
26 {
27 m_logger.traceStart("process");
28
29 m_ast = ast;
30 visit(m_ast, /* register_declarations= */ true);
31
33
34 m_logger.trace("AST after name resolution");
36 m_ast.debugPrint(std::cout) << '\n';
37
38 m_logger.traceStart("checkForUndefinedSymbol");
41 }
42
43 const Node& NameResolutionPass::ast() const noexcept
44 {
45 return m_ast;
46 }
47
48 std::string NameResolutionPass::addDefinedSymbol(const std::string& sym, const bool is_mutable)
49 {
50 const std::string fully_qualified_name = m_scope_resolver.registerInCurrent(sym, is_mutable);
51 m_defined_symbols.emplace(fully_qualified_name);
52 return fully_qualified_name;
53 }
54
55 void NameResolutionPass::visit(Node& node, const bool register_declarations)
56 {
57 switch (node.nodeType())
58 {
60 {
61 const std::string old_name = node.string();
63 addSymbolNode(node, old_name);
64 break;
65 }
66
67 case NodeType::Field:
68 for (std::size_t i = 0, end = node.list().size(); i < end; ++i)
69 {
70 Node& child = node.list()[i];
71
72 if (i == 0)
73 {
74 const std::string old_name = child.string();
75 // in case of field, no need to check if we can fully qualify names
77 addSymbolNode(child, old_name);
78 }
79 else
80 addSymbolNode(child);
81 }
82 break;
83
84 case NodeType::List:
85 if (!node.constList().empty())
86 {
87 if (node.constList()[0].nodeType() == NodeType::Keyword)
88 visitKeyword(node, node.constList()[0].keyword(), register_declarations);
89 else
90 {
91 // function calls
92 // the UpdateRef function calls kind get a special treatment, like let/mut/set,
93 // because we need to check for mutability errors
94 if (node.constList().size() > 1 && node.constList()[0].nodeType() == NodeType::Symbol &&
95 node.constList()[1].nodeType() == NodeType::Symbol && register_declarations)
96 {
97 const auto funcname = node.constList()[0].string();
98 const auto arg = node.constList()[1].string();
99
100 if (std::ranges::find(Language::UpdateRef, funcname) != Language::UpdateRef.end() && m_scope_resolver.isImmutable(arg).value_or(false))
101 throw CodeError(
102 fmt::format("MutabilityError: Can not modify the constant list `{}' using `{}'", arg, funcname),
103 CodeErrorContext(node.filename(), node.constList()[1].position()));
104
105 // check that we aren't doing a (append! a a) nor a (concat! a a)
106 if (funcname == Language::AppendInPlace || funcname == Language::ConcatInPlace)
107 {
108 for (std::size_t i = 2, end = node.constList().size(); i < end; ++i)
109 {
110 if (node.constList()[i].nodeType() == NodeType::Symbol && node.constList()[i].string() == arg)
111 throw CodeError(
112 fmt::format("MutabilityError: Can not {} the list `{}' to itself", funcname, arg),
113 CodeErrorContext(node.filename(), node.constList()[1].position()));
114 }
115 }
116 }
117
118 for (auto& child : node.list())
119 visit(child, register_declarations);
120 }
121 }
122 break;
123
125 {
126 auto& namespace_ = node.arkNamespace();
127 // no need to guard createNewNamespace with an if (register_declarations), we want to keep the namespace node
128 // (which will get ignored by the compiler, that only uses its AST), so that we can (re)construct the
129 // scopes correctly
130 m_scope_resolver.createNewNamespace(namespace_.name, namespace_.with_prefix, namespace_.is_glob, namespace_.symbols);
132
133 visit(*namespace_.ast, /* register_declarations= */ true);
134 // dual visit so that we can handle forward references
135 visit(*namespace_.ast, /* register_declarations= */ false);
136
137 // if we had specific symbols to import, check that those exist
138 if (!namespace_.symbols.empty())
139 {
140 const auto it = std::ranges::find_if(
141 namespace_.symbols,
142 [&scope, &namespace_](const std::string& sym) -> bool {
143 return !scope->get(sym, namespace_.name, true).has_value();
144 });
145
146 if (it != namespace_.symbols.end())
147 throw CodeError(
148 fmt::format("ImportError: Can not import symbol {} from {}, as it isn't in the package", *it, namespace_.name),
149 CodeErrorContext(namespace_.ast->filename(), namespace_.ast->position()));
150 }
151
153 break;
154 }
155
156 default:
157 break;
158 }
159 }
160
161 void NameResolutionPass::visitKeyword(Node& node, const Keyword keyword, const bool register_declarations)
162 {
163 switch (keyword)
164 {
165 case Keyword::Set:
166 [[fallthrough]];
167 case Keyword::Let:
168 [[fallthrough]];
169 case Keyword::Mut:
170 // first, visit the value, then register the symbol
171 // this allows us to detect things like (let foo (fun (&foo) ()))
172 if (node.constList().size() > 2)
173 visit(node.list()[2], register_declarations);
174 if (node.constList().size() > 1 && node.constList()[1].nodeType() == NodeType::Symbol)
175 {
176 const std::string& name = node.constList()[1].string();
177 if (m_language_symbols.contains(name) && register_declarations)
178 throw CodeError(
179 fmt::format("Can not use a reserved identifier ('{}') as a {} name.", name, keyword == Keyword::Let ? "constant" : "variable"),
180 CodeErrorContext(node.filename(), node.constList()[1].position()));
181
182 if (m_scope_resolver.isInScope(name) && keyword == Keyword::Let && register_declarations)
183 throw CodeError(
184 fmt::format("MutabilityError: Can not use 'let' to redefine variable `{}'", name),
185 CodeErrorContext(node.filename(), node.constList()[1].position()));
186 if (keyword == Keyword::Set && m_scope_resolver.isRegistered(name))
187 {
188 if (m_scope_resolver.isImmutable(name).value_or(false) && register_declarations)
189 throw CodeError(
190 fmt::format("MutabilityError: Can not set the constant `{}' to {}", name, node.constList()[2].repr()),
191 CodeErrorContext(node.filename(), node.constList()[1].position()));
192
194 }
195 else if (keyword != Keyword::Set)
196 {
197 // update the declared variable name to use the fully qualified name
198 // this will prevent name conflicts, and handle scope resolution
199 const std::string fully_qualified_name = addDefinedSymbol(name, keyword != Keyword::Let);
200 if (register_declarations)
201 node.list()[1].setString(fully_qualified_name);
202 }
203 }
204 break;
205
206 case Keyword::Import:
207 if (!node.constList().empty())
208 m_plugin_names.push_back(node.constList()[1].constList().back().string());
209 break;
210
211 case Keyword::While:
212 // create a new scope to track variables
214 for (auto& child : node.list())
215 visit(child, register_declarations);
216 // remove the scope once the loop has been compiled, only we were registering declarations
218 break;
219
220 case Keyword::Fun:
221 // create a new scope to track variables
223
224 if (node.constList()[1].nodeType() == NodeType::List)
225 {
226 for (auto& child : node.list()[1].list())
227 {
228 if (child.nodeType() == NodeType::Capture)
229 {
230 if (!m_scope_resolver.isRegistered(child.string()) && register_declarations)
231 throw CodeError(
232 fmt::format("Can not capture `{}' because it is referencing a variable defined in an unreachable scope.", child.string()),
233 CodeErrorContext(child.filename(), child.position()));
234
235 // save the old unqualified name of the capture, so that we can use it in the
236 // ASTLowerer later one
237 if (!child.getUnqualifiedName())
238 {
239 child.setUnqualifiedName(child.string());
240 m_defined_symbols.emplace(child.string());
241 }
242 // update the declared variable name to use the fully qualified name
243 // this will prevent name conflicts, and handle scope resolution
244 std::string old_name = child.string();
246 // FIXME: addDefinedSymbol(fqn, true); ?
247 addDefinedSymbol(old_name, true);
248 }
249 else if (child.nodeType() == NodeType::Symbol)
250 addDefinedSymbol(child.string(), /* is_mutable= */ true);
251 }
252 }
253 if (node.constList().size() > 2)
254 visit(node.list()[2], register_declarations);
255
256 // remove the scope once the function has been compiled, only we were registering declarations
258 break;
259
260 default:
261 for (auto& child : node.list())
262 visit(child, register_declarations);
263 break;
264 }
265 }
266
267 void NameResolutionPass::addSymbolNode(const Node& symbol, const std::string& old_name)
268 {
269 const std::string& name = symbol.string();
270
271 // we don't accept builtins/operators as a user symbol
272 if (m_language_symbols.contains(name))
273 return;
274
275 // remove the old name node, to avoid false positive when looking for unbound symbols
276 if (!old_name.empty())
277 {
278 auto it = std::ranges::find_if(m_symbol_nodes, [&old_name, &symbol](const Node& sym_node) -> bool {
279 return sym_node.string() == old_name &&
280 sym_node.position().start == symbol.position().start &&
281 sym_node.filename() == symbol.filename();
282 });
283 if (it != m_symbol_nodes.end())
284 {
285 it->setString(name);
286 return;
287 }
288 }
289
290 const auto it = std::ranges::find_if(m_symbol_nodes, [&name](const Node& sym_node) -> bool {
291 return sym_node.string() == name;
292 });
293 if (it == m_symbol_nodes.end())
294 m_symbol_nodes.push_back(symbol);
295 }
296
297 bool NameResolutionPass::mayBeFromPlugin(const std::string& name) const noexcept
298 {
299 std::string splitted = Utils::splitString(name, ':')[0];
300 const auto it = std::ranges::find_if(
301 m_plugin_names,
302 [&splitted](const std::string& plugin) -> bool {
303 return plugin == splitted;
304 });
305 return it != m_plugin_names.end();
306 }
307
309 {
310 auto [allowed, fqn] = m_scope_resolver.canFullyQualifyName(symbol.string());
311
312 if (m_language_symbols.contains(fqn) && symbol.string() != fqn)
313 {
314 throw CodeError(
315 fmt::format(
316 "Symbol `{}' was resolved to `{}', which is also a builtin name. Either the symbol or the package it's in needs to be renamed to avoid conflicting with the builtin.",
317 symbol.string(), fqn),
318 CodeErrorContext(symbol.filename(), symbol.position()));
319 }
320 if (!allowed)
321 {
322 std::string message;
323 if (fqn.ends_with("#hidden"))
324 message = fmt::format(
325 R"(Unbound variable "{}". However, it exists in a namespace as "{}", did you forget to add it to the symbol list while importing?)",
326 symbol.string(),
327 fqn.substr(0, fqn.find_first_of('#')));
328 else
329 message = fmt::format(R"(Unbound variable "{}". However, it exists in a namespace as "{}", did you forget to prefix it with its namespace?)", symbol.string(), fqn);
330
331 if (m_logger.shouldTrace())
332 m_ast.debugPrint(std::cout) << '\n';
333
334 throw CodeError(message, CodeErrorContext(symbol.filename(), symbol.position()));
335 }
336
337 symbol.setString(fqn);
338 return fqn;
339 }
340
342 {
343 for (const auto& sym : m_symbol_nodes)
344 {
345 const auto& str = sym.string();
346 const bool is_plugin = mayBeFromPlugin(str);
347
348 if (!m_defined_symbols.contains(str) && !is_plugin)
349 {
350 std::string message;
351
352 const std::string suggestion = offerSuggestion(str);
353 if (suggestion.empty())
354 message = fmt::format(R"(Unbound variable error "{}" (variable is used but not defined))", str);
355 else
356 {
357 const std::string prefix = suggestion.substr(0, suggestion.find_first_of(':'));
358 const std::string note_about_prefix = fmt::format(
359 " You either forgot to import it in the symbol list (eg `(import {} :{})') or need to fully qualify it by adding the namespace",
360 prefix,
361 str);
362 const bool add_note = suggestion.ends_with(":" + str);
363 message = fmt::format(R"(Unbound variable error "{}" (did you mean "{}"?{}))", str, suggestion, add_note ? note_about_prefix : "");
364 }
365
366 throw CodeError(message, CodeErrorContext(sym.filename(), sym.position()));
367 }
368 }
369 }
370
371 std::string NameResolutionPass::offerSuggestion(const std::string& str) const
372 {
373 auto iterate = [](const std::string& word, const std::unordered_set<std::string>& dict) -> std::string {
374 std::string suggestion;
375 // our suggestion shouldn't require more than half the string to change
376 std::size_t suggestion_distance = word.size() / 2;
377 for (const std::string& symbol : dict)
378 {
379 const std::size_t current_distance = Utils::levenshteinDistance(word, symbol);
380 if (current_distance <= suggestion_distance)
381 {
382 suggestion_distance = current_distance;
383 suggestion = symbol;
384 }
385 }
386 return suggestion;
387 };
388
389 std::string suggestion = iterate(str, m_defined_symbols);
390 // look for a suggestion related to language builtins
391 if (suggestion.empty())
392 suggestion = iterate(str, m_language_symbols);
393 // look for a suggestion related to a namespace change
394 if (suggestion.empty())
395 {
396 if (const auto it = std::ranges::find_if(m_defined_symbols, [&str](const std::string& symbol) {
397 return symbol.ends_with(":" + str);
398 });
399 it != m_defined_symbols.end())
400 suggestion = *it;
401 }
402
403 return suggestion;
404 }
405}
Lots of utilities about string, filesystem and more.
Host the declaration of all the ArkScript builtins.
ArkScript homemade exceptions.
Resolves names and fully qualify them in the AST (prefixing them with the package they are from)
bool shouldTrace() const
Definition Logger.hpp:51
void trace(const char *fmt, Args &&... args)
Write a trace level log using fmtlib.
Definition Logger.hpp:113
void traceStart(std::string &&trace_name)
Definition Logger.hpp:90
std::vector< std::string > m_plugin_names
void visit(Node &node, bool register_declarations)
Recursively visit nodes.
void visitKeyword(Node &node, Keyword keyword, bool register_declarations)
const Node & ast() const noexcept override
Unused overload that return the input AST (untouched as this pass only generates errors)
void checkForUndefinedSymbol() const
Checks for undefined symbols, not present in the defined symbols table.
std::string offerSuggestion(const std::string &str) const
Suggest a symbol of what the user may have meant to input.
std::unordered_set< std::string > m_language_symbols
Precomputed set of language symbols that can't be used to define variables.
std::unordered_set< std::string > m_defined_symbols
bool mayBeFromPlugin(const std::string &name) const noexcept
Checking if a symbol may be coming from a plugin.
void process(const Node &ast) override
Start visiting the given AST, checking for mutability violation and unbound variables.
std::string addDefinedSymbol(const std::string &sym, bool is_mutable)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
std::string updateSymbolWithFullyQualifiedName(Node &symbol)
void addSymbolNode(const Node &symbol, const std::string &old_name="")
Register a given node in the symbol table.
NameResolutionPass(unsigned debug)
Create a NameResolutionPass.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
const std::string & filename() const noexcept
Return the filename in which this node was created.
Definition Node.cpp:164
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
Namespace & arkNamespace() noexcept
Return the namespace held by the value (if the node type allows it)
Definition Node.cpp:53
std::ostream & debugPrint(std::ostream &os) const noexcept
Print a node to an output stream with added type annotations.
Definition Node.cpp:279
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
void setString(const std::string &value) noexcept
Set the String object.
Definition Node.cpp:117
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
An interface to describe compiler passes.
Definition Pass.hpp:23
std::string registerInCurrent(const std::string &name, bool is_mutable)
Register a Declaration in the current (last) scope.
void createNewNamespace(const std::string &name, bool with_prefix, bool is_glob, const std::vector< std::string > &symbols)
Create a new namespace scope.
void saveNamespaceAndRemove()
Save the last scope as a namespace, by attaching it to the nearest namespace scope.
std::string getFullyQualifiedNameInNearestScope(const std::string &name) const
Get a FQN from a variable name in the nearest scope it is declared in.
bool isRegistered(const std::string &name) const
Checks if any scope has 'name', in reverse order.
void createNew()
Create a new scope.
StaticScope * currentScope() const
Return a non-owning raw pointer to the current scope.
bool isInScope(const std::string &name) const
Checks if 'name' is in the current scope.
void removeLastScope()
Remove the last scope.
std::optional< bool > isImmutable(const std::string &name) const
Checks the scopes in reverse order for 'name' and returns its mutability status.
std::pair< bool, std::string > canFullyQualifyName(const std::string &name)
Checks if a name can be fully qualified (allows only unprefixed names to be resolved by glob namespac...
std::vector< std::string > splitString(const std::string &source, const char sep)
Cut a string into pieces, given a character separator.
Definition Utils.hpp:31
ARK_API std::size_t levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
Definition Utils.cpp:5
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 9 > listInstructions
Definition Common.hpp:115
constexpr std::string_view AppendInPlace
Definition Common.hpp:102
constexpr std::array< std::string_view, 24 > operators
Definition Common.hpp:152
constexpr std::string_view ConcatInPlace
Definition Common.hpp:103
constexpr std::string_view SysArgs
Definition Common.hpp:127
constexpr std::string_view And
Definition Common.hpp:130
constexpr std::array UpdateRef
All the builtins that modify in place a variable.
Definition Common.hpp:108
constexpr std::string_view Or
Definition Common.hpp:131
constexpr std::string_view SysProgramName
Definition Common.hpp:128
Keyword
The different keywords available.
Definition Common.hpp:75
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)