ArkScript
A small, lisp-inspired, functional scripting language
NameResolutionPass.cpp
Go to the documentation of this file.
2
4#include <Ark/Utils/Utils.hpp>
6
7namespace Ark::internal
8{
10 Pass("NameResolution", debug)
11 {
12 for (const auto& builtin : Builtins::builtins)
13 m_language_symbols.emplace(builtin.first);
14 for (auto ope : Language::operators)
15 m_language_symbols.emplace(ope);
16 for (auto inst : Language::listInstructions)
17 m_language_symbols.emplace(inst);
18
24 }
25
27 {
28 m_logger.traceStart("process");
29
30 m_ast = ast;
31 visit(m_ast, /* register_declarations= */ true);
32
34
35 m_logger.trace("AST after name resolution");
37 m_ast.debugPrint(std::cout) << '\n';
38
39 m_logger.traceStart("checkForUndefinedSymbol");
42 }
43
44 const Node& NameResolutionPass::ast() const noexcept
45 {
46 return m_ast;
47 }
48
49 std::string NameResolutionPass::addDefinedSymbol(const std::string& sym, const bool is_mutable)
50 {
51 const std::string fully_qualified_name = m_scope_resolver.registerInCurrent(sym, is_mutable);
52 m_defined_symbols.emplace(fully_qualified_name);
53 return fully_qualified_name;
54 }
55
56 void NameResolutionPass::visit(Node& node, const bool register_declarations)
57 {
58 switch (node.nodeType())
59 {
61 {
62 const std::string old_name = node.string();
64 addSymbolNode(node, old_name);
65 break;
66 }
67
68 case NodeType::Field:
69 for (std::size_t i = 0, end = node.list().size(); i < end; ++i)
70 {
71 Node& child = node.list()[i];
72
73 if (i == 0)
74 {
75 const std::string old_name = child.string();
76 // in case of field, no need to check if we can fully qualify names
78 addSymbolNode(child, old_name);
79 }
80 else
81 addSymbolNode(child);
82 }
83 break;
84
85 case NodeType::List:
86 if (!node.constList().empty())
87 {
88 if (node.constList()[0].nodeType() == NodeType::Keyword)
89 visitKeyword(node, node.constList()[0].keyword(), register_declarations);
90 else
91 {
92 // function calls
93 // the UpdateRef function calls kind get a special treatment, like let/mut/set,
94 // because we need to check for mutability errors
95 if (node.constList().size() > 1 && node.constList()[0].nodeType() == NodeType::Symbol &&
96 node.constList()[1].nodeType() == NodeType::Symbol && register_declarations)
97 {
98 const auto funcname = node.constList()[0].string();
99 const auto arg = node.constList()[1].string();
100
101 if (std::ranges::find(Language::UpdateRef, funcname) != Language::UpdateRef.end() && m_scope_resolver.isImmutable(arg).value_or(false))
102 throw CodeError(
103 fmt::format("MutabilityError: Can not modify the constant list `{}' using `{}'", arg, funcname),
104 CodeErrorContext(node.filename(), node.constList()[1].position()));
105
106 // check that we aren't doing a (append! a a) nor a (concat! a a)
107 if (funcname == Language::AppendInPlace || funcname == Language::ConcatInPlace)
108 {
109 for (std::size_t i = 2, end = node.constList().size(); i < end; ++i)
110 {
111 if (node.constList()[i].nodeType() == NodeType::Symbol && node.constList()[i].string() == arg)
112 throw CodeError(
113 fmt::format("MutabilityError: Can not {} the list `{}' to itself", funcname, arg),
114 CodeErrorContext(node.filename(), node.constList()[1].position()));
115 }
116 }
117 }
118
119 for (auto& child : node.list())
120 visit(child, register_declarations);
121 }
122 }
123 break;
124
126 {
127 auto& namespace_ = node.arkNamespace();
128 // no need to guard createNewNamespace with an if (register_declarations), we want to keep the namespace node
129 // (which will get ignored by the compiler, that only uses its AST), so that we can (re)construct the
130 // scopes correctly
131 m_scope_resolver.createNewNamespace(namespace_.name, namespace_.with_prefix, namespace_.is_glob, namespace_.symbols);
133
134 visit(*namespace_.ast, /* register_declarations= */ true);
135 // dual visit so that we can handle forward references
136 visit(*namespace_.ast, /* register_declarations= */ false);
137
138 // if we had specific symbols to import, check that those exist
139 if (!namespace_.symbols.empty())
140 {
141 const auto it = std::ranges::find_if(
142 namespace_.symbols,
143 [&scope, &namespace_](const std::string& sym) -> bool {
144 return !scope->get(sym, namespace_.name, true).has_value();
145 });
146
147 if (it != namespace_.symbols.end())
148 throw CodeError(
149 fmt::format("ImportError: Can not import symbol {} from {}, as it isn't in the package", *it, namespace_.name),
150 CodeErrorContext(namespace_.ast->filename(), namespace_.ast->position()));
151 }
152
154 break;
155 }
156
157 default:
158 break;
159 }
160 }
161
162 void NameResolutionPass::visitKeyword(Node& node, const Keyword keyword, const bool register_declarations)
163 {
164 switch (keyword)
165 {
166 case Keyword::Set:
167 [[fallthrough]];
168 case Keyword::Let:
169 [[fallthrough]];
170 case Keyword::Mut:
171 // first, visit the value, then register the symbol
172 // this allows us to detect things like (let foo (fun (&foo) ()))
173 if (node.constList().size() > 2)
174 visit(node.list()[2], register_declarations);
175 if (node.constList().size() > 1 && node.constList()[1].nodeType() == NodeType::Symbol)
176 {
177 const std::string& name = node.constList()[1].string();
178 if (m_language_symbols.contains(name) && register_declarations)
179 throw CodeError(
180 fmt::format("Can not use a reserved identifier ('{}') as a {} name.", name, keyword == Keyword::Let ? "constant" : "variable"),
181 CodeErrorContext(node.filename(), node.constList()[1].position()));
182
183 if (m_scope_resolver.isInScope(name) && keyword == Keyword::Let && register_declarations)
184 throw CodeError(
185 fmt::format("MutabilityError: Can not use 'let' to redefine variable `{}'", name),
186 CodeErrorContext(node.filename(), node.constList()[1].position()));
187 if (keyword == Keyword::Set && m_scope_resolver.isRegistered(name))
188 {
189 if (m_scope_resolver.isImmutable(name).value_or(false) && register_declarations)
190 throw CodeError(
191 fmt::format("MutabilityError: Can not set the constant `{}' to {}", name, node.constList()[2].repr()),
192 CodeErrorContext(node.filename(), node.constList()[1].position()));
193
195 }
196 else if (keyword != Keyword::Set)
197 {
198 // update the declared variable name to use the fully qualified name
199 // this will prevent name conflicts, and handle scope resolution
200 const std::string fully_qualified_name = addDefinedSymbol(name, keyword != Keyword::Let);
201 if (register_declarations)
202 node.list()[1].setString(fully_qualified_name);
203 }
204 }
205 break;
206
207 case Keyword::Import:
208 if (!node.constList().empty())
209 m_plugin_names.push_back(node.constList()[1].constList().back().string());
210 break;
211
212 case Keyword::While:
213 // create a new scope to track variables
215 for (auto& child : node.list())
216 visit(child, register_declarations);
217 // remove the scope once the loop has been compiled, only we were registering declarations
219 break;
220
221 case Keyword::Fun:
222 // create a new scope to track variables
224
225 if (node.constList()[1].nodeType() == NodeType::List)
226 {
227 for (auto& child : node.list()[1].list())
228 {
229 if (child.nodeType() == NodeType::Capture)
230 {
231 if (!m_scope_resolver.isRegistered(child.string()) && register_declarations)
232 throw CodeError(
233 fmt::format("Can not capture `{}' because it is referencing a variable defined in an unreachable scope.", child.string()),
234 CodeErrorContext(child.filename(), child.position()));
235
236 // save the old unqualified name of the capture, so that we can use it in the
237 // ASTLowerer later one
238 if (!child.getUnqualifiedName())
239 {
240 child.setUnqualifiedName(child.string());
241 m_defined_symbols.emplace(child.string());
242 }
243 // update the declared variable name to use the fully qualified name
244 // this will prevent name conflicts, and handle scope resolution
245 std::string old_name = child.string();
247 addDefinedSymbol(old_name, true);
248 }
249 else if (child.nodeType() == NodeType::Symbol || child.nodeType() == NodeType::RefArg)
250 addDefinedSymbol(child.string(), /* is_mutable= */ false);
251 else if (child.nodeType() == NodeType::MutArg)
252 addDefinedSymbol(child.string(), /* is_mutable= */ true);
253 }
254 }
255 if (node.constList().size() > 2)
256 visit(node.list()[2], register_declarations);
257
258 // remove the scope once the function has been compiled, only we were registering declarations
260 break;
261
262 default:
263 for (auto& child : node.list())
264 visit(child, register_declarations);
265 break;
266 }
267 }
268
269 void NameResolutionPass::addSymbolNode(const Node& symbol, const std::string& old_name)
270 {
271 const std::string& name = symbol.string();
272
273 // we don't accept builtins/operators as a user symbol
274 if (m_language_symbols.contains(name))
275 return;
276
277 // remove the old name node, to avoid false positive when looking for unbound symbols
278 if (!old_name.empty())
279 {
280 auto it = std::ranges::find_if(m_symbol_nodes, [&old_name, &symbol](const Node& sym_node) -> bool {
281 return sym_node.string() == old_name &&
282 sym_node.position().start == symbol.position().start &&
283 sym_node.filename() == symbol.filename();
284 });
285 if (it != m_symbol_nodes.end())
286 {
287 it->setString(name);
288 return;
289 }
290 }
291
292 const auto it = std::ranges::find_if(m_symbol_nodes, [&name](const Node& sym_node) -> bool {
293 return sym_node.string() == name;
294 });
295 if (it == m_symbol_nodes.end())
296 m_symbol_nodes.push_back(symbol);
297 }
298
299 bool NameResolutionPass::mayBeFromPlugin(const std::string& name) const noexcept
300 {
301 std::string splitted = Utils::splitString(name, ':')[0];
302 const auto it = std::ranges::find_if(
303 m_plugin_names,
304 [&splitted](const std::string& plugin) -> bool {
305 return plugin == splitted;
306 });
307 return it != m_plugin_names.end();
308 }
309
311 {
312 auto [allowed, fqn] = m_scope_resolver.canFullyQualifyName(symbol.string());
313
314 if (m_language_symbols.contains(fqn) && symbol.string() != fqn)
315 {
316 throw CodeError(
317 fmt::format(
318 "Symbol `{}' was resolved to `{}', which is also a builtin name. Either the symbol or the package it's in needs to be renamed to avoid conflicting with the builtin.",
319 symbol.string(), fqn),
320 CodeErrorContext(symbol.filename(), symbol.position()));
321 }
322 if (!allowed)
323 {
324 std::string message;
325 if (fqn.ends_with("#hidden"))
326 message = fmt::format(
327 R"(Unbound variable "{}". However, it exists in a namespace as "{}", did you forget to add it to the symbol list while importing?)",
328 symbol.string(),
329 fqn.substr(0, fqn.find_first_of('#')));
330 else
331 message = fmt::format(R"(Unbound variable "{}". However, it exists in a namespace as "{}", did you forget to prefix it with its namespace?)", symbol.string(), fqn);
332
333 if (m_logger.shouldTrace())
334 m_ast.debugPrint(std::cout) << '\n';
335
336 throw CodeError(message, CodeErrorContext(symbol.filename(), symbol.position()));
337 }
338
339 symbol.setString(fqn);
340 return fqn;
341 }
342
344 {
345 for (const auto& sym : m_symbol_nodes)
346 {
347 const auto& str = sym.string();
348 const bool is_plugin = mayBeFromPlugin(str);
349
350 if (!m_defined_symbols.contains(str) && !is_plugin)
351 {
352 std::string message;
353
354 const std::string suggestion = offerSuggestion(str);
355 if (suggestion.empty())
356 message = fmt::format(R"(Unbound variable error "{}" (variable is used but not defined))", str);
357 else
358 {
359 const std::string prefix = suggestion.substr(0, suggestion.find_first_of(':'));
360 const std::string note_about_prefix = fmt::format(
361 " You either forgot to import it in the symbol list (eg `(import {} :{})') or need to fully qualify it by adding the namespace",
362 prefix,
363 str);
364 const bool add_note = suggestion.ends_with(":" + str);
365 message = fmt::format(R"(Unbound variable error "{}" (did you mean "{}"?{}))", str, suggestion, add_note ? note_about_prefix : "");
366 }
367
368 throw CodeError(message, CodeErrorContext(sym.filename(), sym.position()));
369 }
370 }
371 }
372
373 std::string NameResolutionPass::offerSuggestion(const std::string& str) const
374 {
375 auto iterate = [](const std::string& word, const std::unordered_set<std::string>& dict) -> std::string {
376 std::string suggestion;
377 // our suggestion shouldn't require more than half the string to change
378 std::size_t suggestion_distance = word.size() / 2;
379 for (const std::string& symbol : dict)
380 {
381 const std::size_t current_distance = Utils::levenshteinDistance(word, symbol);
382 if (current_distance <= suggestion_distance)
383 {
384 suggestion_distance = current_distance;
385 suggestion = symbol;
386 }
387 }
388 return suggestion;
389 };
390
391 std::string suggestion = iterate(str, m_defined_symbols);
392 // look for a suggestion related to language builtins
393 if (suggestion.empty())
394 suggestion = iterate(str, m_language_symbols);
395 // look for a suggestion related to a namespace change
396 if (suggestion.empty())
397 {
398 if (const auto it = std::ranges::find_if(m_defined_symbols, [&str](const std::string& symbol) {
399 return symbol.ends_with(":" + str);
400 });
401 it != m_defined_symbols.end())
402 suggestion = *it;
403 }
404
405 return suggestion;
406 }
407}
Lots of utilities about string, filesystem and more.
Host the declaration of all the ArkScript builtins.
ArkScript homemade exceptions.
Resolves names and fully qualify them in the AST (prefixing them with the package they are from)
bool shouldTrace() const
Definition Logger.hpp:54
void trace(const char *fmt, Args &&... args)
Write a trace level log using fmtlib.
Definition Logger.hpp:132
void traceStart(std::string &&trace_name)
Definition Logger.hpp:109
std::vector< std::string > m_plugin_names
const Node & ast() const noexcept
Unused overload that return the input AST (untouched as this pass only generates errors)
void visit(Node &node, bool register_declarations)
Recursively visit nodes.
void visitKeyword(Node &node, Keyword keyword, bool register_declarations)
void checkForUndefinedSymbol() const
Checks for undefined symbols, not present in the defined symbols table.
std::string offerSuggestion(const std::string &str) const
Suggest a symbol of what the user may have meant to input.
std::unordered_set< std::string > m_language_symbols
Precomputed set of language symbols that can't be used to define variables.
void process(const Node &ast)
Start visiting the given AST, checking for mutability violation and unbound variables.
std::unordered_set< std::string > m_defined_symbols
bool mayBeFromPlugin(const std::string &name) const noexcept
Checking if a symbol may be coming from a plugin.
std::string addDefinedSymbol(const std::string &sym, bool is_mutable)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
std::string updateSymbolWithFullyQualifiedName(Node &symbol)
void addSymbolNode(const Node &symbol, const std::string &old_name="")
Register a given node in the symbol table.
NameResolutionPass(unsigned debug)
Create a NameResolutionPass.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
const std::string & filename() const noexcept
Return the filename in which this node was created.
Definition Node.cpp:164
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
Namespace & arkNamespace() noexcept
Return the namespace held by the value (if the node type allows it)
Definition Node.cpp:53
std::ostream & debugPrint(std::ostream &os) const noexcept
Print a node to an output stream with added type annotations.
Definition Node.cpp:287
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
void setString(const std::string &value) noexcept
Set the String object.
Definition Node.cpp:117
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
An interface to describe compiler passes.
Definition Pass.hpp:24
std::string registerInCurrent(const std::string &name, bool is_mutable)
Register a Declaration in the current (last) scope.
void createNewNamespace(const std::string &name, bool with_prefix, bool is_glob, const std::vector< std::string > &symbols)
Create a new namespace scope.
void saveNamespaceAndRemove()
Save the last scope as a namespace, by attaching it to the nearest namespace scope.
std::string getFullyQualifiedNameInNearestScope(const std::string &name) const
Get a FQN from a variable name in the nearest scope it is declared in.
bool isRegistered(const std::string &name) const
Checks if any scope has 'name', in reverse order.
void createNew()
Create a new scope.
StaticScope * currentScope() const
Return a non-owning raw pointer to the current scope.
bool isInScope(const std::string &name) const
Checks if 'name' is in the current scope.
void removeLastScope()
Remove the last scope.
std::optional< bool > isImmutable(const std::string &name) const
Checks the scopes in reverse order for 'name' and returns its mutability status.
std::pair< bool, std::string > canFullyQualifyName(const std::string &name)
Checks if a name can be fully qualified (allows only unprefixed names to be resolved by glob namespac...
std::vector< std::string > splitString(const std::string &source, const char sep)
Cut a string into pieces, given a character separator.
Definition Utils.hpp:31
ARK_API std::size_t levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
Definition Utils.cpp:7
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 9 > listInstructions
Definition Common.hpp:119
constexpr std::string_view AppendInPlace
Definition Common.hpp:106
constexpr std::string_view Apply
Definition Common.hpp:137
constexpr std::array< std::string_view, 24 > operators
Definition Common.hpp:158
constexpr std::string_view ConcatInPlace
Definition Common.hpp:107
constexpr std::string_view SysArgs
Definition Common.hpp:131
constexpr std::string_view And
Definition Common.hpp:134
constexpr std::array UpdateRef
All the builtins that modify in place a variable.
Definition Common.hpp:112
constexpr std::string_view Or
Definition Common.hpp:135
constexpr std::string_view SysProgramName
Definition Common.hpp:132
Keyword
The different keywords available.
Definition Common.hpp:79
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)