ArkScript
A small, lisp-inspired, functional scripting language
NameResolutionPass.cpp
Go to the documentation of this file.
2
4#include <Ark/Utils/Utils.hpp>
6
7namespace Ark::internal
8{
10 Pass("NameResolution", debug)
11 {
12 for (const auto& builtin : Builtins::builtins)
13 m_language_symbols.emplace(builtin.first);
14 for (auto ope : Language::operators)
15 m_language_symbols.emplace(ope);
16 for (auto inst : Language::listInstructions)
17 m_language_symbols.emplace(inst);
18
23 }
24
26 {
27 m_logger.traceStart("process");
28
29 m_ast = ast;
30 visit(m_ast, /* register_declarations= */ true);
31
33
34 m_logger.trace("AST after name resolution");
36 m_ast.debugPrint(std::cout) << '\n';
37
38 m_logger.traceStart("checkForUndefinedSymbol");
41 }
42
43 const Node& NameResolutionPass::ast() const noexcept
44 {
45 return m_ast;
46 }
47
48 std::string NameResolutionPass::addDefinedSymbol(const std::string& sym, const bool is_mutable)
49 {
50 const std::string fully_qualified_name = m_scope_resolver.registerInCurrent(sym, is_mutable);
51 m_defined_symbols.emplace(fully_qualified_name);
52 return fully_qualified_name;
53 }
54
55 void NameResolutionPass::visit(Node& node, const bool register_declarations)
56 {
57 switch (node.nodeType())
58 {
60 {
61 const std::string old_name = node.string();
63 addSymbolNode(node, old_name);
64 break;
65 }
66
67 case NodeType::Field:
68 for (std::size_t i = 0, end = node.list().size(); i < end; ++i)
69 {
70 Node& child = node.list()[i];
71
72 if (i == 0)
73 {
74 const std::string old_name = child.string();
75 // in case of field, no need to check if we can fully qualify names
77 addSymbolNode(child, old_name);
78 }
79 else
80 addSymbolNode(child);
81 }
82 break;
83
84 case NodeType::List:
85 if (!node.constList().empty())
86 {
87 if (node.constList()[0].nodeType() == NodeType::Keyword)
88 visitKeyword(node, node.constList()[0].keyword(), register_declarations);
89 else
90 {
91 // function calls
92 // the UpdateRef function calls kind get a special treatment, like let/mut/set,
93 // because we need to check for mutability errors
94 if (node.constList().size() > 1 && node.constList()[0].nodeType() == NodeType::Symbol &&
95 node.constList()[1].nodeType() == NodeType::Symbol && register_declarations)
96 {
97 const auto funcname = node.constList()[0].string();
98 const auto arg = node.constList()[1].string();
99
100 if (std::ranges::find(Language::UpdateRef, funcname) != Language::UpdateRef.end() && m_scope_resolver.isImmutable(arg).value_or(false))
101 throw CodeError(
102 fmt::format("MutabilityError: Can not modify the constant list `{}' using `{}'", arg, funcname),
103 CodeErrorContext(node.filename(), node.constList()[1].position()));
104
105 // check that we aren't doing a (append! a a) nor a (concat! a a)
106 if (funcname == Language::AppendInPlace || funcname == Language::ConcatInPlace)
107 {
108 for (std::size_t i = 2, end = node.constList().size(); i < end; ++i)
109 {
110 if (node.constList()[i].nodeType() == NodeType::Symbol && node.constList()[i].string() == arg)
111 throw CodeError(
112 fmt::format("MutabilityError: Can not {} the list `{}' to itself", funcname, arg),
113 CodeErrorContext(node.filename(), node.constList()[1].position()));
114 }
115 }
116 }
117
118 for (auto& child : node.list())
119 visit(child, register_declarations);
120 }
121 }
122 break;
123
125 {
126 auto& namespace_ = node.arkNamespace();
127 // no need to guard createNewNamespace with an if (register_declarations), we want to keep the namespace node
128 // (which will get ignored by the compiler, that only uses its AST), so that we can (re)construct the
129 // scopes correctly
130 m_scope_resolver.createNewNamespace(namespace_.name, namespace_.with_prefix, namespace_.is_glob, namespace_.symbols);
132
133 visit(*namespace_.ast, /* register_declarations= */ true);
134 // dual visit so that we can handle forward references
135 visit(*namespace_.ast, /* register_declarations= */ false);
136
137 // if we had specific symbols to import, check that those exist
138 if (!namespace_.symbols.empty())
139 {
140 const auto it = std::ranges::find_if(
141 namespace_.symbols,
142 [&scope, &namespace_](const std::string& sym) -> bool {
143 return !scope->get(sym, namespace_.name, true).has_value();
144 });
145
146 if (it != namespace_.symbols.end())
147 throw CodeError(
148 fmt::format("ImportError: Can not import symbol {} from {}, as it isn't in the package", *it, namespace_.name),
149 CodeErrorContext(namespace_.ast->filename(), namespace_.ast->position()));
150 }
151
153 break;
154 }
155
156 default:
157 break;
158 }
159 }
160
161 void NameResolutionPass::visitKeyword(Node& node, const Keyword keyword, const bool register_declarations)
162 {
163 switch (keyword)
164 {
165 case Keyword::Set:
166 [[fallthrough]];
167 case Keyword::Let:
168 [[fallthrough]];
169 case Keyword::Mut:
170 // first, visit the value, then register the symbol
171 // this allows us to detect things like (let foo (fun (&foo) ()))
172 if (node.constList().size() > 2)
173 visit(node.list()[2], register_declarations);
174 if (node.constList().size() > 1 && node.constList()[1].nodeType() == NodeType::Symbol)
175 {
176 const std::string& name = node.constList()[1].string();
177 if (m_language_symbols.contains(name) && register_declarations)
178 throw CodeError(
179 fmt::format("Can not use a reserved identifier ('{}') as a {} name.", name, keyword == Keyword::Let ? "constant" : "variable"),
180 CodeErrorContext(node.filename(), node.constList()[1].position()));
181
182 if (m_scope_resolver.isInScope(name) && keyword == Keyword::Let && register_declarations)
183 throw CodeError(
184 fmt::format("MutabilityError: Can not use 'let' to redefine variable `{}'", name),
185 CodeErrorContext(node.filename(), node.constList()[1].position()));
186 if (keyword == Keyword::Set && m_scope_resolver.isRegistered(name))
187 {
188 if (m_scope_resolver.isImmutable(name).value_or(false) && register_declarations)
189 throw CodeError(
190 fmt::format("MutabilityError: Can not set the constant `{}' to {}", name, node.constList()[2].repr()),
191 CodeErrorContext(node.filename(), node.constList()[1].position()));
192
194 }
195 else if (keyword != Keyword::Set)
196 {
197 // update the declared variable name to use the fully qualified name
198 // this will prevent name conflicts, and handle scope resolution
199 const std::string fully_qualified_name = addDefinedSymbol(name, keyword != Keyword::Let);
200 if (register_declarations)
201 node.list()[1].setString(fully_qualified_name);
202 }
203 }
204 break;
205
206 case Keyword::Import:
207 if (!node.constList().empty())
208 m_plugin_names.push_back(node.constList()[1].constList().back().string());
209 break;
210
211 case Keyword::While:
212 // create a new scope to track variables
214 for (auto& child : node.list())
215 visit(child, register_declarations);
216 // remove the scope once the loop has been compiled, only we were registering declarations
218 break;
219
220 case Keyword::Fun:
221 // create a new scope to track variables
223
224 if (node.constList()[1].nodeType() == NodeType::List)
225 {
226 for (auto& child : node.list()[1].list())
227 {
228 if (child.nodeType() == NodeType::Capture)
229 {
230 if (!m_scope_resolver.isRegistered(child.string()) && register_declarations)
231 throw CodeError(
232 fmt::format("Can not capture `{}' because it is referencing a variable defined in an unreachable scope.", child.string()),
233 CodeErrorContext(child.filename(), child.position()));
234
235 // save the old unqualified name of the capture, so that we can use it in the
236 // ASTLowerer later one
237 if (!child.getUnqualifiedName())
238 {
239 child.setUnqualifiedName(child.string());
240 m_defined_symbols.emplace(child.string());
241 }
242 // update the declared variable name to use the fully qualified name
243 // this will prevent name conflicts, and handle scope resolution
244 std::string old_name = child.string();
246 addDefinedSymbol(old_name, true);
247 }
248 else if (child.nodeType() == NodeType::Symbol || child.nodeType() == NodeType::RefArg)
249 addDefinedSymbol(child.string(), /* is_mutable= */ false);
250 else if (child.nodeType() == NodeType::MutArg)
251 addDefinedSymbol(child.string(), /* is_mutable= */ true);
252 }
253 }
254 if (node.constList().size() > 2)
255 visit(node.list()[2], register_declarations);
256
257 // remove the scope once the function has been compiled, only we were registering declarations
259 break;
260
261 default:
262 for (auto& child : node.list())
263 visit(child, register_declarations);
264 break;
265 }
266 }
267
268 void NameResolutionPass::addSymbolNode(const Node& symbol, const std::string& old_name)
269 {
270 const std::string& name = symbol.string();
271
272 // we don't accept builtins/operators as a user symbol
273 if (m_language_symbols.contains(name))
274 return;
275
276 // remove the old name node, to avoid false positive when looking for unbound symbols
277 if (!old_name.empty())
278 {
279 auto it = std::ranges::find_if(m_symbol_nodes, [&old_name, &symbol](const Node& sym_node) -> bool {
280 return sym_node.string() == old_name &&
281 sym_node.position().start == symbol.position().start &&
282 sym_node.filename() == symbol.filename();
283 });
284 if (it != m_symbol_nodes.end())
285 {
286 it->setString(name);
287 return;
288 }
289 }
290
291 const auto it = std::ranges::find_if(m_symbol_nodes, [&name](const Node& sym_node) -> bool {
292 return sym_node.string() == name;
293 });
294 if (it == m_symbol_nodes.end())
295 m_symbol_nodes.push_back(symbol);
296 }
297
298 bool NameResolutionPass::mayBeFromPlugin(const std::string& name) const noexcept
299 {
300 std::string splitted = Utils::splitString(name, ':')[0];
301 const auto it = std::ranges::find_if(
302 m_plugin_names,
303 [&splitted](const std::string& plugin) -> bool {
304 return plugin == splitted;
305 });
306 return it != m_plugin_names.end();
307 }
308
310 {
311 auto [allowed, fqn] = m_scope_resolver.canFullyQualifyName(symbol.string());
312
313 if (m_language_symbols.contains(fqn) && symbol.string() != fqn)
314 {
315 throw CodeError(
316 fmt::format(
317 "Symbol `{}' was resolved to `{}', which is also a builtin name. Either the symbol or the package it's in needs to be renamed to avoid conflicting with the builtin.",
318 symbol.string(), fqn),
319 CodeErrorContext(symbol.filename(), symbol.position()));
320 }
321 if (!allowed)
322 {
323 std::string message;
324 if (fqn.ends_with("#hidden"))
325 message = fmt::format(
326 R"(Unbound variable "{}". However, it exists in a namespace as "{}", did you forget to add it to the symbol list while importing?)",
327 symbol.string(),
328 fqn.substr(0, fqn.find_first_of('#')));
329 else
330 message = fmt::format(R"(Unbound variable "{}". However, it exists in a namespace as "{}", did you forget to prefix it with its namespace?)", symbol.string(), fqn);
331
332 if (m_logger.shouldTrace())
333 m_ast.debugPrint(std::cout) << '\n';
334
335 throw CodeError(message, CodeErrorContext(symbol.filename(), symbol.position()));
336 }
337
338 symbol.setString(fqn);
339 return fqn;
340 }
341
343 {
344 for (const auto& sym : m_symbol_nodes)
345 {
346 const auto& str = sym.string();
347 const bool is_plugin = mayBeFromPlugin(str);
348
349 if (!m_defined_symbols.contains(str) && !is_plugin)
350 {
351 std::string message;
352
353 const std::string suggestion = offerSuggestion(str);
354 if (suggestion.empty())
355 message = fmt::format(R"(Unbound variable error "{}" (variable is used but not defined))", str);
356 else
357 {
358 const std::string prefix = suggestion.substr(0, suggestion.find_first_of(':'));
359 const std::string note_about_prefix = fmt::format(
360 " You either forgot to import it in the symbol list (eg `(import {} :{})') or need to fully qualify it by adding the namespace",
361 prefix,
362 str);
363 const bool add_note = suggestion.ends_with(":" + str);
364 message = fmt::format(R"(Unbound variable error "{}" (did you mean "{}"?{}))", str, suggestion, add_note ? note_about_prefix : "");
365 }
366
367 throw CodeError(message, CodeErrorContext(sym.filename(), sym.position()));
368 }
369 }
370 }
371
372 std::string NameResolutionPass::offerSuggestion(const std::string& str) const
373 {
374 auto iterate = [](const std::string& word, const std::unordered_set<std::string>& dict) -> std::string {
375 std::string suggestion;
376 // our suggestion shouldn't require more than half the string to change
377 std::size_t suggestion_distance = word.size() / 2;
378 for (const std::string& symbol : dict)
379 {
380 const std::size_t current_distance = Utils::levenshteinDistance(word, symbol);
381 if (current_distance <= suggestion_distance)
382 {
383 suggestion_distance = current_distance;
384 suggestion = symbol;
385 }
386 }
387 return suggestion;
388 };
389
390 std::string suggestion = iterate(str, m_defined_symbols);
391 // look for a suggestion related to language builtins
392 if (suggestion.empty())
393 suggestion = iterate(str, m_language_symbols);
394 // look for a suggestion related to a namespace change
395 if (suggestion.empty())
396 {
397 if (const auto it = std::ranges::find_if(m_defined_symbols, [&str](const std::string& symbol) {
398 return symbol.ends_with(":" + str);
399 });
400 it != m_defined_symbols.end())
401 suggestion = *it;
402 }
403
404 return suggestion;
405 }
406}
Lots of utilities about string, filesystem and more.
Host the declaration of all the ArkScript builtins.
ArkScript homemade exceptions.
Resolves names and fully qualify them in the AST (prefixing them with the package they are from)
bool shouldTrace() const
Definition Logger.hpp:51
void trace(const char *fmt, Args &&... args)
Write a trace level log using fmtlib.
Definition Logger.hpp:113
void traceStart(std::string &&trace_name)
Definition Logger.hpp:90
std::vector< std::string > m_plugin_names
void visit(Node &node, bool register_declarations)
Recursively visit nodes.
void visitKeyword(Node &node, Keyword keyword, bool register_declarations)
const Node & ast() const noexcept override
Unused overload that return the input AST (untouched as this pass only generates errors)
void checkForUndefinedSymbol() const
Checks for undefined symbols, not present in the defined symbols table.
std::string offerSuggestion(const std::string &str) const
Suggest a symbol of what the user may have meant to input.
std::unordered_set< std::string > m_language_symbols
Precomputed set of language symbols that can't be used to define variables.
std::unordered_set< std::string > m_defined_symbols
bool mayBeFromPlugin(const std::string &name) const noexcept
Checking if a symbol may be coming from a plugin.
void process(const Node &ast) override
Start visiting the given AST, checking for mutability violation and unbound variables.
std::string addDefinedSymbol(const std::string &sym, bool is_mutable)
Register a symbol as defined, so that later we can throw errors on undefined symbols.
std::string updateSymbolWithFullyQualifiedName(Node &symbol)
void addSymbolNode(const Node &symbol, const std::string &old_name="")
Register a given node in the symbol table.
NameResolutionPass(unsigned debug)
Create a NameResolutionPass.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
const std::string & filename() const noexcept
Return the filename in which this node was created.
Definition Node.cpp:164
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
Namespace & arkNamespace() noexcept
Return the namespace held by the value (if the node type allows it)
Definition Node.cpp:53
std::ostream & debugPrint(std::ostream &os) const noexcept
Print a node to an output stream with added type annotations.
Definition Node.cpp:287
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
void setString(const std::string &value) noexcept
Set the String object.
Definition Node.cpp:117
std::vector< Node > & list() noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:68
An interface to describe compiler passes.
Definition Pass.hpp:23
std::string registerInCurrent(const std::string &name, bool is_mutable)
Register a Declaration in the current (last) scope.
void createNewNamespace(const std::string &name, bool with_prefix, bool is_glob, const std::vector< std::string > &symbols)
Create a new namespace scope.
void saveNamespaceAndRemove()
Save the last scope as a namespace, by attaching it to the nearest namespace scope.
std::string getFullyQualifiedNameInNearestScope(const std::string &name) const
Get a FQN from a variable name in the nearest scope it is declared in.
bool isRegistered(const std::string &name) const
Checks if any scope has 'name', in reverse order.
void createNew()
Create a new scope.
StaticScope * currentScope() const
Return a non-owning raw pointer to the current scope.
bool isInScope(const std::string &name) const
Checks if 'name' is in the current scope.
void removeLastScope()
Remove the last scope.
std::optional< bool > isImmutable(const std::string &name) const
Checks the scopes in reverse order for 'name' and returns its mutability status.
std::pair< bool, std::string > canFullyQualifyName(const std::string &name)
Checks if a name can be fully qualified (allows only unprefixed names to be resolved by glob namespac...
std::vector< std::string > splitString(const std::string &source, const char sep)
Cut a string into pieces, given a character separator.
Definition Utils.hpp:31
ARK_API std::size_t levenshteinDistance(const std::string &str1, const std::string &str2)
Calculate the Levenshtein distance between two strings.
Definition Utils.cpp:5
ARK_API const std::vector< std::pair< std::string, Value > > builtins
constexpr std::array< std::string_view, 9 > listInstructions
Definition Common.hpp:119
constexpr std::string_view AppendInPlace
Definition Common.hpp:106
constexpr std::array< std::string_view, 24 > operators
Definition Common.hpp:156
constexpr std::string_view ConcatInPlace
Definition Common.hpp:107
constexpr std::string_view SysArgs
Definition Common.hpp:131
constexpr std::string_view And
Definition Common.hpp:134
constexpr std::array UpdateRef
All the builtins that modify in place a variable.
Definition Common.hpp:112
constexpr std::string_view Or
Definition Common.hpp:135
constexpr std::string_view SysProgramName
Definition Common.hpp:132
Keyword
The different keywords available.
Definition Common.hpp:79
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)