ArkScript
A small, fast, functional and scripting language for video games
Formatter.cpp
Go to the documentation of this file.
1#include <Ark/Constants.hpp>
2#include <CLI/Formatter.hpp>
3
4#include <fmt/core.h>
5#include <fmt/color.h>
6
7#include <Ark/Files.hpp>
8#include <Ark/Exceptions.hpp>
10
11using namespace Ark;
12using namespace Ark::internal;
13
14Formatter::Formatter(const bool dry_run) :
15 m_dry_run(dry_run), m_parser(/* debug= */ 0, /* interpret= */ false), m_updated(false)
16{}
17
18Formatter::Formatter(std::string filename, const bool dry_run) :
19 m_filename(std::move(filename)), m_dry_run(dry_run), m_parser(/* debug= */ 0, /* interpret= */ false), m_updated(false)
20{}
21
23{
24 try
25 {
26 const std::string code = Utils::readFile(m_filename);
30
31 m_updated = code != m_output;
32 }
33 catch (const CodeError& e)
34 {
36 }
37}
38
39void Formatter::runWithString(const std::string& code)
40{
41 try
42 {
46
47 m_updated = code != m_output;
48 }
49 catch (const CodeError& e)
50 {
52 }
53}
54
55const std::string& Formatter::output() const
56{
57 return m_output;
58}
59
61{
62 return m_updated;
63}
64
66{
67 // remove useless surrounding begin (generated by the parser)
68 if (isBeginBlock(ast))
69 {
70 for (std::size_t i = 1, end = ast.constList().size(); i < end; ++i)
71 {
72 const Node node = ast.constList()[i];
73 if (shouldAddNewLineBetweenNodes(ast, i) && !m_output.empty())
74 m_output += "\n";
75 m_output += format(node, 0, false) + "\n";
76 }
77 }
78 else
79 m_output = format(ast, 0, false);
80
81 if (!m_dry_run)
82 {
83 std::ofstream stream(m_filename);
84 stream << m_output;
85 }
86}
87
88void Formatter::warnIfCommentsWereRemoved(const std::string& original_code, const std::string& filename)
89{
90 if (std::ranges::count(original_code, '#') != std::ranges::count(m_output, '#'))
91 {
92 fmt::println(
93 "{}: one or more comments from the original source code seem to have been removed by mistake while formatting {}",
94 fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)),
95 filename != ARK_NO_NAME_FILE ? filename : "file");
96 fmt::println("Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark");
97 }
98}
99
100bool Formatter::isListStartingWithKeyword(const Node& node, const Keyword keyword)
101{
102 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword && node.constList()[0].keyword() == keyword;
103}
104
106{
107 return isListStartingWithKeyword(node, Keyword::Begin);
108}
109
110bool Formatter::isFuncDef(const Node& node)
111{
112 return isListStartingWithKeyword(node, Keyword::Fun);
113}
114
116{
117 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol;
118}
119
120std::size_t Formatter::lineOfLastNodeIn(const Node& node)
121{
122 if (node.isListLike() && !node.constList().empty())
123 {
124 std::size_t child_line = lineOfLastNodeIn(node.constList().back());
125 if (child_line < node.line())
126 return node.line();
127 return child_line;
128 }
129 return node.line();
130}
131
133{
134 const std::string formatted = format(node, 0, false);
135 const std::string::size_type sz = formatted.find_first_of('\n');
136
137 const bool is_long_line = !((sz < FormatterConfig::LongLineLength || (sz == std::string::npos && formatted.size() < FormatterConfig::LongLineLength)));
138 if (node.comment().empty() && (isBeginBlock(node) || isFuncCall(node)))
139 return false;
140 if (is_long_line || (node.isListLike() && node.constList().size() > 1) || !node.comment().empty())
141 return true;
142 return false;
143}
144
145bool Formatter::shouldAddNewLineBetweenNodes(const Node& node, const std::size_t at)
146{
147 if (at <= 1)
148 return false;
149
150 const auto& list = node.constList();
151 std::size_t previous_line = lineOfLastNodeIn(list[at - 1]);
152
153 const auto& child = list[at];
154
155 // If we have a node before the current one,
156 // and the line count between the two nodes is more than 1,
157 // maybe we should add a new line to preserve user spacing.
158 // However, if the current node has a comment, do not add a new line, this is causing the spacing.
159 if (child.line() - previous_line > 1 && child.comment().empty())
160 return true;
161 // If we do have a comment but the spacing is more than 2,
162 // then add a newline to preserve user spacing.
163 if (child.line() - previous_line > 2 && !child.comment().empty())
164 return true;
165 return false;
166}
167
168std::string Formatter::format(const Node& node, std::size_t indent, bool after_newline)
169{
170 std::string output;
171 if (!node.comment().empty())
172 {
173 output += formatComment(node.comment(), indent);
174 after_newline = true;
175 }
176 if (after_newline)
177 output += prefix(indent);
178
179 switch (node.nodeType())
180 {
181 case NodeType::Symbol:
182 output += node.string();
183 break;
184 case NodeType::Capture:
185 output += "&" + node.string();
186 break;
187 case NodeType::Keyword:
188 output += std::string(keywords[static_cast<std::size_t>(node.keyword())]);
189 break;
190 case NodeType::String:
191 output += fmt::format("\"{}\"", node.string());
192 break;
193 case NodeType::Number:
194 output += fmt::format("{}", node.number());
195 break;
196 case NodeType::List:
197 output += formatBlock(node, indent, after_newline);
198 break;
199 case NodeType::Spread:
200 output += fmt::format("...{}", node.string());
201 break;
202 case NodeType::Field:
203 {
204 std::string field = format(node.constList()[0], indent, false);
205 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
206 field += "." + format(node.constList()[i], indent, false);
207 output += field;
208 break;
209 }
210 case NodeType::Macro:
211 output += formatMacro(node, indent);
212 break;
213 // not handling Namespace nor Unused node types as those can not be generated by the parser
214 case NodeType::Namespace:
215 [[fallthrough]];
216 case NodeType::Unused:
217 break;
218 }
219
220 if (!node.commentAfter().empty())
221 output += " " + formatComment(node.commentAfter(), /* indent= */ 0);
222
223 return output;
224}
225
226std::string Formatter::formatComment(const std::string& comment, const std::size_t indent) const
227{
228 std::string output = prefix(indent);
229 for (std::size_t i = 0, end = comment.size(); i < end; ++i)
230 {
231 output += comment[i];
232 if (comment[i] == '\n' && i != end - 1)
233 output += prefix(indent);
234 }
235
236 return output;
237}
238
239std::string Formatter::formatBlock(const Node& node, const std::size_t indent, const bool after_newline)
240{
241 if (node.constList().empty())
242 return "()";
243
244 const Node first = node.constList().front();
245 if (first.nodeType() == NodeType::Keyword)
246 {
247 switch (first.keyword())
248 {
249 case Keyword::Fun:
250 return formatFunction(node, indent);
251 case Keyword::Let:
252 [[fallthrough]];
253 case Keyword::Mut:
254 [[fallthrough]];
255 case Keyword::Set:
256 return formatVariable(node, indent);
257 case Keyword::If:
258 return formatCondition(node, indent);
259 case Keyword::While:
260 return formatLoop(node, indent);
261 case Keyword::Begin:
262 return formatBegin(node, indent, after_newline);
263 case Keyword::Import:
264 return formatImport(node, indent);
265 case Keyword::Del:
266 return formatDel(node, indent);
267 }
268 // HACK: should never reach, but the compiler insists that the function doesn't return in every code path
269 return "";
270 }
271 return formatCall(node, indent);
272}
273
274std::string Formatter::formatFunction(const Node& node, const std::size_t indent)
275{
276 const Node args_node = node.constList()[1];
277 const Node body_node = node.constList()[2];
278
279 std::string formatted_args;
280
281 if (!args_node.comment().empty())
282 {
283 formatted_args += "\n";
284 formatted_args += formatComment(args_node.comment(), indent + 1);
285 formatted_args += prefix(indent + 1);
286 }
287 else
288 formatted_args += " ";
289
290 if (args_node.isListLike())
291 {
292 bool comment_in_args = false;
293 std::string args;
294 for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i)
295 {
296 const Node arg_i = args_node.constList()[i];
297 if (!arg_i.comment().empty())
298 comment_in_args = true;
299
300 args += format(arg_i, indent + (comment_in_args ? 1 : 0), comment_in_args);
301 if (i != end - 1)
302 args += comment_in_args ? '\n' : ' ';
303 }
304
305 formatted_args += fmt::format("({}{})", (comment_in_args ? "\n" : ""), args);
306 }
307 else
308 formatted_args += format(args_node, indent, false);
309
310 if (!shouldSplitOnNewline(body_node) && args_node.comment().empty())
311 return fmt::format("(fun{} {})", formatted_args, format(body_node, indent + 1, false));
312 return fmt::format("(fun{}\n{})", formatted_args, format(body_node, indent + 1, true));
313}
314
315std::string Formatter::formatVariable(const Node& node, const std::size_t indent)
316{
317 std::string keyword = std::string(keywords[static_cast<std::size_t>(node.constList()[0].keyword())]);
318
319 const Node body_node = node.constList()[2];
320 const std::string formatted_bind = format(node.constList()[1], indent, false);
321
322 // we don't want to add another indentation level here, because it would result in a (let a (fun ()\n{indent+=4}...))
323 if (isFuncDef(body_node))
324 return fmt::format("({} {} {})", keyword, formatted_bind, format(body_node, indent, false));
325 if (!shouldSplitOnNewline(body_node))
326 return fmt::format("({} {} {})", keyword, formatted_bind, format(body_node, indent + 1, false));
327 return fmt::format("({} {}\n{})", keyword, formatted_bind, format(body_node, indent + 1, true));
328}
329
330std::string Formatter::formatCondition(const Node& node, const std::size_t indent, const bool is_macro)
331{
332 const Node cond_node = node.constList()[1];
333 const Node then_node = node.constList()[2];
334
335 bool cond_on_newline = false;
336 std::string formatted_cond = format(cond_node, indent + 1, false);
337 if (formatted_cond.find('\n') != std::string::npos)
338 cond_on_newline = true;
339
340 std::string if_cond_formatted = fmt::format(
341 "({}if{}{}",
342 is_macro ? "$" : "",
343 cond_on_newline ? "\n" : " ",
344 formatted_cond);
345
346 const bool split_then_newline = shouldSplitOnNewline(then_node);
347
348 // (if cond then)
349 if (node.constList().size() == 3)
350 {
351 if (cond_on_newline || split_then_newline)
352 return fmt::format("{}\n{})", if_cond_formatted, format(then_node, indent + 1, true));
353 return fmt::format("{} {})", if_cond_formatted, format(then_node, indent + 1, false));
354 }
355 // (if cond then else)
356 return fmt::format(
357 "{}\n{}\n{}{})",
358 if_cond_formatted,
359 format(then_node, indent + 1, true),
360 format(node.constList()[3], indent + 1, true),
361 node.constList()[3].commentAfter().empty() ? "" : ("\n" + prefix(indent)));
362}
363
364std::string Formatter::formatLoop(const Node& node, const std::size_t indent)
365{
366 const Node cond_node = node.constList()[1];
367 const Node body_node = node.constList()[2];
368
369 bool cond_on_newline = false;
370 std::string formatted_cond = format(cond_node, indent + 1, false);
371 if (formatted_cond.find('\n') != std::string::npos)
372 cond_on_newline = true;
373
374 if (cond_on_newline || shouldSplitOnNewline(body_node))
375 return fmt::format(
376 "(while{}{}\n{})",
377 cond_on_newline ? "\n" : " ",
378 formatted_cond,
379 format(body_node, indent + 1, true));
380 return fmt::format(
381 "(while {} {})",
382 formatted_cond,
383 format(body_node, indent + 1, false));
384}
385
386std::string Formatter::formatBegin(const Node& node, const std::size_t indent, const bool after_newline)
387{
388 // only the keyword begin is present
389 if (node.constList().size() == 1)
390 return "{}";
391
392 // after a new line, we need to increment our indentation level
393 // if the block is a top level one, we also need to increment indentation level
394 const std::size_t inner_indentation = indent + (after_newline ? 1 : 0) + (indent == 0 ? 1 : 0);
395
396 std::string output = "{\n";
397 // skip begin keyword
398 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
399 {
400 const Node child = node.constList()[i];
401 // we want to preserve the node grouping by the user, but remove useless duplicate new line
402 // but that shouldn't apply to the first node of the block
403 if (shouldAddNewLineBetweenNodes(node, i) && i > 1)
404 output += "\n";
405
406 output += format(child, inner_indentation, true);
407 if (i != end - 1)
408 output += "\n";
409 }
410
411 // if the last node has a comment, add a new line
412 if (!node.constList().empty() && !node.constList().back().commentAfter().empty())
413 output += "\n" + prefix(indent) + "}";
414 else
415 output += " }";
416 return output;
417}
418
419std::string Formatter::formatImport(const Node& node, const std::size_t indent)
420{
421 const Node package_node = node.constList()[1];
422 std::string package;
423
424 if (!package_node.comment().empty())
425 package += "\n" + formatComment(package_node.comment(), indent + 1) + prefix(indent + 1);
426 else
427 package += " ";
428
429 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
430 {
431 package += format(package_node.constList()[i], indent + 1, false);
432 if (i != end - 1)
433 package += ".";
434 }
435
436 const Node symbols = node.constList()[2];
437 if (symbols.nodeType() == NodeType::Symbol && symbols.string() == "*")
438 package += ":*";
439 else // symbols is a list
440 {
441 if (const auto& sym_list = symbols.constList(); !sym_list.empty())
442 {
443 const bool comment_after_last = !sym_list.back().commentAfter().empty();
444
445 for (const auto& sym : sym_list)
446 {
447 if (sym.comment().empty())
448 {
449 if (comment_after_last)
450 package += "\n" + prefix(indent + 1) + ":" + sym.string();
451 else
452 package += " :" + sym.string();
453 }
454 else
455 package += "\n" + formatComment(sym.comment(), indent + 1) + prefix(indent + 1) + ":" + sym.string();
456 }
457
458 if (comment_after_last)
459 {
460 package += " " + formatComment(sym_list.back().commentAfter(), /* indent= */ 0);
461 package += "\n" + prefix(indent + 1);
462 }
463 }
464 }
465
466 return fmt::format("(import{})", package);
467}
468
469std::string Formatter::formatDel(const Node& node, const std::size_t indent)
470{
471 std::string formatted_sym = format(node.constList()[1], indent + 1, false);
472 if (formatted_sym.find('\n') != std::string::npos)
473 return fmt::format("(del\n{})", formatted_sym);
474 return fmt::format("(del {})", formatted_sym);
475}
476
477std::string Formatter::formatCall(const Node& node, const std::size_t indent)
478{
479 bool is_list = false;
480 if (!node.constList().empty() && node.constList().front().nodeType() == NodeType::Symbol &&
481 node.constList().front().string() == "list")
482 is_list = true;
483
484 bool is_multiline = false;
485
486 std::vector<std::string> formatted_args;
487 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
488 {
489 formatted_args.push_back(format(node.constList()[i], indent, false));
490 // if we have at least one argument taking multiple lines, split them all on their own line
491 if (formatted_args.back().find('\n') != std::string::npos || !node.constList()[i].commentAfter().empty())
492 is_multiline = true;
493 }
494
495 std::string output = is_list ? "[" : ("(" + format(node.constList()[0], indent, false));
496 for (std::size_t i = 0, end = formatted_args.size(); i < end; ++i)
497 {
498 const std::string formatted_node = formatted_args[i];
499 if (is_multiline)
500 output += "\n" + format(node.constList()[i + 1], indent + 1, true);
501 else
502 output += (is_list && i == 0 ? "" : " ") + formatted_node;
503 }
504 if (!node.constList().back().commentAfter().empty())
505 output += "\n" + prefix(indent);
506 output += is_list ? "]" : ")";
507 return output;
508}
509
510std::string Formatter::formatMacro(const Node& node, const std::size_t indent)
511{
512 if (isListStartingWithKeyword(node, Keyword::If))
513 return formatCondition(node, indent, /* is_macro= */ true);
514
515 std::string output = "($ ";
516 bool after_newline = false;
517
518 for (std::size_t i = 0, end = node.constList().size(); i < end; ++i)
519 {
520 output += format(node.constList()[i], indent + 1, after_newline);
521 after_newline = false;
522
523 if (!node.constList()[i].commentAfter().empty())
524 {
525 output += "\n";
526 after_newline = true;
527 }
528 else if (i != end - 1)
529 output += " ";
530 }
531
532 return output + ")";
533}
Common code for the compiler.
Constants used by ArkScript.
#define ARK_NO_NAME_FILE
Definition Constants.hpp:27
ArkScript homemade exceptions.
Lots of utilities about the filesystem.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:31
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:77
bool isListLike() const noexcept
Check if the node is a list like node.
Definition Node.cpp:82
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:37
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:72
Keyword keyword() const noexcept
Return the keyword held by the value (if the node type allows it)
Definition Node.cpp:47
const std::string & comment() const noexcept
Return the comment attached to this node, if any.
Definition Node.cpp:150
const std::string & commentAfter() const noexcept
Return the comment attached after this node, if any.
Definition Node.cpp:155
double number() const noexcept
Return the number held by the value (if the node type allows it)
Definition Node.cpp:42
std::size_t line() const noexcept
Get the line at which this node was created.
Definition Node.cpp:135
void process(const std::string &filename, const std::string &code)
Parse the given code.
Definition Parser.cpp:14
const Node & ast() const noexcept
Definition Parser.cpp:48
std::string formatMacro(const Ark::internal::Node &node, std::size_t indent)
bool shouldSplitOnNewline(const Ark::internal::Node &node)
Decide if a node should be split on a newline or not.
void run()
Read the file and process it. The file isn't modified.
Definition Formatter.cpp:22
std::string formatVariable(const Ark::internal::Node &node, std::size_t indent)
bool codeModified() const
Definition Formatter.cpp:60
std::string formatBlock(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
std::string formatDel(const Ark::internal::Node &node, std::size_t indent)
void processAst(const Ark::internal::Node &ast)
Definition Formatter.cpp:65
static std::string prefix(const std::size_t indent)
Compute indentation level.
std::string formatCall(const Ark::internal::Node &node, std::size_t indent)
static bool isBeginBlock(const Ark::internal::Node &node)
Check if a node is a begin block.
bool m_dry_run
If true, only prints the formatted file instead of saving it to disk.
Definition Formatter.hpp:41
static bool isFuncCall(const Ark::internal::Node &node)
Check if a node is a function call (foo bar egg)
static bool isFuncDef(const Ark::internal::Node &node)
Check if a node is a function definition (fun (args) body)
Ark::internal::Parser m_parser
Definition Formatter.hpp:42
bool shouldAddNewLineBetweenNodes(const Ark::internal::Node &node, std::size_t at)
Decide if we should add a newline after a node in a block.
std::string formatBegin(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
bool m_updated
True if the original code now difer from the formatted one.
Definition Formatter.hpp:44
Formatter(bool dry_run)
Definition Formatter.cpp:14
std::string formatFunction(const Ark::internal::Node &node, std::size_t indent)
std::string formatLoop(const Ark::internal::Node &node, std::size_t indent)
std::string m_output
Definition Formatter.hpp:43
const std::string & output() const
Definition Formatter.cpp:55
std::string formatImport(const Ark::internal::Node &node, std::size_t indent)
std::string formatComment(const std::string &comment, std::size_t indent) const
const std::string m_filename
Definition Formatter.hpp:40
void runWithString(const std::string &code)
Definition Formatter.cpp:39
void warnIfCommentsWereRemoved(const std::string &original_code, const std::string &filename)
Given the original code, produce a warning if comments from it were removed during formatting.
Definition Formatter.cpp:88
static std::size_t lineOfLastNodeIn(const Ark::internal::Node &node)
Compute the line on which the deepest right most node of node is at.
std::string format(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
Handles all node formatting.
static bool isListStartingWithKeyword(const Ark::internal::Node &node, Ark::internal::Keyword keyword)
Check if a given node starts with a given keyword.
std::string formatCondition(const Ark::internal::Node &node, std::size_t indent, bool is_macro=false)
ARK_API void generate(const CodeError &e, std::ostream &os=std::cout, bool colorize=true)
Generate a diagnostic from an error and print it to the standard output.
std::string readFile(const std::string &name)
Helper to read a file.
Definition Files.hpp:48
Keyword
The different keywords available.
Definition Common.hpp:60
constexpr std::array< std::string_view, 9 > keywords
List of available keywords in ArkScript.
Definition Common.hpp:73
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
static constexpr std::size_t LongLineLength
Max number of characters per line segment to consider splitting.
Definition Formatter.hpp:11