ArkScript
A small, lisp-inspired, functional scripting language
Formatter.cpp
Go to the documentation of this file.
1#include <Ark/Constants.hpp>
2#include <CLI/Formatter.hpp>
3
4#include <fmt/core.h>
5
6#include <Ark/Utils/Files.hpp>
11
12using namespace Ark;
13using namespace Ark::internal;
14using namespace Ark::literals;
15
16Formatter::Formatter(const bool dry_run) :
17 m_dry_run(dry_run), m_parser(/* debug= */ 0, ParserMode::Raw), m_updated(false), m_logger("formatter", 0)
18{}
19
20Formatter::Formatter(std::string filename, const bool dry_run) :
21 m_filename(std::move(filename)), m_dry_run(dry_run), m_parser(/* debug= */ 0, ParserMode::Raw), m_updated(false), m_logger("formatter", 0)
22{}
23
25{
26 try
27 {
28 const std::string code = Utils::readFile(m_filename);
32
33 m_updated = code != m_output;
34 }
35 catch (const CodeError& e)
36 {
38 }
39}
40
41void Formatter::runWithString(const std::string& code)
42{
43 try
44 {
48
49 m_updated = code != m_output;
50 }
51 catch (const CodeError& e)
52 {
54 }
55}
56
57const std::string& Formatter::output() const
58{
59 return m_output;
60}
61
63{
64 return m_updated;
65}
66
68{
69 // remove useless surrounding begin (generated by the parser)
70 if (isBeginBlock(ast))
71 {
72 for (std::size_t i = 1, end = ast.constList().size(); i < end; ++i)
73 {
74 const Node node = ast.constList()[i];
75 if (shouldAddNewLineBetweenNodes(ast, i) && !m_output.empty())
76 m_output += "\n";
77 m_output += format(node, 0, false) + "\n";
78 }
79 }
80 else
81 m_output = format(ast, 0, false);
82
83 if (!m_dry_run)
84 {
85 std::ofstream stream(m_filename);
86 stream << m_output;
87 }
88}
89
90void Formatter::warnIfCommentsWereRemoved(const std::string& original_code, const std::string& filename)
91{
92 const std::size_t before_count = std::ranges::count(original_code, '#');
93 const std::size_t after_count = std::ranges::count(m_output, '#');
94
95 if (before_count != after_count)
96 {
98 "one or more comments from the original source code seem to have been {} by mistake while formatting {}",
99 before_count > after_count ? "removed" : "duplicated",
100 filename != ARK_NO_NAME_FILE ? filename : "file");
101 m_logger.warn("Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark");
102 }
103}
104
105bool Formatter::isListStartingWithKeyword(const Node& node, const Keyword keyword)
106{
107 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword && node.constList()[0].keyword() == keyword;
108}
109
111{
112 return isListStartingWithKeyword(node, Keyword::Begin);
113}
114
115bool Formatter::isFuncDef(const Node& node)
116{
117 return isListStartingWithKeyword(node, Keyword::Fun);
118}
119
121{
122 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol;
123}
124
125std::size_t Formatter::lineOfLastNodeIn(const Node& node)
126{
127 if (node.isListLike() && !node.constList().empty())
128 {
129 const std::size_t child_line = lineOfLastNodeIn(node.constList().back());
130 if (child_line < node.position().start.line)
131 return node.position().start.line;
132 return child_line;
133 }
134 return node.position().start.line;
135}
136
138{
139 const std::string formatted = format(node, 0, false);
140 const std::size_t max_len =
141 std::ranges::max(
142 Utils::splitString(formatted, '\n'),
143 [](const std::string& lhs, const std::string& rhs) {
144 return lhs.size() < rhs.size();
145 })
146 .size();
147 const std::size_t newlines = std::ranges::count(formatted, '\n');
148
149 // split on multiple lines if we have a very long node,
150 // or if we added many line breaks while doing dumb formatting
151 return max_len >= FormatterConfig::LongLineLength || (newlines > 0 && node.isListLike() && newlines + 1 >= node.constList().size());
152}
153
155{
156 if (node.comment().empty() && (isBeginBlock(node) || isFuncCall(node)))
157 return false;
158 if (isLongLine(node) || (node.isListLike() && node.constList().size() > 1) || !node.comment().empty())
159 return true;
160 return false;
161}
162
163bool Formatter::shouldAddNewLineBetweenNodes(const Node& node, const std::size_t at)
164{
165 if (at <= 1)
166 return false;
167
168 const auto& list = node.constList();
169 const std::size_t previous_line = lineOfLastNodeIn(list[at - 1]);
170
171 const auto& child = list[at];
172
173 // If we have a node before the current one,
174 // and the line count between the two nodes is more than 1,
175 // maybe we should add a new line to preserve user spacing.
176 // However, if the current node has a comment, do not add a new line, this is causing the spacing.
177 if (child.position().start.line - previous_line > 1 && child.comment().empty())
178 return true;
179 // If we do have a comment but the spacing is more than 2,
180 // then add a newline to preserve user spacing.
181 if (child.position().start.line - previous_line > 2 && !child.comment().empty())
182 return true;
183 return false;
184}
185
186std::string Formatter::format(const Node& node, std::size_t indent, bool after_newline)
187{
188 std::string result;
189 if (!node.comment().empty())
190 {
191 result += formatComment(node.comment(), indent);
192 after_newline = true;
193 }
194 if (after_newline)
195 result += prefix(indent);
196
197 switch (node.nodeType())
198 {
199 case NodeType::Symbol:
200 result += node.string();
201 break;
202 case NodeType::MutArg:
203 result += fmt::format("(mut {})", node.string());
204 break;
205 case NodeType::RefArg:
206 result += fmt::format("(ref {})", node.string());
207 break;
208 case NodeType::Capture:
209 result += "&" + node.string();
210 break;
211 case NodeType::Keyword:
212 result += std::string(keywords[static_cast<std::size_t>(node.keyword())]);
213 break;
214 case NodeType::String:
215 result += fmt::format("\"{}\"", node.string());
216 break;
217 case NodeType::Number:
218 result += fmt::format("{}", node.number());
219 break;
220 case NodeType::List:
221 result += formatBlock(node, indent, after_newline);
222 break;
223 case NodeType::Spread:
224 result += fmt::format("...{}", node.string());
225 break;
226 case NodeType::Field:
227 {
228 std::string field = format(node.constList()[0], indent, false);
229 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
230 field += "." + format(node.constList()[i], indent, false);
231 result += field;
232 break;
233 }
234 case NodeType::Macro:
235 result += formatMacro(node, indent);
236 break;
237 // not handling Namespace nor Unused node types as those can not be generated by the parser
238 case NodeType::Namespace:
239 [[fallthrough]];
240 case NodeType::Unused:
241 break;
242 }
243
244 if (!node.commentAfter().empty())
245 result += " " + formatComment(node.commentAfter(), /* indent= */ 0);
246
247 return result;
248}
249
250std::string Formatter::formatComment(const std::string& comment, const std::size_t indent) const
251{
252 std::string result = prefix(indent);
253 for (std::size_t i = 0, end = comment.size(); i < end; ++i)
254 {
255 result += comment[i];
256 if (comment[i] == '\n' && i != end - 1)
257 result += prefix(indent);
258 }
259
260 return result;
261}
262
263std::string Formatter::formatBlock(const Node& node, const std::size_t indent, const bool after_newline)
264{
265 if (node.constList().empty())
266 return "()";
267
268 const Node first = node.constList().front();
269 if (first.nodeType() == NodeType::Keyword)
270 {
271 switch (first.keyword())
272 {
273 case Keyword::Fun:
274 return formatFunction(node, indent);
275 case Keyword::Let:
276 [[fallthrough]];
277 case Keyword::Mut:
278 [[fallthrough]];
279 case Keyword::Set:
280 return formatVariable(node, indent);
281 case Keyword::If:
282 return formatCondition(node, indent);
283 case Keyword::While:
284 return formatLoop(node, indent);
285 case Keyword::Begin:
286 return formatBegin(node, indent, after_newline);
287 case Keyword::Import:
288 return formatImport(node, indent);
289 case Keyword::Del:
290 return formatDel(node, indent);
291 }
292 // HACK: should never reach, but the compiler insists that the function doesn't return in every code path
293 return "";
294 }
295 return formatCall(node, indent);
296}
297
298std::string Formatter::formatFunction(const Node& node, const std::size_t indent)
299{
300 const Node args_node = node.constList()[1];
301 const Node body_node = node.constList()[2];
302
303 std::string formatted_args;
304
305 if (!args_node.comment().empty())
306 {
307 formatted_args += "\n";
308 formatted_args += formatComment(args_node.comment(), indent + 1);
309 formatted_args += prefix(indent + 1);
310 }
311 else
312 formatted_args += " ";
313
314 if (args_node.isListLike())
315 {
316 bool comment_in_args = false;
317 std::string args;
318 const bool split = (isLongLine(args_node) || !args_node.comment().empty());
319
320 for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i)
321 {
322 const Node arg_i = args_node.constList()[i];
323 if (!arg_i.comment().empty())
324 comment_in_args = true;
325
326 args += format(arg_i, indent + ((comment_in_args || split) ? 1 : 0), i > 0 && (comment_in_args || split));
327 if (i != end - 1)
328 args += (comment_in_args || split) ? '\n' : ' ';
329 }
330
331 formatted_args += fmt::format("({}{})", (comment_in_args ? "\n" : ""), args);
332 }
333 else
334 formatted_args += format(args_node, indent, false);
335
336 if (!shouldSplitOnNewline(body_node) && args_node.comment().empty())
337 return fmt::format("(fun{} {})", formatted_args, format(body_node, indent + 1, false));
338 return fmt::format("(fun{}\n{})", formatted_args, format(body_node, indent + 1, true));
339}
340
341std::string Formatter::formatVariable(const Node& node, const std::size_t indent)
342{
343 const auto keyword = std::string(keywords[static_cast<std::size_t>(node.constList()[0].keyword())]);
344
345 const Node body_node = node.constList()[2];
346 const std::string formatted_bind = format(node.constList()[1], indent, false);
347
348 // we don't want to add another indentation level here, because it would result in a (let a (fun ()\n{indent+=4}...))
349 if (isFuncDef(body_node) || !shouldSplitOnNewline(body_node))
350 return fmt::format("({} {} {})", keyword, formatted_bind, format(body_node, indent, false));
351 return fmt::format("({} {}\n{})", keyword, formatted_bind, format(body_node, indent + 1, true));
352}
353
354std::string Formatter::formatCondition(const Node& node, const std::size_t indent, const bool is_macro)
355{
356 const Node cond_node = node.constList()[1];
357 const Node then_node = node.constList()[2];
358
359 bool cond_on_newline = false;
360 const std::string formatted_cond = format(cond_node, indent + 1, false);
361 if (formatted_cond.find('\n') != std::string::npos)
362 cond_on_newline = true;
363
364 std::string if_cond_formatted = fmt::format(
365 "({}if{}{}",
366 is_macro ? "$" : "",
367 cond_on_newline ? "\n" : " ",
368 cond_on_newline ? format(cond_node, indent + 1, true) : formatted_cond);
369
370 const bool split_then_newline = shouldSplitOnNewline(then_node) || isBeginBlock(then_node);
371
372 // (if cond then)
373 if (node.constList().size() == 3)
374 {
375 if (cond_on_newline || split_then_newline)
376 return fmt::format("{}\n{})", if_cond_formatted, format(then_node, indent + 1, true));
377 return fmt::format("{} {})", if_cond_formatted, format(then_node, indent + 1, false));
378 }
379 // (if cond then else)
380 return fmt::format(
381 "{}\n{}\n{}{})",
382 if_cond_formatted,
383 format(then_node, indent + 1, true),
384 format(node.constList()[3], indent + 1, true),
385 node.constList()[3].commentAfter().empty() ? "" : ("\n" + prefix(indent)));
386}
387
388std::string Formatter::formatLoop(const Node& node, const std::size_t indent)
389{
390 const Node cond_node = node.constList()[1];
391 const Node body_node = node.constList()[2];
392
393 bool cond_on_newline = false;
394 std::string formatted_cond = format(cond_node, indent + 1, false);
395 if (formatted_cond.find('\n') != std::string::npos)
396 cond_on_newline = true;
397
398 if (cond_on_newline || shouldSplitOnNewline(body_node))
399 return fmt::format(
400 "(while{}{}\n{})",
401 cond_on_newline ? "\n" : " ",
402 cond_on_newline ? format(cond_node, indent + 1, true) : formatted_cond,
403 format(body_node, indent + 1, true));
404 return fmt::format(
405 "(while {} {})",
406 formatted_cond,
407 format(body_node, indent + 1, false));
408}
409
410std::string Formatter::formatBegin(const Node& node, const std::size_t indent, const bool after_newline)
411{
412 // only the keyword begin is present
413 if (node.constList().size() == 1)
414 return "{}";
415
416 // after a new line, we need to increment our indentation level
417 // if the block is a top level one, we also need to increment indentation level
418 const std::size_t inner_indentation = indent + (after_newline ? 1 : 0) + (indent == 0 ? 1 : 0);
419
420 std::string result = "{\n";
421 // skip begin keyword
422 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
423 {
424 const Node child = node.constList()[i];
425 // we want to preserve the node grouping by the user, but remove useless duplicate new line
426 // but that shouldn't apply to the first node of the block
427 if (shouldAddNewLineBetweenNodes(node, i) && i > 1)
428 result += "\n";
429
430 result += format(child, inner_indentation, true);
431 if (i != end - 1)
432 result += "\n";
433 }
434
435 // if the last node has a comment, add a new line
436 if (!node.constList().empty() && !node.constList().back().commentAfter().empty())
437 result += "\n" + prefix(indent) + "}";
438 else
439 result += " }";
440 return result;
441}
442
443std::string Formatter::formatImport(const Node& node, const std::size_t indent)
444{
445 const Node package_node = node.constList()[1];
446 std::string package;
447
448 if (!package_node.comment().empty())
449 package += "\n" + formatComment(package_node.comment(), indent + 1) + prefix(indent + 1);
450 else
451 package += " ";
452
453 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
454 {
455 package += format(package_node.constList()[i], indent + 1, false);
456 if (i != end - 1)
457 package += ".";
458 }
459
460 const Node symbols = node.constList()[2];
461 if (symbols.nodeType() == NodeType::Symbol && symbols.string() == "*")
462 package += ":*";
463 else // symbols is a list
464 {
465 if (const auto& sym_list = symbols.constList(); !sym_list.empty())
466 {
467 const bool comment_after_last = !sym_list.back().commentAfter().empty();
468
469 for (const auto& sym : sym_list)
470 {
471 if (sym.comment().empty())
472 {
473 if (comment_after_last)
474 package += "\n" + prefix(indent + 1) + ":" + sym.string();
475 else
476 package += " :" + sym.string();
477 }
478 else
479 package += "\n" + formatComment(sym.comment(), indent + 1) + prefix(indent + 1) + ":" + sym.string();
480 }
481
482 if (comment_after_last)
483 {
484 package += " " + formatComment(sym_list.back().commentAfter(), /* indent= */ 0);
485 package += "\n" + prefix(indent + 1);
486 }
487 }
488 }
489
490 return fmt::format("(import{})", package);
491}
492
493std::string Formatter::formatDel(const Node& node, const std::size_t indent)
494{
495 std::string formatted_sym = format(node.constList()[1], indent + 1, false);
496 if (formatted_sym.find('\n') != std::string::npos)
497 return fmt::format("(del\n{})", formatted_sym);
498 return fmt::format("(del {})", formatted_sym);
499}
500
501std::string Formatter::formatCall(const Node& node, const std::size_t indent)
502{
503 bool is_list = false;
504 bool is_dict = false;
505 bool is_multiline = false;
506
507 if (!node.constList().empty() && node.constList().front().nodeType() == NodeType::Symbol)
508 {
509 if (node.constList().front().string() == "list")
510 is_list = true;
511 else if (node.constList().front().string() == "dict")
512 is_dict = true;
513 }
514
515 std::vector<std::string> formatted_args;
516 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
517 {
518 formatted_args.push_back(format(node.constList()[i], indent, false));
519 // if we have at least one argument taking multiple lines, split them all on their own line
520 if (formatted_args.back().find('\n') != std::string::npos || !node.constList()[i].commentAfter().empty())
521 is_multiline = true;
522 }
523
524 std::string result = is_list ? "[" : ("(" + format(node.constList()[0], indent, false));
525
526 // Split args on multiple lines even if, individually, they fit in the configured line length, if grouped together
527 // on a single line they are too long
528 const std::size_t args_line_length = std::accumulate(
529 formatted_args.begin(),
530 formatted_args.end(),
531 result.size() + 1, // +1 to count the closing paren/bracket
532 [](const std::size_t acc, const std::string& val) {
533 return acc + val.size() + 1_z;
534 });
535 if (args_line_length >= FormatterConfig::LongLineLength)
536 is_multiline = true;
537
538 for (std::size_t i = 0, end = formatted_args.size(); i < end; ++i)
539 {
540 const std::string& formatted_node = formatted_args[i];
541 if (is_dict)
542 {
543 if (i % 2 == 0 && formatted_args.size() > 2) // one pair per line if we have at least 2 key-value pairs
544 result += "\n" + format(node.constList()[i + 1], indent + 1, true);
545 else
546 result += " " + formatted_node;
547 }
548 else if (is_multiline)
549 result += "\n" + format(node.constList()[i + 1], indent + 1, true);
550 else if (is_list && i == 0)
551 result += formatted_node;
552 else // put all arguments on the same line
553 result += " " + formatted_node;
554 }
555 if (!node.constList().back().commentAfter().empty())
556 result += "\n" + prefix(indent);
557
558 result += is_list ? "]" : ")";
559 return result;
560}
561
562std::string Formatter::formatMacro(const Node& node, const std::size_t indent)
563{
564 if (isListStartingWithKeyword(node, Keyword::If))
565 return formatCondition(node, indent, /* is_macro= */ true);
566
567 std::string result = "(macro ";
568 bool after_newline = false;
569
570 for (std::size_t i = 0, end = node.constList().size(); i < end; ++i)
571 {
572 result += format(node.constList()[i], indent + 1, after_newline);
573 after_newline = false;
574
575 if (!node.constList()[i].commentAfter().empty())
576 {
577 result += "\n";
578 after_newline = true;
579 }
580 else if (i != end - 1)
581 result += " ";
582 }
583
584 return result + ")";
585}
Common code for the compiler.
Constants used by ArkScript.
#define ARK_NO_NAME_FILE
Definition Constants.hpp:28
Tools to report code errors nicely to the user.
ArkScript homemade exceptions.
Lots of utilities about the filesystem.
User defined literals for Ark internals.
void warn(const char *fmt, Args &&... args)
Write a warn level log using fmtlib.
Definition Logger.hpp:80
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
bool isListLike() const noexcept
Check if the node is a list like node.
Definition Node.cpp:83
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
Keyword keyword() const noexcept
Return the keyword held by the value (if the node type allows it)
Definition Node.cpp:48
const std::string & comment() const noexcept
Return the comment attached to this node, if any.
Definition Node.cpp:169
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
const std::string & commentAfter() const noexcept
Return the comment attached after this node, if any.
Definition Node.cpp:174
double number() const noexcept
Return the number held by the value (if the node type allows it)
Definition Node.cpp:43
void process(const std::string &filename, const std::string &code)
Parse the given code.
Definition Parser.cpp:51
const Node & ast() const noexcept
Definition Parser.cpp:92
std::string formatMacro(const Ark::internal::Node &node, std::size_t indent)
bool shouldSplitOnNewline(const Ark::internal::Node &node)
Decide if a node should be split on a newline or not.
Ark::internal::Logger m_logger
Definition Formatter.hpp:45
void run()
Read the file and process it. The file isn't modified.
Definition Formatter.cpp:24
std::string formatVariable(const Ark::internal::Node &node, std::size_t indent)
bool codeModified() const
Definition Formatter.cpp:62
std::string formatBlock(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
std::string formatDel(const Ark::internal::Node &node, std::size_t indent)
void processAst(const Ark::internal::Node &ast)
Definition Formatter.cpp:67
static std::string prefix(const std::size_t indent)
Compute indentation level.
std::string formatCall(const Ark::internal::Node &node, std::size_t indent)
static bool isBeginBlock(const Ark::internal::Node &node)
Check if a node is a begin block.
bool m_dry_run
If true, only prints the formatted file instead of saving it to disk.
Definition Formatter.hpp:41
static bool isFuncCall(const Ark::internal::Node &node)
Check if a node is a function call (foo bar egg)
static bool isFuncDef(const Ark::internal::Node &node)
Check if a node is a function definition (fun (args) body)
Ark::internal::Parser m_parser
Definition Formatter.hpp:42
bool shouldAddNewLineBetweenNodes(const Ark::internal::Node &node, std::size_t at)
Decide if we should add a newline after a node in a block.
std::string formatBegin(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
bool m_updated
True if the original code now difer from the formatted one.
Definition Formatter.hpp:44
Formatter(bool dry_run)
Definition Formatter.cpp:16
std::string formatFunction(const Ark::internal::Node &node, std::size_t indent)
std::string formatLoop(const Ark::internal::Node &node, std::size_t indent)
std::string m_output
Definition Formatter.hpp:43
const std::string & output() const
Definition Formatter.cpp:57
std::string formatImport(const Ark::internal::Node &node, std::size_t indent)
std::string formatComment(const std::string &comment, std::size_t indent) const
const std::string m_filename
Definition Formatter.hpp:40
void runWithString(const std::string &code)
Definition Formatter.cpp:41
void warnIfCommentsWereRemoved(const std::string &original_code, const std::string &filename)
Given the original code, produce a warning if comments from it were removed during formatting.
Definition Formatter.cpp:90
static std::size_t lineOfLastNodeIn(const Ark::internal::Node &node)
Compute the line on which the deepest right most node of node is at.
std::string format(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
Handles all node formatting.
bool isLongLine(const Ark::internal::Node &node)
static bool isListStartingWithKeyword(const Ark::internal::Node &node, Ark::internal::Keyword keyword)
Check if a given node starts with a given keyword.
std::string formatCondition(const Ark::internal::Node &node, std::size_t indent, bool is_macro=false)
ARK_API void generate(const CodeError &e, std::ostream &os=std::cerr, bool colorize=true)
Generate a diagnostic from an error and print it to the standard error output.
std::string readFile(const std::string &name)
Helper to read a file.
Definition Files.hpp:47
std::vector< std::string > splitString(const std::string &source, const char sep)
Cut a string into pieces, given a character separator.
Definition Utils.hpp:31
@ Raw
Keep all text as is without modifying it (useful for the code formatter)
Keyword
The different keywords available.
Definition Common.hpp:79
constexpr std::array< std::string_view, 9 > keywords
List of available keywords in ArkScript.
Definition Common.hpp:92
STL namespace.
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
std::size_t line
0-indexed line number
Definition Position.hpp:22
static constexpr std::size_t LongLineLength
Max number of characters per line segment to consider splitting.
Definition Formatter.hpp:11