ArkScript
A small, lisp-inspired, functional scripting language
Formatter.cpp
Go to the documentation of this file.
1#include <Ark/Constants.hpp>
2#include <CLI/Formatter.hpp>
3
4#include <fmt/core.h>
5#include <fmt/color.h>
6
7#include <Ark/Utils/Files.hpp>
12
13using namespace Ark;
14using namespace Ark::internal;
15using namespace Ark::literals;
16
17Formatter::Formatter(const bool dry_run) :
18 m_dry_run(dry_run), m_parser(/* debug= */ 0, ParserMode::Raw), m_updated(false)
19{}
20
21Formatter::Formatter(std::string filename, const bool dry_run) :
22 m_filename(std::move(filename)), m_dry_run(dry_run), m_parser(/* debug= */ 0, ParserMode::Raw), m_updated(false)
23{}
24
26{
27 try
28 {
29 const std::string code = Utils::readFile(m_filename);
33
34 m_updated = code != m_output;
35 }
36 catch (const CodeError& e)
37 {
39 }
40}
41
42void Formatter::runWithString(const std::string& code)
43{
44 try
45 {
49
50 m_updated = code != m_output;
51 }
52 catch (const CodeError& e)
53 {
55 }
56}
57
58const std::string& Formatter::output() const
59{
60 return m_output;
61}
62
64{
65 return m_updated;
66}
67
69{
70 // remove useless surrounding begin (generated by the parser)
71 if (isBeginBlock(ast))
72 {
73 for (std::size_t i = 1, end = ast.constList().size(); i < end; ++i)
74 {
75 const Node node = ast.constList()[i];
76 if (shouldAddNewLineBetweenNodes(ast, i) && !m_output.empty())
77 m_output += "\n";
78 m_output += format(node, 0, false) + "\n";
79 }
80 }
81 else
82 m_output = format(ast, 0, false);
83
84 if (!m_dry_run)
85 {
86 std::ofstream stream(m_filename);
87 stream << m_output;
88 }
89}
90
91void Formatter::warnIfCommentsWereRemoved(const std::string& original_code, const std::string& filename)
92{
93 const std::size_t before_count = std::ranges::count(original_code, '#');
94 const std::size_t after_count = std::ranges::count(m_output, '#');
95
96 if (before_count != after_count)
97 {
98 fmt::println(
99 "{}: one or more comments from the original source code seem to have been {} by mistake while formatting {}",
100 fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)),
101 before_count > after_count ? "removed" : "duplicated",
102 filename != ARK_NO_NAME_FILE ? filename : "file");
103 fmt::println("Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark");
104 }
105}
106
107bool Formatter::isListStartingWithKeyword(const Node& node, const Keyword keyword)
108{
109 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword && node.constList()[0].keyword() == keyword;
110}
111
113{
114 return isListStartingWithKeyword(node, Keyword::Begin);
115}
116
117bool Formatter::isFuncDef(const Node& node)
118{
119 return isListStartingWithKeyword(node, Keyword::Fun);
120}
121
123{
124 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol;
125}
126
127std::size_t Formatter::lineOfLastNodeIn(const Node& node)
128{
129 if (node.isListLike() && !node.constList().empty())
130 {
131 const std::size_t child_line = lineOfLastNodeIn(node.constList().back());
132 if (child_line < node.position().start.line)
133 return node.position().start.line;
134 return child_line;
135 }
136 return node.position().start.line;
137}
138
140{
141 const std::string formatted = format(node, 0, false);
142 const std::size_t max_len =
143 std::ranges::max(
144 Utils::splitString(formatted, '\n'),
145 [](const std::string& lhs, const std::string& rhs) {
146 return lhs.size() < rhs.size();
147 })
148 .size();
149 const std::size_t newlines = std::ranges::count(formatted, '\n');
150
151 // split on multiple lines if we have a very long node,
152 // or if we added many line breaks while doing dumb formatting
153 return max_len >= FormatterConfig::LongLineLength || (newlines > 0 && node.isListLike() && newlines + 1 >= node.constList().size());
154}
155
157{
158 if (node.comment().empty() && (isBeginBlock(node) || isFuncCall(node)))
159 return false;
160 if (isLongLine(node) || (node.isListLike() && node.constList().size() > 1) || !node.comment().empty())
161 return true;
162 return false;
163}
164
165bool Formatter::shouldAddNewLineBetweenNodes(const Node& node, const std::size_t at)
166{
167 if (at <= 1)
168 return false;
169
170 const auto& list = node.constList();
171 const std::size_t previous_line = lineOfLastNodeIn(list[at - 1]);
172
173 const auto& child = list[at];
174
175 // If we have a node before the current one,
176 // and the line count between the two nodes is more than 1,
177 // maybe we should add a new line to preserve user spacing.
178 // However, if the current node has a comment, do not add a new line, this is causing the spacing.
179 if (child.position().start.line - previous_line > 1 && child.comment().empty())
180 return true;
181 // If we do have a comment but the spacing is more than 2,
182 // then add a newline to preserve user spacing.
183 if (child.position().start.line - previous_line > 2 && !child.comment().empty())
184 return true;
185 return false;
186}
187
188std::string Formatter::format(const Node& node, std::size_t indent, bool after_newline)
189{
190 std::string result;
191 if (!node.comment().empty())
192 {
193 result += formatComment(node.comment(), indent);
194 after_newline = true;
195 }
196 if (after_newline)
197 result += prefix(indent);
198
199 switch (node.nodeType())
200 {
201 case NodeType::Symbol:
202 result += node.string();
203 break;
204 case NodeType::MutArg:
205 result += fmt::format("(mut {})", node.string());
206 break;
207 case NodeType::RefArg:
208 result += fmt::format("(ref {})", node.string());
209 break;
210 case NodeType::Capture:
211 result += "&" + node.string();
212 break;
213 case NodeType::Keyword:
214 result += std::string(keywords[static_cast<std::size_t>(node.keyword())]);
215 break;
216 case NodeType::String:
217 result += fmt::format("\"{}\"", node.string());
218 break;
219 case NodeType::Number:
220 result += fmt::format("{}", node.number());
221 break;
222 case NodeType::List:
223 result += formatBlock(node, indent, after_newline);
224 break;
225 case NodeType::Spread:
226 result += fmt::format("...{}", node.string());
227 break;
228 case NodeType::Field:
229 {
230 std::string field = format(node.constList()[0], indent, false);
231 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
232 field += "." + format(node.constList()[i], indent, false);
233 result += field;
234 break;
235 }
236 case NodeType::Macro:
237 result += formatMacro(node, indent);
238 break;
239 // not handling Namespace nor Unused node types as those can not be generated by the parser
240 case NodeType::Namespace:
241 [[fallthrough]];
242 case NodeType::Unused:
243 break;
244 }
245
246 if (!node.commentAfter().empty())
247 result += " " + formatComment(node.commentAfter(), /* indent= */ 0);
248
249 return result;
250}
251
252std::string Formatter::formatComment(const std::string& comment, const std::size_t indent) const
253{
254 std::string result = prefix(indent);
255 for (std::size_t i = 0, end = comment.size(); i < end; ++i)
256 {
257 result += comment[i];
258 if (comment[i] == '\n' && i != end - 1)
259 result += prefix(indent);
260 }
261
262 return result;
263}
264
265std::string Formatter::formatBlock(const Node& node, const std::size_t indent, const bool after_newline)
266{
267 if (node.constList().empty())
268 return "()";
269
270 const Node first = node.constList().front();
271 if (first.nodeType() == NodeType::Keyword)
272 {
273 switch (first.keyword())
274 {
275 case Keyword::Fun:
276 return formatFunction(node, indent);
277 case Keyword::Let:
278 [[fallthrough]];
279 case Keyword::Mut:
280 [[fallthrough]];
281 case Keyword::Set:
282 return formatVariable(node, indent);
283 case Keyword::If:
284 return formatCondition(node, indent);
285 case Keyword::While:
286 return formatLoop(node, indent);
287 case Keyword::Begin:
288 return formatBegin(node, indent, after_newline);
289 case Keyword::Import:
290 return formatImport(node, indent);
291 case Keyword::Del:
292 return formatDel(node, indent);
293 }
294 // HACK: should never reach, but the compiler insists that the function doesn't return in every code path
295 return "";
296 }
297 return formatCall(node, indent);
298}
299
300std::string Formatter::formatFunction(const Node& node, const std::size_t indent)
301{
302 const Node args_node = node.constList()[1];
303 const Node body_node = node.constList()[2];
304
305 std::string formatted_args;
306
307 if (!args_node.comment().empty())
308 {
309 formatted_args += "\n";
310 formatted_args += formatComment(args_node.comment(), indent + 1);
311 formatted_args += prefix(indent + 1);
312 }
313 else
314 formatted_args += " ";
315
316 if (args_node.isListLike())
317 {
318 bool comment_in_args = false;
319 std::string args;
320 const bool split = (isLongLine(args_node) || !args_node.comment().empty());
321
322 for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i)
323 {
324 const Node arg_i = args_node.constList()[i];
325 if (!arg_i.comment().empty())
326 comment_in_args = true;
327
328 args += format(arg_i, indent + ((comment_in_args || split) ? 1 : 0), i > 0 && (comment_in_args || split));
329 if (i != end - 1)
330 args += (comment_in_args || split) ? '\n' : ' ';
331 }
332
333 formatted_args += fmt::format("({}{})", (comment_in_args ? "\n" : ""), args);
334 }
335 else
336 formatted_args += format(args_node, indent, false);
337
338 if (!shouldSplitOnNewline(body_node) && args_node.comment().empty())
339 return fmt::format("(fun{} {})", formatted_args, format(body_node, indent + 1, false));
340 return fmt::format("(fun{}\n{})", formatted_args, format(body_node, indent + 1, true));
341}
342
343std::string Formatter::formatVariable(const Node& node, const std::size_t indent)
344{
345 const auto keyword = std::string(keywords[static_cast<std::size_t>(node.constList()[0].keyword())]);
346
347 const Node body_node = node.constList()[2];
348 const std::string formatted_bind = format(node.constList()[1], indent, false);
349
350 // we don't want to add another indentation level here, because it would result in a (let a (fun ()\n{indent+=4}...))
351 if (isFuncDef(body_node) || !shouldSplitOnNewline(body_node))
352 return fmt::format("({} {} {})", keyword, formatted_bind, format(body_node, indent, false));
353 return fmt::format("({} {}\n{})", keyword, formatted_bind, format(body_node, indent + 1, true));
354}
355
356std::string Formatter::formatCondition(const Node& node, const std::size_t indent, const bool is_macro)
357{
358 const Node cond_node = node.constList()[1];
359 const Node then_node = node.constList()[2];
360
361 bool cond_on_newline = false;
362 std::string formatted_cond = format(cond_node, indent + 1, false);
363 if (formatted_cond.find('\n') != std::string::npos)
364 cond_on_newline = true;
365
366 std::string if_cond_formatted = fmt::format(
367 "({}if{}{}",
368 is_macro ? "$" : "",
369 cond_on_newline ? "\n" : " ",
370 formatted_cond);
371
372 const bool split_then_newline = shouldSplitOnNewline(then_node) || isBeginBlock(then_node);
373
374 // (if cond then)
375 if (node.constList().size() == 3)
376 {
377 if (cond_on_newline || split_then_newline)
378 return fmt::format("{}\n{})", if_cond_formatted, format(then_node, indent + 1, true));
379 return fmt::format("{} {})", if_cond_formatted, format(then_node, indent + 1, false));
380 }
381 // (if cond then else)
382 return fmt::format(
383 "{}\n{}\n{}{})",
384 if_cond_formatted,
385 format(then_node, indent + 1, true),
386 format(node.constList()[3], indent + 1, true),
387 node.constList()[3].commentAfter().empty() ? "" : ("\n" + prefix(indent)));
388}
389
390std::string Formatter::formatLoop(const Node& node, const std::size_t indent)
391{
392 const Node cond_node = node.constList()[1];
393 const Node body_node = node.constList()[2];
394
395 bool cond_on_newline = false;
396 std::string formatted_cond = format(cond_node, indent + 1, false);
397 if (formatted_cond.find('\n') != std::string::npos)
398 cond_on_newline = true;
399
400 if (cond_on_newline || shouldSplitOnNewline(body_node))
401 return fmt::format(
402 "(while{}{}\n{})",
403 cond_on_newline ? "\n" : " ",
404 formatted_cond,
405 format(body_node, indent + 1, true));
406 return fmt::format(
407 "(while {} {})",
408 formatted_cond,
409 format(body_node, indent + 1, false));
410}
411
412std::string Formatter::formatBegin(const Node& node, const std::size_t indent, const bool after_newline)
413{
414 // only the keyword begin is present
415 if (node.constList().size() == 1)
416 return "{}";
417
418 // after a new line, we need to increment our indentation level
419 // if the block is a top level one, we also need to increment indentation level
420 const std::size_t inner_indentation = indent + (after_newline ? 1 : 0) + (indent == 0 ? 1 : 0);
421
422 std::string result = "{\n";
423 // skip begin keyword
424 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
425 {
426 const Node child = node.constList()[i];
427 // we want to preserve the node grouping by the user, but remove useless duplicate new line
428 // but that shouldn't apply to the first node of the block
429 if (shouldAddNewLineBetweenNodes(node, i) && i > 1)
430 result += "\n";
431
432 result += format(child, inner_indentation, true);
433 if (i != end - 1)
434 result += "\n";
435 }
436
437 // if the last node has a comment, add a new line
438 if (!node.constList().empty() && !node.constList().back().commentAfter().empty())
439 result += "\n" + prefix(indent) + "}";
440 else
441 result += " }";
442 return result;
443}
444
445std::string Formatter::formatImport(const Node& node, const std::size_t indent)
446{
447 const Node package_node = node.constList()[1];
448 std::string package;
449
450 if (!package_node.comment().empty())
451 package += "\n" + formatComment(package_node.comment(), indent + 1) + prefix(indent + 1);
452 else
453 package += " ";
454
455 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
456 {
457 package += format(package_node.constList()[i], indent + 1, false);
458 if (i != end - 1)
459 package += ".";
460 }
461
462 const Node symbols = node.constList()[2];
463 if (symbols.nodeType() == NodeType::Symbol && symbols.string() == "*")
464 package += ":*";
465 else // symbols is a list
466 {
467 if (const auto& sym_list = symbols.constList(); !sym_list.empty())
468 {
469 const bool comment_after_last = !sym_list.back().commentAfter().empty();
470
471 for (const auto& sym : sym_list)
472 {
473 if (sym.comment().empty())
474 {
475 if (comment_after_last)
476 package += "\n" + prefix(indent + 1) + ":" + sym.string();
477 else
478 package += " :" + sym.string();
479 }
480 else
481 package += "\n" + formatComment(sym.comment(), indent + 1) + prefix(indent + 1) + ":" + sym.string();
482 }
483
484 if (comment_after_last)
485 {
486 package += " " + formatComment(sym_list.back().commentAfter(), /* indent= */ 0);
487 package += "\n" + prefix(indent + 1);
488 }
489 }
490 }
491
492 return fmt::format("(import{})", package);
493}
494
495std::string Formatter::formatDel(const Node& node, const std::size_t indent)
496{
497 std::string formatted_sym = format(node.constList()[1], indent + 1, false);
498 if (formatted_sym.find('\n') != std::string::npos)
499 return fmt::format("(del\n{})", formatted_sym);
500 return fmt::format("(del {})", formatted_sym);
501}
502
503std::string Formatter::formatCall(const Node& node, const std::size_t indent)
504{
505 bool is_list = false;
506 bool is_dict = false;
507 bool is_multiline = false;
508
509 if (!node.constList().empty() && node.constList().front().nodeType() == NodeType::Symbol)
510 {
511 if (node.constList().front().string() == "list")
512 is_list = true;
513 else if (node.constList().front().string() == "dict")
514 is_dict = true;
515 }
516
517 std::vector<std::string> formatted_args;
518 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
519 {
520 formatted_args.push_back(format(node.constList()[i], indent, false));
521 // if we have at least one argument taking multiple lines, split them all on their own line
522 if (formatted_args.back().find('\n') != std::string::npos || !node.constList()[i].commentAfter().empty())
523 is_multiline = true;
524 }
525
526 std::string result = is_list ? "[" : ("(" + format(node.constList()[0], indent, false));
527
528 // Split args on multiple lines even if, individually, they fit in the configured line length, if grouped together
529 // on a single line they are too long
530 const std::size_t args_line_length = std::accumulate(
531 formatted_args.begin(),
532 formatted_args.end(),
533 result.size() + 1, // +1 to count the closing paren/bracket
534 [](const std::size_t acc, const std::string& val) {
535 return acc + val.size() + 1_z;
536 });
537 if (args_line_length >= FormatterConfig::LongLineLength)
538 is_multiline = true;
539
540 for (std::size_t i = 0, end = formatted_args.size(); i < end; ++i)
541 {
542 const std::string& formatted_node = formatted_args[i];
543 if (is_dict)
544 {
545 if (i % 2 == 0 && formatted_args.size() > 2) // one pair per line if we have at least 2 key-value pairs
546 result += "\n" + format(node.constList()[i + 1], indent + 1, true);
547 else
548 result += " " + formatted_node;
549 }
550 else if (is_multiline)
551 result += "\n" + format(node.constList()[i + 1], indent + 1, true);
552 else if (is_list && i == 0)
553 result += formatted_node;
554 else // put all arguments on the same line
555 result += " " + formatted_node;
556 }
557 if (!node.constList().back().commentAfter().empty())
558 result += "\n" + prefix(indent);
559
560 result += is_list ? "]" : ")";
561 return result;
562}
563
564std::string Formatter::formatMacro(const Node& node, const std::size_t indent)
565{
566 if (isListStartingWithKeyword(node, Keyword::If))
567 return formatCondition(node, indent, /* is_macro= */ true);
568
569 std::string result = "(macro ";
570 bool after_newline = false;
571
572 for (std::size_t i = 0, end = node.constList().size(); i < end; ++i)
573 {
574 result += format(node.constList()[i], indent + 1, after_newline);
575 after_newline = false;
576
577 if (!node.constList()[i].commentAfter().empty())
578 {
579 result += "\n";
580 after_newline = true;
581 }
582 else if (i != end - 1)
583 result += " ";
584 }
585
586 return result + ")";
587}
Common code for the compiler.
Constants used by ArkScript.
#define ARK_NO_NAME_FILE
Definition Constants.hpp:28
Tools to report code errors nicely to the user.
ArkScript homemade exceptions.
Lots of utilities about the filesystem.
User defined literals for Ark internals.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
bool isListLike() const noexcept
Check if the node is a list like node.
Definition Node.cpp:83
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
Keyword keyword() const noexcept
Return the keyword held by the value (if the node type allows it)
Definition Node.cpp:48
const std::string & comment() const noexcept
Return the comment attached to this node, if any.
Definition Node.cpp:169
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
const std::string & commentAfter() const noexcept
Return the comment attached after this node, if any.
Definition Node.cpp:174
double number() const noexcept
Return the number held by the value (if the node type allows it)
Definition Node.cpp:43
void process(const std::string &filename, const std::string &code)
Parse the given code.
Definition Parser.cpp:51
const Node & ast() const noexcept
Definition Parser.cpp:92
std::string formatMacro(const Ark::internal::Node &node, std::size_t indent)
bool shouldSplitOnNewline(const Ark::internal::Node &node)
Decide if a node should be split on a newline or not.
void run()
Read the file and process it. The file isn't modified.
Definition Formatter.cpp:25
std::string formatVariable(const Ark::internal::Node &node, std::size_t indent)
bool codeModified() const
Definition Formatter.cpp:63
std::string formatBlock(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
std::string formatDel(const Ark::internal::Node &node, std::size_t indent)
void processAst(const Ark::internal::Node &ast)
Definition Formatter.cpp:68
static std::string prefix(const std::size_t indent)
Compute indentation level.
std::string formatCall(const Ark::internal::Node &node, std::size_t indent)
static bool isBeginBlock(const Ark::internal::Node &node)
Check if a node is a begin block.
bool m_dry_run
If true, only prints the formatted file instead of saving it to disk.
Definition Formatter.hpp:41
static bool isFuncCall(const Ark::internal::Node &node)
Check if a node is a function call (foo bar egg)
static bool isFuncDef(const Ark::internal::Node &node)
Check if a node is a function definition (fun (args) body)
Ark::internal::Parser m_parser
Definition Formatter.hpp:42
bool shouldAddNewLineBetweenNodes(const Ark::internal::Node &node, std::size_t at)
Decide if we should add a newline after a node in a block.
std::string formatBegin(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
bool m_updated
True if the original code now difer from the formatted one.
Definition Formatter.hpp:44
Formatter(bool dry_run)
Definition Formatter.cpp:17
std::string formatFunction(const Ark::internal::Node &node, std::size_t indent)
std::string formatLoop(const Ark::internal::Node &node, std::size_t indent)
std::string m_output
Definition Formatter.hpp:43
const std::string & output() const
Definition Formatter.cpp:58
std::string formatImport(const Ark::internal::Node &node, std::size_t indent)
std::string formatComment(const std::string &comment, std::size_t indent) const
const std::string m_filename
Definition Formatter.hpp:40
void runWithString(const std::string &code)
Definition Formatter.cpp:42
void warnIfCommentsWereRemoved(const std::string &original_code, const std::string &filename)
Given the original code, produce a warning if comments from it were removed during formatting.
Definition Formatter.cpp:91
static std::size_t lineOfLastNodeIn(const Ark::internal::Node &node)
Compute the line on which the deepest right most node of node is at.
std::string format(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
Handles all node formatting.
bool isLongLine(const Ark::internal::Node &node)
static bool isListStartingWithKeyword(const Ark::internal::Node &node, Ark::internal::Keyword keyword)
Check if a given node starts with a given keyword.
std::string formatCondition(const Ark::internal::Node &node, std::size_t indent, bool is_macro=false)
ARK_API void generate(const CodeError &e, std::ostream &os=std::cout, bool colorize=true)
Generate a diagnostic from an error and print it to the standard output.
std::string readFile(const std::string &name)
Helper to read a file.
Definition Files.hpp:47
std::vector< std::string > splitString(const std::string &source, const char sep)
Cut a string into pieces, given a character separator.
Definition Utils.hpp:31
@ Raw
Keep all text as is without modifying it (useful for the code formatter)
Keyword
The different keywords available.
Definition Common.hpp:79
constexpr std::array< std::string_view, 9 > keywords
List of available keywords in ArkScript.
Definition Common.hpp:92
STL namespace.
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
std::size_t line
0-indexed line number
Definition Position.hpp:22
static constexpr std::size_t LongLineLength
Max number of characters per line segment to consider splitting.
Definition Formatter.hpp:11