ArkScript
A small, lisp-inspired, functional scripting language
Formatter.cpp
Go to the documentation of this file.
1#include <Ark/Constants.hpp>
2#include <CLI/Formatter.hpp>
3
4#include <fmt/core.h>
5#include <fmt/color.h>
6
7#include <Ark/Utils/Files.hpp>
11
12using namespace Ark;
13using namespace Ark::internal;
14
15Formatter::Formatter(const bool dry_run) :
16 m_dry_run(dry_run), m_parser(/* debug= */ 0, ParserMode::Raw), m_updated(false)
17{}
18
19Formatter::Formatter(std::string filename, const bool dry_run) :
20 m_filename(std::move(filename)), m_dry_run(dry_run), m_parser(/* debug= */ 0, ParserMode::Raw), m_updated(false)
21{}
22
24{
25 try
26 {
27 const std::string code = Utils::readFile(m_filename);
31
32 m_updated = code != m_output;
33 }
34 catch (const CodeError& e)
35 {
37 }
38}
39
40void Formatter::runWithString(const std::string& code)
41{
42 try
43 {
47
48 m_updated = code != m_output;
49 }
50 catch (const CodeError& e)
51 {
53 }
54}
55
56const std::string& Formatter::output() const
57{
58 return m_output;
59}
60
62{
63 return m_updated;
64}
65
67{
68 // remove useless surrounding begin (generated by the parser)
69 if (isBeginBlock(ast))
70 {
71 for (std::size_t i = 1, end = ast.constList().size(); i < end; ++i)
72 {
73 const Node node = ast.constList()[i];
74 if (shouldAddNewLineBetweenNodes(ast, i) && !m_output.empty())
75 m_output += "\n";
76 m_output += format(node, 0, false) + "\n";
77 }
78 }
79 else
80 m_output = format(ast, 0, false);
81
82 if (!m_dry_run)
83 {
84 std::ofstream stream(m_filename);
85 stream << m_output;
86 }
87}
88
89void Formatter::warnIfCommentsWereRemoved(const std::string& original_code, const std::string& filename)
90{
91 const std::size_t before_count = std::ranges::count(original_code, '#');
92 const std::size_t after_count = std::ranges::count(m_output, '#');
93
94 if (before_count != after_count)
95 {
96 fmt::println(
97 "{}: one or more comments from the original source code seem to have been {} by mistake while formatting {}",
98 fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)),
99 before_count > after_count ? "removed" : "duplicated",
100 filename != ARK_NO_NAME_FILE ? filename : "file");
101 fmt::println("Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark");
102 }
103}
104
105bool Formatter::isListStartingWithKeyword(const Node& node, const Keyword keyword)
106{
107 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword && node.constList()[0].keyword() == keyword;
108}
109
111{
112 return isListStartingWithKeyword(node, Keyword::Begin);
113}
114
115bool Formatter::isFuncDef(const Node& node)
116{
117 return isListStartingWithKeyword(node, Keyword::Fun);
118}
119
121{
122 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol;
123}
124
125std::size_t Formatter::lineOfLastNodeIn(const Node& node)
126{
127 if (node.isListLike() && !node.constList().empty())
128 {
129 const std::size_t child_line = lineOfLastNodeIn(node.constList().back());
130 if (child_line < node.position().start.line)
131 return node.position().start.line;
132 return child_line;
133 }
134 return node.position().start.line;
135}
136
138{
139 const std::string formatted = format(node, 0, false);
140 const std::string::size_type sz = formatted.find_first_of('\n');
141
142 const bool is_long_line = !((sz < FormatterConfig::LongLineLength || (sz == std::string::npos && formatted.size() < FormatterConfig::LongLineLength)));
143 if (node.comment().empty() && (isBeginBlock(node) || isFuncCall(node)))
144 return false;
145 if (is_long_line || (node.isListLike() && node.constList().size() > 1) || !node.comment().empty())
146 return true;
147 return false;
148}
149
150bool Formatter::shouldAddNewLineBetweenNodes(const Node& node, const std::size_t at)
151{
152 if (at <= 1)
153 return false;
154
155 const auto& list = node.constList();
156 const std::size_t previous_line = lineOfLastNodeIn(list[at - 1]);
157
158 const auto& child = list[at];
159
160 // If we have a node before the current one,
161 // and the line count between the two nodes is more than 1,
162 // maybe we should add a new line to preserve user spacing.
163 // However, if the current node has a comment, do not add a new line, this is causing the spacing.
164 if (child.position().start.line - previous_line > 1 && child.comment().empty())
165 return true;
166 // If we do have a comment but the spacing is more than 2,
167 // then add a newline to preserve user spacing.
168 if (child.position().start.line - previous_line > 2 && !child.comment().empty())
169 return true;
170 return false;
171}
172
173std::string Formatter::format(const Node& node, std::size_t indent, bool after_newline)
174{
175 std::string result;
176 if (!node.comment().empty())
177 {
178 result += formatComment(node.comment(), indent);
179 after_newline = true;
180 }
181 if (after_newline)
182 result += prefix(indent);
183
184 switch (node.nodeType())
185 {
186 case NodeType::Symbol:
187 result += node.string();
188 break;
189 case NodeType::Capture:
190 result += "&" + node.string();
191 break;
192 case NodeType::Keyword:
193 result += std::string(keywords[static_cast<std::size_t>(node.keyword())]);
194 break;
195 case NodeType::String:
196 result += fmt::format("\"{}\"", node.string());
197 break;
198 case NodeType::Number:
199 result += fmt::format("{}", node.number());
200 break;
201 case NodeType::List:
202 result += formatBlock(node, indent, after_newline);
203 break;
204 case NodeType::Spread:
205 result += fmt::format("...{}", node.string());
206 break;
207 case NodeType::Field:
208 {
209 std::string field = format(node.constList()[0], indent, false);
210 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
211 field += "." + format(node.constList()[i], indent, false);
212 result += field;
213 break;
214 }
215 case NodeType::Macro:
216 result += formatMacro(node, indent);
217 break;
218 // not handling Namespace nor Unused node types as those can not be generated by the parser
219 case NodeType::Namespace:
220 [[fallthrough]];
221 case NodeType::Unused:
222 break;
223 }
224
225 if (!node.commentAfter().empty())
226 result += " " + formatComment(node.commentAfter(), /* indent= */ 0);
227
228 return result;
229}
230
231std::string Formatter::formatComment(const std::string& comment, const std::size_t indent) const
232{
233 std::string result = prefix(indent);
234 for (std::size_t i = 0, end = comment.size(); i < end; ++i)
235 {
236 result += comment[i];
237 if (comment[i] == '\n' && i != end - 1)
238 result += prefix(indent);
239 }
240
241 return result;
242}
243
244std::string Formatter::formatBlock(const Node& node, const std::size_t indent, const bool after_newline)
245{
246 if (node.constList().empty())
247 return "()";
248
249 const Node first = node.constList().front();
250 if (first.nodeType() == NodeType::Keyword)
251 {
252 switch (first.keyword())
253 {
254 case Keyword::Fun:
255 return formatFunction(node, indent);
256 case Keyword::Let:
257 [[fallthrough]];
258 case Keyword::Mut:
259 [[fallthrough]];
260 case Keyword::Set:
261 return formatVariable(node, indent);
262 case Keyword::If:
263 return formatCondition(node, indent);
264 case Keyword::While:
265 return formatLoop(node, indent);
266 case Keyword::Begin:
267 return formatBegin(node, indent, after_newline);
268 case Keyword::Import:
269 return formatImport(node, indent);
270 case Keyword::Del:
271 return formatDel(node, indent);
272 }
273 // HACK: should never reach, but the compiler insists that the function doesn't return in every code path
274 return "";
275 }
276 return formatCall(node, indent);
277}
278
279std::string Formatter::formatFunction(const Node& node, const std::size_t indent)
280{
281 const Node args_node = node.constList()[1];
282 const Node body_node = node.constList()[2];
283
284 std::string formatted_args;
285
286 if (!args_node.comment().empty())
287 {
288 formatted_args += "\n";
289 formatted_args += formatComment(args_node.comment(), indent + 1);
290 formatted_args += prefix(indent + 1);
291 }
292 else
293 formatted_args += " ";
294
295 if (args_node.isListLike())
296 {
297 bool comment_in_args = false;
298 std::string args;
299 const bool split = shouldSplitOnNewline(args_node);
300
301 for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i)
302 {
303 const Node arg_i = args_node.constList()[i];
304 if (!arg_i.comment().empty())
305 comment_in_args = true;
306
307 args += format(arg_i, indent + ((comment_in_args || split) ? 1 : 0), i > 0 && (comment_in_args || split));
308 if (i != end - 1)
309 args += (comment_in_args || split) ? '\n' : ' ';
310 }
311
312 formatted_args += fmt::format("({}{})", (comment_in_args ? "\n" : ""), args);
313 }
314 else
315 formatted_args += format(args_node, indent, false);
316
317 if (!shouldSplitOnNewline(body_node) && args_node.comment().empty())
318 return fmt::format("(fun{} {})", formatted_args, format(body_node, indent + 1, false));
319 return fmt::format("(fun{}\n{})", formatted_args, format(body_node, indent + 1, true));
320}
321
322std::string Formatter::formatVariable(const Node& node, const std::size_t indent)
323{
324 const auto keyword = std::string(keywords[static_cast<std::size_t>(node.constList()[0].keyword())]);
325
326 const Node body_node = node.constList()[2];
327 const std::string formatted_bind = format(node.constList()[1], indent, false);
328
329 // we don't want to add another indentation level here, because it would result in a (let a (fun ()\n{indent+=4}...))
330 if (isFuncDef(body_node) || !shouldSplitOnNewline(body_node))
331 return fmt::format("({} {} {})", keyword, formatted_bind, format(body_node, indent, false));
332 return fmt::format("({} {}\n{})", keyword, formatted_bind, format(body_node, indent + 1, true));
333}
334
335std::string Formatter::formatCondition(const Node& node, const std::size_t indent, const bool is_macro)
336{
337 const Node cond_node = node.constList()[1];
338 const Node then_node = node.constList()[2];
339
340 bool cond_on_newline = false;
341 std::string formatted_cond = format(cond_node, indent + 1, false);
342 if (formatted_cond.find('\n') != std::string::npos)
343 cond_on_newline = true;
344
345 std::string if_cond_formatted = fmt::format(
346 "({}if{}{}",
347 is_macro ? "$" : "",
348 cond_on_newline ? "\n" : " ",
349 formatted_cond);
350
351 const bool split_then_newline = shouldSplitOnNewline(then_node);
352
353 // (if cond then)
354 if (node.constList().size() == 3)
355 {
356 if (cond_on_newline || split_then_newline)
357 return fmt::format("{}\n{})", if_cond_formatted, format(then_node, indent + 1, true));
358 return fmt::format("{} {})", if_cond_formatted, format(then_node, indent + 1, false));
359 }
360 // (if cond then else)
361 return fmt::format(
362 "{}\n{}\n{}{})",
363 if_cond_formatted,
364 format(then_node, indent + 1, true),
365 format(node.constList()[3], indent + 1, true),
366 node.constList()[3].commentAfter().empty() ? "" : ("\n" + prefix(indent)));
367}
368
369std::string Formatter::formatLoop(const Node& node, const std::size_t indent)
370{
371 const Node cond_node = node.constList()[1];
372 const Node body_node = node.constList()[2];
373
374 bool cond_on_newline = false;
375 std::string formatted_cond = format(cond_node, indent + 1, false);
376 if (formatted_cond.find('\n') != std::string::npos)
377 cond_on_newline = true;
378
379 if (cond_on_newline || shouldSplitOnNewline(body_node))
380 return fmt::format(
381 "(while{}{}\n{})",
382 cond_on_newline ? "\n" : " ",
383 formatted_cond,
384 format(body_node, indent + 1, true));
385 return fmt::format(
386 "(while {} {})",
387 formatted_cond,
388 format(body_node, indent + 1, false));
389}
390
391std::string Formatter::formatBegin(const Node& node, const std::size_t indent, const bool after_newline)
392{
393 // only the keyword begin is present
394 if (node.constList().size() == 1)
395 return "{}";
396
397 // after a new line, we need to increment our indentation level
398 // if the block is a top level one, we also need to increment indentation level
399 const std::size_t inner_indentation = indent + (after_newline ? 1 : 0) + (indent == 0 ? 1 : 0);
400
401 std::string result = "{\n";
402 // skip begin keyword
403 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
404 {
405 const Node child = node.constList()[i];
406 // we want to preserve the node grouping by the user, but remove useless duplicate new line
407 // but that shouldn't apply to the first node of the block
408 if (shouldAddNewLineBetweenNodes(node, i) && i > 1)
409 result += "\n";
410
411 result += format(child, inner_indentation, true);
412 if (i != end - 1)
413 result += "\n";
414 }
415
416 // if the last node has a comment, add a new line
417 if (!node.constList().empty() && !node.constList().back().commentAfter().empty())
418 result += "\n" + prefix(indent) + "}";
419 else
420 result += " }";
421 return result;
422}
423
424std::string Formatter::formatImport(const Node& node, const std::size_t indent)
425{
426 const Node package_node = node.constList()[1];
427 std::string package;
428
429 if (!package_node.comment().empty())
430 package += "\n" + formatComment(package_node.comment(), indent + 1) + prefix(indent + 1);
431 else
432 package += " ";
433
434 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
435 {
436 package += format(package_node.constList()[i], indent + 1, false);
437 if (i != end - 1)
438 package += ".";
439 }
440
441 const Node symbols = node.constList()[2];
442 if (symbols.nodeType() == NodeType::Symbol && symbols.string() == "*")
443 package += ":*";
444 else // symbols is a list
445 {
446 if (const auto& sym_list = symbols.constList(); !sym_list.empty())
447 {
448 const bool comment_after_last = !sym_list.back().commentAfter().empty();
449
450 for (const auto& sym : sym_list)
451 {
452 if (sym.comment().empty())
453 {
454 if (comment_after_last)
455 package += "\n" + prefix(indent + 1) + ":" + sym.string();
456 else
457 package += " :" + sym.string();
458 }
459 else
460 package += "\n" + formatComment(sym.comment(), indent + 1) + prefix(indent + 1) + ":" + sym.string();
461 }
462
463 if (comment_after_last)
464 {
465 package += " " + formatComment(sym_list.back().commentAfter(), /* indent= */ 0);
466 package += "\n" + prefix(indent + 1);
467 }
468 }
469 }
470
471 return fmt::format("(import{})", package);
472}
473
474std::string Formatter::formatDel(const Node& node, const std::size_t indent)
475{
476 std::string formatted_sym = format(node.constList()[1], indent + 1, false);
477 if (formatted_sym.find('\n') != std::string::npos)
478 return fmt::format("(del\n{})", formatted_sym);
479 return fmt::format("(del {})", formatted_sym);
480}
481
482std::string Formatter::formatCall(const Node& node, const std::size_t indent)
483{
484 bool is_list = false;
485 bool is_dict = false;
486 bool is_multiline = false;
487
488 if (!node.constList().empty() && node.constList().front().nodeType() == NodeType::Symbol)
489 {
490 if (node.constList().front().string() == "list")
491 is_list = true;
492 else if (node.constList().front().string() == "dict")
493 is_dict = true;
494 }
495
496 std::vector<std::string> formatted_args;
497 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
498 {
499 formatted_args.push_back(format(node.constList()[i], indent, false));
500 // if we have at least one argument taking multiple lines, split them all on their own line
501 if (formatted_args.back().find('\n') != std::string::npos || !node.constList()[i].commentAfter().empty())
502 is_multiline = true;
503 }
504
505 std::string result = is_list ? "[" : ("(" + format(node.constList()[0], indent, false));
506 for (std::size_t i = 0, end = formatted_args.size(); i < end; ++i)
507 {
508 const std::string& formatted_node = formatted_args[i];
509 if (is_dict)
510 {
511 if (i % 2 == 0 && formatted_args.size() > 2) // one pair per line if we have at least 2 key-value pairs
512 result += "\n" + format(node.constList()[i + 1], indent + 1, true);
513 else
514 result += " " + formatted_node;
515 }
516 else if (is_multiline)
517 result += "\n" + format(node.constList()[i + 1], indent + 1, true);
518 else if (is_list && i == 0)
519 result += formatted_node;
520 else // put all arguments on the same line
521 result += " " + formatted_node;
522 }
523 if (!node.constList().back().commentAfter().empty())
524 result += "\n" + prefix(indent);
525
526 result += is_list ? "]" : ")";
527 return result;
528}
529
530std::string Formatter::formatMacro(const Node& node, const std::size_t indent)
531{
532 if (isListStartingWithKeyword(node, Keyword::If))
533 return formatCondition(node, indent, /* is_macro= */ true);
534
535 std::string result = "(macro ";
536 bool after_newline = false;
537
538 for (std::size_t i = 0, end = node.constList().size(); i < end; ++i)
539 {
540 result += format(node.constList()[i], indent + 1, after_newline);
541 after_newline = false;
542
543 if (!node.constList()[i].commentAfter().empty())
544 {
545 result += "\n";
546 after_newline = true;
547 }
548 else if (i != end - 1)
549 result += " ";
550 }
551
552 return result + ")";
553}
Common code for the compiler.
Constants used by ArkScript.
#define ARK_NO_NAME_FILE
Definition Constants.hpp:27
Tools to report code errors nicely to the user.
ArkScript homemade exceptions.
Lots of utilities about the filesystem.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:32
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
bool isListLike() const noexcept
Check if the node is a list like node.
Definition Node.cpp:83
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
Keyword keyword() const noexcept
Return the keyword held by the value (if the node type allows it)
Definition Node.cpp:48
const std::string & comment() const noexcept
Return the comment attached to this node, if any.
Definition Node.cpp:169
FileSpan position() const noexcept
Get the span of the node (start and end)
Definition Node.cpp:159
const std::string & commentAfter() const noexcept
Return the comment attached after this node, if any.
Definition Node.cpp:174
double number() const noexcept
Return the number held by the value (if the node type allows it)
Definition Node.cpp:43
void process(const std::string &filename, const std::string &code)
Parse the given code.
Definition Parser.cpp:51
const Node & ast() const noexcept
Definition Parser.cpp:92
std::string formatMacro(const Ark::internal::Node &node, std::size_t indent)
bool shouldSplitOnNewline(const Ark::internal::Node &node)
Decide if a node should be split on a newline or not.
void run()
Read the file and process it. The file isn't modified.
Definition Formatter.cpp:23
std::string formatVariable(const Ark::internal::Node &node, std::size_t indent)
bool codeModified() const
Definition Formatter.cpp:61
std::string formatBlock(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
std::string formatDel(const Ark::internal::Node &node, std::size_t indent)
void processAst(const Ark::internal::Node &ast)
Definition Formatter.cpp:66
static std::string prefix(const std::size_t indent)
Compute indentation level.
std::string formatCall(const Ark::internal::Node &node, std::size_t indent)
static bool isBeginBlock(const Ark::internal::Node &node)
Check if a node is a begin block.
bool m_dry_run
If true, only prints the formatted file instead of saving it to disk.
Definition Formatter.hpp:41
static bool isFuncCall(const Ark::internal::Node &node)
Check if a node is a function call (foo bar egg)
static bool isFuncDef(const Ark::internal::Node &node)
Check if a node is a function definition (fun (args) body)
Ark::internal::Parser m_parser
Definition Formatter.hpp:42
bool shouldAddNewLineBetweenNodes(const Ark::internal::Node &node, std::size_t at)
Decide if we should add a newline after a node in a block.
std::string formatBegin(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
bool m_updated
True if the original code now difer from the formatted one.
Definition Formatter.hpp:44
Formatter(bool dry_run)
Definition Formatter.cpp:15
std::string formatFunction(const Ark::internal::Node &node, std::size_t indent)
std::string formatLoop(const Ark::internal::Node &node, std::size_t indent)
std::string m_output
Definition Formatter.hpp:43
const std::string & output() const
Definition Formatter.cpp:56
std::string formatImport(const Ark::internal::Node &node, std::size_t indent)
std::string formatComment(const std::string &comment, std::size_t indent) const
const std::string m_filename
Definition Formatter.hpp:40
void runWithString(const std::string &code)
Definition Formatter.cpp:40
void warnIfCommentsWereRemoved(const std::string &original_code, const std::string &filename)
Given the original code, produce a warning if comments from it were removed during formatting.
Definition Formatter.cpp:89
static std::size_t lineOfLastNodeIn(const Ark::internal::Node &node)
Compute the line on which the deepest right most node of node is at.
std::string format(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
Handles all node formatting.
static bool isListStartingWithKeyword(const Ark::internal::Node &node, Ark::internal::Keyword keyword)
Check if a given node starts with a given keyword.
std::string formatCondition(const Ark::internal::Node &node, std::size_t indent, bool is_macro=false)
ARK_API void generate(const CodeError &e, std::ostream &os=std::cout, bool colorize=true)
Generate a diagnostic from an error and print it to the standard output.
std::string readFile(const std::string &name)
Helper to read a file.
Definition Files.hpp:47
@ Raw
Keep all text as is without modifying it (useful for the code formatter)
Keyword
The different keywords available.
Definition Common.hpp:75
constexpr std::array< std::string_view, 9 > keywords
List of available keywords in ArkScript.
Definition Common.hpp:88
STL namespace.
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
std::size_t line
0-indexed line number
Definition Position.hpp:22
static constexpr std::size_t LongLineLength
Max number of characters per line segment to consider splitting.
Definition Formatter.hpp:11