ArkScript
A small, lisp-inspired, functional scripting language
Formatter.cpp
Go to the documentation of this file.
1#include <Ark/Constants.hpp>
2#include <CLI/Formatter.hpp>
3
4#include <fmt/core.h>
5#include <fmt/color.h>
6
7#include <Ark/Files.hpp>
8#include <Ark/Exceptions.hpp>
10
11using namespace Ark;
12using namespace Ark::internal;
13
14Formatter::Formatter(const bool dry_run) :
15 m_dry_run(dry_run), m_parser(/* debug= */ 0, /* interpret= */ false), m_updated(false)
16{}
17
18Formatter::Formatter(std::string filename, const bool dry_run) :
19 m_filename(std::move(filename)), m_dry_run(dry_run), m_parser(/* debug= */ 0, /* interpret= */ false), m_updated(false)
20{}
21
23{
24 try
25 {
26 const std::string code = Utils::readFile(m_filename);
30
31 m_updated = code != m_output;
32 }
33 catch (const CodeError& e)
34 {
36 }
37}
38
39void Formatter::runWithString(const std::string& code)
40{
41 try
42 {
46
47 m_updated = code != m_output;
48 }
49 catch (const CodeError& e)
50 {
52 }
53}
54
55const std::string& Formatter::output() const
56{
57 return m_output;
58}
59
61{
62 return m_updated;
63}
64
66{
67 // remove useless surrounding begin (generated by the parser)
68 if (isBeginBlock(ast))
69 {
70 for (std::size_t i = 1, end = ast.constList().size(); i < end; ++i)
71 {
72 const Node node = ast.constList()[i];
73 if (shouldAddNewLineBetweenNodes(ast, i) && !m_output.empty())
74 m_output += "\n";
75 m_output += format(node, 0, false) + "\n";
76 }
77 }
78 else
79 m_output = format(ast, 0, false);
80
81 if (!m_dry_run)
82 {
83 std::ofstream stream(m_filename);
84 stream << m_output;
85 }
86}
87
88void Formatter::warnIfCommentsWereRemoved(const std::string& original_code, const std::string& filename)
89{
90 if (std::ranges::count(original_code, '#') != std::ranges::count(m_output, '#'))
91 {
92 fmt::println(
93 "{}: one or more comments from the original source code seem to have been removed by mistake while formatting {}",
94 fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)),
95 filename != ARK_NO_NAME_FILE ? filename : "file");
96 fmt::println("Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark");
97 }
98}
99
100bool Formatter::isListStartingWithKeyword(const Node& node, const Keyword keyword)
101{
102 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword && node.constList()[0].keyword() == keyword;
103}
104
106{
107 return isListStartingWithKeyword(node, Keyword::Begin);
108}
109
110bool Formatter::isFuncDef(const Node& node)
111{
112 return isListStartingWithKeyword(node, Keyword::Fun);
113}
114
116{
117 return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol;
118}
119
120std::size_t Formatter::lineOfLastNodeIn(const Node& node)
121{
122 if (node.isListLike() && !node.constList().empty())
123 {
124 std::size_t child_line = lineOfLastNodeIn(node.constList().back());
125 if (child_line < node.line())
126 return node.line();
127 return child_line;
128 }
129 return node.line();
130}
131
133{
134 const std::string formatted = format(node, 0, false);
135 const std::string::size_type sz = formatted.find_first_of('\n');
136
137 const bool is_long_line = !((sz < FormatterConfig::LongLineLength || (sz == std::string::npos && formatted.size() < FormatterConfig::LongLineLength)));
138 if (node.comment().empty() && (isBeginBlock(node) || isFuncCall(node)))
139 return false;
140 if (is_long_line || (node.isListLike() && node.constList().size() > 1) || !node.comment().empty())
141 return true;
142 return false;
143}
144
145bool Formatter::shouldAddNewLineBetweenNodes(const Node& node, const std::size_t at)
146{
147 if (at <= 1)
148 return false;
149
150 const auto& list = node.constList();
151 std::size_t previous_line = lineOfLastNodeIn(list[at - 1]);
152
153 const auto& child = list[at];
154
155 // If we have a node before the current one,
156 // and the line count between the two nodes is more than 1,
157 // maybe we should add a new line to preserve user spacing.
158 // However, if the current node has a comment, do not add a new line, this is causing the spacing.
159 if (child.line() - previous_line > 1 && child.comment().empty())
160 return true;
161 // If we do have a comment but the spacing is more than 2,
162 // then add a newline to preserve user spacing.
163 if (child.line() - previous_line > 2 && !child.comment().empty())
164 return true;
165 return false;
166}
167
168std::string Formatter::format(const Node& node, std::size_t indent, bool after_newline)
169{
170 std::string result;
171 if (!node.comment().empty())
172 {
173 result += formatComment(node.comment(), indent);
174 after_newline = true;
175 }
176 if (after_newline)
177 result += prefix(indent);
178
179 switch (node.nodeType())
180 {
181 case NodeType::Symbol:
182 result += node.string();
183 break;
184 case NodeType::Capture:
185 result += "&" + node.string();
186 break;
187 case NodeType::Keyword:
188 result += std::string(keywords[static_cast<std::size_t>(node.keyword())]);
189 break;
190 case NodeType::String:
191 result += fmt::format("\"{}\"", node.string());
192 break;
193 case NodeType::Number:
194 result += fmt::format("{}", node.number());
195 break;
196 case NodeType::List:
197 result += formatBlock(node, indent, after_newline);
198 break;
199 case NodeType::Spread:
200 result += fmt::format("...{}", node.string());
201 break;
202 case NodeType::Field:
203 {
204 std::string field = format(node.constList()[0], indent, false);
205 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
206 field += "." + format(node.constList()[i], indent, false);
207 result += field;
208 break;
209 }
210 case NodeType::Macro:
211 result += formatMacro(node, indent);
212 break;
213 // not handling Namespace nor Unused node types as those can not be generated by the parser
214 case NodeType::Namespace:
215 [[fallthrough]];
216 case NodeType::Unused:
217 break;
218 }
219
220 if (!node.commentAfter().empty())
221 result += " " + formatComment(node.commentAfter(), /* indent= */ 0);
222
223 return result;
224}
225
226std::string Formatter::formatComment(const std::string& comment, const std::size_t indent) const
227{
228 std::string result = prefix(indent);
229 for (std::size_t i = 0, end = comment.size(); i < end; ++i)
230 {
231 result += comment[i];
232 if (comment[i] == '\n' && i != end - 1)
233 result += prefix(indent);
234 }
235
236 return result;
237}
238
239std::string Formatter::formatBlock(const Node& node, const std::size_t indent, const bool after_newline)
240{
241 if (node.constList().empty())
242 return "()";
243
244 const Node first = node.constList().front();
245 if (first.nodeType() == NodeType::Keyword)
246 {
247 switch (first.keyword())
248 {
249 case Keyword::Fun:
250 return formatFunction(node, indent);
251 case Keyword::Let:
252 [[fallthrough]];
253 case Keyword::Mut:
254 [[fallthrough]];
255 case Keyword::Set:
256 return formatVariable(node, indent);
257 case Keyword::If:
258 return formatCondition(node, indent);
259 case Keyword::While:
260 return formatLoop(node, indent);
261 case Keyword::Begin:
262 return formatBegin(node, indent, after_newline);
263 case Keyword::Import:
264 return formatImport(node, indent);
265 case Keyword::Del:
266 return formatDel(node, indent);
267 }
268 // HACK: should never reach, but the compiler insists that the function doesn't return in every code path
269 return "";
270 }
271 return formatCall(node, indent);
272}
273
274std::string Formatter::formatFunction(const Node& node, const std::size_t indent)
275{
276 const Node args_node = node.constList()[1];
277 const Node body_node = node.constList()[2];
278
279 std::string formatted_args;
280
281 if (!args_node.comment().empty())
282 {
283 formatted_args += "\n";
284 formatted_args += formatComment(args_node.comment(), indent + 1);
285 formatted_args += prefix(indent + 1);
286 }
287 else
288 formatted_args += " ";
289
290 if (args_node.isListLike())
291 {
292 bool comment_in_args = false;
293 std::string args;
294 const bool split = shouldSplitOnNewline(args_node);
295
296 for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i)
297 {
298 const Node arg_i = args_node.constList()[i];
299 if (!arg_i.comment().empty())
300 comment_in_args = true;
301
302 args += format(arg_i, indent + ((comment_in_args || split) ? 1 : 0), i > 0 && (comment_in_args || split));
303 if (i != end - 1)
304 args += (comment_in_args || split) ? '\n' : ' ';
305 }
306
307 formatted_args += fmt::format("({}{})", (comment_in_args ? "\n" : ""), args);
308 }
309 else
310 formatted_args += format(args_node, indent, false);
311
312 if (!shouldSplitOnNewline(body_node) && args_node.comment().empty())
313 return fmt::format("(fun{} {})", formatted_args, format(body_node, indent + 1, false));
314 return fmt::format("(fun{}\n{})", formatted_args, format(body_node, indent + 1, true));
315}
316
317std::string Formatter::formatVariable(const Node& node, const std::size_t indent)
318{
319 std::string keyword = std::string(keywords[static_cast<std::size_t>(node.constList()[0].keyword())]);
320
321 const Node body_node = node.constList()[2];
322 const std::string formatted_bind = format(node.constList()[1], indent, false);
323
324 // we don't want to add another indentation level here, because it would result in a (let a (fun ()\n{indent+=4}...))
325 if (isFuncDef(body_node))
326 return fmt::format("({} {} {})", keyword, formatted_bind, format(body_node, indent, false));
327 if (!shouldSplitOnNewline(body_node))
328 return fmt::format("({} {} {})", keyword, formatted_bind, format(body_node, indent + 1, false));
329 return fmt::format("({} {}\n{})", keyword, formatted_bind, format(body_node, indent + 1, true));
330}
331
332std::string Formatter::formatCondition(const Node& node, const std::size_t indent, const bool is_macro)
333{
334 const Node cond_node = node.constList()[1];
335 const Node then_node = node.constList()[2];
336
337 bool cond_on_newline = false;
338 std::string formatted_cond = format(cond_node, indent + 1, false);
339 if (formatted_cond.find('\n') != std::string::npos)
340 cond_on_newline = true;
341
342 std::string if_cond_formatted = fmt::format(
343 "({}if{}{}",
344 is_macro ? "$" : "",
345 cond_on_newline ? "\n" : " ",
346 formatted_cond);
347
348 const bool split_then_newline = shouldSplitOnNewline(then_node);
349
350 // (if cond then)
351 if (node.constList().size() == 3)
352 {
353 if (cond_on_newline || split_then_newline)
354 return fmt::format("{}\n{})", if_cond_formatted, format(then_node, indent + 1, true));
355 return fmt::format("{} {})", if_cond_formatted, format(then_node, indent + 1, false));
356 }
357 // (if cond then else)
358 return fmt::format(
359 "{}\n{}\n{}{})",
360 if_cond_formatted,
361 format(then_node, indent + 1, true),
362 format(node.constList()[3], indent + 1, true),
363 node.constList()[3].commentAfter().empty() ? "" : ("\n" + prefix(indent)));
364}
365
366std::string Formatter::formatLoop(const Node& node, const std::size_t indent)
367{
368 const Node cond_node = node.constList()[1];
369 const Node body_node = node.constList()[2];
370
371 bool cond_on_newline = false;
372 std::string formatted_cond = format(cond_node, indent + 1, false);
373 if (formatted_cond.find('\n') != std::string::npos)
374 cond_on_newline = true;
375
376 if (cond_on_newline || shouldSplitOnNewline(body_node))
377 return fmt::format(
378 "(while{}{}\n{})",
379 cond_on_newline ? "\n" : " ",
380 formatted_cond,
381 format(body_node, indent + 1, true));
382 return fmt::format(
383 "(while {} {})",
384 formatted_cond,
385 format(body_node, indent + 1, false));
386}
387
388std::string Formatter::formatBegin(const Node& node, const std::size_t indent, const bool after_newline)
389{
390 // only the keyword begin is present
391 if (node.constList().size() == 1)
392 return "{}";
393
394 // after a new line, we need to increment our indentation level
395 // if the block is a top level one, we also need to increment indentation level
396 const std::size_t inner_indentation = indent + (after_newline ? 1 : 0) + (indent == 0 ? 1 : 0);
397
398 std::string result = "{\n";
399 // skip begin keyword
400 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
401 {
402 const Node child = node.constList()[i];
403 // we want to preserve the node grouping by the user, but remove useless duplicate new line
404 // but that shouldn't apply to the first node of the block
405 if (shouldAddNewLineBetweenNodes(node, i) && i > 1)
406 result += "\n";
407
408 result += format(child, inner_indentation, true);
409 if (i != end - 1)
410 result += "\n";
411 }
412
413 // if the last node has a comment, add a new line
414 if (!node.constList().empty() && !node.constList().back().commentAfter().empty())
415 result += "\n" + prefix(indent) + "}";
416 else
417 result += " }";
418 return result;
419}
420
421std::string Formatter::formatImport(const Node& node, const std::size_t indent)
422{
423 const Node package_node = node.constList()[1];
424 std::string package;
425
426 if (!package_node.comment().empty())
427 package += "\n" + formatComment(package_node.comment(), indent + 1) + prefix(indent + 1);
428 else
429 package += " ";
430
431 for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i)
432 {
433 package += format(package_node.constList()[i], indent + 1, false);
434 if (i != end - 1)
435 package += ".";
436 }
437
438 const Node symbols = node.constList()[2];
439 if (symbols.nodeType() == NodeType::Symbol && symbols.string() == "*")
440 package += ":*";
441 else // symbols is a list
442 {
443 if (const auto& sym_list = symbols.constList(); !sym_list.empty())
444 {
445 const bool comment_after_last = !sym_list.back().commentAfter().empty();
446
447 for (const auto& sym : sym_list)
448 {
449 if (sym.comment().empty())
450 {
451 if (comment_after_last)
452 package += "\n" + prefix(indent + 1) + ":" + sym.string();
453 else
454 package += " :" + sym.string();
455 }
456 else
457 package += "\n" + formatComment(sym.comment(), indent + 1) + prefix(indent + 1) + ":" + sym.string();
458 }
459
460 if (comment_after_last)
461 {
462 package += " " + formatComment(sym_list.back().commentAfter(), /* indent= */ 0);
463 package += "\n" + prefix(indent + 1);
464 }
465 }
466 }
467
468 return fmt::format("(import{})", package);
469}
470
471std::string Formatter::formatDel(const Node& node, const std::size_t indent)
472{
473 std::string formatted_sym = format(node.constList()[1], indent + 1, false);
474 if (formatted_sym.find('\n') != std::string::npos)
475 return fmt::format("(del\n{})", formatted_sym);
476 return fmt::format("(del {})", formatted_sym);
477}
478
479std::string Formatter::formatCall(const Node& node, const std::size_t indent)
480{
481 bool is_list = false;
482 if (!node.constList().empty() && node.constList().front().nodeType() == NodeType::Symbol &&
483 node.constList().front().string() == "list")
484 is_list = true;
485
486 bool is_multiline = false;
487
488 std::vector<std::string> formatted_args;
489 for (std::size_t i = 1, end = node.constList().size(); i < end; ++i)
490 {
491 formatted_args.push_back(format(node.constList()[i], indent, false));
492 // if we have at least one argument taking multiple lines, split them all on their own line
493 if (formatted_args.back().find('\n') != std::string::npos || !node.constList()[i].commentAfter().empty())
494 is_multiline = true;
495 }
496
497 std::string result = is_list ? "[" : ("(" + format(node.constList()[0], indent, false));
498 for (std::size_t i = 0, end = formatted_args.size(); i < end; ++i)
499 {
500 const std::string formatted_node = formatted_args[i];
501 if (is_multiline)
502 result += "\n" + format(node.constList()[i + 1], indent + 1, true);
503 else
504 result += (is_list && i == 0 ? "" : " ") + formatted_node;
505 }
506 if (!node.constList().back().commentAfter().empty())
507 result += "\n" + prefix(indent);
508 result += is_list ? "]" : ")";
509 return result;
510}
511
512std::string Formatter::formatMacro(const Node& node, const std::size_t indent)
513{
514 if (isListStartingWithKeyword(node, Keyword::If))
515 return formatCondition(node, indent, /* is_macro= */ true);
516
517 std::string result = "(macro ";
518 bool after_newline = false;
519
520 for (std::size_t i = 0, end = node.constList().size(); i < end; ++i)
521 {
522 result += format(node.constList()[i], indent + 1, after_newline);
523 after_newline = false;
524
525 if (!node.constList()[i].commentAfter().empty())
526 {
527 result += "\n";
528 after_newline = true;
529 }
530 else if (i != end - 1)
531 result += " ";
532 }
533
534 return result + ")";
535}
Common code for the compiler.
Constants used by ArkScript.
#define ARK_NO_NAME_FILE
Definition Constants.hpp:26
ArkScript homemade exceptions.
Lots of utilities about the filesystem.
A node of an Abstract Syntax Tree for ArkScript.
Definition Node.hpp:30
NodeType nodeType() const noexcept
Return the node type.
Definition Node.cpp:78
bool isListLike() const noexcept
Check if the node is a list like node.
Definition Node.cpp:83
const std::string & string() const noexcept
Return the string held by the value (if the node type allows it)
Definition Node.cpp:38
const std::vector< Node > & constList() const noexcept
Return the list of sub-nodes held by the node.
Definition Node.cpp:73
Keyword keyword() const noexcept
Return the keyword held by the value (if the node type allows it)
Definition Node.cpp:48
const std::string & comment() const noexcept
Return the comment attached to this node, if any.
Definition Node.cpp:179
const std::string & commentAfter() const noexcept
Return the comment attached after this node, if any.
Definition Node.cpp:184
double number() const noexcept
Return the number held by the value (if the node type allows it)
Definition Node.cpp:43
std::size_t line() const noexcept
Get the line at which this node was created.
Definition Node.cpp:164
void process(const std::string &filename, const std::string &code)
Parse the given code.
Definition Parser.cpp:51
const Node & ast() const noexcept
Definition Parser.cpp:95
std::string formatMacro(const Ark::internal::Node &node, std::size_t indent)
bool shouldSplitOnNewline(const Ark::internal::Node &node)
Decide if a node should be split on a newline or not.
void run()
Read the file and process it. The file isn't modified.
Definition Formatter.cpp:22
std::string formatVariable(const Ark::internal::Node &node, std::size_t indent)
bool codeModified() const
Definition Formatter.cpp:60
std::string formatBlock(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
std::string formatDel(const Ark::internal::Node &node, std::size_t indent)
void processAst(const Ark::internal::Node &ast)
Definition Formatter.cpp:65
static std::string prefix(const std::size_t indent)
Compute indentation level.
std::string formatCall(const Ark::internal::Node &node, std::size_t indent)
static bool isBeginBlock(const Ark::internal::Node &node)
Check if a node is a begin block.
bool m_dry_run
If true, only prints the formatted file instead of saving it to disk.
Definition Formatter.hpp:41
static bool isFuncCall(const Ark::internal::Node &node)
Check if a node is a function call (foo bar egg)
static bool isFuncDef(const Ark::internal::Node &node)
Check if a node is a function definition (fun (args) body)
Ark::internal::Parser m_parser
Definition Formatter.hpp:42
bool shouldAddNewLineBetweenNodes(const Ark::internal::Node &node, std::size_t at)
Decide if we should add a newline after a node in a block.
std::string formatBegin(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
bool m_updated
True if the original code now difer from the formatted one.
Definition Formatter.hpp:44
Formatter(bool dry_run)
Definition Formatter.cpp:14
std::string formatFunction(const Ark::internal::Node &node, std::size_t indent)
std::string formatLoop(const Ark::internal::Node &node, std::size_t indent)
std::string m_output
Definition Formatter.hpp:43
const std::string & output() const
Definition Formatter.cpp:55
std::string formatImport(const Ark::internal::Node &node, std::size_t indent)
std::string formatComment(const std::string &comment, std::size_t indent) const
const std::string m_filename
Definition Formatter.hpp:40
void runWithString(const std::string &code)
Definition Formatter.cpp:39
void warnIfCommentsWereRemoved(const std::string &original_code, const std::string &filename)
Given the original code, produce a warning if comments from it were removed during formatting.
Definition Formatter.cpp:88
static std::size_t lineOfLastNodeIn(const Ark::internal::Node &node)
Compute the line on which the deepest right most node of node is at.
std::string format(const Ark::internal::Node &node, std::size_t indent, bool after_newline)
Handles all node formatting.
static bool isListStartingWithKeyword(const Ark::internal::Node &node, Ark::internal::Keyword keyword)
Check if a given node starts with a given keyword.
std::string formatCondition(const Ark::internal::Node &node, std::size_t indent, bool is_macro=false)
ARK_API void generate(const CodeError &e, std::ostream &os=std::cout, bool colorize=true)
Generate a diagnostic from an error and print it to the standard output.
std::string readFile(const std::string &name)
Helper to read a file.
Definition Files.hpp:47
Keyword
The different keywords available.
Definition Common.hpp:75
constexpr std::array< std::string_view, 9 > keywords
List of available keywords in ArkScript.
Definition Common.hpp:88
CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
static constexpr std::size_t LongLineLength
Max number of characters per line segment to consider splitting.
Definition Formatter.hpp:11