From 6652413e6b7b468ee4c2547bb7a67f9aa85b8128 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Mon, 25 Mar 2024 21:59:17 +0100 Subject: [PATCH 01/12] ci(static analysis): repurposing lizard.yml to host all the static analysis tools to run on PRs --- .github/workflows/lizard.yml | 33 --------------- .github/workflows/static_analysis.yml | 61 +++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 33 deletions(-) delete mode 100644 .github/workflows/lizard.yml create mode 100644 .github/workflows/static_analysis.yml diff --git a/.github/workflows/lizard.yml b/.github/workflows/lizard.yml deleted file mode 100644 index 0e747db3e..000000000 --- a/.github/workflows/lizard.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Lizard PR reporter - -on: - issue_comment: - types: [ created ] - -jobs: - lizard: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v2 - with: - python-version: '3.9' - - - run: pip install lizard - - - name: Create lizard report - id: lizard - shell: bash - run: | - content=$(python .github/launch-lizard.py) - content="${content//'%'/'%25'}" - content="${content//$'\n'/'%0A'}" - content="${content//$'\r'/'%0D'}" - echo ::set-output name=report::$content - - - uses: actions-ecosystem/action-create-comment@v1 - if: ${{ startsWith(github.event.comment.body, '/lizard') && github.event.issue.pull_request }} - with: - github_token: ${{ secrets.github_token }} - body: ${{ steps.lizard.outputs.report }} diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml new file mode 100644 index 000000000..5d7a86f22 --- /dev/null +++ b/.github/workflows/static_analysis.yml @@ -0,0 +1,61 @@ +name: Static analysis + +on: + pull_request: + +jobs: + static_analysis: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.9' + + - run: | + sudo apt install -yq cppcheck + pip install lizard + + - name: Create lizard report + id: lizard + shell: bash + run: | + content=$(python .github/launch-lizard.py) + content="${content//'%'/'%25'}" + content="${content//$'\n'/'%0A'}" + content="${content//$'\r'/'%0D'}" + # echo "report=$content" >> $GITHUB_OUTPUT + echo ::set-output name=report::$content + + - name: Run cppcheck + id: cppcheck + shell: bash + run: | + cppcheck --platform=unix64 --template="{file}:{line}: {severity}: {message}" \ + --output-file=cppcheck.txt -j $(nproc) \ + -I include src + cat cppcheck.txt | sort > cppcheck_sorted.txt + echo "report=$(cat cppcheck_sorted.txt)" >> $GITHUB_OUTPUT + + - name: Find Comment + uses: peter-evans/find-comment@v3 + id: fc + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: Build output + + - name: Create or update comment + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body: | + ${{ steps.lizard.outputs.report }} + --- + ## CppCheck report + ``` + ${{ steps.cppcheck.outputs.report }} + ``` + edit-mode: replace From c06e294d2c3bc1fa78d74734b68876f4a94d8cfe Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Tue, 19 Mar 2024 13:59:10 +0100 Subject: [PATCH 02/12] refactor!: moving the JsonCompiler from Ark to CLI include directory --- CMakeLists.txt | 2 - include/Ark/Compiler/JsonCompiler.hpp | 61 ------- include/CLI/JsonCompiler.hpp | 56 +++++++ src/arkreactor/Compiler/JsonCompiler.cpp | 204 ----------------------- src/arkscript/JsonCompiler.cpp | 201 ++++++++++++++++++++++ src/arkscript/main.cpp | 4 +- tests/unittests/ValidAstSuite.cpp | 4 +- 7 files changed, 261 insertions(+), 271 deletions(-) delete mode 100644 include/Ark/Compiler/JsonCompiler.hpp create mode 100644 include/CLI/JsonCompiler.hpp delete mode 100644 src/arkreactor/Compiler/JsonCompiler.cpp create mode 100644 src/arkscript/JsonCompiler.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c6c1a69c..55ab50e48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -213,9 +213,7 @@ if (ARK_BUILD_EXE) add_subdirectory("${ark_SOURCE_DIR}/lib/replxx" EXCLUDE_FROM_ALL) add_subdirectory("${ark_SOURCE_DIR}/lib/clipp" EXCLUDE_FROM_ALL) - target_include_directories(arkscript PUBLIC "${ark_SOURCE_DIR}/src/arkscript/") target_link_libraries(arkscript PUBLIC ArkReactor replxx clipp termcolor) - target_compile_features(arkscript PRIVATE cxx_std_20) enable_lto(arkscript) diff --git a/include/Ark/Compiler/JsonCompiler.hpp b/include/Ark/Compiler/JsonCompiler.hpp deleted file mode 100644 index 23c2a4665..000000000 --- a/include/Ark/Compiler/JsonCompiler.hpp +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef ARK_COMPILER_JSONCOMPILER_HPP -#define ARK_COMPILER_JSONCOMPILER_HPP - -#include -#include -#include - -#include -#include -#include -#include - -namespace Ark -{ - class ARK_API JsonCompiler final - { - public: - /** - * @brief Construct a new JsonCompiler object - * - * @param debug the debug level - */ - JsonCompiler(unsigned debug, const std::vector& libenv); - - /** - * @brief Feed the different variables with information taken from the given source code file - * - * @param filename the name of the file - */ - void feed(const std::string& filename); - - /** - * @brief Start the compilation - * - * @return - */ - std::string compile(); - - private: - Welder m_welder; - - /** - * @brief Compile a single node and return its representation - * - * @param node - * @return const std::string& - */ - std::string _compile(const internal::Node& node); - - /** - * @brief Convert a NodeType::List to a JSON list - * - * @param node - * @param start - * @return std::string - */ - std::string toJsonList(const internal::Node& node, std::size_t start); - }; -} - -#endif diff --git a/include/CLI/JsonCompiler.hpp b/include/CLI/JsonCompiler.hpp new file mode 100644 index 000000000..5071c72ab --- /dev/null +++ b/include/CLI/JsonCompiler.hpp @@ -0,0 +1,56 @@ +#ifndef CLI_JSONCOMPILER_HPP +#define CLI_JSONCOMPILER_HPP + +#include +#include +#include + +#include +#include + +class JsonCompiler final +{ +public: + /** + * @brief Construct a new JsonCompiler object + * + * @param debug the debug level + */ + JsonCompiler(unsigned debug, const std::vector& libenv); + + /** + * @brief Feed the different variables with information taken from the given source code file + * + * @param filename the name of the file + */ + void feed(const std::string& filename); + + /** + * @brief Start the compilation + * + * @return + */ + std::string compile(); + +private: + Ark::Welder m_welder; + + /** + * @brief Compile a single node and return its representation + * + * @param node + * @return const std::string& + */ + std::string _compile(const Ark::internal::Node& node); + + /** + * @brief Convert a NodeType::List to a JSON list + * + * @param node + * @param start + * @return std::string + */ + std::string toJsonList(const Ark::internal::Node& node, std::size_t start); +}; + +#endif diff --git a/src/arkreactor/Compiler/JsonCompiler.cpp b/src/arkreactor/Compiler/JsonCompiler.cpp deleted file mode 100644 index 20ea25227..000000000 --- a/src/arkreactor/Compiler/JsonCompiler.cpp +++ /dev/null @@ -1,204 +0,0 @@ -#include - -#include - -#include - -namespace Ark -{ - using namespace internal; - - JsonCompiler::JsonCompiler(unsigned debug, const std::vector& libenv) : - m_welder(debug, libenv) - {} - - void JsonCompiler::feed(const std::string& filename) - { - m_welder.computeASTFromFile(filename); - } - - std::string JsonCompiler::compile() - { - return _compile(m_welder.ast()); - } - - std::string JsonCompiler::_compile(const Node& node) - { - std::string json; - - switch (node.nodeType()) - { - case NodeType::Symbol: - { - json += fmt::format( - R"({{"type": "Symbol", "name": "{}"}})", - node.string().c_str()); - break; - } - - case NodeType::Capture: - { - json += fmt::format( - R"({{"type": "Capture", "name": "{}"}})", - node.string().c_str()); - break; - } - - case NodeType::Field: - { - json += R"({"type": "Field", "children": )"; - json += toJsonList(node, 0) + "}"; - break; - } - - case NodeType::String: - { - json += fmt::format( - R"({{"type": "String", "value": "{}"}})", - node.string().c_str()); - break; - } - - case NodeType::Number: - { - json += fmt::format( - R"({{"type": "Number", "value": {}}})", - node.number()); - break; - } - - case NodeType::List: - { - if (!node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword) - { - Node keyword = node.constList()[0]; - switch (keyword.keyword()) - { - case Keyword::Fun: - { - // (fun (args) (body)) - std::string args; - Node args_node = node.constList()[1]; - for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i) - { - args += _compile(args_node.constList()[i]); - if (end > 1 && i != end - 1) - args += ", "; - } - - json += fmt::format( - R"({{"type": "Fun", "args": [{}], "body": {}}})", - args.c_str(), _compile(node.constList()[2]).c_str()); - break; - } - - case Keyword::Let: - { - // (let name value) - json += fmt::format( - R"({{"type": "Let", "name": {}, "value": {}}})", - _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); - break; - } - - case Keyword::Mut: - { - // (mut name value) - json += fmt::format( - R"({{"type": "Mut", "name": {}, "value": {}}})", - _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); - break; - } - - case Keyword::Set: - { - // (set name value) - json += fmt::format( - R"({{"type": "Set", "name": {}, "value": {}}})", - _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); - break; - } - - case Keyword::If: - { - // (if condition then else) - json += fmt::format( - R"({{"type": "If", "condition": {}, "then": {}, "else": {}}})", - _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str(), _compile(node.constList()[3]).c_str()); - break; - } - - case Keyword::While: - { - // (while condition body) - json += fmt::format( - R"({{"type": "While", "condition": {}, "body": {}}})", - _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); - break; - } - - case Keyword::Begin: - { - // (begin body) - json += R"({"type": "Begin", "children": )"; - json += toJsonList(node, 1) + "}"; - break; - } - - case Keyword::Import: - { - // (import value) - json += fmt::format( - R"({{"type": "Import", "value": {}}})", - _compile(node.constList()[1]).c_str()); - break; - } - - case Keyword::Del: - { - // (del value) - json += fmt::format( - R"({{"type": "Del", "value": {}}})", - _compile(node.constList()[1]).c_str()); - break; - } - } - } - else if (node.constList().size() > 1 && node.constList()[0].nodeType() == NodeType::Symbol) - { - // (foo bar 1) - json += fmt::format( - R"({{"type": "FunctionCall", "name": {}, "args": )", - _compile(node.constList()[0]).c_str()); - json += toJsonList(node, 1) + "}"; - } - else - json += toJsonList(node, 0); - - break; - } - - default: - throw Error(fmt::format( - "Not handled NodeType::{} ({} at {}:{}), please report this error on GitHub", - nodeTypes[static_cast(node.nodeType())].data(), - node.filename().c_str(), - node.line(), - node.col())); - } - return json; - } - - std::string JsonCompiler::toJsonList(const Node& node, std::size_t start) - { - std::string json = "["; - for (std::size_t i = start, end = node.constList().size(); i < end; ++i) - { - json += _compile(node.constList()[i]); - if (i != end - 1) - json += ", "; - } - json += "]"; - return json; - } -} diff --git a/src/arkscript/JsonCompiler.cpp b/src/arkscript/JsonCompiler.cpp new file mode 100644 index 000000000..4df864b26 --- /dev/null +++ b/src/arkscript/JsonCompiler.cpp @@ -0,0 +1,201 @@ +#include + +#include + +#include + +using namespace Ark::internal; + +JsonCompiler::JsonCompiler(unsigned debug, const std::vector& libenv) : + m_welder(debug, libenv) +{} + +void JsonCompiler::feed(const std::string& filename) +{ + m_welder.computeASTFromFile(filename); +} + +std::string JsonCompiler::compile() +{ + return _compile(m_welder.ast()); +} + +std::string JsonCompiler::_compile(const Node& node) +{ + std::string json; + + switch (node.nodeType()) + { + case NodeType::Symbol: + { + json += fmt::format( + R"({{"type": "Symbol", "name": "{}"}})", + node.string().c_str()); + break; + } + + case NodeType::Capture: + { + json += fmt::format( + R"({{"type": "Capture", "name": "{}"}})", + node.string().c_str()); + break; + } + + case NodeType::Field: + { + json += R"({"type": "Field", "children": )"; + json += toJsonList(node, 0) + "}"; + break; + } + + case NodeType::String: + { + json += fmt::format( + R"({{"type": "String", "value": "{}"}})", + node.string().c_str()); + break; + } + + case NodeType::Number: + { + json += fmt::format( + R"({{"type": "Number", "value": {}}})", + node.number()); + break; + } + + case NodeType::List: + { + if (!node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword) + { + Node keyword = node.constList()[0]; + switch (keyword.keyword()) + { + case Keyword::Fun: + { + // (fun (args) (body)) + std::string args; + Node args_node = node.constList()[1]; + for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i) + { + args += _compile(args_node.constList()[i]); + if (end > 1 && i != end - 1) + args += ", "; + } + + json += fmt::format( + R"({{"type": "Fun", "args": [{}], "body": {}}})", + args.c_str(), _compile(node.constList()[2]).c_str()); + break; + } + + case Keyword::Let: + { + // (let name value) + json += fmt::format( + R"({{"type": "Let", "name": {}, "value": {}}})", + _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); + break; + } + + case Keyword::Mut: + { + // (mut name value) + json += fmt::format( + R"({{"type": "Mut", "name": {}, "value": {}}})", + _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); + break; + } + + case Keyword::Set: + { + // (set name value) + json += fmt::format( + R"({{"type": "Set", "name": {}, "value": {}}})", + _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); + break; + } + + case Keyword::If: + { + // (if condition then else) + json += fmt::format( + R"({{"type": "If", "condition": {}, "then": {}, "else": {}}})", + _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str(), _compile(node.constList()[3]).c_str()); + break; + } + + case Keyword::While: + { + // (while condition body) + json += fmt::format( + R"({{"type": "While", "condition": {}, "body": {}}})", + _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); + break; + } + + case Keyword::Begin: + { + // (begin body) + json += R"({"type": "Begin", "children": )"; + json += toJsonList(node, 1) + "}"; + break; + } + + case Keyword::Import: + { + // (import value) + json += fmt::format( + R"({{"type": "Import", "value": {}}})", + _compile(node.constList()[1]).c_str()); + break; + } + + case Keyword::Del: + { + // (del value) + json += fmt::format( + R"({{"type": "Del", "value": {}}})", + _compile(node.constList()[1]).c_str()); + break; + } + } + } + else if (node.constList().size() > 1 && node.constList()[0].nodeType() == NodeType::Symbol) + { + // (foo bar 1) + json += fmt::format( + R"({{"type": "FunctionCall", "name": {}, "args": )", + _compile(node.constList()[0]).c_str()); + json += toJsonList(node, 1) + "}"; + } + else + json += toJsonList(node, 0); + + break; + } + + default: + throw Error(fmt::format( + "Not handled NodeType::{} ({} at {}:{}), please report this error on GitHub", + nodeTypes[static_cast(node.nodeType())].data(), + node.filename().c_str(), + node.line(), + node.col())); + } + return json; +} + +std::string JsonCompiler::toJsonList(const Node& node, std::size_t start) +{ + std::string json = "["; + for (std::size_t i = start, end = node.constList().size(); i < end; ++i) + { + json += _compile(node.constList()[i]); + if (i != end - 1) + json += ", "; + } + json += "]"; + return json; +} diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index e2419a8b7..fc63d47b8 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include int main(int argc, char** argv) { @@ -252,7 +252,7 @@ int main(int argc, char** argv) case mode::ast: { - Ark::JsonCompiler compiler(debug, lib_paths); + JsonCompiler compiler(debug, lib_paths); compiler.feed(file); std::cout << compiler.compile() << std::endl; break; diff --git a/tests/unittests/ValidAstSuite.cpp b/tests/unittests/ValidAstSuite.cpp index ad9c6cd7b..57019d271 100644 --- a/tests/unittests/ValidAstSuite.cpp +++ b/tests/unittests/ValidAstSuite.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include "TestsHelper.hpp" @@ -14,7 +14,7 @@ ut::suite<"AST"> ast_suite = [] { iter_test_files( "ASTSuite", [](TestData&& data) { - Ark::JsonCompiler compiler(false, { ARK_TESTS_ROOT "lib/" }); + JsonCompiler compiler(false, { ARK_TESTS_ROOT "lib/" }); std::string json; should("parse " + data.stem) = [&] { From dd46b9890b244a15c95ec05a380cb4fa701835fc Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sat, 23 Mar 2024 19:01:03 +0100 Subject: [PATCH 03/12] refactor: move the REPL out of Ark/ to put it under CLI/ to avoid clutering the interface --- CMakeLists.txt | 13 +++++++------ include/{Ark => CLI}/REPL/ConsoleStyle.hpp | 0 include/{Ark => CLI}/REPL/Repl.hpp | 2 +- include/{Ark => CLI}/REPL/replxx/Util.hpp | 0 src/arkscript/JsonCompiler.cpp | 2 +- src/arkscript/REPL/Repl.cpp | 4 ++-- src/arkscript/REPL/Utils.cpp | 3 ++- src/arkscript/main.cpp | 2 +- 8 files changed, 14 insertions(+), 12 deletions(-) rename include/{Ark => CLI}/REPL/ConsoleStyle.hpp (100%) rename include/{Ark => CLI}/REPL/Repl.hpp (96%) rename include/{Ark => CLI}/REPL/replxx/Util.hpp (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 55ab50e48..3a4dbde9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -180,7 +180,11 @@ endif() # TODO: consider using ctest if (ARK_TESTS) - file(GLOB_RECURSE UT_SOURCES ${ark_SOURCE_DIR}/tests/unittests/*.cpp) + file(GLOB_RECURSE UT_SOURCES + ${ark_SOURCE_DIR}/tests/unittests/*.cpp + ${ark_SOURCE_DIR}/lib/fmt/src/format.cc + ${ark_SOURCE_DIR}/src/arkscript/Formatter.cpp + ${ark_SOURCE_DIR}/src/arkscript/JsonCompiler.cpp) add_executable(unittests ${UT_SOURCES}) add_subdirectory(${ark_SOURCE_DIR}/lib/ut) @@ -194,11 +198,8 @@ endif() if (ARK_BUILD_EXE) # additional files needed for the exe (repl, command line and stuff) file(GLOB_RECURSE EXE_SOURCES - ${ark_SOURCE_DIR}/src/arkscript/REPL/Utils.cpp - ${ark_SOURCE_DIR}/src/arkscript/REPL/Repl.cpp - ${ark_SOURCE_DIR}/lib/fmt/src/format.cc - ${ark_SOURCE_DIR}/src/arkscript/main.cpp) - + ${ark_SOURCE_DIR}/src/arkscript/*.cpp + ${ark_SOURCE_DIR}/lib/fmt/src/format.cc) add_executable(arkscript ${EXE_SOURCES}) if (MSVC) diff --git a/include/Ark/REPL/ConsoleStyle.hpp b/include/CLI/REPL/ConsoleStyle.hpp similarity index 100% rename from include/Ark/REPL/ConsoleStyle.hpp rename to include/CLI/REPL/ConsoleStyle.hpp diff --git a/include/Ark/REPL/Repl.hpp b/include/CLI/REPL/Repl.hpp similarity index 96% rename from include/Ark/REPL/Repl.hpp rename to include/CLI/REPL/Repl.hpp index 3b9372150..80072876f 100644 --- a/include/Ark/REPL/Repl.hpp +++ b/include/CLI/REPL/Repl.hpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include namespace Ark { diff --git a/include/Ark/REPL/replxx/Util.hpp b/include/CLI/REPL/replxx/Util.hpp similarity index 100% rename from include/Ark/REPL/replxx/Util.hpp rename to include/CLI/REPL/replxx/Util.hpp diff --git a/src/arkscript/JsonCompiler.cpp b/src/arkscript/JsonCompiler.cpp index 4df864b26..6b27ff83c 100644 --- a/src/arkscript/JsonCompiler.cpp +++ b/src/arkscript/JsonCompiler.cpp @@ -177,7 +177,7 @@ std::string JsonCompiler::_compile(const Node& node) } default: - throw Error(fmt::format( + throw Ark::Error(fmt::format( "Not handled NodeType::{} ({} at {}:{}), please report this error on GitHub", nodeTypes[static_cast(node.nodeType())].data(), node.filename().c_str(), diff --git a/src/arkscript/REPL/Repl.cpp b/src/arkscript/REPL/Repl.cpp index f9f5788a3..acc091108 100644 --- a/src/arkscript/REPL/Repl.cpp +++ b/src/arkscript/REPL/Repl.cpp @@ -2,8 +2,8 @@ #include #include -#include -#include +#include +#include namespace Ark { diff --git a/src/arkscript/REPL/Utils.cpp b/src/arkscript/REPL/Utils.cpp index 7274d115e..25f97a6c2 100644 --- a/src/arkscript/REPL/Utils.cpp +++ b/src/arkscript/REPL/Utils.cpp @@ -1,4 +1,5 @@ -#include +#include + #include #include diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index fc63d47b8..5e4a02895 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include From a543acdf03eee73af8f0a668c24b2510d7cec4da Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sat, 23 Mar 2024 20:12:25 +0100 Subject: [PATCH 04/12] chore(cli): modify the cli definition so that arguments of subcommands go together in blocks --- src/arkscript/main.cpp | 47 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index 5e4a02895..2ba892724 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -56,13 +56,33 @@ int main(int argc, char** argv) | option("-v", "--version").set(selected, mode::version).doc("Display ArkScript version and exit") | option("--dev-info").set(selected, mode::dev_info).doc("Display development information and exit") | ( - required("-e", "--eval").set(selected, mode::eval).doc("Evaluate ArkScript expression") + required("-e", "--eval").set(selected, mode::eval).doc("Evaluate ArkScript expression\n") & value("expression", eval_expression) ) | ( required("-c", "--compile").set(selected, mode::compile).doc("Compile the given program to bytecode, but do not run") & value("file", file) + , joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }).doc("Increase debug level (default: 0)\n"))) + ) + | ( + value("file", file).set(selected, mode::run) + , ( + joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }))) + , ( + option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'\n") + & value("lib_dir", libdir) + ) + ) + , any_other(script_args) + ) + | ( + required("--ast").set(selected, mode::ast).doc("Compile the given program and output its AST as JSON to stdout") + & value("file", file) , joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }).doc("Increase debug level (default: 0)"))) + , ( + option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'") + & value("lib_dir", libdir) + ) ) | ( required("-bcr", "--bytecode-reader").set(selected, mode::bytecode_reader).doc("Launch the bytecode reader") @@ -86,26 +106,6 @@ int main(int argc, char** argv) ) ) ) - | ( - value("file", file).set(selected, mode::run) - , ( - joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }))) - , ( - option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'") - & value("lib_dir", libdir) - ) - ) - , any_other(script_args) - ) - | ( - required("--ast").set(selected, mode::ast).doc("Compile the given program and output its AST as JSON to stdout") - & value("file", file) - , joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }).doc("Increase debug level (default: 0)"))) - , ( - option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'") - & value("lib_dir", libdir) - ) - ) , any_other(wrong) ); // clang-format on @@ -116,7 +116,8 @@ int main(int argc, char** argv) .indent_size(2) // indent of documentation lines for children of a documented group .split_alternatives(true) // split usage into several lines for large alternatives .merge_alternative_flags_with_common_prefix(true) // [-fok] [-fno-ok] becomes [-f(ok|no-ok)] - ; + .paragraph_spacing(1) + .ignore_newline_chars(false); const auto man_page = make_man_page(cli, "arkscript", fmt) .prepend_section("DESCRIPTION", " ArkScript programming language") .append_section("VERSION", fmt::format(" {}", ARK_FULL_VERSION)) @@ -288,7 +289,7 @@ int main(int argc, char** argv) for (const auto& arg : wrong) std::cerr << "'" << arg.c_str() << "' isn't a valid argument\n"; - std::cout << man_page << std::endl; + std::cout << usage_lines(cli, fmt) << std::endl; } return 0; From 4c1a384618eea5c7512a08303b34c5883bebd4b6 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sat, 23 Mar 2024 21:27:58 +0100 Subject: [PATCH 05/12] feat(parser): adding an option to interpret escape sequences or not --- include/Ark/Compiler/AST/Parser.hpp | 69 +++++++++++++++++--------- src/arkreactor/Compiler/AST/Parser.cpp | 4 +- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/include/Ark/Compiler/AST/Parser.hpp b/include/Ark/Compiler/AST/Parser.hpp index 3e665f4d6..9ac4b53b5 100644 --- a/include/Ark/Compiler/AST/Parser.hpp +++ b/include/Ark/Compiler/AST/Parser.hpp @@ -19,7 +19,11 @@ namespace Ark::internal class ARK_API Parser : public BaseParser { public: - Parser(); + /** + * @brief Constructs a new Parser object + * @param interpret + */ + explicit Parser(bool interpret = true); void processFile(const std::string& filename); void processString(const std::string& code); @@ -28,6 +32,7 @@ namespace Ark::internal [[nodiscard]] const std::vector& imports() const; private: + bool m_interpret; Node m_ast; std::vector m_imports; unsigned m_allow_macro_behavior; ///< Toggled on when inside a macro definition, off afterward @@ -77,36 +82,44 @@ namespace Ark::internal { if (accept(IsChar('\\'))) { + if (!m_interpret) + res += '\\'; + if (accept(IsChar('"'))) - res += '\"'; + res += '"'; else if (accept(IsChar('\\'))) res += '\\'; else if (accept(IsChar('n'))) - res += '\n'; + res += m_interpret ? '\n' : 'n'; else if (accept(IsChar('t'))) - res += '\t'; + res += m_interpret ? '\t' : 't'; else if (accept(IsChar('v'))) - res += '\v'; + res += m_interpret ? '\v' : 'v'; else if (accept(IsChar('r'))) - res += '\r'; + res += m_interpret ? '\r' : 'r'; else if (accept(IsChar('a'))) - res += '\a'; + res += m_interpret ? '\a' : 'a'; else if (accept(IsChar('b'))) - res += '\b'; + res += m_interpret ? '\b' : 'b'; else if (accept(IsChar('0'))) - res += '\0'; + res += m_interpret ? '\0' : '0'; else if (accept(IsChar('f'))) - res += '\f'; + res += m_interpret ? '\f' : 'f'; else if (accept(IsChar('u'))) { std::string seq; if (hexNumber(4, &seq)) { - char utf8_str[5]; - utf8::decode(seq.c_str(), utf8_str); - if (*utf8_str == '\0') - error("Invalid escape sequence", "\\u" + seq); - res += utf8_str; + if (m_interpret) + { + char utf8_str[5]; + utf8::decode(seq.c_str(), utf8_str); + if (*utf8_str == '\0') + error("Invalid escape sequence", "\\u" + seq); + res += utf8_str; + } + else + res += seq; } else error("Invalid escape sequence", "\\u"); @@ -116,14 +129,19 @@ namespace Ark::internal std::string seq; if (hexNumber(8, &seq)) { - std::size_t begin = 0; - for (; seq[begin] == '0'; ++begin) - ; - char utf8_str[5]; - utf8::decode(seq.c_str() + begin, utf8_str); - if (*utf8_str == '\0') - error("Invalid escape sequence", "\\U" + seq); - res += utf8_str; + if (m_interpret) + { + std::size_t begin = 0; + for (; seq[begin] == '0'; ++begin) + ; + char utf8_str[5]; + utf8::decode(seq.c_str() + begin, utf8_str); + if (*utf8_str == '\0') + error("Invalid escape sequence", "\\U" + seq); + res += utf8_str; + } + else + res += seq; } else error("Invalid escape sequence", "\\U"); @@ -201,7 +219,10 @@ namespace Ark::internal if (!accept(IsChar(')'))) return std::nullopt; - return Node(NodeType::Symbol, "nil"); + if (m_interpret) + return Node(NodeType::Symbol, "nil"); + else + return Node(NodeType::List); } std::optional atom(); diff --git a/src/arkreactor/Compiler/AST/Parser.cpp b/src/arkreactor/Compiler/AST/Parser.cpp index d769681b8..746a4c24e 100644 --- a/src/arkreactor/Compiler/AST/Parser.cpp +++ b/src/arkreactor/Compiler/AST/Parser.cpp @@ -4,8 +4,8 @@ namespace Ark::internal { - Parser::Parser() : - BaseParser(), m_ast(NodeType::List), m_imports({}), m_allow_macro_behavior(0) + Parser::Parser(bool interpret) : + BaseParser(), m_interpret(interpret), m_ast(NodeType::List), m_imports({}), m_allow_macro_behavior(0) { m_ast.push_back(Node(Keyword::Begin)); } From 9699259298a03500b32eb6d1701f980decc5c83a Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sat, 23 Mar 2024 22:12:33 +0100 Subject: [PATCH 06/12] feat(parser): keep track of each node source position and file --- include/Ark/Compiler/AST/BaseParser.hpp | 4 +++- include/Ark/Compiler/AST/Parser.hpp | 3 +++ src/arkreactor/Compiler/AST/Parser.cpp | 24 ++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/include/Ark/Compiler/AST/BaseParser.hpp b/include/Ark/Compiler/AST/BaseParser.hpp index 5fd473f86..9adc69e4b 100644 --- a/include/Ark/Compiler/AST/BaseParser.hpp +++ b/include/Ark/Compiler/AST/BaseParser.hpp @@ -25,7 +25,7 @@ namespace Ark::internal BaseParser() = default; private: - std::string m_filename; + bool m_remove_comments; std::string m_str; std::string::iterator m_it, m_next_it; utf8_char_t m_sym; @@ -36,6 +36,8 @@ namespace Ark::internal void next(); protected: + std::string m_filename; + void initParser(const std::string& filename, const std::string& code); FilePosition getCursor(); diff --git a/include/Ark/Compiler/AST/Parser.hpp b/include/Ark/Compiler/AST/Parser.hpp index 9ac4b53b5..7b1cf9d17 100644 --- a/include/Ark/Compiler/AST/Parser.hpp +++ b/include/Ark/Compiler/AST/Parser.hpp @@ -39,6 +39,8 @@ namespace Ark::internal void run(); + void setNodePosAndFilename(Node& node); + std::optional node(); std::optional letMutSet(); std::optional del(); @@ -173,6 +175,7 @@ namespace Ark::internal return std::nullopt; Node leaf = Node(NodeType::Field); + setNodePosAndFilename(leaf); leaf.push_back(Node(NodeType::Symbol, symbol)); while (true) diff --git a/src/arkreactor/Compiler/AST/Parser.cpp b/src/arkreactor/Compiler/AST/Parser.cpp index 746a4c24e..e76bed11a 100644 --- a/src/arkreactor/Compiler/AST/Parser.cpp +++ b/src/arkreactor/Compiler/AST/Parser.cpp @@ -47,6 +47,13 @@ namespace Ark::internal } } + void Parser::setNodePosAndFilename(Node& node) + { + auto position = getCursor(); + node.setPos(position.row, position.col); + node.setFilename(m_filename); + } + std::optional Parser::node() { // save current position in buffer to be able to go back if needed @@ -118,6 +125,7 @@ namespace Ark::internal newlineOrComment(); Node leaf(NodeType::List); + setNodePosAndFilename(leaf); if (token == "let") leaf.push_back(Node(Keyword::Let)); else if (token == "mut") @@ -167,6 +175,7 @@ namespace Ark::internal errorWithNextToken(keyword + " needs a symbol"); Node leaf(NodeType::List); + setNodePosAndFilename(leaf); leaf.push_back(Node(Keyword::Del)); leaf.push_back(Node(NodeType::Symbol, symbol)); @@ -181,6 +190,7 @@ namespace Ark::internal newlineOrComment(); Node leaf(NodeType::List); + setNodePosAndFilename(leaf); leaf.push_back(Node(Keyword::If)); if (auto condition = nodeOrValue(); condition.has_value()) @@ -214,6 +224,7 @@ namespace Ark::internal newlineOrComment(); Node leaf(NodeType::List); + setNodePosAndFilename(leaf); leaf.push_back(Node(Keyword::While)); if (auto condition = nodeOrValue(); condition.has_value()) @@ -242,6 +253,7 @@ namespace Ark::internal newlineOrComment(); Node leaf(NodeType::List); + setNodePosAndFilename(leaf); leaf.push_back(Node(Keyword::Import)); Import import_data; @@ -251,6 +263,7 @@ namespace Ark::internal import_data.package.push_back(import_data.prefix); Node packageNode(NodeType::List); + setNodePosAndFilename(packageNode); packageNode.push_back(Node(NodeType::String, import_data.prefix)); // first, parse the package name @@ -288,6 +301,7 @@ namespace Ark::internal } Node symbols(NodeType::List); + setNodePosAndFilename(symbols); // then parse the symbols to import, if any if (newlineOrComment()) { @@ -340,6 +354,7 @@ namespace Ark::internal newlineOrComment(); Node leaf(NodeType::List); + setNodePosAndFilename(leaf); leaf.push_back(Node(Keyword::Begin)); while (!isEOF()) @@ -364,6 +379,7 @@ namespace Ark::internal newlineOrComment(); Node args(NodeType::List); + setNodePosAndFilename(args); bool has_captures = false; while (!isEOF()) @@ -416,6 +432,7 @@ namespace Ark::internal auto position = getCount(); Node leaf(NodeType::List); + setNodePosAndFilename(leaf); leaf.push_back(Node(Keyword::Fun)); // args if (auto value = nodeOrValue(); value.has_value()) @@ -423,6 +440,7 @@ namespace Ark::internal // if value is nil, just add an empty argument bloc to prevent bugs when // declaring functions inside macros Node args = value.value(); + setNodePosAndFilename(args); if (args.nodeType() == NodeType::Symbol && args.string() == "nil") leaf.push_back(Node(NodeType::List)); else @@ -443,6 +461,7 @@ namespace Ark::internal } Node leaf(NodeType::List); + setNodePosAndFilename(leaf); leaf.push_back(Node(Keyword::Fun)); auto position = getCount(); @@ -475,6 +494,7 @@ namespace Ark::internal newlineOrComment(); Node leaf(NodeType::Macro); + setNodePosAndFilename(leaf); leaf.push_back(Node(Keyword::If)); if (auto condition = nodeOrValue(); condition.has_value()) @@ -506,6 +526,7 @@ namespace Ark::internal { newlineOrComment(); Node args = Node(NodeType::List); + setNodePosAndFilename(args); while (!isEOF()) { @@ -554,6 +575,7 @@ namespace Ark::internal newlineOrComment(); Node leaf(NodeType::Macro); + setNodePosAndFilename(leaf); leaf.push_back(Node(NodeType::Symbol, symbol)); auto position = getCount(); @@ -614,6 +636,7 @@ namespace Ark::internal } Node leaf(call_type); + setNodePosAndFilename(leaf); leaf.push_back(func.value()); while (!isEOF()) @@ -639,6 +662,7 @@ namespace Ark::internal newlineOrComment(); Node leaf(NodeType::List); + setNodePosAndFilename(leaf); leaf.push_back(Node(NodeType::Symbol, "list")); while (!isEOF()) From ea577d72a7ac0166711a390985189b4d7b23e85c Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sat, 23 Mar 2024 22:00:06 +0100 Subject: [PATCH 07/12] feat(parser): begin work to keep comments inside the parser --- CHANGELOG.md | 2 + include/Ark/Compiler/AST/BaseParser.hpp | 5 +- include/Ark/Compiler/AST/Node.hpp | 14 ++ include/Ark/Compiler/AST/Parser.hpp | 10 +- src/arkreactor/Compiler/AST/BaseParser.cpp | 14 +- src/arkreactor/Compiler/AST/Node.cpp | 11 ++ src/arkreactor/Compiler/AST/Parser.cpp | 212 +++++++++++++-------- 7 files changed, 171 insertions(+), 97 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1be5ca0a5..3b33b74b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ - added the padding/instruction/argumentation values when displaying instructions in the bytecode reader - `$repr` macro to get a string representation of a given node - added boost-ext/ut to write unit tests in C++ +- basic ArkScript code formatter, available through the CLI: `arkscript -f|--format` +- comments are now tracked in the AST and attached to the nearest node below them ### Changed - instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument diff --git a/include/Ark/Compiler/AST/BaseParser.hpp b/include/Ark/Compiler/AST/BaseParser.hpp index 9adc69e4b..e3ddee0a1 100644 --- a/include/Ark/Compiler/AST/BaseParser.hpp +++ b/include/Ark/Compiler/AST/BaseParser.hpp @@ -25,7 +25,6 @@ namespace Ark::internal BaseParser() = default; private: - bool m_remove_comments; std::string m_str; std::string::iterator m_it, m_next_it; utf8_char_t m_sym; @@ -71,8 +70,8 @@ namespace Ark::internal bool space(std::string* s = nullptr); bool inlineSpace(std::string* s = nullptr); bool endOfLine(std::string* s = nullptr); - bool comment(); - bool newlineOrComment(); + bool comment(std::string* s = nullptr); + bool newlineOrComment(std::string* s = nullptr); bool prefix(char c); bool suffix(char c); bool number(std::string* s = nullptr); diff --git a/include/Ark/Compiler/AST/Node.hpp b/include/Ark/Compiler/AST/Node.hpp index dd271b5e1..b3f8888f5 100644 --- a/include/Ark/Compiler/AST/Node.hpp +++ b/include/Ark/Compiler/AST/Node.hpp @@ -127,6 +127,13 @@ namespace Ark::internal */ void setFilename(const std::string& filename) noexcept; + /** + * @brief Set the comment field with the nearest comment before this node + * @param comment + * @return Node& reference to this node after updating it + */ + Node& attachNearestCommentBefore(const std::string& comment); + /** * @brief Get the line at which this node was created * @@ -148,6 +155,12 @@ namespace Ark::internal */ [[nodiscard]] const std::string& filename() const noexcept; + /** + * @brief Return the comment attached to this node, if any + * @return const std::string& + */ + [[nodiscard]] const std::string& comment() const noexcept; + /** * @brief Compute a representation of the node without any comments or additional sugar, colors, types * @return String representation of the node @@ -165,6 +178,7 @@ namespace Ark::internal // position of the node in the original code, useful when it comes to parser errors std::size_t m_line = 0, m_col = 0; std::string m_filename; + std::string m_comment; }; ARK_API std::ostream& operator<<(std::ostream& os, const std::vector& node) noexcept; diff --git a/include/Ark/Compiler/AST/Parser.hpp b/include/Ark/Compiler/AST/Parser.hpp index 7b1cf9d17..5b5ef1b4a 100644 --- a/include/Ark/Compiler/AST/Parser.hpp +++ b/include/Ark/Compiler/AST/Parser.hpp @@ -21,7 +21,7 @@ namespace Ark::internal public: /** * @brief Constructs a new Parser object - * @param interpret + * @param interpret interpret escape codes in strings */ explicit Parser(bool interpret = true); @@ -218,14 +218,16 @@ namespace Ark::internal { if (!accept(IsChar('('))) return std::nullopt; - newlineOrComment(); + + std::string comment; + newlineOrComment(&comment); if (!accept(IsChar(')'))) return std::nullopt; if (m_interpret) - return Node(NodeType::Symbol, "nil"); + return Node(NodeType::Symbol, "nil").attachNearestCommentBefore(comment); else - return Node(NodeType::List); + return Node(NodeType::List).attachNearestCommentBefore(comment); } std::optional atom(); diff --git a/src/arkreactor/Compiler/AST/BaseParser.cpp b/src/arkreactor/Compiler/AST/BaseParser.cpp index 8a9269156..f29fab45d 100644 --- a/src/arkreactor/Compiler/AST/BaseParser.cpp +++ b/src/arkreactor/Compiler/AST/BaseParser.cpp @@ -164,22 +164,22 @@ namespace Ark::internal return false; } - bool BaseParser::comment() + bool BaseParser::comment(std::string* s) { - if (accept(IsChar('#'))) + if (accept(IsChar('#'), s)) { - while (accept(IsNot(IsChar('\n')))) + while (accept(IsNot(IsChar('\n')), s)) ; - accept(IsChar('\n')); + accept(IsChar('\n'), s); return true; } return false; } - bool BaseParser::newlineOrComment() + bool BaseParser::newlineOrComment(std::string* s) { bool matched = space(); - while (!isEOF() && comment()) + while (!isEOF() && comment(s)) { space(); matched = true; @@ -192,13 +192,11 @@ namespace Ark::internal { if (!accept(IsChar(c))) return false; - newlineOrComment(); return true; } bool BaseParser::suffix(char c) { - newlineOrComment(); return accept(IsChar(c)); } diff --git a/src/arkreactor/Compiler/AST/Node.cpp b/src/arkreactor/Compiler/AST/Node.cpp index 9419ec92c..d7684153d 100644 --- a/src/arkreactor/Compiler/AST/Node.cpp +++ b/src/arkreactor/Compiler/AST/Node.cpp @@ -99,6 +99,12 @@ namespace Ark::internal m_filename = filename; } + Node& Node::attachNearestCommentBefore(const std::string& comment) + { + m_comment = comment; + return *this; + } + std::size_t Node::line() const noexcept { return m_line; @@ -114,6 +120,11 @@ namespace Ark::internal return m_filename; } + const std::string& Node::comment() const noexcept + { + return m_comment; + } + std::string Node::repr() const noexcept { std::string data; diff --git a/src/arkreactor/Compiler/AST/Parser.cpp b/src/arkreactor/Compiler/AST/Parser.cpp index e76bed11a..55ae8febe 100644 --- a/src/arkreactor/Compiler/AST/Parser.cpp +++ b/src/arkreactor/Compiler/AST/Parser.cpp @@ -37,13 +37,14 @@ namespace Ark::internal { while (!isEOF()) { - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); if (isEOF()) break; auto n = node(); if (n) - m_ast.push_back(n.value()); + m_ast.push_back(n.value().attachNearestCommentBefore(comment)); } } @@ -122,10 +123,11 @@ namespace Ark::internal std::string token; if (!oneOf({ "let", "mut", "set" }, &token)) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); Node leaf(NodeType::List); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); if (token == "let") leaf.push_back(Node(Keyword::Let)); else if (token == "mut") @@ -152,10 +154,11 @@ namespace Ark::internal leaf.push_back(Node(NodeType::Symbol, symbol)); } - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); if (auto value = nodeOrValue(); value.has_value()) - leaf.push_back(value.value()); + leaf.push_back(value.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a value"); @@ -168,14 +171,15 @@ namespace Ark::internal if (!oneOf({ "del" }, &keyword)) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); std::string symbol; if (!name(&symbol)) errorWithNextToken(keyword + " needs a symbol"); Node leaf(NodeType::List); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); leaf.push_back(Node(Keyword::Del)); leaf.push_back(Node(NodeType::Symbol, symbol)); @@ -187,10 +191,11 @@ namespace Ark::internal if (!oneOf({ "if" })) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); Node leaf(NodeType::List); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); leaf.push_back(Node(Keyword::If)); if (auto condition = nodeOrValue(); condition.has_value()) @@ -198,19 +203,21 @@ namespace Ark::internal else errorWithNextToken("If need a valid condition"); - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); if (auto value_if_true = nodeOrValue(); value_if_true.has_value()) - leaf.push_back(value_if_true.value()); + leaf.push_back(value_if_true.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a value"); - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); if (auto value_if_false = nodeOrValue(); value_if_false.has_value()) { - leaf.push_back(value_if_false.value()); - newlineOrComment(); + leaf.push_back(value_if_false.value().attachNearestCommentBefore(comment)); + newlineOrComment(); // FIXME how to attach a comment after the node? another field? } return leaf; @@ -221,10 +228,11 @@ namespace Ark::internal if (!oneOf({ "while" })) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); Node leaf(NodeType::List); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); leaf.push_back(Node(Keyword::While)); if (auto condition = nodeOrValue(); condition.has_value()) @@ -232,10 +240,11 @@ namespace Ark::internal else errorWithNextToken("While need a valid condition"); - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); if (auto body = nodeOrValue(); body.has_value()) - leaf.push_back(body.value()); + leaf.push_back(body.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a value"); @@ -246,15 +255,17 @@ namespace Ark::internal { if (!accept(IsChar('('))) return std::nullopt; - newlineOrComment(); - - if (!oneOf({ "import" })) - return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); Node leaf(NodeType::List); setNodePosAndFilename(leaf); - leaf.push_back(Node(Keyword::Import)); + + if (!oneOf({ "import" })) + return std::nullopt; + comment.clear(); + newlineOrComment(&comment); + leaf.push_back(Node(Keyword::Import).attachNearestCommentBefore(comment)); Import import_data; @@ -303,7 +314,8 @@ namespace Ark::internal Node symbols(NodeType::List); setNodePosAndFilename(symbols); // then parse the symbols to import, if any - if (newlineOrComment()) + comment.clear(); + if (newlineOrComment(&comment)) { while (!isEOF()) { @@ -319,11 +331,12 @@ namespace Ark::internal error("Glob pattern can not follow a symbol to import", ":*"); } - symbols.push_back(Node(NodeType::Symbol, symbol)); + symbols.push_back(Node(NodeType::Symbol, symbol).attachNearestCommentBefore(comment)); import_data.symbols.push_back(symbol); } - if (!newlineOrComment()) + comment.clear(); + if (!newlineOrComment(&comment)) // TODO what to do? break; } } @@ -333,7 +346,7 @@ namespace Ark::internal // save the import data m_imports.push_back(import_data); - newlineOrComment(); + newlineOrComment(); // FIXME: attach comment after the node expect(IsChar(')')); return leaf; } @@ -341,9 +354,10 @@ namespace Ark::internal std::optional Parser::block() { bool alt_syntax = false; + std::string comment; if (accept(IsChar('('))) { - newlineOrComment(); + newlineOrComment(&comment); if (!oneOf({ "begin" })) return std::nullopt; } @@ -351,24 +365,27 @@ namespace Ark::internal alt_syntax = true; else return std::nullopt; - newlineOrComment(); Node leaf(NodeType::List); setNodePosAndFilename(leaf); - leaf.push_back(Node(Keyword::Begin)); + leaf.push_back(Node(Keyword::Begin).attachNearestCommentBefore(comment)); + + comment.clear(); + newlineOrComment(&comment); while (!isEOF()) { if (auto value = nodeOrValue(); value.has_value()) { - leaf.push_back(value.value()); - newlineOrComment(); + leaf.push_back(value.value().attachNearestCommentBefore(comment)); + comment.clear(); + newlineOrComment(&comment); } else break; } - newlineOrComment(); + newlineOrComment(&comment); // FIXME: attach comment after last node expect(IsChar(!alt_syntax ? ')' : '}')); return leaf; } @@ -376,10 +393,11 @@ namespace Ark::internal std::optional Parser::functionArgs() { expect(IsChar('(')); - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); Node args(NodeType::List); - setNodePosAndFilename(args); + setNodePosAndFilename(args.attachNearestCommentBefore(comment)); bool has_captures = false; while (!isEOF()) @@ -392,8 +410,9 @@ namespace Ark::internal break; else { - newlineOrComment(); - args.push_back(Node(NodeType::Capture, capture)); + comment.clear(); + newlineOrComment(&comment); + args.push_back(Node(NodeType::Capture, capture).attachNearestCommentBefore(comment)); } } else @@ -410,8 +429,9 @@ namespace Ark::internal error("Captured variables should be at the end of the argument list", symbol); } - newlineOrComment(); - args.push_back(Node(NodeType::Symbol, symbol)); + comment.clear(); + newlineOrComment(&comment); + args.push_back(Node(NodeType::Symbol, symbol).attachNearestCommentBefore(comment)); } } } @@ -425,14 +445,15 @@ namespace Ark::internal { if (!oneOf({ "fun" })) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); while (m_allow_macro_behavior > 0) { auto position = getCount(); Node leaf(NodeType::List); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); leaf.push_back(Node(Keyword::Fun)); // args if (auto value = nodeOrValue(); value.has_value()) @@ -451,17 +472,19 @@ namespace Ark::internal backtrack(position); break; } - newlineOrComment(); + + comment.clear(); + newlineOrComment(&comment); // body if (auto value = nodeOrValue(); value.has_value()) - leaf.push_back(value.value()); + leaf.push_back(value.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a body for the function"); return leaf; } Node leaf(NodeType::List); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); leaf.push_back(Node(Keyword::Fun)); auto position = getCount(); @@ -477,10 +500,11 @@ namespace Ark::internal errorWithNextToken("Expected an argument list"); } - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); if (auto value = nodeOrValue(); value.has_value()) - leaf.push_back(value.value()); + leaf.push_back(value.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a body for the function"); @@ -491,10 +515,11 @@ namespace Ark::internal { if (!oneOf({ "$if" })) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); Node leaf(NodeType::Macro); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); leaf.push_back(Node(Keyword::If)); if (auto condition = nodeOrValue(); condition.has_value()) @@ -502,19 +527,22 @@ namespace Ark::internal else errorWithNextToken("$if need a valid condition"); - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); if (auto value_if_true = nodeOrValue(); value_if_true.has_value()) - leaf.push_back(value_if_true.value()); + leaf.push_back(value_if_true.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a value"); - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); if (auto value_if_false = nodeOrValue(); value_if_false.has_value()) { - leaf.push_back(value_if_false.value()); - newlineOrComment(); + leaf.push_back(value_if_false.value().attachNearestCommentBefore(comment)); + comment.clear(); + newlineOrComment(&comment); // FIXME: attach comment after node } return leaf; @@ -524,9 +552,10 @@ namespace Ark::internal { if (accept(IsChar('('))) { - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); Node args = Node(NodeType::List); - setNodePosAndFilename(args); + setNodePosAndFilename(args.attachNearestCommentBefore(comment)); while (!isEOF()) { @@ -535,8 +564,9 @@ namespace Ark::internal break; else { - newlineOrComment(); - args.push_back(Node(NodeType::Symbol, arg_name)); + comment.clear(); + newlineOrComment(&comment); + args.push_back(Node(NodeType::Symbol, arg_name).attachNearestCommentBefore(comment)); } } @@ -546,12 +576,14 @@ namespace Ark::internal if (!name(&spread_name)) errorWithNextToken("Expected a name for the variadic arguments list"); args.push_back(Node(NodeType::Spread, spread_name)); - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); // FIXME: attach comment after node } if (!accept(IsChar(')'))) return std::nullopt; - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); // FIXME: attach comment after node return args; } @@ -563,20 +595,23 @@ namespace Ark::internal { if (!accept(IsChar('('))) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); if (!oneOf({ "$" })) return std::nullopt; - newlineOrComment(); + newlineOrComment(&comment); + + Node leaf(NodeType::Macro); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); std::string symbol; if (!name(&symbol)) errorWithNextToken("$ needs a symbol to declare a macro"); - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); - Node leaf(NodeType::Macro); - setNodePosAndFilename(leaf); - leaf.push_back(Node(NodeType::Symbol, symbol)); + leaf.push_back(Node(NodeType::Symbol, symbol).attachNearestCommentBefore(comment)); auto position = getCount(); if (auto args = macroArgs(); args.has_value()) @@ -607,7 +642,8 @@ namespace Ark::internal else errorWithNextToken("Expected a value while defining macro `" + symbol + "'"); - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); // FIXME: attach comment after node expect(IsChar(')')); return leaf; } @@ -616,16 +652,18 @@ namespace Ark::internal { if (!accept(IsChar('('))) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); std::optional func; if (auto atom = anyAtomOf({ NodeType::Symbol, NodeType::Field }); atom.has_value()) - func = atom; + func = atom->attachNearestCommentBefore(comment); else if (auto nested = node(); nested.has_value()) - func = nested; + func = nested->attachNearestCommentBefore(comment); else return std::nullopt; - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); NodeType call_type = NodeType::List; if (auto node = func.value(); node.nodeType() == NodeType::Symbol) @@ -636,21 +674,23 @@ namespace Ark::internal } Node leaf(call_type); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); leaf.push_back(func.value()); while (!isEOF()) { if (auto arg = nodeOrValue(); arg.has_value()) { - newlineOrComment(); - leaf.push_back(arg.value()); + comment.clear(); + newlineOrComment(&comment); + leaf.push_back(arg.value().attachNearestCommentBefore(comment)); } else break; } - newlineOrComment(); + comment.clear(); + newlineOrComment(&comment); // FIXME: attach comment after node expect(IsChar(')')); return leaf; } @@ -659,25 +699,28 @@ namespace Ark::internal { if (!accept(IsChar('['))) return std::nullopt; - newlineOrComment(); + std::string comment; + newlineOrComment(&comment); Node leaf(NodeType::List); - setNodePosAndFilename(leaf); + setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); leaf.push_back(Node(NodeType::Symbol, "list")); + comment.clear(); while (!isEOF()) { if (auto value = nodeOrValue(); value.has_value()) { - leaf.push_back(value.value()); - newlineOrComment(); + leaf.push_back(value.value().attachNearestCommentBefore(comment)); + comment.clear(); + newlineOrComment(&comment); } else break; } - newlineOrComment(); - expect(IsChar(']')); + newlineOrComment(&comment); + expect(IsChar(']')); // FIXME: attach comment after node return leaf; } @@ -746,9 +789,14 @@ namespace Ark::internal { if (!prefix('(')) return std::nullopt; + std::string comment; + newlineOrComment(&comment); if (auto result = (this->*parser)(); result.has_value()) { + result->attachNearestCommentBefore(comment); + comment.clear(); + newlineOrComment(&comment); // FIXME: attach comment after node if (!suffix(')')) errorMissingSuffix(')', name); return result; From 7389aa2cc153e412fa2aeebb6fcc214b43e1d82a Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Wed, 27 Mar 2024 13:49:45 +0100 Subject: [PATCH 08/12] feat(ci): updates the static analysis workflow --- .github/launch-lizard.py | 7 ++++--- .github/workflows/static_analysis.yml | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/launch-lizard.py b/.github/launch-lizard.py index d5ce25e9b..0c1929042 100644 --- a/.github/launch-lizard.py +++ b/.github/launch-lizard.py @@ -5,7 +5,7 @@ i = lizard.analyze(files) -print(f"""Lizard report +print(f"""### Lizard report --- Listing only functions with cyclomatic complexity >= 15 or NLOC >= 100 or parameters >= 10. @@ -22,8 +22,9 @@ if func.cyclomatic_complexity >= 15 or func.nloc >= 100 or param_count >= 10: data.append([ - f"{filename} | {func.start_line}:{func.end_line} | `{func.name}` | {param_count} | {func.nloc}", func.cyclomatic_complexity + f"{filename} | {func.start_line}:{func.end_line} | `{func.name}` | {param_count} | {func.nloc}", + func.cyclomatic_complexity ]) for line in sorted(data, key=lambda e: e[1], reverse=True): - print(f"| {line[0]} | {line[1]} |") \ No newline at end of file + print(f"| {line[0]} | {line[1]} |") diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index 5d7a86f22..17f17271f 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -44,7 +44,7 @@ jobs: with: issue-number: ${{ github.event.pull_request.number }} comment-author: 'github-actions[bot]' - body-includes: Build output + body-includes: Static analysis report - name: Create or update comment uses: peter-evans/create-or-update-comment@v4 @@ -52,9 +52,10 @@ jobs: comment-id: ${{ steps.fc.outputs.comment-id }} issue-number: ${{ github.event.pull_request.number }} body: | + ## Static analysis report ${{ steps.lizard.outputs.report }} --- - ## CppCheck report + ### CppCheck report ``` ${{ steps.cppcheck.outputs.report }} ``` From 1ec2d6ae87b5c98d3b5d80bbebb1af82df853fdc Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sat, 30 Mar 2024 18:32:17 +0100 Subject: [PATCH 09/12] feat(ci): update compilers to llvm 15 on macos --- .github/workflows/ci.yml | 8 ++++---- .github/workflows/setup-compilers/action.yaml | 10 ++++++++-- CMakeLists.txt | 4 ---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fcaf3ce0b..098600db7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -89,9 +89,9 @@ jobs: artifact: "windows-msvc-22" } - { - os: macos-latest, name: "MacOS Clang 14", - artifact: "macos-clang-14", - compiler: clang, compiler_version: 14, sanitizers: "On" + os: macos-latest, name: "MacOS Clang 15", + artifact: "macos-clang-15", + compiler: clang, compiler_version: 15, sanitizers: "On" } steps: @@ -156,7 +156,7 @@ jobs: - { os: ubuntu-latest, name: "Ubuntu Clang 15", artifact: "ubuntu-clang-15" } - { os: ubuntu-latest, name: "Ubuntu GCC 13", artifact: "ubuntu-gcc-13" } - { os: windows-latest, name: "Windows VS 2022", artifact: "windows-msvc-22", } - - { os: macos-latest, name: "MacOS Clang 14", artifact: "macos-clang-14", } + - { os: macos-latest, name: "MacOS Clang 15", artifact: "macos-clang-15", } steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/setup-compilers/action.yaml b/.github/workflows/setup-compilers/action.yaml index cea3cee8f..d42ad1aed 100644 --- a/.github/workflows/setup-compilers/action.yaml +++ b/.github/workflows/setup-compilers/action.yaml @@ -26,8 +26,8 @@ runs: shell: bash run: | if [[ '${{ startsWith(inputs.os_name, 'macos') }}' == 'true' ]]; then - echo "cc=${{ inputs.compiler }}" >> $GITHUB_OUTPUT - echo "cxx=${{ inputs.compiler }}++" >> $GITHUB_OUTPUT + echo "cc=/usr/local/opt/llvm@${{ inputs.compiler_version }}/bin/${{ inputs.compiler }}" >> $GITHUB_OUTPUT + echo "cxx=/usr/local/opt/llvm@${{ inputs.compiler_version }}/bin/${{ inputs.compiler }}++" >> $GITHUB_OUTPUT elif [[ '${{ inputs.compiler }}' == 'clang' ]]; then echo "cc=clang-${{ inputs.compiler_version }}" >> $GITHUB_OUTPUT echo "cxx=clang++-${{ inputs.compiler_version }}" >> $GITHUB_OUTPUT @@ -51,6 +51,12 @@ runs: libc++-${{ inputs.compiler_version }}-dev libc++abi-${{ inputs.compiler_version }}-dev \ clang-tools-${{ inputs.compiler_version }} + - name: Update LLVM compilers + if: startsWith(inputs.os_name, 'macos') + shell: bash + run: | + brew install llvm@${{ inputs.compiler_version }} + - name: Setup Windows environment uses: ilammy/msvc-dev-cmd@v1 if: startsWith(inputs.os_name, 'windows') diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a4dbde9b..d9333807c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,10 +109,6 @@ target_include_directories(ArkReactor target_link_libraries(ArkReactor PUBLIC termcolor) if (UNIX OR LINUX) - if (CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG) - target_link_libraries(ArkReactor PUBLIC stdc++fs) - endif() - find_package(Threads) target_link_libraries(ArkReactor PRIVATE ${CMAKE_DL_LIBS} ${CMAKE_THREAD_LIBS_INIT}) endif() From 92a6f99f083f7c99953ad192ea3d1110a3464900 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sun, 24 Mar 2024 13:47:34 +0100 Subject: [PATCH 10/12] feat(parser): enhancing node source line tracking --- include/Ark/Compiler/AST/BaseParser.hpp | 9 +- include/Ark/Compiler/AST/Parser.hpp | 26 +- src/arkreactor/Compiler/AST/BaseParser.cpp | 70 +++-- src/arkreactor/Compiler/AST/Parser.cpp | 271 ++++++++++-------- .../ParserSuite/success/import.expected | 16 +- 5 files changed, 235 insertions(+), 157 deletions(-) diff --git a/include/Ark/Compiler/AST/BaseParser.hpp b/include/Ark/Compiler/AST/BaseParser.hpp index e3ddee0a1..efc3e3279 100644 --- a/include/Ark/Compiler/AST/BaseParser.hpp +++ b/include/Ark/Compiler/AST/BaseParser.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -15,8 +16,8 @@ namespace Ark::internal { struct FilePosition { - std::size_t row; - std::size_t col; + std::size_t row = 0; + std::size_t col = 0; }; class ARK_API BaseParser @@ -26,8 +27,12 @@ namespace Ark::internal private: std::string m_str; + std::vector> m_it_to_row; std::string::iterator m_it, m_next_it; utf8_char_t m_sym; + FilePosition m_filepos; + + void registerNewLine(std::string::iterator it, std::size_t row); /* getting next character and changing the values of count/row/col/sym diff --git a/include/Ark/Compiler/AST/Parser.hpp b/include/Ark/Compiler/AST/Parser.hpp index 5b5ef1b4a..e10821686 100644 --- a/include/Ark/Compiler/AST/Parser.hpp +++ b/include/Ark/Compiler/AST/Parser.hpp @@ -39,7 +39,7 @@ namespace Ark::internal void run(); - void setNodePosAndFilename(Node& node); + Node& setNodePosAndFilename(Node& node, std::optional cursor = std::nullopt); std::optional node(); std::optional letMutSet(); @@ -65,7 +65,7 @@ namespace Ark::internal { double output; if (Utils::isDouble(res, &output)) - return Node(output); + return std::optional(output); else { backtrack(pos); @@ -163,7 +163,7 @@ namespace Ark::internal errorMissingSuffix('"', "string"); } - return Node(NodeType::String, res); + return { Node(NodeType::String, res) }; } return std::nullopt; } @@ -174,21 +174,21 @@ namespace Ark::internal if (!name(&symbol)) return std::nullopt; - Node leaf = Node(NodeType::Field); - setNodePosAndFilename(leaf); - leaf.push_back(Node(NodeType::Symbol, symbol)); + std::optional leaf { Node(NodeType::Field) }; + setNodePosAndFilename(leaf.value()); + leaf->push_back(Node(NodeType::Symbol, symbol)); while (true) { - if (leaf.list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc + if (leaf->list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc return std::nullopt; - if (leaf.list().size() > 1 && !accept(IsChar('.'))) + if (leaf->list().size() > 1 && !accept(IsChar('.'))) break; std::string res; if (!name(&res)) errorWithNextToken("Expected a field name: ."); - leaf.push_back(Node(NodeType::Symbol, res)); + leaf->push_back(Node(NodeType::Symbol, res)); } return leaf; @@ -199,7 +199,7 @@ namespace Ark::internal std::string res; if (!name(&res)) return std::nullopt; - return Node(NodeType::Symbol, res); + return { Node(NodeType::Symbol, res) }; } inline std::optional spread() @@ -209,7 +209,7 @@ namespace Ark::internal { if (!name(&res)) errorWithNextToken("Expected a name for the variadic"); - return Node(NodeType::Spread, res); + return { Node(NodeType::Spread, res) }; } return std::nullopt; } @@ -225,9 +225,9 @@ namespace Ark::internal return std::nullopt; if (m_interpret) - return Node(NodeType::Symbol, "nil").attachNearestCommentBefore(comment); + return { Node(NodeType::Symbol, "nil").attachNearestCommentBefore(comment) }; else - return Node(NodeType::List).attachNearestCommentBefore(comment); + return { Node(NodeType::List).attachNearestCommentBefore(comment) }; } std::optional atom(); diff --git a/src/arkreactor/Compiler/AST/BaseParser.cpp b/src/arkreactor/Compiler/AST/BaseParser.cpp index f29fab45d..095b45068 100644 --- a/src/arkreactor/Compiler/AST/BaseParser.cpp +++ b/src/arkreactor/Compiler/AST/BaseParser.cpp @@ -1,9 +1,31 @@ #include #include + +#include #include namespace Ark::internal { + void BaseParser::registerNewLine(std::string::iterator it, std::size_t row) + { + // search for an existing new line position + if (std::find_if(m_it_to_row.begin(), m_it_to_row.end(), [it](const auto& pair) { + return pair.first == it; + }) != m_it_to_row.end()) + return; + + for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i) + { + auto current = m_it_to_row[i].first; + auto next = i + 1 < end ? m_it_to_row[i + 1].first : m_str.end(); + if (current < it && it < next) + { + m_it_to_row.insert(m_it_to_row.begin() + i + 1, std::make_pair(it, row)); + break; + } + } + } + void BaseParser::next() { m_it = m_next_it; @@ -17,6 +39,15 @@ namespace Ark::internal auto [it, sym] = utf8_char_t::at(m_it); m_next_it = it; m_sym = sym; + + if (*m_it == '\n') + { + ++m_filepos.row; + m_filepos.col = 0; + registerNewLine(m_it, m_filepos.row); + } + else if (m_sym.isPrintable()) + m_filepos.col += m_sym.size(); } void BaseParser::initParser(const std::string& filename, const std::string& code) @@ -46,31 +77,32 @@ namespace Ark::internal auto [it, sym] = utf8_char_t::at(m_it); m_next_it = it; m_sym = sym; - } - - FilePosition BaseParser::getCursor() - { - FilePosition pos { 0, 0 }; - // adjust the row/col count (this is going to be VERY inefficient) - auto tmp = m_str.begin(); - while (true) + // TODO: create a kind of map vec> + // search for the nearest it < m_it in the map to know the line number + for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i) { - auto [it, sym] = utf8_char_t::at(tmp); - if (*tmp == '\n') + auto [at, line] = m_it_to_row[i]; + if (it < at) { - ++pos.row; - pos.col = 0; - } - else if (sym.isPrintable()) - pos.col += sym.size(); - tmp = it; - - if (tmp > m_it || tmp == m_str.end()) + m_filepos.row = line - 1; break; + } } + // compute the position in the line + std::string_view view = m_str; + auto it_pos = std::distance(m_str.begin(), m_it); + view = view.substr(0, it_pos); + auto nearest_newline_index = view.find_last_of('\n'); + if (nearest_newline_index != std::string_view::npos) + m_filepos.col = it_pos - nearest_newline_index + 1; + else + m_filepos.col = it_pos + 1; + } - return pos; + FilePosition BaseParser::getCursor() + { + return m_filepos; } void BaseParser::error(const std::string& error, std::string exp) diff --git a/src/arkreactor/Compiler/AST/Parser.cpp b/src/arkreactor/Compiler/AST/Parser.cpp index 55ae8febe..29f621794 100644 --- a/src/arkreactor/Compiler/AST/Parser.cpp +++ b/src/arkreactor/Compiler/AST/Parser.cpp @@ -2,6 +2,8 @@ #include +#include + namespace Ark::internal { Parser::Parser(bool interpret) : @@ -48,11 +50,12 @@ namespace Ark::internal } } - void Parser::setNodePosAndFilename(Node& node) + Node& Parser::setNodePosAndFilename(Node& node, std::optional cursor) { - auto position = getCursor(); + auto position = cursor.value_or(getCursor()); node.setPos(position.row, position.col); node.setFilename(m_filename); + return node; } std::optional Parser::node() @@ -120,45 +123,47 @@ namespace Ark::internal std::optional Parser::letMutSet() { + std::optional leaf { NodeType::List }; + setNodePosAndFilename(leaf.value()); + std::string token; if (!oneOf({ "let", "mut", "set" }, &token)) return std::nullopt; std::string comment; newlineOrComment(&comment); + leaf->attachNearestCommentBefore(comment); - Node leaf(NodeType::List); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); if (token == "let") - leaf.push_back(Node(Keyword::Let)); + leaf->push_back(Node(Keyword::Let)); else if (token == "mut") - leaf.push_back(Node(Keyword::Mut)); + leaf->push_back(Node(Keyword::Mut)); else // "set" - leaf.push_back(Node(Keyword::Set)); + leaf->push_back(Node(Keyword::Set)); if (m_allow_macro_behavior > 0) { auto position = getCount(); if (auto value = nodeOrValue(); value.has_value()) - leaf.push_back(value.value()); + leaf->push_back(value.value()); else backtrack(position); } - if (leaf.constList().size() == 1) + if (leaf->constList().size() == 1) { // we haven't parsed anything while in "macro state" std::string symbol; if (!name(&symbol)) errorWithNextToken(token + " needs a symbol"); - leaf.push_back(Node(NodeType::Symbol, symbol)); + leaf->push_back(Node(NodeType::Symbol, symbol)); } comment.clear(); newlineOrComment(&comment); if (auto value = nodeOrValue(); value.has_value()) - leaf.push_back(value.value().attachNearestCommentBefore(comment)); + leaf->push_back(value.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a value"); @@ -167,39 +172,42 @@ namespace Ark::internal std::optional Parser::del() { - std::string keyword; - if (!oneOf({ "del" }, &keyword)) + std::optional leaf { NodeType::List }; + setNodePosAndFilename(leaf.value()); + + if (!oneOf({ "del" })) return std::nullopt; + leaf->push_back(Node(Keyword::Del)); std::string comment; newlineOrComment(&comment); std::string symbol; if (!name(&symbol)) - errorWithNextToken(keyword + " needs a symbol"); + errorWithNextToken("del needs a symbol"); - Node leaf(NodeType::List); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); - leaf.push_back(Node(Keyword::Del)); - leaf.push_back(Node(NodeType::Symbol, symbol)); + leaf->push_back(Node(NodeType::Symbol, symbol)); + leaf->list().back().attachNearestCommentBefore(comment); + setNodePosAndFilename(leaf->list().back()); return leaf; } std::optional Parser::condition() { + std::optional leaf { NodeType::List }; + setNodePosAndFilename(leaf.value()); + if (!oneOf({ "if" })) return std::nullopt; std::string comment; newlineOrComment(&comment); - Node leaf(NodeType::List); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); - leaf.push_back(Node(Keyword::If)); + leaf->push_back(Node(Keyword::If)); if (auto condition = nodeOrValue(); condition.has_value()) - leaf.push_back(condition.value()); + leaf->push_back(condition.value().attachNearestCommentBefore(comment)); else errorWithNextToken("If need a valid condition"); @@ -207,7 +215,7 @@ namespace Ark::internal newlineOrComment(&comment); if (auto value_if_true = nodeOrValue(); value_if_true.has_value()) - leaf.push_back(value_if_true.value().attachNearestCommentBefore(comment)); + leaf->push_back(value_if_true.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a value"); @@ -216,27 +224,29 @@ namespace Ark::internal if (auto value_if_false = nodeOrValue(); value_if_false.has_value()) { - leaf.push_back(value_if_false.value().attachNearestCommentBefore(comment)); + leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment)); newlineOrComment(); // FIXME how to attach a comment after the node? another field? } + setNodePosAndFilename(leaf->list().back()); return leaf; } std::optional Parser::loop() { + std::optional leaf { NodeType::List }; + setNodePosAndFilename(leaf.value()); + if (!oneOf({ "while" })) return std::nullopt; std::string comment; newlineOrComment(&comment); - Node leaf(NodeType::List); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); - leaf.push_back(Node(Keyword::While)); + leaf->push_back(Node(Keyword::While)); if (auto condition = nodeOrValue(); condition.has_value()) - leaf.push_back(condition.value()); + leaf->push_back(condition.value().attachNearestCommentBefore(comment)); else errorWithNextToken("While need a valid condition"); @@ -244,28 +254,30 @@ namespace Ark::internal newlineOrComment(&comment); if (auto body = nodeOrValue(); body.has_value()) - leaf.push_back(body.value().attachNearestCommentBefore(comment)); + leaf->push_back(body.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a value"); + setNodePosAndFilename(leaf->list().back()); return leaf; } std::optional Parser::import_() { + std::optional leaf { NodeType::List }; + setNodePosAndFilename(leaf.value()); + if (!accept(IsChar('('))) return std::nullopt; std::string comment; newlineOrComment(&comment); - - Node leaf(NodeType::List); - setNodePosAndFilename(leaf); + leaf->attachNearestCommentBefore(comment); if (!oneOf({ "import" })) return std::nullopt; comment.clear(); newlineOrComment(&comment); - leaf.push_back(Node(Keyword::Import).attachNearestCommentBefore(comment)); + leaf->push_back(Node(Keyword::Import)); Import import_data; @@ -274,8 +286,8 @@ namespace Ark::internal import_data.package.push_back(import_data.prefix); Node packageNode(NodeType::List); - setNodePosAndFilename(packageNode); - packageNode.push_back(Node(NodeType::String, import_data.prefix)); + setNodePosAndFilename(packageNode.attachNearestCommentBefore(comment)); + packageNode.push_back(Node(NodeType::Symbol, import_data.prefix)); // first, parse the package name while (!isEOF()) @@ -288,19 +300,21 @@ namespace Ark::internal errorWithNextToken("Package name expected after '.'"); else { - packageNode.push_back(Node(NodeType::String, path)); + packageNode.push_back(Node(NodeType::Symbol, path)); + setNodePosAndFilename(packageNode.list().back()); import_data.package.push_back(path); import_data.prefix = path; // in the end we will store the last element of the package, which is what we want } } else if (accept(IsChar(':')) && accept(IsChar('*'))) // parsing :* { + leaf->push_back(packageNode); + leaf->push_back(Node(NodeType::Symbol, "*")); + setNodePosAndFilename(leaf->list().back()); + space(); expect(IsChar(')')); - leaf.push_back(packageNode); - leaf.push_back(Node(NodeType::Symbol, "*")); - // save the import data structure to know we encounter an import node, and retrieve its data more easily later on import_data.with_prefix = false; m_imports.push_back(import_data); @@ -323,7 +337,9 @@ namespace Ark::internal { std::string symbol; if (!name(&symbol)) - errorWithNextToken("Expected a valid symbol to import"); + errorWithNextToken(fmt::format("Expected a valid symbol to import, not `{}'", symbol)); + if (symbol == "*") + error(fmt::format("Glob patterns can not be separated from the package, use (import {}:*) instead", import_data.toPackageString()), symbol); if (symbol.size() >= 2 && symbol[symbol.size() - 2] == ':' && symbol.back() == '*') { @@ -332,6 +348,7 @@ namespace Ark::internal } symbols.push_back(Node(NodeType::Symbol, symbol).attachNearestCommentBefore(comment)); + setNodePosAndFilename(symbols.list().back()); import_data.symbols.push_back(symbol); } @@ -341,8 +358,8 @@ namespace Ark::internal } } - leaf.push_back(packageNode); - leaf.push_back(symbols); + leaf->push_back(packageNode); + leaf->push_back(symbols); // save the import data m_imports.push_back(import_data); @@ -353,6 +370,9 @@ namespace Ark::internal std::optional Parser::block() { + std::optional leaf { NodeType::List }; + setNodePosAndFilename(leaf.value()); + bool alt_syntax = false; std::string comment; if (accept(IsChar('('))) @@ -366,9 +386,7 @@ namespace Ark::internal else return std::nullopt; - Node leaf(NodeType::List); - setNodePosAndFilename(leaf); - leaf.push_back(Node(Keyword::Begin).attachNearestCommentBefore(comment)); + leaf->push_back(Node(Keyword::Begin).attachNearestCommentBefore(comment)); comment.clear(); newlineOrComment(&comment); @@ -377,7 +395,7 @@ namespace Ark::internal { if (auto value = nodeOrValue(); value.has_value()) { - leaf.push_back(value.value().attachNearestCommentBefore(comment)); + leaf->push_back(value.value().attachNearestCommentBefore(comment)); comment.clear(); newlineOrComment(&comment); } @@ -387,17 +405,20 @@ namespace Ark::internal newlineOrComment(&comment); // FIXME: attach comment after last node expect(IsChar(!alt_syntax ? ')' : '}')); + setNodePosAndFilename(leaf->list().back()); return leaf; } std::optional Parser::functionArgs() { expect(IsChar('(')); + std::optional args { NodeType::List }; + setNodePosAndFilename(args.value()); + std::string comment; newlineOrComment(&comment); + args->attachNearestCommentBefore(comment); - Node args(NodeType::List); - setNodePosAndFilename(args.attachNearestCommentBefore(comment)); bool has_captures = false; while (!isEOF()) @@ -410,9 +431,9 @@ namespace Ark::internal break; else { + args->push_back(Node(NodeType::Capture, capture).attachNearestCommentBefore(comment)); comment.clear(); newlineOrComment(&comment); - args.push_back(Node(NodeType::Capture, capture).attachNearestCommentBefore(comment)); } } else @@ -429,9 +450,9 @@ namespace Ark::internal error("Captured variables should be at the end of the argument list", symbol); } + args->push_back(Node(NodeType::Symbol, symbol).attachNearestCommentBefore(comment)); comment.clear(); newlineOrComment(&comment); - args.push_back(Node(NodeType::Symbol, symbol).attachNearestCommentBefore(comment)); } } } @@ -443,18 +464,21 @@ namespace Ark::internal std::optional Parser::function() { + std::optional leaf { NodeType::List }; + setNodePosAndFilename(leaf.value()); + if (!oneOf({ "fun" })) return std::nullopt; + leaf->push_back(Node(Keyword::Fun)); + std::string comment; newlineOrComment(&comment); + leaf->attachNearestCommentBefore(comment); while (m_allow_macro_behavior > 0) { auto position = getCount(); - Node leaf(NodeType::List); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); - leaf.push_back(Node(Keyword::Fun)); // args if (auto value = nodeOrValue(); value.has_value()) { @@ -463,9 +487,9 @@ namespace Ark::internal Node args = value.value(); setNodePosAndFilename(args); if (args.nodeType() == NodeType::Symbol && args.string() == "nil") - leaf.push_back(Node(NodeType::List)); + leaf->push_back(Node(NodeType::List)); else - leaf.push_back(args); + leaf->push_back(args); } else { @@ -477,25 +501,22 @@ namespace Ark::internal newlineOrComment(&comment); // body if (auto value = nodeOrValue(); value.has_value()) - leaf.push_back(value.value().attachNearestCommentBefore(comment)); + leaf->push_back(value.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a body for the function"); + setNodePosAndFilename(leaf->list().back()); return leaf; } - Node leaf(NodeType::List); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); - leaf.push_back(Node(Keyword::Fun)); - auto position = getCount(); if (auto args = functionArgs(); args.has_value()) - leaf.push_back(args.value()); + leaf->push_back(args.value()); else { backtrack(position); if (auto value = nodeOrValue(); value.has_value()) - leaf.push_back(value.value()); + leaf->push_back(value.value()); else errorWithNextToken("Expected an argument list"); } @@ -504,26 +525,29 @@ namespace Ark::internal newlineOrComment(&comment); if (auto value = nodeOrValue(); value.has_value()) - leaf.push_back(value.value().attachNearestCommentBefore(comment)); + leaf->push_back(value.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a body for the function"); + setNodePosAndFilename(leaf->list().back()); return leaf; } std::optional Parser::macroCondition() { + std::optional leaf { NodeType::Macro }; + setNodePosAndFilename(leaf.value()); + if (!oneOf({ "$if" })) return std::nullopt; + leaf->push_back(Node(Keyword::If)); + std::string comment; newlineOrComment(&comment); - - Node leaf(NodeType::Macro); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); - leaf.push_back(Node(Keyword::If)); + leaf->attachNearestCommentBefore(comment); if (auto condition = nodeOrValue(); condition.has_value()) - leaf.push_back(condition.value()); + leaf->push_back(condition.value()); else errorWithNextToken("$if need a valid condition"); @@ -531,7 +555,7 @@ namespace Ark::internal newlineOrComment(&comment); if (auto value_if_true = nodeOrValue(); value_if_true.has_value()) - leaf.push_back(value_if_true.value().attachNearestCommentBefore(comment)); + leaf->push_back(value_if_true.value().attachNearestCommentBefore(comment)); else errorWithNextToken("Expected a value"); @@ -540,59 +564,63 @@ namespace Ark::internal if (auto value_if_false = nodeOrValue(); value_if_false.has_value()) { - leaf.push_back(value_if_false.value().attachNearestCommentBefore(comment)); + leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment)); comment.clear(); newlineOrComment(&comment); // FIXME: attach comment after node } + setNodePosAndFilename(leaf->list().back()); return leaf; } std::optional Parser::macroArgs() { - if (accept(IsChar('('))) - { - std::string comment; - newlineOrComment(&comment); - Node args = Node(NodeType::List); - setNodePosAndFilename(args.attachNearestCommentBefore(comment)); + if (!accept(IsChar('('))) + return std::nullopt; - while (!isEOF()) - { - std::string arg_name; - if (!name(&arg_name)) - break; - else - { - comment.clear(); - newlineOrComment(&comment); - args.push_back(Node(NodeType::Symbol, arg_name).attachNearestCommentBefore(comment)); - } - } + std::optional args { NodeType::List }; + setNodePosAndFilename(args.value()); + + std::string comment; + newlineOrComment(&comment); + args->attachNearestCommentBefore(comment); - if (sequence("...")) + while (!isEOF()) + { + std::string arg_name; + if (!name(&arg_name)) + break; + else { - std::string spread_name; - if (!name(&spread_name)) - errorWithNextToken("Expected a name for the variadic arguments list"); - args.push_back(Node(NodeType::Spread, spread_name)); comment.clear(); - newlineOrComment(&comment); // FIXME: attach comment after node + newlineOrComment(&comment); + args->push_back(Node(NodeType::Symbol, arg_name).attachNearestCommentBefore(comment)); } + } - if (!accept(IsChar(')'))) - return std::nullopt; + if (sequence("...")) + { + std::string spread_name; + if (!name(&spread_name)) + errorWithNextToken("Expected a name for the variadic arguments list"); + args->push_back(Node(NodeType::Spread, spread_name)); comment.clear(); newlineOrComment(&comment); // FIXME: attach comment after node - - return args; } - return std::nullopt; + if (!accept(IsChar(')'))) + return std::nullopt; + comment.clear(); + newlineOrComment(&comment); // FIXME: attach comment after node + + return args; } std::optional Parser::macro() { + std::optional leaf { NodeType::Macro }; + setNodePosAndFilename(leaf.value()); + if (!accept(IsChar('('))) return std::nullopt; std::string comment; @@ -601,9 +629,7 @@ namespace Ark::internal if (!oneOf({ "$" })) return std::nullopt; newlineOrComment(&comment); - - Node leaf(NodeType::Macro); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); + leaf->attachNearestCommentBefore(comment); std::string symbol; if (!name(&symbol)) @@ -611,11 +637,11 @@ namespace Ark::internal comment.clear(); newlineOrComment(&comment); - leaf.push_back(Node(NodeType::Symbol, symbol).attachNearestCommentBefore(comment)); + leaf->push_back(Node(NodeType::Symbol, symbol).attachNearestCommentBefore(comment)); auto position = getCount(); if (auto args = macroArgs(); args.has_value()) - leaf.push_back(args.value()); + leaf->push_back(args.value()); else { backtrack(position); @@ -625,10 +651,11 @@ namespace Ark::internal --m_allow_macro_behavior; if (value.has_value()) - leaf.push_back(value.value()); + leaf->push_back(value.value()); else errorWithNextToken("Expected an argument list, atom or node while defining macro `" + symbol + "'"); + setNodePosAndFilename(leaf->list().back()); if (accept(IsChar(')'))) return leaf; } @@ -638,13 +665,14 @@ namespace Ark::internal --m_allow_macro_behavior; if (value.has_value()) - leaf.push_back(value.value()); + leaf->push_back(value.value()); else errorWithNextToken("Expected a value while defining macro `" + symbol + "'"); comment.clear(); newlineOrComment(&comment); // FIXME: attach comment after node expect(IsChar(')')); + setNodePosAndFilename(leaf->list().back()); return leaf; } @@ -673,17 +701,17 @@ namespace Ark::internal call_type = NodeType::Macro; } - Node leaf(call_type); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); - leaf.push_back(func.value()); + std::optional leaf { call_type }; + setNodePosAndFilename(leaf.value()); + leaf->push_back(func.value()); while (!isEOF()) { if (auto arg = nodeOrValue(); arg.has_value()) { + leaf->push_back(arg.value().attachNearestCommentBefore(comment)); comment.clear(); newlineOrComment(&comment); - leaf.push_back(arg.value().attachNearestCommentBefore(comment)); } else break; @@ -691,27 +719,30 @@ namespace Ark::internal comment.clear(); newlineOrComment(&comment); // FIXME: attach comment after node + setNodePosAndFilename(leaf->list().back()); expect(IsChar(')')); return leaf; } std::optional Parser::list() { + std::optional leaf { NodeType::List }; + setNodePosAndFilename(leaf.value()); + if (!accept(IsChar('['))) return std::nullopt; + leaf->push_back(Node(NodeType::Symbol, "list")); + std::string comment; newlineOrComment(&comment); - - Node leaf(NodeType::List); - setNodePosAndFilename(leaf.attachNearestCommentBefore(comment)); - leaf.push_back(Node(NodeType::Symbol, "list")); + leaf->attachNearestCommentBefore(comment); comment.clear(); while (!isEOF()) { if (auto value = nodeOrValue(); value.has_value()) { - leaf.push_back(value.value().attachNearestCommentBefore(comment)); + leaf->push_back(value.value().attachNearestCommentBefore(comment)); comment.clear(); newlineOrComment(&comment); } @@ -721,6 +752,7 @@ namespace Ark::internal newlineOrComment(&comment); expect(IsChar(']')); // FIXME: attach comment after node + setNodePosAndFilename(leaf->list().back()); return leaf; } @@ -778,15 +810,22 @@ namespace Ark::internal std::optional Parser::nodeOrValue() { if (auto value = atom(); value.has_value()) + { + setNodePosAndFilename(value.value()); return value; + } else if (auto sub_node = node(); sub_node.has_value()) + { + setNodePosAndFilename(sub_node.value()); return sub_node; + } return std::nullopt; } std::optional Parser::wrapped(std::optional (Parser::*parser)(), const std::string& name) { + auto cursor = getCursor(); if (!prefix('(')) return std::nullopt; std::string comment; @@ -795,6 +834,8 @@ namespace Ark::internal if (auto result = (this->*parser)(); result.has_value()) { result->attachNearestCommentBefore(comment); + setNodePosAndFilename(result.value(), cursor); + comment.clear(); newlineOrComment(&comment); // FIXME: attach comment after node if (!suffix(')')) diff --git a/tests/unittests/resources/ParserSuite/success/import.expected b/tests/unittests/resources/ParserSuite/success/import.expected index e5bd0d97d..c304198bc 100644 --- a/tests/unittests/resources/ParserSuite/success/import.expected +++ b/tests/unittests/resources/ParserSuite/success/import.expected @@ -1,11 +1,11 @@ -( Keyword:Import ( String:a ) ( ) ) -( Keyword:Import ( String:a String:b ) ( ) ) -( Keyword:Import ( String:foo String:bar String:egg ) ( ) ) -( Keyword:Import ( String:foo ) Symbol:* ) -( Keyword:Import ( String:foo String:bar ) Symbol:* ) -( Keyword:Import ( String:foo String:bar String:egg ) Symbol:* ) -( Keyword:Import ( String:foo ) ( Symbol:a ) ) -( Keyword:Import ( String:foo String:bar ) ( Symbol:a Symbol:b ) ) +( Keyword:Import ( Symbol:a ) ( ) ) +( Keyword:Import ( Symbol:a Symbol:b ) ( ) ) +( Keyword:Import ( Symbol:foo Symbol:bar Symbol:egg ) ( ) ) +( Keyword:Import ( Symbol:foo ) Symbol:* ) +( Keyword:Import ( Symbol:foo Symbol:bar ) Symbol:* ) +( Keyword:Import ( Symbol:foo Symbol:bar Symbol:egg ) Symbol:* ) +( Keyword:Import ( Symbol:foo ) ( Symbol:a ) ) +( Keyword:Import ( Symbol:foo Symbol:bar ) ( Symbol:a Symbol:b ) ) 0) a (basic) 1) b (basic) From ca950273771429d2af6bc44b6100cc70bca57c7f Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sun, 31 Mar 2024 16:47:48 +0200 Subject: [PATCH 11/12] feat(parser): finish handling comments in the parser, taking care of comments after nodes too --- include/Ark/Compiler/AST/BaseParser.hpp | 1 + include/Ark/Compiler/AST/Node.hpp | 5 ++ src/arkreactor/Compiler/AST/BaseParser.cpp | 18 +++++- src/arkreactor/Compiler/AST/Node.cpp | 28 +++++---- src/arkreactor/Compiler/AST/Parser.cpp | 68 +++++++++++++++++----- 5 files changed, 94 insertions(+), 26 deletions(-) diff --git a/include/Ark/Compiler/AST/BaseParser.hpp b/include/Ark/Compiler/AST/BaseParser.hpp index efc3e3279..7a5eee7f5 100644 --- a/include/Ark/Compiler/AST/BaseParser.hpp +++ b/include/Ark/Compiler/AST/BaseParser.hpp @@ -76,6 +76,7 @@ namespace Ark::internal bool inlineSpace(std::string* s = nullptr); bool endOfLine(std::string* s = nullptr); bool comment(std::string* s = nullptr); + bool spaceComment(std::string* s = nullptr); bool newlineOrComment(std::string* s = nullptr); bool prefix(char c); bool suffix(char c); diff --git a/include/Ark/Compiler/AST/Node.hpp b/include/Ark/Compiler/AST/Node.hpp index b3f8888f5..bdca3787d 100644 --- a/include/Ark/Compiler/AST/Node.hpp +++ b/include/Ark/Compiler/AST/Node.hpp @@ -134,6 +134,8 @@ namespace Ark::internal */ Node& attachNearestCommentBefore(const std::string& comment); + Node& attachCommentAfter(const std::string& comment); + /** * @brief Get the line at which this node was created * @@ -161,6 +163,8 @@ namespace Ark::internal */ [[nodiscard]] const std::string& comment() const noexcept; + [[nodiscard]] const std::string& commentAfter() const noexcept; + /** * @brief Compute a representation of the node without any comments or additional sugar, colors, types * @return String representation of the node @@ -179,6 +183,7 @@ namespace Ark::internal std::size_t m_line = 0, m_col = 0; std::string m_filename; std::string m_comment; + std::string m_after_comment; ///< Comment after node }; ARK_API std::ostream& operator<<(std::ostream& os, const std::vector& node) noexcept; diff --git a/src/arkreactor/Compiler/AST/BaseParser.cpp b/src/arkreactor/Compiler/AST/BaseParser.cpp index 095b45068..234974c82 100644 --- a/src/arkreactor/Compiler/AST/BaseParser.cpp +++ b/src/arkreactor/Compiler/AST/BaseParser.cpp @@ -208,9 +208,25 @@ namespace Ark::internal return false; } + bool BaseParser::spaceComment(std::string* s) + { + bool matched = false; + + inlineSpace(); + while (!isEOF() && comment(s)) + { + inlineSpace(); + matched = true; + } + + return matched; + } + bool BaseParser::newlineOrComment(std::string* s) { - bool matched = space(); + bool matched = false; + + space(); while (!isEOF() && comment(s)) { space(); diff --git a/src/arkreactor/Compiler/AST/Node.cpp b/src/arkreactor/Compiler/AST/Node.cpp index d7684153d..b21f1ddfe 100644 --- a/src/arkreactor/Compiler/AST/Node.cpp +++ b/src/arkreactor/Compiler/AST/Node.cpp @@ -105,6 +105,16 @@ namespace Ark::internal return *this; } + Node& Node::attachCommentAfter(const std::string& comment) + { + if (!m_after_comment.empty()) + m_after_comment += "\n"; + m_after_comment += comment; + if (m_after_comment.back() == '\n') + m_after_comment.pop_back(); + return *this; + } + std::size_t Node::line() const noexcept { return m_line; @@ -125,6 +135,11 @@ namespace Ark::internal return m_comment; } + const std::string& Node::commentAfter() const noexcept + { + return m_after_comment; + } + std::string Node::repr() const noexcept { std::string data; @@ -295,12 +310,8 @@ namespace Ark::internal if (A.m_type != NodeType::List) return A.m_value == B.m_value; - - if (A.m_type == NodeType::List) + else throw TypeError("Can not compare lists"); - - // any other type => false (here, Closure) - return false; } bool operator<(const Node& A, const Node& B) @@ -317,9 +328,6 @@ namespace Ark::internal case NodeType::String: return A.m_value < B.m_value; - case NodeType::List: - // return A.m_list < B.m_list; // fixme - default: return false; } @@ -329,6 +337,8 @@ namespace Ark::internal { switch (A.nodeType()) { + case NodeType::Field: + [[fallthrough]]; case NodeType::List: return A.constList().empty(); @@ -347,8 +357,6 @@ namespace Ark::internal return true; return false; - // todo: implement field? - default: return false; } diff --git a/src/arkreactor/Compiler/AST/Parser.cpp b/src/arkreactor/Compiler/AST/Parser.cpp index 29f621794..89828c362 100644 --- a/src/arkreactor/Compiler/AST/Parser.cpp +++ b/src/arkreactor/Compiler/AST/Parser.cpp @@ -42,11 +42,20 @@ namespace Ark::internal std::string comment; newlineOrComment(&comment); if (isEOF()) + { + if (!comment.empty()) + m_ast.list().back().attachCommentAfter(comment); break; + } auto n = node(); if (n) + { m_ast.push_back(n.value().attachNearestCommentBefore(comment)); + comment.clear(); + if (spaceComment(&comment)) + m_ast.list().back().attachCommentAfter(comment); + } } } @@ -225,8 +234,12 @@ namespace Ark::internal if (auto value_if_false = nodeOrValue(); value_if_false.has_value()) { leaf->push_back(value_if_false.value().attachNearestCommentBefore(comment)); - newlineOrComment(); // FIXME how to attach a comment after the node? another field? + comment.clear(); + if (newlineOrComment(&comment)) + leaf->list().back().attachCommentAfter(comment); } + else if (!comment.empty()) + leaf->attachCommentAfter(comment); setNodePosAndFilename(leaf->list().back()); return leaf; @@ -328,9 +341,11 @@ namespace Ark::internal Node symbols(NodeType::List); setNodePosAndFilename(symbols); // then parse the symbols to import, if any - comment.clear(); - if (newlineOrComment(&comment)) + if (space()) // fixme: potential regression introduced here { + comment.clear(); + newlineOrComment(&comment); + while (!isEOF()) { if (accept(IsChar(':'))) // parsing potential :a :b :c @@ -352,9 +367,10 @@ namespace Ark::internal import_data.symbols.push_back(symbol); } - comment.clear(); - if (!newlineOrComment(&comment)) // TODO what to do? + if (!space()) break; + comment.clear(); + newlineOrComment(&comment); } } @@ -363,7 +379,10 @@ namespace Ark::internal // save the import data m_imports.push_back(import_data); - newlineOrComment(); // FIXME: attach comment after the node + comment.clear(); + if (newlineOrComment(&comment)) + leaf->list().back().attachCommentAfter(comment); + expect(IsChar(')')); return leaf; } @@ -604,14 +623,17 @@ namespace Ark::internal if (!name(&spread_name)) errorWithNextToken("Expected a name for the variadic arguments list"); args->push_back(Node(NodeType::Spread, spread_name)); + comment.clear(); - newlineOrComment(&comment); // FIXME: attach comment after node + if (newlineOrComment(&comment)) + args->list().back().attachCommentAfter(comment); } if (!accept(IsChar(')'))) return std::nullopt; comment.clear(); - newlineOrComment(&comment); // FIXME: attach comment after node + if (newlineOrComment(&comment)) + args->list().back().attachCommentAfter(comment); return args; } @@ -669,10 +691,12 @@ namespace Ark::internal else errorWithNextToken("Expected a value while defining macro `" + symbol + "'"); + setNodePosAndFilename(leaf->list().back()); comment.clear(); - newlineOrComment(&comment); // FIXME: attach comment after node + if (newlineOrComment(&comment)) + leaf->list().back().attachCommentAfter(comment); + expect(IsChar(')')); - setNodePosAndFilename(leaf->list().back()); return leaf; } @@ -717,9 +741,13 @@ namespace Ark::internal break; } - comment.clear(); - newlineOrComment(&comment); // FIXME: attach comment after node + leaf->list().back().attachCommentAfter(comment); setNodePosAndFilename(leaf->list().back()); + + comment.clear(); + if (newlineOrComment(&comment)) + leaf->list().back().attachCommentAfter(comment); + expect(IsChar(')')); return leaf; } @@ -749,10 +777,11 @@ namespace Ark::internal else break; } + leaf->list().back().attachCommentAfter(comment); - newlineOrComment(&comment); - expect(IsChar(']')); // FIXME: attach comment after node setNodePosAndFilename(leaf->list().back()); + + expect(IsChar(']')); return leaf; } @@ -837,9 +866,18 @@ namespace Ark::internal setNodePosAndFilename(result.value(), cursor); comment.clear(); - newlineOrComment(&comment); // FIXME: attach comment after node + if (newlineOrComment(&comment)) + result.value().attachCommentAfter(comment); + if (!suffix(')')) errorMissingSuffix(')', name); + if (result->isListLike()) + setNodePosAndFilename(result->list().back()); + + comment.clear(); + if (spaceComment(&comment)) + result.value().attachCommentAfter(comment); + return result; } From a773898c7885094392080a58061f4ba42e7c7b41 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Tue, 19 Mar 2024 21:36:19 +0100 Subject: [PATCH 12/12] feat(formatter): adding a first version of an ArkScript code formatter --- CMakeLists.txt | 1 + README.md | 22 +- include/CLI/Formatter.hpp | 79 +++ src/arkreactor/Compiler/AST/Node.cpp | 2 +- src/arkscript/Formatter.cpp | 450 ++++++++++++++++++ src/arkscript/main.cpp | 34 +- tests/unittests/FormatterSuite.cpp | 24 + .../resources/FormatterSuite/calls.ark | 10 + .../resources/FormatterSuite/calls.expected | 10 + .../FormatterSuite/comments_after_call.ark | 10 + .../comments_after_call.expected | 17 + .../FormatterSuite/comments_after_cond.ark | 10 + .../comments_after_cond.expected | 9 + .../FormatterSuite/comments_after_import.ark | 1 + .../comments_after_import.expected | 1 + .../comments_after_variable.ark | 5 + .../comments_after_variable.expected | 4 + .../FormatterSuite/comments_after_while.ark | 7 + .../comments_after_while.expected | 6 + .../resources/FormatterSuite/conditions.ark | 10 + .../FormatterSuite/conditions.expected | 23 + .../resources/FormatterSuite/del.ark | 3 + .../resources/FormatterSuite/del.expected | 4 + .../resources/FormatterSuite/field.ark | 5 + .../resources/FormatterSuite/field.expected | 6 + .../resources/FormatterSuite/functions.ark | 19 + .../FormatterSuite/functions.expected | 18 + .../resources/FormatterSuite/imports.ark | 13 + .../resources/FormatterSuite/imports.expected | 16 + .../resources/FormatterSuite/loop.ark | 6 + .../resources/FormatterSuite/loop.expected | 10 + .../resources/FormatterSuite/macro_cond.ark | 2 + .../FormatterSuite/macro_cond.expected | 4 + .../resources/FormatterSuite/macros.ark | 6 + .../resources/FormatterSuite/macros.expected | 6 + .../resources/FormatterSuite/vars.ark | 8 + .../resources/FormatterSuite/vars.expected | 9 + 37 files changed, 856 insertions(+), 14 deletions(-) create mode 100644 include/CLI/Formatter.hpp create mode 100644 src/arkscript/Formatter.cpp create mode 100644 tests/unittests/FormatterSuite.cpp create mode 100644 tests/unittests/resources/FormatterSuite/calls.ark create mode 100644 tests/unittests/resources/FormatterSuite/calls.expected create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_call.ark create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_call.expected create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_cond.ark create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_cond.expected create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_import.ark create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_import.expected create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_variable.ark create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_variable.expected create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_while.ark create mode 100644 tests/unittests/resources/FormatterSuite/comments_after_while.expected create mode 100644 tests/unittests/resources/FormatterSuite/conditions.ark create mode 100644 tests/unittests/resources/FormatterSuite/conditions.expected create mode 100644 tests/unittests/resources/FormatterSuite/del.ark create mode 100644 tests/unittests/resources/FormatterSuite/del.expected create mode 100644 tests/unittests/resources/FormatterSuite/field.ark create mode 100644 tests/unittests/resources/FormatterSuite/field.expected create mode 100644 tests/unittests/resources/FormatterSuite/functions.ark create mode 100644 tests/unittests/resources/FormatterSuite/functions.expected create mode 100644 tests/unittests/resources/FormatterSuite/imports.ark create mode 100644 tests/unittests/resources/FormatterSuite/imports.expected create mode 100644 tests/unittests/resources/FormatterSuite/loop.ark create mode 100644 tests/unittests/resources/FormatterSuite/loop.expected create mode 100644 tests/unittests/resources/FormatterSuite/macro_cond.ark create mode 100644 tests/unittests/resources/FormatterSuite/macro_cond.expected create mode 100644 tests/unittests/resources/FormatterSuite/macros.ark create mode 100644 tests/unittests/resources/FormatterSuite/macros.expected create mode 100644 tests/unittests/resources/FormatterSuite/vars.ark create mode 100644 tests/unittests/resources/FormatterSuite/vars.expected diff --git a/CMakeLists.txt b/CMakeLists.txt index d9333807c..ddeee842a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -184,6 +184,7 @@ if (ARK_TESTS) add_executable(unittests ${UT_SOURCES}) add_subdirectory(${ark_SOURCE_DIR}/lib/ut) + target_include_directories(unittests PUBLIC ${ark_SOURCE_DIR}/include) target_link_libraries(unittests PUBLIC ArkReactor termcolor ut) add_compile_definitions(BOOST_UT_DISABLE_MODULE) diff --git a/README.md b/README.md index 271b3c453..b487b13fe 100644 --- a/README.md +++ b/README.md @@ -186,20 +186,35 @@ SYNOPSIS arkscript --dev-info arkscript -e arkscript -c [-d] + arkscript [-d] [-L ] + arkscript -f [--dry-run] + arkscript --ast [-d] [-L ] arkscript -bcr -on arkscript -bcr -a [-s ] arkscript -bcr -st [-s ] arkscript -bcr -vt [-s ] arkscript -bcr [-cs] [-p ] [-s ] - arkscript [-d] [-L ] OPTIONS -h, --help Display this message -v, --version Display ArkScript version and exit --dev-info Display development information and exit -e, --eval Evaluate ArkScript expression + -c, --compile Compile the given program to bytecode, but do not run -d, --debug... Increase debug level (default: 0) + + -L, --lib Set the location of the ArkScript standard library. Paths can be + delimited by ';' + + -f, --format Format the given source file in place + --dry-run Do not modify the file, only print out the changes + + --ast Compile the given program and output its AST as JSON to stdout + -d, --debug... Increase debug level (default: 0) + -L, --lib Set the location of the ArkScript standard library. Paths can be + delimited by ';' + -bcr, --bytecode-reader Launch the bytecode reader -on, --only-names Display only the bytecode segments names and sizes -a, --all Display all the bytecode segments (default) @@ -208,8 +223,9 @@ OPTIONS -cs, --code Display only the code segments -p, --page Set the bytecode reader code segment to display -s, --slice Select a slice of instructions in the bytecode - -L, --lib Set the location of the ArkScript standard library. Paths can be - delimited by ';' + +VERSION + 4.0.0-86587c14 LICENSE Mozilla Public License 2.0 diff --git a/include/CLI/Formatter.hpp b/include/CLI/Formatter.hpp new file mode 100644 index 000000000..a9aec82c4 --- /dev/null +++ b/include/CLI/Formatter.hpp @@ -0,0 +1,79 @@ +#ifndef ARK_FORMATTER_HPP +#define ARK_FORMATTER_HPP + +#include + +#include + +constexpr struct FormatterConfig +{ + static constexpr std::size_t SpacePerIndent = 2; ///< Indentation level of each node + static constexpr std::size_t LongLineLength = 32; ///< Max number of characters per line segment to consider splitting +} FormatterConfig; + +class Formatter final +{ +public: + Formatter(std::string filename, bool dry_run); + + void run(); + + [[nodiscard]] const std::string& output() const; + +private: + const std::string m_filename; + bool m_dry_run; ///< If true, only prints the formatted file instead of saving it to disk + Ark::internal::Parser m_parser; + std::string m_output; + + bool isListStartingWithKeyword(const Ark::internal::Node& node, Ark::internal::Keyword keyword); + bool isBeginBlock(const Ark::internal::Node& node); + bool isFuncDef(const Ark::internal::Node& node); + bool isFuncCall(const Ark::internal::Node& node); + + /** + * @param node + * @return true if the node is a String|Number|Symbol|Field + * @return false + */ + bool isPlainValue(const Ark::internal::Node& node); + + /** + * @brief Compute the line on which the deepest right most node of node is at + * @param node + * @return + */ + std::size_t lineOfLastNodeIn(const Ark::internal::Node& node); + + bool should_split_on_newline(const Ark::internal::Node& node); + + inline constexpr std::string prefix(std::size_t indent) const + { + return std::string(indent * FormatterConfig.SpacePerIndent, ' '); + } + + /** + * @brief Handles all node formatting + * @param node + * @param indent indentation level, starting at 0, increment by 1 + * @param after_newline when false, do not add prefix + * @return + */ + std::string format(const Ark::internal::Node& node, std::size_t indent, bool after_newline); + + std::string formatComment(const std::string& comment, std::size_t indent); + + std::string formatBlock(const Ark::internal::Node& node, std::size_t indent, bool after_newline); + + std::string formatFunction(const Ark::internal::Node& node, std::size_t indent); + std::string formatVariable(const Ark::internal::Node& node, std::size_t indent); + std::string formatCondition(const Ark::internal::Node& node, std::size_t indent, bool is_macro = false); + std::string formatLoop(const Ark::internal::Node& node, std::size_t indent); + std::string formatBegin(const Ark::internal::Node& node, std::size_t indent, bool after_newline); + std::string formatImport(const Ark::internal::Node& node, std::size_t indent); + std::string formatDel(const Ark::internal::Node& node, std::size_t indent); + std::string formatCall(const Ark::internal::Node& node, std::size_t indent); + std::string formatMacro(const Ark::internal::Node& node, std::size_t indent); +}; + +#endif // ARK_FORMATTER_HPP diff --git a/src/arkreactor/Compiler/AST/Node.cpp b/src/arkreactor/Compiler/AST/Node.cpp index b21f1ddfe..e5a2c4586 100644 --- a/src/arkreactor/Compiler/AST/Node.cpp +++ b/src/arkreactor/Compiler/AST/Node.cpp @@ -110,7 +110,7 @@ namespace Ark::internal if (!m_after_comment.empty()) m_after_comment += "\n"; m_after_comment += comment; - if (m_after_comment.back() == '\n') + if (!m_after_comment.empty() && m_after_comment.back() == '\n') m_after_comment.pop_back(); return *this; } diff --git a/src/arkscript/Formatter.cpp b/src/arkscript/Formatter.cpp new file mode 100644 index 000000000..99d672c97 --- /dev/null +++ b/src/arkscript/Formatter.cpp @@ -0,0 +1,450 @@ +#include + +#include +#include +#include + +#include +#include + +using namespace Ark; +using namespace Ark::internal; + +Formatter::Formatter(std::string filename, bool dry_run) : + m_filename(std::move(filename)), m_dry_run(dry_run), m_parser(/* interpret= */ false) +{} + +void Formatter::run() +{ + try + { + m_parser.processFile(m_filename); + + // remove useless surrounding begin (generated by the parser) + if (isBeginBlock(m_parser.ast())) + { + std::size_t previous_line = 0; + for (std::size_t i = 1, end = m_parser.ast().constList().size(); i < end; ++i) + { + const Node node = m_parser.ast().constList()[i]; + if (node.line() - previous_line > 1 && !m_output.empty()) + m_output += "\n"; + previous_line = lineOfLastNodeIn(node); + m_output += format(node, 0, false) + "\n"; + } + } + else + m_output = format(m_parser.ast(), 0, false); + + if (!m_dry_run) + { + std::ofstream stream(m_filename); + stream << m_output; + } + } + catch (const CodeError& e) + { + Diagnostics::generate(e); + } +} + +const std::string& Formatter::output() const +{ + return m_output; +} + +bool Formatter::isListStartingWithKeyword(const Ark::internal::Node& node, Ark::internal::Keyword keyword) +{ + return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword && node.constList()[0].keyword() == keyword; +} + +bool Formatter::isBeginBlock(const Node& node) +{ + return isListStartingWithKeyword(node, Keyword::Begin); +} + +bool Formatter::isFuncDef(const Ark::internal::Node& node) +{ + return isListStartingWithKeyword(node, Keyword::Fun); +} + +bool Formatter::isFuncCall(const Ark::internal::Node& node) +{ + return node.isListLike() && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Symbol; +} + +bool Formatter::isPlainValue(const Ark::internal::Node& node) +{ + switch (node.nodeType()) + { + case NodeType::Symbol: [[fallthrough]]; + case NodeType::Number: [[fallthrough]]; + case NodeType::String: [[fallthrough]]; + case NodeType::Field: return true; + + default: + return false; + } +} + +std::size_t Formatter::lineOfLastNodeIn(const Ark::internal::Node& node) +{ + if (node.isListLike() && !node.constList().empty()) + { + std::size_t child_line = lineOfLastNodeIn(node.constList().back()); + if (child_line < node.line()) + return node.line(); + else + return child_line; + } + else + return node.line(); +} + +bool Formatter::should_split_on_newline(const Ark::internal::Node& node) +{ + std::string formatted = format(node, 0, false); + std::string::size_type sz = formatted.find_first_of('\n'); + + bool is_long_line = !((sz < FormatterConfig.LongLineLength || (sz == std::string::npos && formatted.size() < FormatterConfig.LongLineLength))); + if (node.comment().empty() && (isBeginBlock(node) || isFuncCall(node))) + return false; + else if (is_long_line || (node.isListLike() && node.constList().size() > 1) || !node.comment().empty()) + return true; + return false; +} + +std::string Formatter::format(const Ark::internal::Node& node, std::size_t indent, bool after_newline) +{ + std::string output; + if (!node.comment().empty()) + { + output += formatComment(node.comment(), indent); + after_newline = true; + } + if (after_newline) + output += prefix(indent); + + switch (node.nodeType()) + { + case NodeType::Symbol: + output += node.string(); + break; + case NodeType::Capture: + output += "&" + node.string(); + break; + case NodeType::Keyword: + output += std::string(keywords[static_cast(node.keyword())]); + break; + case NodeType::String: + output += fmt::format("\"{}\"", node.string()); + break; + case NodeType::Number: + output += fmt::format("{}", node.number()); + break; + case NodeType::List: + output += formatBlock(node, indent, after_newline); + break; + case NodeType::Spread: + output += fmt::format("...{}", node.string()); + break; + case NodeType::Field: + { + std::string field = format(node.constList()[0], indent, false); + for (std::size_t i = 1, end = node.constList().size(); i < end; ++i) + field += "." + format(node.constList()[i], indent, false); + output += field; + break; + } + case NodeType::Macro: + output += formatMacro(node, indent); + break; + case NodeType::Unused: + break; + } + + if (!node.commentAfter().empty()) + output += " " + formatComment(node.commentAfter(), /* indent= */ 0); + + return output; +} + +std::string Formatter::formatComment(const std::string& comment, std::size_t indent) +{ + std::string output = prefix(indent); + for (std::size_t i = 0, end = comment.size(); i < end; ++i) + { + output += comment[i]; + if (comment[i] == '\n' && i != end - 1) + output += prefix(indent); + } + + return output; +} + +std::string Formatter::formatBlock(const Ark::internal::Node& node, std::size_t indent, bool after_newline) +{ + if (node.constList().empty()) + return "()"; + + const Node first = node.constList().front(); + if (first.nodeType() == NodeType::Keyword) + { + switch (first.keyword()) + { + case Keyword::Fun: + return formatFunction(node, indent); + case Keyword::Let: + [[fallthrough]]; + case Keyword::Mut: + [[fallthrough]]; + case Keyword::Set: + return formatVariable(node, indent); + case Keyword::If: + return formatCondition(node, indent); + case Keyword::While: + return formatLoop(node, indent); + case Keyword::Begin: + return formatBegin(node, indent, after_newline); + case Keyword::Import: + return formatImport(node, indent); + case Keyword::Del: + return formatDel(node, indent); + } + } + else + return formatCall(node, indent); +} + +std::string Formatter::formatFunction(const Ark::internal::Node& node, std::size_t indent) +{ + const Node args_node = node.constList()[1]; + const Node body_node = node.constList()[2]; + + std::string formatted_args; + bool comment_in_args = false; + + if (args_node.isListLike()) + { + std::string args; + for (std::size_t i = 0, end = args_node.constList().size(); i < end; ++i) + { + const Node arg_i = args_node.constList()[i]; + bool has_comment = !arg_i.comment().empty(); + if (has_comment) + comment_in_args = true; + + args += format(arg_i, indent + (comment_in_args ? 1 : 0), comment_in_args); + if (i != end - 1) + args += comment_in_args ? '\n' : ' '; + } + + formatted_args = fmt::format("({}{})", (comment_in_args ? "\n" : ""), args); + } + else + formatted_args = format(args_node, indent, false); + + if (!should_split_on_newline(body_node)) + return fmt::format("(fun {} {})", formatted_args, format(body_node, indent + 1, false)); + else + return fmt::format("(fun {}\n{})", formatted_args, format(body_node, indent + 1, true)); +} + +std::string Formatter::formatVariable(const Ark::internal::Node& node, std::size_t indent) +{ + std::string keyword = std::string(keywords[static_cast(node.constList()[0].keyword())]); + + const Node body_node = node.constList()[2]; + std::string formatted_body = format(body_node, indent + 1, false); + + if (!should_split_on_newline(body_node) || isFuncDef(body_node)) + return fmt::format("({} {} {})", keyword, format(node.constList()[1], indent, false), formatted_body); + else + return fmt::format("({} {}\n{})", keyword, format(node.constList()[1], indent, false), format(node.constList()[2], indent + 1, true)); +} + +std::string Formatter::formatCondition(const Ark::internal::Node& node, std::size_t indent, bool is_macro) +{ + const Node cond_node = node.constList()[1]; + const Node then_node = node.constList()[2]; + + bool cond_on_newline = false; + std::string formatted_cond = format(cond_node, indent + 1, false); + if (formatted_cond.find('\n') != std::string::npos) + cond_on_newline = true; + + std::string if_cond_formatted = fmt::format( + "({}if{}{}", + is_macro ? "$" : "", + cond_on_newline ? "\n" : " ", + formatted_cond); + + bool split_then_newline = should_split_on_newline(then_node); + + // (if cond then) + if (node.constList().size() == 3) + { + if (cond_on_newline || split_then_newline) + return fmt::format("{}\n{})", if_cond_formatted, format(then_node, indent + 1, true)); + else + return fmt::format("{} {})", if_cond_formatted, format(then_node, indent + 1, false)); + } + else // (if cond then else) + return fmt::format( + "{}\n{}\n{}{})", + if_cond_formatted, + format(then_node, indent + 1, true), + format(node.constList()[3], indent + 1, true), + node.constList()[3].commentAfter().empty() ? "" : ("\n" + prefix(indent))); +} + +std::string Formatter::formatLoop(const Ark::internal::Node& node, std::size_t indent) +{ + const Node cond_node = node.constList()[1]; + const Node body_node = node.constList()[2]; + + bool cond_on_newline = false; + std::string formatted_cond = format(cond_node, indent + 1, false); + if (formatted_cond.find('\n') != std::string::npos) + cond_on_newline = true; + + if (cond_on_newline || should_split_on_newline(body_node)) + return fmt::format( + "(while{}{}\n{})", + cond_on_newline ? "\n" : " ", + formatted_cond, + format(body_node, indent + 1, true)); + else + return fmt::format( + "(while {} {})", + formatted_cond, + format(body_node, indent + 1, false)); +} + +std::string Formatter::formatBegin(const Ark::internal::Node& node, std::size_t indent, bool after_newline) +{ + // only the keyword begin is present + if (node.constList().size() == 1) + return "{}"; + + std::string output = "{\n"; + std::size_t previous_line = 0; + // skip begin keyword + for (std::size_t i = 1, end = node.constList().size(); i < end; ++i) + { + const Node child = node.constList()[i]; + // we want to preserve the node grouping by the user, but remove useless duplicate new line + // but that shouldn't apply to the first node of the block + if (child.line() - previous_line > 1 && i > 1) + output += "\n"; + previous_line = lineOfLastNodeIn(child); + + output += format(child, indent + (after_newline ? 1 : 0), true); + if (i != end - 1) + output += "\n"; + } + output += " }"; + return output; +} + +std::string Formatter::formatImport(const Ark::internal::Node& node, std::size_t indent) +{ + const Node package_node = node.constList()[1]; + std::string package; + + if (!package_node.comment().empty()) + package += "\n" + formatComment(package_node.comment(), indent + 1) + prefix(indent + 1); + else + package += " "; + + for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i) + { + package += format(package_node.constList()[i], indent + 1, false); + if (i != end - 1) + package += "."; + } + + const Node symbols = node.constList()[2]; + if (symbols.nodeType() == NodeType::Symbol && symbols.string() == "*") + package += ":*"; + else // symbols is a list + { + for (const auto& sym : symbols.constList()) + { + if (sym.comment().empty()) + package += " :" + sym.string(); + else + package += "\n" + formatComment(sym.comment(), indent + 1) + prefix(indent + 1) + ":" + sym.string(); + if (!sym.commentAfter().empty()) + package += " " + formatComment(sym.commentAfter(), /* indent= */ 0); + } + } + + return fmt::format("(import{})", package); +} + +std::string Formatter::formatDel(const Ark::internal::Node& node, std::size_t indent) +{ + std::string formatted_sym = format(node.constList()[1], indent + 1, false); + if (formatted_sym.find('\n') != std::string::npos) + return fmt::format("(del\n{})", formatted_sym); + else + return fmt::format("(del {})", formatted_sym); +} + +std::string Formatter::formatCall(const Ark::internal::Node& node, std::size_t indent) +{ + bool is_list = false; + if (!node.constList().empty() && node.constList().front().nodeType() == NodeType::Symbol && + node.constList().front().string() == "list") + is_list = true; + + bool is_multiline = false; + + std::vector formatted_args; + for (std::size_t i = 1, end = node.constList().size(); i < end; ++i) + { + formatted_args.push_back(format(node.constList()[i], indent, false)); + // if we have at least one argument taking multiple lines, split them all on their own line + if (formatted_args.back().find('\n') != std::string::npos || !node.constList()[i].commentAfter().empty()) + is_multiline = true; + } + + std::string output = is_list ? "[" : ("(" + format(node.constList()[0], indent, false)); + for (std::size_t i = 0, end = formatted_args.size(); i < end; ++i) + { + const std::string formatted_node = formatted_args[i]; + if (is_multiline) + output += "\n" + format(node.constList()[i + 1], indent + 1, true); + else + output += (is_list && i == 0 ? "" : " ") + formatted_node; + } + if (!node.constList().back().commentAfter().empty()) + output += "\n" + prefix(indent); + output += is_list ? "]" : ")"; + return output; +} + +std::string Formatter::formatMacro(const Ark::internal::Node& node, std::size_t indent) +{ + if (isListStartingWithKeyword(node, Keyword::If)) + return formatCondition(node, indent, /* is_macro= */ true); + + std::string output; + // because some macro call like ($undef ...) are considered macros and we shouldn't confuse them and write ($ $undef ...) + if (!node.constList().empty() && node.constList().front().nodeType() == NodeType::Symbol && node.constList().front().string().starts_with('$')) + output = "("; + else + output = "($ "; + + for (std::size_t i = 0, end = node.constList().size(); i < end; ++i) + { + output += format(node.constList()[i], indent + 1, false); + if (i != end - 1) + output += " "; + } + if (!node.constList().back().commentAfter().empty()) + output += "\n" + prefix(indent); + + return output + ")"; +} diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index 2ba892724..474c38fb9 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -3,18 +3,16 @@ #include #include #include -#include #include #include #include -#include -#include - #include #include #include +#include +#include int main(int argc, char** argv) { @@ -30,25 +28,28 @@ int main(int argc, char** argv) repl, compile, eval, - ast + ast, + format }; mode selected = mode::repl; - std::string file, eval_expression; - unsigned debug = 0; constexpr uint16_t max_uint16 = std::numeric_limits::max(); + // Bytecode reader // by default, select all pages and segment types, without slicing anything uint16_t bcr_page = max_uint16; uint16_t bcr_start = max_uint16; uint16_t bcr_end = max_uint16; Ark::BytecodeSegment segment = Ark::BytecodeSegment::All; - - std::vector wrong, script_args; - + // Eval / Run / AST dump + std::string file, eval_expression; std::string libdir; + // Formatting + bool dry_run = false; + // Generic arguments + std::vector wrong, script_args; // clang-format off auto cli = ( @@ -75,6 +76,11 @@ int main(int argc, char** argv) ) , any_other(script_args) ) + | ( + required("-f", "--format").set(selected, mode::format).doc("Format the given source file in place") + & value("file", file) + , option("--dry-run").set(dry_run, true).doc("Do not modify the file, only print out the changes\n") + ) | ( required("--ast").set(selected, mode::ast).doc("Compile the given program and output its AST as JSON to stdout") & value("file", file) @@ -282,6 +288,14 @@ int main(int argc, char** argv) } break; } + + case mode::format: + { + Formatter formatter(file, dry_run); + formatter.run(); + if (dry_run) + std::cout << formatter.output() << std::endl; + } } } else diff --git a/tests/unittests/FormatterSuite.cpp b/tests/unittests/FormatterSuite.cpp new file mode 100644 index 000000000..d667dfabe --- /dev/null +++ b/tests/unittests/FormatterSuite.cpp @@ -0,0 +1,24 @@ +#include + +#include + +#include "TestsHelper.hpp" + +using namespace boost; + +ut::suite<"Formatter"> formatter_suite = [] { + using namespace ut; + + iter_test_files( + "FormatterSuite", + [](TestData&& data) { + Formatter formatter(data.path, /* dry_run= */ true); + should("output a correctly formatted code for " + data.stem) = [&] { + expect(nothrow([&] { + mut(formatter).run(); + })); + std::string code = formatter.output(); + expect(that % code == data.expected); + }; + }); +}; \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/calls.ark b/tests/unittests/resources/FormatterSuite/calls.ark new file mode 100644 index 000000000..83269517b --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/calls.ark @@ -0,0 +1,10 @@ +(let newlist (list:filter _listeners +(fun (element) (!= typ (@ element 0))))) + +# a badly aligned method below +# this comment bloc is also a test + + (list:forEach _listeners (fun (element) + (if (= typ (@ element 0)) { + ((@ element 1) val) + (set found true)}))) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/calls.expected b/tests/unittests/resources/FormatterSuite/calls.expected new file mode 100644 index 000000000..41020b10e --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/calls.expected @@ -0,0 +1,10 @@ +(let newlist (list:filter _listeners (fun (element) (!= typ (@ element 0))))) + +# a badly aligned method below +# this comment bloc is also a test +(list:forEach + _listeners + (fun (element) + (if (= typ (@ element 0)) { + ((@ element 1) val) + (set found true) }))) diff --git a/tests/unittests/resources/FormatterSuite/comments_after_call.ark b/tests/unittests/resources/FormatterSuite/comments_after_call.ark new file mode 100644 index 000000000..900ffb282 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_call.ark @@ -0,0 +1,10 @@ +($ foo (...args # all the args go there +) () # the body is empty +) +[ a b c # last element +] +[a b c] # list +(foo # func + bar # arg + egg # arg bis + ) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/comments_after_call.expected b/tests/unittests/resources/FormatterSuite/comments_after_call.expected new file mode 100644 index 000000000..422759daa --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_call.expected @@ -0,0 +1,17 @@ +($ foo (...args # all the args go there + ) () # the body is empty +) + +[ + a + b + c # last element +] +[a b c] # list + +(foo + # func + bar + # arg + egg # arg bis +) diff --git a/tests/unittests/resources/FormatterSuite/comments_after_cond.ark b/tests/unittests/resources/FormatterSuite/comments_after_cond.ark new file mode 100644 index 000000000..354d8b907 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_cond.ark @@ -0,0 +1,10 @@ +(if true # a condition + ok # ok + ) +(if true ok # ok +) +(if true ok) # ok +(if true + ok + no # dont go +) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/comments_after_cond.expected b/tests/unittests/resources/FormatterSuite/comments_after_cond.expected new file mode 100644 index 000000000..d03e8f1ce --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_cond.expected @@ -0,0 +1,9 @@ +(if true + # a condition + ok) # ok +(if true ok) # ok +(if true ok) # ok +(if true + ok + no # dont go +) diff --git a/tests/unittests/resources/FormatterSuite/comments_after_import.ark b/tests/unittests/resources/FormatterSuite/comments_after_import.ark new file mode 100644 index 000000000..1de571d8b --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_import.ark @@ -0,0 +1 @@ +(import test) # test \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/comments_after_import.expected b/tests/unittests/resources/FormatterSuite/comments_after_import.expected new file mode 100644 index 000000000..014e22008 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_import.expected @@ -0,0 +1 @@ +(import test) # test diff --git a/tests/unittests/resources/FormatterSuite/comments_after_variable.ark b/tests/unittests/resources/FormatterSuite/comments_after_variable.ark new file mode 100644 index 000000000..635b2074f --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_variable.ark @@ -0,0 +1,5 @@ +(let a 1 # a very secret value +) +(mut b 2) # test +(set c 3 # value +) # node \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/comments_after_variable.expected b/tests/unittests/resources/FormatterSuite/comments_after_variable.expected new file mode 100644 index 000000000..aa50a9251 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_variable.expected @@ -0,0 +1,4 @@ +(let a 1) # a very secret value +(mut b 2) # test +(set c 3) # value +# node diff --git a/tests/unittests/resources/FormatterSuite/comments_after_while.ark b/tests/unittests/resources/FormatterSuite/comments_after_while.ark new file mode 100644 index 000000000..935602fb1 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_while.ark @@ -0,0 +1,7 @@ +(while true # cond + 1 # body + ) + + +(while true {} # no body +) # infinite loop \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/comments_after_while.expected b/tests/unittests/resources/FormatterSuite/comments_after_while.expected new file mode 100644 index 000000000..37a4cd104 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/comments_after_while.expected @@ -0,0 +1,6 @@ +(while true + # cond + 1) # body + +(while true {}) # no body +# infinite loop diff --git a/tests/unittests/resources/FormatterSuite/conditions.ark b/tests/unittests/resources/FormatterSuite/conditions.ark new file mode 100644 index 000000000..d9a58408b --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/conditions.ark @@ -0,0 +1,10 @@ +(if true # test +0 # value +1) +(if (cond) (do) (stuff)) +# conditions in functions are on their own line +(fun () (if true 0)) +(fun () (if true 0 1)) +(if # true +true true false) +(if (= 1 2) { (foo) (bar) }) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/conditions.expected b/tests/unittests/resources/FormatterSuite/conditions.expected new file mode 100644 index 000000000..3ceb8cdac --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/conditions.expected @@ -0,0 +1,23 @@ +(if true + # test + 0 + # value + 1) +(if (cond) + (do) + (stuff)) +# conditions in functions are on their own line +(fun () + (if true 0)) +(fun () + (if true + 0 + 1)) +(if + # true + true + true + false) +(if (= 1 2) { + (foo) + (bar) }) diff --git a/tests/unittests/resources/FormatterSuite/del.ark b/tests/unittests/resources/FormatterSuite/del.ark new file mode 100644 index 000000000..ee0772b18 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/del.ark @@ -0,0 +1,3 @@ +(del a) +(del # comment +b) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/del.expected b/tests/unittests/resources/FormatterSuite/del.expected new file mode 100644 index 000000000..71f1cd80d --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/del.expected @@ -0,0 +1,4 @@ +(del a) +(del + # comment + b) diff --git a/tests/unittests/resources/FormatterSuite/field.ark b/tests/unittests/resources/FormatterSuite/field.ark new file mode 100644 index 000000000..12033069b --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/field.ark @@ -0,0 +1,5 @@ +(let a foo.closure.name) +(foo.closure.name # test +this.bar.egg.qux) +(foo.closure.name this.bar.egg.qux) +(foo.closure.name this.bar.egg.qux 1 2) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/field.expected b/tests/unittests/resources/FormatterSuite/field.expected new file mode 100644 index 000000000..a25f4749f --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/field.expected @@ -0,0 +1,6 @@ +(let a foo.closure.name) +(foo.closure.name + # test + this.bar.egg.qux) +(foo.closure.name this.bar.egg.qux) +(foo.closure.name this.bar.egg.qux 1 2) diff --git a/tests/unittests/resources/FormatterSuite/functions.ark b/tests/unittests/resources/FormatterSuite/functions.ark new file mode 100644 index 000000000..884dd5d96 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/functions.ark @@ -0,0 +1,19 @@ +(fun () ()) +(fun ( a b ) ( + a b)) +( +fun +( +a) { +a +}) +(call me maybe) +(call (fun () { + hello +}) maybe) +(fun # test + (# a + a + b + # capture + &c) # body + {}) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/functions.expected b/tests/unittests/resources/FormatterSuite/functions.expected new file mode 100644 index 000000000..f5c4ed88b --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/functions.expected @@ -0,0 +1,18 @@ +(fun () ()) +(fun (a b) (+ a b)) + +(fun (a) { + a }) +(call me maybe) +(call + (fun () { + hello }) + maybe) +(fun ( + # a + a + b + # capture + &c) + # body + {}) diff --git a/tests/unittests/resources/FormatterSuite/imports.ark b/tests/unittests/resources/FormatterSuite/imports.ark new file mode 100644 index 000000000..a7df9339a --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/imports.ark @@ -0,0 +1,13 @@ +(import foo) +(import std.foo) +(import std.foo :a) +(import std.foo :a :b) +(import std.foo.bar:*) +(import # package +foo) +(import std.foo # item +:a) +(import std.foo # item +:a +# item +:b) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/imports.expected b/tests/unittests/resources/FormatterSuite/imports.expected new file mode 100644 index 000000000..5520032e9 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/imports.expected @@ -0,0 +1,16 @@ +(import foo) +(import std.foo) +(import std.foo :a) +(import std.foo :a :b) +(import std.foo.bar:*) +(import + # package + foo) +(import std.foo + # item + :a) +(import std.foo + # item + :a + # item + :b) diff --git a/tests/unittests/resources/FormatterSuite/loop.ark b/tests/unittests/resources/FormatterSuite/loop.ark new file mode 100644 index 000000000..2320d1237 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/loop.ark @@ -0,0 +1,6 @@ +(while # true + (= 1 1) + # body + (print 1)) +(while (= 1 2) (print 3)) +(while true { 1 2 3 }) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/loop.expected b/tests/unittests/resources/FormatterSuite/loop.expected new file mode 100644 index 000000000..76da4e568 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/loop.expected @@ -0,0 +1,10 @@ +(while + # true + (= 1 1) + # body + (print 1)) +(while (= 1 2) (print 3)) +(while true { + 1 + 2 + 3 }) diff --git a/tests/unittests/resources/FormatterSuite/macro_cond.ark b/tests/unittests/resources/FormatterSuite/macro_cond.ark new file mode 100644 index 000000000..1fd358be4 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/macro_cond.ark @@ -0,0 +1,2 @@ +($ -> (arg fn1 ...fn) { + ($if (> (len fn) 0) (-> (fn1 arg) ...fn) (fn1 arg))}) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/macro_cond.expected b/tests/unittests/resources/FormatterSuite/macro_cond.expected new file mode 100644 index 000000000..65ce9b7f0 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/macro_cond.expected @@ -0,0 +1,4 @@ +($ -> (arg fn1 ...fn) { + ($if (> (len fn) 0) + (-> (fn1 arg) ...fn) + (fn1 arg)) }) diff --git a/tests/unittests/resources/FormatterSuite/macros.ark b/tests/unittests/resources/FormatterSuite/macros.ark new file mode 100644 index 000000000..e156af823 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/macros.ark @@ -0,0 +1,6 @@ +($ foo (a b) (+ a b)) +($ var 12) +($ defun (name args body) (let name (fun args body))) +($ one (...args) (print "Macro 'one', returns the 2nd argument given in " args " => " (@ args 1))) +($undef a) +($repr a) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/macros.expected b/tests/unittests/resources/FormatterSuite/macros.expected new file mode 100644 index 000000000..70bb67bcd --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/macros.expected @@ -0,0 +1,6 @@ +($ foo (a b) (+ a b)) +($ var 12) +($ defun (name args body) (let name (fun args body))) +($ one (...args) (print "Macro 'one', returns the 2nd argument given in " args " => " (@ args 1))) +($undef a) +($repr a) diff --git a/tests/unittests/resources/FormatterSuite/vars.ark b/tests/unittests/resources/FormatterSuite/vars.ark new file mode 100644 index 000000000..9c0963687 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/vars.ark @@ -0,0 +1,8 @@ +(let a +1) +(mut b (if true 3 4)) +(set c { + (let d 5) + (+ 5 d) +}) +(let e (fun (f g) (+ f g))) \ No newline at end of file diff --git a/tests/unittests/resources/FormatterSuite/vars.expected b/tests/unittests/resources/FormatterSuite/vars.expected new file mode 100644 index 000000000..197332760 --- /dev/null +++ b/tests/unittests/resources/FormatterSuite/vars.expected @@ -0,0 +1,9 @@ +(let a 1) +(mut b + (if true + 3 + 4)) +(set c { + (let d 5) + (+ 5 d) }) +(let e (fun (f g) (+ f g)))