diff --git a/.github/launch-tests b/.github/launch-tests deleted file mode 100755 index 6c79721eb..000000000 --- a/.github/launch-tests +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -(cd tests/arkscript ; echo ; bash ./run-tests) -(cd tests/cpp/ ; echo ; bash ./run-tests) -(cd tests/errors ; echo ; bash ./run-tests) -(cd tests/ast/ ; echo ; bash ./run-tests) -(source ./lib/modules/.github/run-tests) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2818ddc78..22acdc50d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -139,38 +139,11 @@ jobs: with: submodules: recursive - - name: Update GNU compilers - if: startsWith(matrix.config.name, 'Ubuntu GCC') - shell: bash - run: | - sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test - sudo apt-get -yq install ${{ matrix.config.cc }} ${{ matrix.config.cxx }} - - - name: Update LLVM compilers - if: startsWith(matrix.config.name, 'Ubuntu Clang') - shell: bash - run: | - version=`echo ${{ matrix.config.cc }} | cut -c 7-` - sudo apt-get install -y clang-${version} lld-${version} libc++-${version}-dev libc++abi-${version}-dev clang-tools-${version} + - name: Setup compilers + uses: ./.github/workflows/setup-compilers - - name: Install MacOS dependencies - if: startsWith(matrix.config.name, 'MacOS') - shell: bash - run: env HOMEBREW_NO_AUTO_UPDATE=1 brew install openssl - - - uses: ilammy/msvc-dev-cmd@v1 - if: startsWith(matrix.config.name, 'Windows') - - - name: Download Windows dependencies - if: startsWith(matrix.config.name, 'Windows') - shell: pwsh - run: | - Invoke-RestMethod -Uri https://www.sqlite.org/2022/sqlite-dll-win64-x64-${Env:SQLITE_VERSION}.zip -OutFile sqlite.zip - Invoke-RestMethod -Uri https://www.sqlite.org/2022/sqlite-amalgamation-${Env:SQLITE_VERSION}.zip -OutFile amalgation.zip - Expand-Archive sqlite.zip -DestinationPath sqlite_lib - Expand-Archive amalgation.zip -DestinationPath sqlite_code - cd sqlite_lib - lib /DEF:sqlite3.def /OUT:sqlite3.lib /MACHINE:x64 + - name: Setup dependencies + uses: ./.github/workflows/setup-deps - name: Configure CMake Ark shell: bash @@ -181,7 +154,7 @@ jobs: -DCMAKE_C_COMPILER=${{ matrix.config.cc }} \ -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \ -DARK_SANITIZERS=${{ matrix.config.sanitizers }} \ - -DARK_BUILD_EXE=On -DARK_BUILD_MODULES=On -DARK_MOD_ALL=On + -DARK_BUILD_EXE=On -DARK_BUILD_MODULES=On -DARK_MOD_ALL=On -DARK_BUILD_PARSER_TESTS=On - name: Add SQLite deps if: startsWith(matrix.config.name, 'Windows') @@ -195,7 +168,7 @@ jobs: shell: bash run: cmake --build build --config $BUILD_TYPE - - name: Configure CMake Integration tests + - name: Configure & build CMake Integration tests shell: bash run: | cd tests/cpp @@ -204,29 +177,21 @@ jobs: -DCMAKE_C_COMPILER=${{ matrix.config.cc }} \ -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \ -DARK_SANITIZERS=${{ matrix.config.sanitizers }} - - - name: Build Integration tests - shell: bash - run: cd tests/cpp && cmake --build build --config $BUILD_TYPE - - - name: Organize files for upload - if: startsWith(matrix.config.name, 'Ubuntu') || startsWith(matrix.config.name, 'MacOS') - shell: bash - run: | - mkdir -p artifact/lib/std - cp build/arkscript artifact - cp build/libArkReactor.* artifact - cp lib/*.arkm artifact/lib - cp lib/std/*.ark artifact/lib/std - rm -rf artifact/lib/std/{.git,.github,tests/__arkscript__} + cmake --build build --config $BUILD_TYPE - name: Organize files for upload - if: startsWith(matrix.config.name, 'Windows') shell: bash run: | mkdir -p artifact/lib/std - cp build/$BUILD_TYPE/arkscript.exe artifact - cp build/$BUILD_TYPE/ArkReactor.dll artifact + # Linux/MacOS + cp build/arkscript artifact || true + cp build/parser artifact || true + cp build/libArkReactor.* artifact || true + # Windows + cp build/$BUILD_TYPE/arkscript.exe artifact || true + cp build/$BUILD_TYPE/parser.exe artifact || true + cp build/$BUILD_TYPE/ArkReactor.dll artifact || true + # Generic cp lib/*.arkm artifact/lib cp lib/std/*.ark artifact/lib/std rm -rf artifact/lib/std/{.git,.github,tests/__arkscript__} @@ -234,8 +199,9 @@ jobs: - name: Organize temp artifact shell: bash run: | - mkdir -p temp/ + mkdir -p temp/parser/ cp -r tests/cpp temp/ + cp -r tests/parser temp/ - name: Upload artifact uses: actions/upload-artifact@v3.1.1 @@ -252,7 +218,7 @@ jobs: tests: runs-on: ${{ matrix.config.os }} - name: Test on ${{ matrix.config.name }} + name: Tests on ${{ matrix.config.name }} needs: [build] strategy: @@ -279,46 +245,44 @@ jobs: with: submodules: recursive - - name: Download artifact - id: download - uses: actions/download-artifact@v3.0.1 - with: - name: ${{ matrix.config.artifact }} - path: build + - name: Setup tests + uses: ./.github/workflows/setup-tests - - name: Download temp artifact - id: download-artifact - uses: actions/download-artifact@v3.0.1 - with: - name: temp-${{ matrix.config.artifact }} - path: artifact + - name: Parser tests + shell: bash + run: | + export ASAN_OPTIONS=detect_odr_violation=0 + (cd tests/parser/tests ; bash ./run) - - name: Update GNU compilers - if: startsWith(matrix.config.name, 'Ubuntu GCC') + - name: Integration tests shell: bash run: | - sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test - sudo apt-get -yq install libstdc++6 + export ASAN_OPTIONS=detect_odr_violation=0 + (cd tests/cpp ; bash ./run-tests) - - shell: bash + - name: AST tests + shell: bash run: | - mv artifact/cpp/out tests/cpp/ - mv build/lib/*.arkm lib/ - chmod u+x build/arkscript tests/cpp/out/* + export ASAN_OPTIONS=use_odr_indicator=1 + (cd tests/ast ; bash ./run-tests) - - name: Pre-test - if: startsWith(matrix.config.name, 'Windows') + - name: Unit tests shell: bash run: | - mkdir -p tests/cpp/out - cp build/*.dll tests/cpp/out/ + export ASAN_OPTIONS=detect_odr_violation=0 + (cd tests/arkscript ; bash ./run-tests) - - name: Tests - if: steps.download.outcome == 'success' && steps.download-artifact.outcome == 'success' + #- name: Modules tests + # shell: bash + # run: | + # export ASAN_OPTIONS=detect_odr_violation=0 + # (source ./lib/modules/.github/run-tests) + + - name: Runtime error message generation tests shell: bash run: | - export ASAN_OPTIONS=use_odr_indicator=1 - bash .github/launch-tests + export ASAN_OPTIONS=detect_odr_violation=0 + (cd tests/errors ; bash ./run-tests) valgrind: runs-on: ubuntu-latest @@ -337,14 +301,11 @@ jobs: name: "ubuntu-clang-11-valgrind" path: build - - shell: bash - run: | - mv build/lib/*.arkm lib/ - chmod u+x build/arkscript - - name: Update LLVM compilers shell: bash run: | + mv build/lib/*.arkm lib/ + chmod u+x build/arkscript sudo apt-get update --fix-missing sudo apt-get install -y clang-11 lld-11 libc++-11-dev libc++abi-11-dev clang-tools-11 valgrind diff --git a/.github/workflows/setup-compilers/action.yaml b/.github/workflows/setup-compilers/action.yaml new file mode 100644 index 000000000..d7621e762 --- /dev/null +++ b/.github/workflows/setup-compilers/action.yaml @@ -0,0 +1,22 @@ +--- +name: "Update compilers" + +runs: + using: "composite" + steps: + - name: Update GNU compilers + if: startsWith(matrix.config.name, 'Ubuntu GCC') + shell: bash + run: | + sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test + sudo apt-get -yq install ${{ matrix.config.cc }} ${{ matrix.config.cxx }} + + - name: Update LLVM compilers + if: startsWith(matrix.config.name, 'Ubuntu Clang') + shell: bash + run: | + version=`echo ${{ matrix.config.cc }} | cut -c 7-` + sudo apt-get install -y clang-${version} lld-${version} libc++-${version}-dev libc++abi-${version}-dev clang-tools-${version} + + - uses: ilammy/msvc-dev-cmd@v1 + if: startsWith(matrix.config.name, 'Windows') diff --git a/.github/workflows/setup-deps/action.yaml b/.github/workflows/setup-deps/action.yaml new file mode 100644 index 000000000..4fb2b4cbe --- /dev/null +++ b/.github/workflows/setup-deps/action.yaml @@ -0,0 +1,21 @@ +--- +name: "Install compilers and dependencies" + +runs: + using: "composite" + steps: + - name: Install MacOS dependencies + if: startsWith(matrix.config.name, 'MacOS') + shell: bash + run: env HOMEBREW_NO_AUTO_UPDATE=1 brew install openssl + + - name: Download Windows dependencies + if: startsWith(matrix.config.name, 'Windows') + shell: pwsh + run: | + Invoke-RestMethod -Uri https://www.sqlite.org/2022/sqlite-dll-win64-x64-${Env:SQLITE_VERSION}.zip -OutFile sqlite.zip + Invoke-RestMethod -Uri https://www.sqlite.org/2022/sqlite-amalgamation-${Env:SQLITE_VERSION}.zip -OutFile amalgation.zip + Expand-Archive sqlite.zip -DestinationPath sqlite_lib + Expand-Archive amalgation.zip -DestinationPath sqlite_code + cd sqlite_lib + lib /DEF:sqlite3.def /OUT:sqlite3.lib /MACHINE:x64 diff --git a/.github/workflows/setup-tests/action.yaml b/.github/workflows/setup-tests/action.yaml new file mode 100644 index 000000000..525e63976 --- /dev/null +++ b/.github/workflows/setup-tests/action.yaml @@ -0,0 +1,42 @@ +--- +name: "Setup tests" +description: "Unpack necessary artifacts, updates compilers" + +runs: + using: "composite" + steps: + - name: Download artifact + id: download + uses: actions/download-artifact@v3.0.1 + with: + name: ${{ matrix.config.artifact }} + path: build + + - name: Download temp artifact + id: download-artifact + uses: actions/download-artifact@v3.0.1 + with: + name: temp-${{ matrix.config.artifact }} + path: artifact + + - name: Update GNU compilers + if: startsWith(matrix.config.name, 'Ubuntu GCC') + shell: bash + run: | + sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test + sudo apt-get -yq install libstdc++6 + + - shell: bash + run: | + mv artifact/cpp/out tests/cpp/ + mv build/lib/*.arkm lib/ + chmod u+x build/arkscript tests/cpp/out/* + cp -r artifact/parser/* tests/parser/ + cp -r build tests/parser/ && ls tests/parser/build/ + chmod u+x tests/parser/build/parser + + - shell: bash + if: startsWith(matrix.config.name, 'Windows') + run: | + cp build/*.dll tests/cpp/out/ + cp build/*.dll tests/parser/build/ diff --git a/.gitignore b/.gitignore index f59d697ca..6cf5f7d56 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,6 @@ # Personal utilities warnings.log -cformat.ps1 # ArkScript include/Ark/Constants.hpp @@ -28,6 +27,7 @@ afl/ .cache/ build/ ninja/ +cmake-build-*/ # Prerequisites *.d @@ -47,10 +47,6 @@ ninja/ *.dylib *.dll -# Fortran module files -*.mod -*.smod - # Compiled Static libraries *.lai *.la @@ -64,3 +60,6 @@ ninja/ # MacOS files .DS_store + +# Visual Studio +CmakeSettings.json diff --git a/.gitmodules b/.gitmodules index 137ae9487..39a0aef00 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "lib/utf8_decoder"] - path = lib/utf8_decoder - url = https://github.com/PierrePharel/utf8_decoder.git [submodule "lib/std"] path = lib/std url = https://github.com/ArkScript-lang/std.git diff --git a/CHANGELOG.md b/CHANGELOG.md index e311bae6f..e8fe547ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - more tests for the io builtins - added lines and code coloration in the error context - new dependency: fmtlib +- added the padding/instruction/argumentation values when displaying instructions in the bytecode reader ### Changed - instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument @@ -43,6 +44,11 @@ - fixed a bug in the macro processor where macros were deleted when they shouldn't - fixed a bug where macro functions with no argument would crash the macro processor +### Removed + +### Deprecated + + ## [3.4.0] - 2022-09-12 ### Added - added new `async` and `await` builtins diff --git a/CMakeLists.txt b/CMakeLists.txt index a38fd0ad2..86fb75ef0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) # files needed for the library ArkReactor file(GLOB_RECURSE SOURCE_FILES ${ark_SOURCE_DIR}/src/arkreactor/*.cpp - ${ark_SOURCE_DIR}/lib/fmt/src/*.cc) + ${ark_SOURCE_DIR}/lib/fmt/src/format.cc) add_library(ArkReactor SHARED ${SOURCE_FILES}) @@ -46,8 +46,8 @@ if (CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG OR APPLE) ) if (CMAKE_COMPILER_IS_GNUCXX) - # The package utf8_decoder has an issues with constant overflow. - # Once thisis fixed remove this flag: + # The package utf8 has an issue with constant overflow. + # Once this is fixed remove this flag: target_compile_options(ArkReactor PUBLIC -Wno-overflow) endif() @@ -96,7 +96,6 @@ add_subdirectory("${ark_SOURCE_DIR}/lib/termcolor" EXCLUDE_FROM_ALL) target_include_directories(ArkReactor PUBLIC - "${ark_SOURCE_DIR}/lib/utf8_decoder/" "${ark_SOURCE_DIR}/lib/picosha2/" "${ark_SOURCE_DIR}/lib/fmt/include") @@ -122,12 +121,7 @@ target_include_directories(ArkReactor PUBLIC ${ark_SOURCE_DIR}/include) -# setting up project properties -set_target_properties( - ArkReactor - PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON) +target_compile_features(ArkReactor PRIVATE cxx_std_17) # Installation rules @@ -176,11 +170,18 @@ if (ARK_BUILD_MODULES) add_subdirectory(${ark_SOURCE_DIR}/lib/modules) endif() +if (ARK_BUILD_PARSER_TESTS) + add_executable(parser ${ark_SOURCE_DIR}/tests/parser/main.cpp) + target_link_libraries(parser PUBLIC ArkReactor) + target_compile_features(parser PRIVATE cxx_std_17) +endif() + if (ARK_BUILD_EXE) # additional files needed for the exe (repl, command line and stuff) - set(EXE_SOURCES + file(GLOB_RECURSE EXE_SOURCES ${ark_SOURCE_DIR}/src/arkscript/REPL/Utils.cpp ${ark_SOURCE_DIR}/src/arkscript/REPL/Repl.cpp + ${ark_SOURCE_DIR}/lib/fmt/src/format.cc ${ark_SOURCE_DIR}/src/arkscript/main.cpp) add_executable(arkscript ${EXE_SOURCES}) diff --git a/cmake/link_time_optimization.cmake b/cmake/link_time_optimization.cmake index ac5156506..a2eda34ee 100644 --- a/cmake/link_time_optimization.cmake +++ b/cmake/link_time_optimization.cmake @@ -3,7 +3,7 @@ include(CheckIPOSupported) check_ipo_supported(RESULT ipo_supported) function(enable_lto target_name) - if (ipo_supported) + if (ipo_supported AND (${CMAKE_BUILD_TYPE} STREQUAL "Release")) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND (CMAKE_CXX_COMPILER_VERSION MATCHES "^8\..+")) message(WARNING "LTO supported but not enabled to prevent https://github.com/ArkScript-lang/Ark/pull/385#issuecomment-1163597951") else() diff --git a/examples/error.ark b/examples/error.ark index d54d0b41d..9d3c6c96d 100644 --- a/examples/error.ark +++ b/examples/error.ark @@ -2,7 +2,7 @@ # very often, and this is a convention, # if an imported file starts with a capital letter, # it shall be a file in the standard library. -(import "Exceptions.ark") +(import std.Exceptions) # the function which should do a "safe number invertion" (let invert (fun (x) { diff --git a/examples/macros.ark b/examples/macros.ark index b7ee159f1..7573f6aef 100644 --- a/examples/macros.ark +++ b/examples/macros.ark @@ -1,12 +1,12 @@ -!{suffix-dup (sym x) { - !{if (> x 1) - (suffix-dup sym (- x 1))} - (symcat sym x)}} +($ suffix-dup (sym x) { + ($if (> x 1) + (suffix-dup sym (- x 1))) + (symcat sym x)}) -!{partial (func ...defargs) { - !{bloc (suffix-dup a (- (argcount func) (len defargs)))} +($ partial (func ...defargs) { + ($ bloc (suffix-dup a (- (argcount func) (len defargs)))) (fun (bloc) (func ...defargs bloc)) - !{undef bloc}}} + ($undef bloc)}) (let test_func (fun (a b c) (* a b c))) (let test_func1 (partial test_func 1)) @@ -16,31 +16,31 @@ (print "Expected arguments for test_func1: " (argcount test_func1) ", expected " 2) (print "Calling them: " (test_func 1 2 3) " " (test_func1 2 3)) -!{foo (a b) (+ a b)} +($ foo (a b) (+ a b)) (print "Using macro foo (a b) => (+ a b): " (foo 1 2)) -!{var 12} +($ var 12) (print "Using macro constant var=12: " var) -!{if (= var 12) +($if (= var 12) (print "This was executed in a if macro, testing var == 12") - (print "You shouldn't see this")} + (print "You shouldn't see this")) -!{if (and true true) +($if (and true true) (print "This was executed in a if macro, testing (and true true)") - (print "You shouldn't see this (bis)")} + (print "You shouldn't see this (bis)")) -!{defun (name args body) (let name (fun args body))} +($ defun (name args body) (let name (fun args body))) (defun a_func (a b) (+ a b)) (print "Generated a function with a macro, a_func (a b) => (+ a b)") (print "Calling (a_func 1 2): " (a_func 1 2)) -!{one (...args) (print "Macro 'one', returns the 2nd argument given in " args " => " (@ args 1))} +($ one (...args) (print "Macro 'one', returns the 2nd argument given in " args " => " (@ args 1))) (one 1 2) (one 1 3 4) (one 1 5 6 7 8) -!{last (...args) (print "Macro 'last', returns the last argument given in " args " => " (@ args -1))} +($ last (...args) (print "Macro 'last', returns the last argument given in " args " => " (@ args -1))) (last 1 2) (last 1 3 4) (last 1 5 6 7 8) @@ -48,28 +48,28 @@ { (print "Testing macros in scopes and macro shadowing") - !{test (+ 1 2 3)} + ($ test (+ 1 2 3)) (print "(global) Reading macro 'test', expected 6, " test) ((fun () { - !{test (- 1 2 3)} + ($ test (- 1 2 3)) (print "(sub scope) Reading macro 'test', expected -4, " test)})) (print "(global) Reading macro 'test', expected 6, " test) { - !{test 555} + ($ test 555) (print "(subscope) Reading macro 'test', expected 555, " test) - !{undef test} + ($ undef test) (print "(subscope, undef test) Reading macro 'test', expected 6, " test) - !{undef a}}} + ($ undef a)}} (print "Demonstrating a threading macro") -!{-> (arg fn1 ...fn) { - !{if (> (len fn) 0) +($ -> (arg fn1 ...fn) { + ($if (> (len fn) 0) (-> (fn1 arg) ...fn) - (fn1 arg)}}} + (fn1 arg))}) (let filename "hello.json") diff --git a/images/diagram.svg b/images/diagram.svg index 0a58aefce..9bdd90005 100644 --- a/images/diagram.svg +++ b/images/diagram.svg @@ -1 +1 @@ -teststestssrcsrcliblibinclude/Arkinclude/Arkexamplesexamplesdocsdocs.vscode.vscode.github.githubcppcpparkscriptarkscriptarkscriptarkscriptarkreactorarkreactorVMVMREPLREPLCompilerCompilerBuiltinsBuiltinsgames/snakegames/snakeworkflowsworkflowsREPLREPLVMVMCompilerCompilerBuiltinsBuiltinsinlineinlineValueValueMacrosMacrosASTASTValueValueMacrosMacrosASTASTExecutorsExecutorsCHANGELOG.mdCHANGELOG.mdCHANGELOG.mdVM.cppVM.cppVM.cppCompiler.cppCompiler.cppCompiler.cppBytecodeR...BytecodeR...BytecodeR...VM.inlVM.inlVM.inlProcessor...Processor...Processor...Parser.cppParser.cppParser.cppLexer.cppLexer.cppLexer.cpp.cpp.gitignore.hpp.inl.json.md.py.svg.txt.ymleach dot sized by file size \ No newline at end of file +teststestssrcsrcliblibincludeincludefuzzingfuzzingexamplesexamplesdocsdocscmakecmake.vscode.vscode.github.githubparserparsererrorserrorscppcppastastarkscriptarkscriptarkscriptarkscriptarkreactorarkreactorArkArkuniqueuniqueinputinputcorpuscorpusworkflowsworkflowsteststestsREPLREPLVMVMCompilerCompilerBuiltinsBuiltinsVMVMREPLREPLCompilerCompilerValueValueMacrosMacrosASTASTinlineinlineValueValueMacrosMacrosASTASTExecutorsExecutorsCHANGELO...CHANGELO...CHANGELO...macros.jsonmacros.jsonmacros.jsonVM.cppVM.cppVM.cppCompiler...Compiler...Compiler...Bytecode...Bytecode...Bytecode...Processo...Processo...Processo...Parser.cppParser.cppParser.cpp.cmake.cpp.gitignore.hpp.inl.json.md.py.sh.svg.txt.xml.yaml.ymleach dot sized by file size \ No newline at end of file diff --git a/include/Ark/Compiler/AST/BaseParser.hpp b/include/Ark/Compiler/AST/BaseParser.hpp new file mode 100644 index 000000000..57c7245df --- /dev/null +++ b/include/Ark/Compiler/AST/BaseParser.hpp @@ -0,0 +1,87 @@ +#ifndef SRC_BASEPARSER_HPP +#define SRC_BASEPARSER_HPP + +#include +#include +#include +#include +#include + +#include +#include + +namespace Ark::internal +{ + struct FilePosition + { + std::size_t row; + std::size_t col; + }; + + class BaseParser + { + public: + BaseParser() = default; + + private: + std::string m_filename; + std::string m_str; + std::string::iterator m_it, m_next_it; + utf8_char_t m_sym; + + /* + getting next character and changing the values of count/row/col/sym + */ + void next(); + + protected: + void initParser(const std::string& filename, const std::string& code); + + FilePosition getCursor(); + + void error(const std::string& error, const std::string exp); + void errorWithNextToken(const std::string& message); + void errorMissingSuffix(char suffix, const std::string& node_name); + + inline long getCount() { return std::distance(m_str.begin(), m_it); } + inline std::size_t getSize() { return m_str.size(); } + inline bool isEOF() { return m_it == m_str.end(); } + + void backtrack(long n); + + /* + Function to use and check if a Character Predicate was able to parse + the current symbol. + Add the symbol to the given string (if there was one) and call next() + */ + bool accept(const CharPred& t, std::string* s = nullptr); + + /* + Function to use and check if a Character Predicate was able to parse + the current Symbol. + Add the symbol to the given string (if there was one) and call next(). + Throw a CodeError if it couldn't. + */ + bool expect(const CharPred& t, std::string* s = nullptr); + + // basic parsers + bool space(std::string* s = nullptr); + bool inlineSpace(std::string* s = nullptr); + bool endOfLine(std::string* s = nullptr); + bool comment(); + bool newlineOrComment(); + bool prefix(char c); + bool suffix(char c); + bool number(std::string* s = nullptr); + bool signedNumber(std::string* s = nullptr); + bool hexNumber(unsigned length, std::string* s = nullptr); + bool name(std::string* s = nullptr); + bool sequence(const std::string& s); + bool packageName(std::string* s = nullptr); + bool anyUntil(const CharPred& delim, std::string* s = nullptr); + + bool oneOf(std::initializer_list words, std::string* s = nullptr); + }; +} + +#endif diff --git a/include/Ark/Compiler/AST/Import.hpp b/include/Ark/Compiler/AST/Import.hpp new file mode 100644 index 000000000..706029f75 --- /dev/null +++ b/include/Ark/Compiler/AST/Import.hpp @@ -0,0 +1,89 @@ +#ifndef COMPILER_AST_IMPORT_HPP +#define COMPILER_AST_IMPORT_HPP + +#include +#include +#include + +#include + +namespace Ark::internal +{ + struct ARK_API Import + { + /** + * @brief The filename without the extension + * @details Example: `(import foo.bar)` => `bar` + * `(import foo.bar.egg:*)` => `egg` + * `(import foo :a :b :c)` => `foo` + * + */ + std::string prefix; + + /** + * @brief Package with all the segments + * @details Example: `(import foo.bar)` => `{foo, bar}` + * `(import foo.bar.egg:*)` => `{foo, bar, egg}` + * `(import foo :a :b :c)` => `{foo}` + */ + std::vector package; + + /** + * @brief Import with prefix (the package) or not + * + */ + bool with_prefix = true; + + /** + * @brief List of symbols to import, can be empty if none provided + * + */ + std::vector symbols; + + inline std::string toPackageString() const + { + return std::accumulate(package.begin() + 1, package.end(), package.front(), [](const std::string& left, const std::string& right) { + return left + "." + right; + }); + } + + inline std::string packageToPath() const + { + std::size_t offset = 0; + if (package.front() == "std") + offset = 1; + + return std::accumulate( + std::next(package.begin() + offset), + package.end(), + package[offset], + [](const std::string& a, const std::string& b) { + return a + "/" + b; + }); + } + + /** + * @brief Check if we should import everything, given something like `(import foo.bar.egg:*)` + * + * @return true if all symbols of the file should be imported in the importer scope + * @return false otherwise + */ + inline bool isGlob() const + { + return !with_prefix && symbols.empty(); + } + + /** + * @brief Check if we should import everything with a prefix, given a `(import foo.bar.egg)` + * + * @return true + * @return false + */ + inline bool isBasic() const + { + return with_prefix && symbols.empty(); + } + }; +} + +#endif diff --git a/include/Ark/Compiler/AST/Lexer.hpp b/include/Ark/Compiler/AST/Lexer.hpp deleted file mode 100644 index 19534970e..000000000 --- a/include/Ark/Compiler/AST/Lexer.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/** - * @file Lexer.hpp - * @author Alexandre Plateau (lexplt.dev@gmail.com) - * @brief Tokenize ArkScript code - * @version 0.1 - * @date 2020-10-27 - * - * @copyright Copyright (c) 2020-2021 - * - */ - -#ifndef ARK_COMPILER_LEXER_HPP -#define ARK_COMPILER_LEXER_HPP - -#include - -#include -#include - -namespace Ark::internal -{ - /** - * @brief The lexer, in charge of creating a list of tokens - * - */ - class Lexer - { - public: - /** - * @brief Construct a new Lexer object - * - * @param debug the debug level - */ - explicit Lexer(unsigned debug) noexcept; - - /** - * @brief Give code to tokenize and create the list of tokens - * - * @param code the ArkScript code - */ - void feed(const std::string& code); - - /** - * @brief Return the list of tokens - * - * @return std::vector& - */ - std::vector& tokens() noexcept; - - private: - unsigned m_debug; - std::vector m_tokens; - - inline constexpr bool isHexChar(char chr) - { - return (('a' <= chr && chr <= 'f') || ('A' <= chr && chr <= 'F') || ('0' <= chr && chr <= '9')); - } - - /** - * @brief Helper function to determine the type of a token - * - * @param value - * @return TokenType - */ - TokenType guessType(const std::string& value) noexcept; - - /** - * @brief Check if the value is a keyword in ArkScript - * - * @param value - * @return true - * @return false - */ - bool isKeyword(const std::string& value) noexcept; - /** - * @brief Check if the value can be an identifier in ArkScript - * - * @param value - * @return true - * @return false - */ - bool isIdentifier(const std::string& value) noexcept; - - /** - * @brief Check if the value is an operator in ArkScript - * - * @param value - * @return true - * @return false - */ - bool isOperator(const std::string& value) noexcept; - - /** - * @brief Check if a control character / sequence is complete or not - * - * @param sequence the sequence without the leading \\ - * @param next the next character to come, maybe, in the sequence - * @return true - * @return false - */ - bool endOfControlChar(const std::string& sequence, char next) noexcept; - - /** - * @brief To throw nice lexer errors - * - * @param message - * @param match - * @param line - * @param col - * @param context - */ - [[noreturn]] void throwTokenizingError(const std::string& message, const std::string& match, std::size_t line, std::size_t col, const std::string& context); - }; -} - -#endif diff --git a/include/Ark/Compiler/AST/Module.hpp b/include/Ark/Compiler/AST/Module.hpp new file mode 100644 index 000000000..4a0dac45a --- /dev/null +++ b/include/Ark/Compiler/AST/Module.hpp @@ -0,0 +1,17 @@ +#ifndef ARK_MODULE_HPP +#define ARK_MODULE_HPP + +#include + +namespace Ark::internal +{ + // TODO store something better than just the AST (AST+what we are importing as private/public/namespaced... vs all) + // so that we can remember the order in which we encountered imports. + struct Module + { + Node ast; + bool has_been_processed = false; // TODO document this + }; +} + +#endif // ARK_MODULE_HPP diff --git a/include/Ark/Compiler/AST/Node.hpp b/include/Ark/Compiler/AST/Node.hpp index d4c4755d7..cb1f18944 100644 --- a/include/Ark/Compiler/AST/Node.hpp +++ b/include/Ark/Compiler/AST/Node.hpp @@ -4,9 +4,9 @@ * @brief AST node used by the parser, optimizer and compiler * @version 0.3 * @date 2020-10-27 - * + * * @copyright Copyright (c) 2020-2021 - * + * */ #ifndef COMPILER_AST_NODE_HPP @@ -18,251 +18,149 @@ #include #include +#include namespace Ark::internal { /** * @brief A node of an Abstract Syntax Tree for ArkScript - * + * */ - class Node + class ARK_API Node { public: - using Value = std::variant; - - /** - * @brief Provide a statically initialized / correct and guaranteed to be initialized Node representing "true" - */ - static const Node& getTrueNode(); - - /** - * @brief Provide a statically initialized / correct and guaranteed to be initialized Node representing "false" - */ - static const Node& getFalseNode(); - - /** - * @brief Provide a statically initialized / correct and guaranteed to be initialized Node representing "Nil" - */ - static const Node& getNilNode(); - - /** - * @brief Provide a statically initialized / correct and guaranteed to be initialized Node representing "Empty List" - */ - static const Node& getListNode(); + using Value = std::variant>; Node() = default; - /** - * @brief Construct a new Node object - * - * @param value - */ - explicit Node(long value) noexcept; - - /** - * @brief Construct a new Node object - * - * @param value - */ - explicit Node(double value) noexcept; - - /** - * @brief Construct a new Node object - * - * @param value - */ - explicit Node(const std::string& value) noexcept; - - /** - * @brief Construct a new Node object - * - * @param value - */ - explicit Node(Keyword value) noexcept; + Node(NodeType node_type, const std::string& value); - /** - * @brief Construct a new Node object, does not set the value - * - * @param type - */ - explicit Node(NodeType type) noexcept; - - /** - * @brief Construct a new Node object - * - * @param other - */ - Node(const Node& other) noexcept; - - /** - * @brief Construct a new Node object - * - * @param other - */ - Node& operator=(Node other) noexcept; - - /** - * @brief Construct a new Node object - * - * @param other - */ - void swap(Node& other) noexcept; + explicit Node(NodeType node_type); + explicit Node(double value); + explicit Node(long value); + explicit Node(int value); + explicit Node(Keyword value); + explicit Node(const std::vector& nodes); /** * @brief Return the string held by the value (if the node type allows it) - * - * @return const std::string& + * + * @return const std::string& */ const std::string& string() const noexcept; /** * @brief Return the number held by the value (if the node type allows it) - * - * @return double + * + * @return double */ double number() const noexcept; /** * @brief Return the keyword held by the value (if the node type allows it) - * - * @return Keyword + * + * @return Keyword */ Keyword keyword() const noexcept; /** * @brief Every node has a list as well as a value so we can push_back on all node no matter their type - * + * * @param node a sub-node to push on the list held by the current node */ void push_back(const Node& node) noexcept; /** * @brief Return the list of sub-nodes held by the node - * - * @return std::vector& + * + * @return std::vector& */ std::vector& list() noexcept; /** * @brief Return the list of sub-nodes held by the node - * - * @return const std::vector& + * + * @return const std::vector& */ const std::vector& constList() const noexcept; /** * @brief Return the node type - * - * @return NodeType + * + * @return NodeType */ NodeType nodeType() const noexcept; /** * @brief Set the Node Type object - * - * @param type + * + * @param type */ void setNodeType(NodeType type) noexcept; /** * @brief Set the String object - * - * @param value + * + * @param value */ void setString(const std::string& value) noexcept; - /** - * @brief Set the Number object - * - * @param value - */ - void setNumber(double value) noexcept; - - /** - * @brief Set the Keyword object - * - * @param kw - */ - void setKeyword(Keyword kw) noexcept; - /** * @brief Set the Position of the node in the text - * - * @param line - * @param col + * + * @param line + * @param col */ void setPos(std::size_t line, std::size_t col) noexcept; /** * @brief Set the original Filename where the node was - * - * @param filename + * + * @param filename */ void setFilename(const std::string& filename) noexcept; /** * @brief Get the line at which this node was created - * - * @return std::size_t + * + * @return std::size_t */ std::size_t line() const noexcept; /** * @brief Get the column at which this node was created - * - * @return std::size_t + * + * @return std::size_t */ std::size_t col() const noexcept; /** * @brief Return the filename in which this node was created - * - * @return const std::string& + * + * @return const std::string& */ const std::string& filename() const noexcept; - friend std::ostream& operator<<(std::ostream& os, const Node& N) noexcept; + friend ARK_API std::ostream& operator<<(std::ostream& os, const Node& N) noexcept; friend void swap(Node& lhs, Node& rhs) noexcept; friend bool operator==(const Node& A, const Node& B); friend bool operator<(const Node& A, const Node& B); friend bool operator!(const Node& A); private: - /** - * @brief Construct a new Node object. - * This is private because it is only used by the static member of this class - * to generate specialized versions of the node. - * - * @param value - * @param type - */ - explicit Node(const std::string& value, NodeType const& type) noexcept; NodeType m_type; Value m_value; - std::vector m_list; // position of the node in the original code, useful when it comes to parser errors std::size_t m_line = 0, m_col = 0; std::string m_filename = ""; }; - std::ostream& operator<<(std::ostream& os, const std::vector& N) noexcept; - - template - Node make_node(T&& value, std::size_t line, std::size_t col, const std::string& file) - { - Node n(std::forward(value)); - n.setPos(line, col); - n.setFilename(file); - return n; - } + ARK_API std::ostream& operator<<(std::ostream& os, const std::vector& node) noexcept; - inline Node make_node_list(std::size_t line, std::size_t col, const std::string& file) - { - Node n(NodeType::List); - n.setPos(line, col); - n.setFilename(file); - return n; - } + const Node& getTrueNode(); + const Node& getFalseNode(); + const Node& getNilNode(); + const Node& getListNode(); inline std::string typeToString(const Node& node) noexcept { diff --git a/include/Ark/Compiler/AST/Optimizer.hpp b/include/Ark/Compiler/AST/Optimizer.hpp index d711caf99..31647fc0f 100644 --- a/include/Ark/Compiler/AST/Optimizer.hpp +++ b/include/Ark/Compiler/AST/Optimizer.hpp @@ -20,7 +20,6 @@ #include #include #include -#include namespace Ark::internal { @@ -42,7 +41,7 @@ namespace Ark::internal * * @param ast */ - void feed(const Node& ast); + void process(const Node& ast); /** * @brief Returns the modified AST diff --git a/include/Ark/Compiler/AST/Parser.hpp b/include/Ark/Compiler/AST/Parser.hpp index 7d8f9a981..71a615293 100644 --- a/include/Ark/Compiler/AST/Parser.hpp +++ b/include/Ark/Compiler/AST/Parser.hpp @@ -1,180 +1,214 @@ -/** - * @file Parser.hpp - * @author Alexandre Plateau (lexplt.dev@gmail.com) - * @brief Parses a token stream into an AST by using the Ark::Node - * @version 0.4 - * @date 2020-10-27 - * - * @copyright Copyright (c) 2020-2021 - * - */ - #ifndef COMPILER_AST_PARSER_HPP #define COMPILER_AST_PARSER_HPP +#include +#include +#include +#include +#include + #include -#include -#include +#include #include -#include +#include -#include -#include -#include +#include namespace Ark::internal { - inline NodeType similarNodetypeFromTokentype(TokenType tt) - { - if (tt == TokenType::Capture) - return NodeType::Capture; - else if (tt == TokenType::GetField) - return NodeType::GetField; - else if (tt == TokenType::Spread) - return NodeType::Spread; - - return NodeType::Symbol; - } - - /** - * @brief The parser is responsible of constructing the Abstract Syntax Tree from a token list - * - */ - class Parser + class ARK_API Parser : public BaseParser { public: - /** - * @brief Construct a new Parser object - * - * @param debug the debug level - * @param options the parsing options - * @param lib_env fallback library search path - */ - Parser(unsigned debug, uint16_t options, const std::vector& lib_env) noexcept; - - /** - * @brief Give the code to parse - * - * @param code the ArkScript code - * @param filename the name of the file - */ - void feed(const std::string& code, const std::string& filename = ARK_NO_NAME_FILE); - - /** - * @brief Return the generated AST - * - * @return const Node& - */ - const Node& ast() const noexcept; - - /** - * @brief Return the list of files imported by the code given to the parser - * - * Each path of each imported file is relative to the filename given when feeding the parser. - * - * @return const std::vector& - */ - const std::vector& getImports() const noexcept; - - friend std::ostream& operator<<(std::ostream& os, const Parser& P) noexcept; + Parser(); + + void processFile(const std::string& filename); + void processString(const std::string& code); + + const Node& ast() const; + const std::vector& imports() const; private: - unsigned m_debug; - std::vector m_libenv; - uint16_t m_options; - Lexer m_lexer; Node m_ast; - Token m_last_token; - - // path of the current file - std::string m_file; - // source code of the current file - std::string m_code; - // the files included by the "includer" to avoid multiple includes - std::vector m_parent_include; - - /** - * @brief Applying syntactic sugar: {...} => (begin...), [...] => (list ...) - * - * @param tokens a list of tokens - */ - void sugar(std::vector& tokens) noexcept; - - /** - * @brief Parse a list of tokens recursively - * - * @param tokens - * @param authorize_capture if we are authorized to consume TokenType::Capture tokens - * @param authorize_field_read if we are authorized to consume TokenType::GetField tokens - * @param in_macro if we are in a macro, there a bunch of things we can tolerate - * @return Node - */ - Node parse(std::list& tokens, bool authorize_capture = false, bool authorize_field_read = false, bool in_macro = false); - - void parseIf(Node&, std::list&, bool); - void parseLetMut(Node&, Token&, std::list&, bool); - void parseSet(Node&, Token&, std::list&, bool); - void parseFun(Node&, Token&, std::list&, bool); - void parseWhile(Node&, Token&, std::list&, bool); - void parseBegin(Node&, std::list&, bool); - void parseImport(Node&, std::list&); - void parseQuote(Node&, std::list&, bool); - void parseDel(Node&, std::list&); - Node parseShorthand(Token&, std::list&, bool); - void checkForInvalidTokens(Node&, Token&, bool, bool, bool); - - /** - * @brief Get the next token if possible, from a list of tokens - * - * The list of tokens is modified. - * - * @param tokens list of tokens to get the next token from - * @return Token - */ - Token nextToken(std::list& tokens); - - /** - * @brief Convert a token to a node - * - * @param token the token to converts - * @return Node - */ - Node atom(const Token& token); - - /** - * @brief Search for all the includes in a given node, in its sub-nodes and replace them by the code of the included file - * - * @param n - * @param parent the parent node of the current one - * @param pos the position of the child node in the parent node list - * @return true if we found an import and replaced it by the corresponding code - */ - bool checkForInclude(Node& n, Node& parent, std::size_t pos = 0); - - /** - * @brief Seek a file in the lib folder and everywhere - * - * @param file - * @return std::string - */ - std::string seekFile(const std::string& file); - - /** - * @brief Throw a parse exception is the given predicated is false - * - * @param pred - * @param message error message to use - * @param token concerned token - */ - void expect(bool pred, const std::string& message, Token token); - - /** - * @brief Throw a parse error related to a token (seek it in the related file and highlight the error) - * - * @param message - * @param token - */ - [[noreturn]] void throwParseError(const std::string& message, Token token); + std::vector m_imports; + unsigned m_allow_macro_behavior; ///< Toggled on when inside a macro definition, off afterward + + void run(); + + std::optional node(); + std::optional letMutSet(); + std::optional del(); + std::optional condition(); + std::optional loop(); + std::optional import_(); + std::optional block(); + std::optional functionArgs(); + std::optional function(); + std::optional macroCondition(); + std::optional macroBlock(); + std::optional macroArgs(); + std::optional macro(); + std::optional functionCall(); + std::optional list(); + + inline std::optional number() + { + auto pos = getCount(); + + std::string res; + if (signedNumber(&res)) + { + double output; + if (Utils::isDouble(res, &output)) + return Node(output); + else + { + backtrack(pos); + error("Is not a valid number", res); + } + } + return std::nullopt; + } + + inline std::optional string() + { + std::string res; + if (accept(IsChar('"'))) + { + while (true) + { + if (accept(IsChar('\\'))) + { + if (accept(IsChar('"'))) + res += '\"'; + else if (accept(IsChar('\\'))) + res += '\\'; + else if (accept(IsChar('n'))) + res += '\n'; + else if (accept(IsChar('t'))) + res += '\t'; + else if (accept(IsChar('v'))) + res += '\v'; + else if (accept(IsChar('r'))) + res += '\r'; + else if (accept(IsChar('a'))) + res += '\a'; + else if (accept(IsChar('b'))) + res += '\b'; + else if (accept(IsChar('0'))) + res += '\0'; + else if (accept(IsChar('f'))) + res += '\f'; + else if (accept(IsChar('u'))) + { + std::string seq; + if (hexNumber(4, &seq)) + { + char utf8_str[5]; + utf8::decode(seq.c_str(), utf8_str); + if (*utf8_str == '\0') + error("Invalid escape sequence", "\\u" + seq); + res += utf8_str; + } + else + error("Invalid escape sequence", "\\u"); + } + else if (accept(IsChar('U'))) + { + std::string seq; + if (hexNumber(8, &seq)) + { + std::size_t begin = 0; + for (; seq[begin] == '0'; ++begin) + ; + char utf8_str[5]; + utf8::decode(seq.c_str() + begin, utf8_str); + if (*utf8_str == '\0') + error("Invalid escape sequence", "\\U" + seq); + res += utf8_str; + } + else + error("Invalid escape sequence", "\\U"); + } + else + { + backtrack(getCount() - 1); + error("Unknown escape sequence", "\\"); + } + } + else + accept(IsNot(IsEither(IsChar('\\'), IsChar('"'))), &res); + + if (accept(IsChar('"'))) + break; + else if (isEOF()) + errorMissingSuffix('"', "string"); + } + + return Node(NodeType::String, res); + } + return std::nullopt; + } + + inline std::optional field() + { + std::string symbol; + if (!name(&symbol)) + return std::nullopt; + + Node leaf = Node(NodeType::Field); + leaf.push_back(Node(NodeType::Symbol, symbol)); + + while (true) + { + if (leaf.list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc + return std::nullopt; + + if (leaf.list().size() > 1 && !accept(IsChar('.'))) + break; + std::string res; + if (!name(&res)) + errorWithNextToken("Expected a field name: ."); + leaf.push_back(Node(NodeType::Symbol, res)); + } + + return leaf; + } + + inline std::optional symbol() + { + std::string res; + if (!name(&res)) + return std::nullopt; + return Node(NodeType::Symbol, res); + } + + inline std::optional spread() + { + std::string res; + if (sequence("...")) + { + if (!name(&res)) + errorWithNextToken("Expected a name for the variadic"); + return Node(NodeType::Spread, res); + } + return std::nullopt; + } + + inline std::optional nil() + { + if (!accept(IsChar('('))) + return std::nullopt; + newlineOrComment(); + if (!accept(IsChar(')'))) + return std::nullopt; + + return Node(NodeType::Symbol, "nil"); + } + + std::optional atom(); + std::optional anyAtomOf(std::initializer_list types); + std::optional nodeOrValue(); + std::optional wrapped(std::optional (Parser::*parser)(), const std::string& name, char prefix, char suffix); }; } diff --git a/include/Ark/Compiler/AST/Predicates.hpp b/include/Ark/Compiler/AST/Predicates.hpp new file mode 100644 index 000000000..d36727046 --- /dev/null +++ b/include/Ark/Compiler/AST/Predicates.hpp @@ -0,0 +1,201 @@ +#ifndef SRC_PREDICATES_HPP +#define SRC_PREDICATES_HPP + +#include +#include + +#include + +namespace Ark::internal +{ + struct CharPred + { + const std::string name; + + CharPred(const std::string& n) : + name(n) {} + + virtual bool operator()(const utf8_char_t::codepoint_t c) const = 0; + }; + + inline struct IsSpace : public CharPred + { + IsSpace() : + CharPred("space") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && std::isspace(c) != 0; + } + } IsSpace; + + inline struct IsInlineSpace : public CharPred + { + IsInlineSpace() : + CharPred("inline space") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && (std::isspace(c) != 0) && (c != '\n') && (c != '\r'); + } + } IsInlineSpace; + + inline struct IsDigit : public CharPred + { + IsDigit() : + CharPred("digit") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && std::isdigit(c) != 0; + } + } IsDigit; + + inline struct IsHex : public CharPred + { + IsHex() : + CharPred("hex") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && std::isxdigit(c) != 0; + } + } IsHex; + + inline struct IsUpper : public CharPred + { + IsUpper() : + CharPred("uppercase") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && std::isupper(c) != 0; + } + } IsUpper; + + inline struct IsLower : public CharPred + { + IsLower() : + CharPred("lowercase") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && std::islower(c) != 0; + } + } IsLower; + + inline struct IsAlpha : public CharPred + { + IsAlpha() : + CharPred("alphabetic") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && std::isalpha(c) != 0; + } + } IsAlpha; + + inline struct IsAlnum : public CharPred + { + IsAlnum() : + CharPred("alphanumeric") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && std::isalnum(c) != 0; + } + } IsAlnum; + + inline struct IsPrint : public CharPred + { + IsPrint() : + CharPred("printable") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return 0 <= c && c <= 255 && std::isprint(c) != 0; + } + } IsPrint; + + struct IsChar : public CharPred + { + explicit IsChar(const char c) : + CharPred("'" + std::string(1, c) + "'"), m_k(c) + {} + explicit IsChar(const utf8_char_t c) : + CharPred(std::string(c.c_str())), m_k(c.codepoint()) + {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return m_k == c; + } + + private: + const utf8_char_t::codepoint_t m_k; + }; + + struct IsEither : public CharPred + { + explicit IsEither(const CharPred& a, const CharPred& b) : + CharPred("(" + a.name + " | " + b.name + ")"), m_a(a), m_b(b) + {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return m_a(c) || m_b(c); + } + + private: + const CharPred& m_a; + const CharPred& m_b; + }; + + struct IsNot : public CharPred + { + explicit IsNot(const CharPred& a) : + CharPred("~" + a.name), m_a(a) + {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + return !m_a(c); + } + + private: + const CharPred& m_a; + }; + + inline struct IsSymbol : public CharPred + { + IsSymbol() : + CharPred("sym") {} + virtual bool operator()(const utf8_char_t::codepoint_t c) const override + { + switch (c) + { + case ':': + case '!': + case '?': + case '@': + case '_': + case '-': + case '+': + case '*': + case '/': + case '|': + case '=': + case '<': + case '>': + case '%': + case '$': + return true; + + default: + return false; + } + } + } IsSymbol; + + inline struct IsAny : public CharPred + { + IsAny() : + CharPred("any") {} + virtual bool operator()(const utf8_char_t::codepoint_t) const override + { + return true; + } + } IsAny; + + const IsChar IsMinus('-'); +} + +#endif diff --git a/include/Ark/Compiler/AST/Token.hpp b/include/Ark/Compiler/AST/Token.hpp deleted file mode 100644 index 77ec9ec84..000000000 --- a/include/Ark/Compiler/AST/Token.hpp +++ /dev/null @@ -1,82 +0,0 @@ -/** - * @file Token.hpp - * @author Alexandre Plateau (lexplt.dev@gmail.com) - * @brief Token definition for ArkScript - * @version 0.1 - * @date 2021-10-02 - * - * @copyright Copyright (c) 2021 - * - */ - -#ifndef ARK_COMPILER_AST_TOKEN_HPP -#define ARK_COMPILER_AST_TOKEN_HPP - -#include -#include -#include - -namespace Ark::internal -{ - enum class TokenType - { - Grouping, - String, - Number, - Operator, - Identifier, - Capture, - GetField, - Keyword, - Skip, - Comment, - Shorthand, - Spread, - Mismatch - }; - - // TokenType to string - constexpr std::array tokentype_string = { - "Grouping", - "String", - "Number", - "Operator", - "Identifier", - "Capture", - "GetField", - "Keyword", - "Skip", - "Comment", - "Shorthand", - "Spread", - "Mistmatch" - }; - - struct Token - { - TokenType type; - std::string token; - std::size_t line; - std::size_t col; - - /** - * @brief Construct a new Token object - * - */ - Token() = default; - - /** - * @brief Construct a new Token object - * - * @param type the token type - * @param tok the token value - * @param line the line where we found the token - * @param col the column at which was the token - */ - Token(TokenType type, const std::string& tok, std::size_t line, std::size_t col) noexcept : - type(type), token(tok), line(line), col(col) - {} - }; -} - -#endif diff --git a/include/Ark/Compiler/AST/makeErrorCtx.hpp b/include/Ark/Compiler/AST/makeErrorCtx.hpp deleted file mode 100644 index 782d461b4..000000000 --- a/include/Ark/Compiler/AST/makeErrorCtx.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/** - * @file makeErrorCtx.hpp - * @author Alexandre Plateau (lexplt.dev@gmail.com) - * @brief Create string error context for AST errors - * @version 0.2 - * @date 2022-02-19 - * - * @copyright Copyright (c) 2020-2022 - * - */ - -#ifndef COMPILER_AST_MAKEERRORCTX_HPP -#define COMPILER_AST_MAKEERRORCTX_HPP - -#include -#include - -#include - -namespace Ark::internal -{ - struct LineColorContextCounts - { - int open_parentheses = 0; - int open_square_braces = 0; - int open_curly_braces = 0; - }; - - /** - * @brief Construct an error message based on a given node - * @details It opens the related file at the line and column of the node, - * and display context, plus underline the problem with a serie of ^. - * - * @param message - * @param node - * @return std::string the complete generated error message - */ - std::string makeNodeBasedErrorCtx(const std::string& message, const Node& node); - - /** - * @brief Construct an error message based on a given match in the code - * @details Mostly used by the Lexer and Parser since they don't have Nodes to work on - * - * @param match the identified token, causing a problem - * @param line line of the token - * @param col starting column of the token - * @param code the whole code of the file - * @return std::string the complete generated error message - */ - std::string makeTokenBasedErrorCtx(const std::string& match, std::size_t line, std::size_t col, const std::string& code); - - /** - * @brief Add colors to highlight matching parentheses/curly braces/square braces on a line - * - * @param line the line of code to colorize - * @param line_color_context_counts a LineColorContextCounts to manipulate the running counts of open pairings - * @return std::string a colorized line of code - */ - std::string colorizeLine(const std::string& line, LineColorContextCounts& line_color_context_counts); - - /** - * @brief Check if the character passed in can be paired (parentheses, curly braces, or square braces) - * - * @param c - * @return bool - */ - inline bool isPairableChar(const char c) - { - return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}'; - } -} - -#endif diff --git a/include/Ark/Compiler/AST/utf8_char.hpp b/include/Ark/Compiler/AST/utf8_char.hpp new file mode 100644 index 000000000..9d639ac46 --- /dev/null +++ b/include/Ark/Compiler/AST/utf8_char.hpp @@ -0,0 +1,85 @@ +#ifndef SRC_UTF8_CHAR_HPP +#define SRC_UTF8_CHAR_HPP + +#include +#include +#include +#include + +#undef max + +namespace Ark::internal +{ + class utf8_char_t + { + public: + using codepoint_t = int; + using length_t = unsigned char; + using repr_t = std::array; + + utf8_char_t() : + m_codepoint(0), m_length(0), m_repr({ 0 }) {} + + utf8_char_t(codepoint_t cp, length_t len, repr_t&& repr) : + m_codepoint(cp), m_length(len), m_repr(repr) {} + + // https://github.com/sheredom/utf8.h/blob/4e4d828174c35e4564c31a9e35580c299c69a063/utf8.h#L1178 + static std::pair at(std::string::iterator it) + { + codepoint_t codepoint = 0; + length_t length = 0; + repr_t repr = { 0 }; + + if (0xf0 == (0xf8 & *it)) // 4 byte utf8 codepoint + { + codepoint = (static_cast(0x07 & *it) << 18) | + (static_cast(0x3f & *(it + 1)) << 12) | + (static_cast(0x3f & *(it + 2)) << 6) | + static_cast(0x3f & *(it + 3)); + length = 4; + } + else if (0xe0 == (0xf0 & *it)) // 3 byte utf8 codepoint + { + codepoint = (static_cast(0x0f & *it) << 12) | + (static_cast(0x3f & *(it + 1)) << 6) | + static_cast(0x3f & *(it + 2)); + length = 3; + } + else if (0xc0 == (0xe0 & *it)) // 2 byte utf8 codepoint + { + codepoint = (static_cast(0x1f & *it) << 6) | + static_cast(0x3f & *(it + 1)); + length = 2; + } + else // 1 byte utf8 codepoint otherwise + { + codepoint = static_cast(*it); + length = 1; + } + + for (length_t i = 0; i < length; ++i) + repr[i] = static_cast(*(it + static_cast(i))); + + return std::make_pair(it + static_cast(length), + utf8_char_t(codepoint, length, std::move(repr))); + } + + bool isPrintable() const + { + if (m_codepoint < std::numeric_limits::max()) + return std::isprint(m_codepoint); + return true; + } + + const char* c_str() const { return reinterpret_cast(m_repr.data()); } + std::size_t size() const { return static_cast(m_length); } + codepoint_t codepoint() const { return m_codepoint; } + + private: + codepoint_t m_codepoint; + length_t m_length; + repr_t m_repr; + }; +} + +#endif diff --git a/include/Ark/Compiler/Common.hpp b/include/Ark/Compiler/Common.hpp index 3d2391154..a7e2b4393 100644 --- a/include/Ark/Compiler/Common.hpp +++ b/include/Ark/Compiler/Common.hpp @@ -29,26 +29,26 @@ namespace Ark::internal { Symbol, Capture, - GetField, Keyword, String, Number, List, - Macro, Spread, + Field, + Macro, Unused }; constexpr std::array nodeTypes = { "Symbol", "Capture", - "GetField", "Keyword", "String", "Number", "List", - "Macro", "Spread", + "Field", + "Macro", "Unused" }; @@ -63,12 +63,11 @@ namespace Ark::internal While, Begin, Import, - Quote, Del }; /// List of available keywords in ArkScript - constexpr std::array keywords = { + constexpr std::array keywords = { "fun", "let", "mut", @@ -77,7 +76,6 @@ namespace Ark::internal "while", "begin", "import", - "quote", "del" }; diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Compiler.hpp index a5a8d4524..1c0f05615 100644 --- a/include/Ark/Compiler/Compiler.hpp +++ b/include/Ark/Compiler/Compiler.hpp @@ -2,7 +2,7 @@ * @file Compiler.hpp * @author Alexandre Plateau (lexplt.dev@gmail.com) * @brief ArkScript compiler is in charge of transforming the AST into bytecode - * @version 1.2 + * @version 1.3 * @date 2020-10-27 * * @copyright Copyright (c) 2020-2021 @@ -21,13 +21,12 @@ #include #include #include -#include -#include #include namespace Ark { class State; + class Welder; /** * @brief The ArkScript bytecode compiler @@ -40,44 +39,27 @@ namespace Ark * @brief Construct a new Compiler object * * @param debug the debug level - * @param options the compilers options */ - Compiler(unsigned debug, const std::vector& libenv, uint16_t options = DefaultFeatures); - - /** - * @brief Feed the differents variables with information taken from the given source code file - * - * @param code the code of the file - * @param filename the name of the file - */ - void feed(const std::string& code, const std::string& filename = ARK_NO_NAME_FILE); + Compiler(unsigned debug); /** * @brief Start the compilation * + * @param ast */ - void compile(); - - /** - * @brief Save generated bytecode to a file - * - * @param file the name of the file where the bytecode will be saved - */ - void saveTo(const std::string& file); + void process(const internal::Node& ast); /** * @brief Return the constructed bytecode object * * @return const bytecode_t& */ - const bytecode_t& bytecode() noexcept; + const bytecode_t& bytecode() const noexcept; friend class Ark::State; + friend class Ark::Welder; private: - internal::Parser m_parser; - internal::Optimizer m_optimizer; - uint16_t m_options; // tables: symbols, values, plugins and codes std::vector m_symbols; std::vector m_defined_symbols; @@ -127,16 +109,6 @@ namespace Ark return &m_temp_pages[-i - 1]; } - /** - * @brief Count the number of "valid" ark objects in a node - * @details Isn't considered valid a GetField, because we use - * this function to count the number of arguments of function calls. - * - * @param lst - * @return std::size_t - */ - std::size_t countArkObjects(const std::vector& lst) noexcept; - /** * @brief Checking if a symbol is an operator * @@ -238,7 +210,6 @@ namespace Ark void compileFunction(const internal::Node& x, int p, bool is_result_unused, const std::string& var_name); void compileLetMutSet(internal::Keyword n, const internal::Node& x, int p); void compileWhile(const internal::Node& x, int p); - void compileQuote(const internal::Node& x, int p, bool is_result_unused, bool is_terminal, const std::string& var_name); void compilePluginImport(const internal::Node& x, int p); void handleCalls(const internal::Node& x, int p, bool is_result_unused, bool is_terminal, const std::string& var_name); diff --git a/include/Ark/Compiler/ImportSolver.hpp b/include/Ark/Compiler/ImportSolver.hpp new file mode 100644 index 000000000..fdbc3a799 --- /dev/null +++ b/include/Ark/Compiler/ImportSolver.hpp @@ -0,0 +1,61 @@ +#ifndef ARK_COMPILER_IMPORTSOLVER_HPP +#define ARK_COMPILER_IMPORTSOLVER_HPP + +#include +#include +#include +#include + +#include +#include +#include + +namespace Ark::internal +{ + class ImportSolver final + { + public: + ImportSolver(unsigned debug, const std::vector& libenv); + + void process(const std::filesystem::path& root, const Node& origin_ast, const std::vector& origin_imports); + + const Node& ast() const noexcept; + + private: + unsigned m_debug; + std::vector m_libenv; + std::filesystem::path m_root; ///< Folder were the entry file is + Node m_ast; + std::unordered_map m_modules; ///< Package to module map + // TODO is this ok? is this fine? this is sort of ugly + std::vector m_imported; ///< List of imports, in the order they were found and parsed + + /** + * @brief Visits the AST, looking for import nodes to replace with their parsed module version + * @param ast + * @return + */ + std::pair findAndReplaceImports(const Node& ast); + + /** + * @brief Parse a given file and returns a list of its imports. + * The AST is parsed and stored in m_modules[import.prefix] + * + * @param file path to the file containing the import + * @param import current import directive + * @return std::vector imports found in the processed file + */ + std::vector parseImport(const std::filesystem::path& file, const Import& import); + + /** + * @brief Search for an import file, using the root file path + * + * @param file path to the file containing the import + * @param import current import directive + * @return std::filesystem::path + */ + std::filesystem::path findFile(const std::filesystem::path& file, const Import& import); + }; +} + +#endif diff --git a/include/Ark/Compiler/JsonCompiler.hpp b/include/Ark/Compiler/JsonCompiler.hpp index 9c50b2a3a..785bc7342 100644 --- a/include/Ark/Compiler/JsonCompiler.hpp +++ b/include/Ark/Compiler/JsonCompiler.hpp @@ -3,33 +3,31 @@ #include #include +#include #include #include #include -#include -#include +#include namespace Ark { - class ARK_API JsonCompiler + class ARK_API JsonCompiler final { public: /** * @brief Construct a new JsonCompiler object * * @param debug the debug level - * @param options the compilers options */ - JsonCompiler(unsigned debug, const std::vector& libenv, uint16_t options = DefaultFeatures); + JsonCompiler(unsigned debug, const std::vector& libenv); /** * @brief Feed the differents variables with information taken from the given source code file * - * @param code the code of the file * @param filename the name of the file */ - void feed(const std::string& code, const std::string& filename = ARK_NO_NAME_FILE); + void feed(const std::string& filename); /** * @brief Start the compilation @@ -39,10 +37,7 @@ namespace Ark std::string compile(); private: - internal::Parser m_parser; - internal::Optimizer m_optimizer; - uint16_t m_options; - unsigned m_debug; ///< the debug level of the compiler + Welder m_welder; /** * @brief Compile a single node and return its representation diff --git a/include/Ark/Compiler/Macros/Processor.hpp b/include/Ark/Compiler/Macros/Processor.hpp index db0803294..20b3c7864 100644 --- a/include/Ark/Compiler/Macros/Processor.hpp +++ b/include/Ark/Compiler/Macros/Processor.hpp @@ -34,16 +34,15 @@ namespace Ark::internal * @brief Construct a new Macro Processor object * * @param debug the debug level - * @param options the options flags */ - MacroProcessor(unsigned debug, uint16_t options) noexcept; + MacroProcessor(unsigned debug) noexcept; /** - * @brief Send the complete AST (after the inclusions and stuff), and work on it + * @brief Send the complete AST and work on it * * @param ast */ - void feed(const Node& ast); + void process(const Node& ast); /** * @brief Return the modified AST @@ -141,7 +140,7 @@ namespace Ark::internal * @param node node on which to operate * @param depth */ - void process(Node& node, unsigned depth); + void processNode(Node& node, unsigned depth); /** * @brief Apply a macro on a given node diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp new file mode 100644 index 000000000..dcfc90a36 --- /dev/null +++ b/include/Ark/Compiler/Welder.hpp @@ -0,0 +1,65 @@ +/** + * @file Welder.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief In charge of welding everything needed to compile code + * @version 0.2 + * @date 2023-03-26 + * + * @copyright Copyright (c) 2023 + * + */ + +#ifndef ARK_COMPILER_WELDER_HPP +#define ARK_COMPILER_WELDER_HPP + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace Ark +{ + class ARK_API Welder final + { + public: + Welder(unsigned debug, const std::vector& libenv); + + /** + * @brief Register a symbol as a global in the compiler + * + * @param name + */ + void registerSymbol(const std::string& name); + + bool computeASTFromFile(const std::string& filename); + bool computeASTFromString(const std::string& code); + + bool generateBytecode(); + bool saveBytecodeToFile(const std::string& filename); + + const internal::Node& ast() const noexcept; + const bytecode_t& bytecode() const noexcept; + + private: + unsigned m_debug; ///< The debug level + std::filesystem::path m_root_file; + std::vector m_imports; + bytecode_t m_bytecode; + + internal::Parser m_parser; + internal::ImportSolver m_importer; + internal::MacroProcessor m_macro_processor; + internal::Optimizer m_optimizer; + Compiler m_compiler; + }; +} // namespace Ark + +#endif diff --git a/include/Ark/Exceptions.hpp b/include/Ark/Exceptions.hpp index b45573b04..d556b43b0 100644 --- a/include/Ark/Exceptions.hpp +++ b/include/Ark/Exceptions.hpp @@ -4,9 +4,9 @@ * @brief ArkScript homemade exceptions * @version 0.2 * @date 2020-10-27 - * + * * @copyright Copyright (c) 2020-2021 - * + * */ #ifndef INCLUDE_ARK_EXCEPTIONS_HPP @@ -16,32 +16,49 @@ #include #include #include +#include +#include +#include -#include +#include +#include namespace Ark { + namespace internal + { + class Node; + } + + class Error : public std::runtime_error + { + public: + explicit Error(const std::string& message) : + std::runtime_error(message) + {} + }; + /** * @brief A type error triggered when types don't match - * + * */ - class TypeError : public std::runtime_error + class TypeError : public Error { public: explicit TypeError(const std::string& message) : - std::runtime_error(message) + Error(message) {} }; /** * @brief A special zero division error triggered when a number is divided by 0 - * + * */ - class ZeroDivisionError : public std::runtime_error + class ZeroDivisionError : public Error { public: ZeroDivisionError() : - std::runtime_error( + Error( "ZeroDivisionError: In ordonary arithmetic, the expression has no meaning, " "as there is no number which, when multiplied by 0, gives a (assuming a != 0), " "and so division by zero is undefined. Since any number multiplied by 0 is 0, " @@ -51,13 +68,13 @@ namespace Ark /** * @brief A pow error triggered when we can't do a pow b - * + * */ - class PowError : public std::runtime_error + class PowError : public Error { public: PowError() : - std::runtime_error( + Error( "PowError: Can not pow the given number (a) to the given exponent (b) because " "a^b, with b being a member of the rational numbers, isn't supported.") {} @@ -65,75 +82,70 @@ namespace Ark /** * @brief An assertion error, only triggered from ArkScript code through (assert expr error-message) - * + * */ - class AssertionFailed : public std::runtime_error + class AssertionFailed : public Error { public: explicit AssertionFailed(const std::string& message) : - std::runtime_error("AssertionFailed: " + message) + Error("AssertionFailed: " + message) {} }; /** - * @brief SyntaxError thrown by the lexer - * + * @brief CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself) + * */ - class SyntaxError : public std::runtime_error + struct CodeError : public Error { - public: - explicit SyntaxError(const std::string& message) : - std::runtime_error("SyntaxError: " + message) - {} - }; + const std::string filename; + const std::size_t line; + const std::size_t col; + const std::string expr; + const std::optional symbol; - /** - * @brief ParseError thrown by the parser - * - */ - class ParseError : public std::runtime_error - { - public: - explicit ParseError(const std::string& message) : - std::runtime_error("ParseError: " + message) + CodeError( + const std::string& what, + const std::string& filename, + std::size_t lineNum, + std::size_t column, + std::string exp, + std::optional opt_sym = std::nullopt) : + Error(what), + filename(filename), line(lineNum), col(column), expr(std::move(exp)), symbol(opt_sym) {} }; - /** - * @brief OptimizerError thrown by the AST optimizer - * - */ - class OptimizerError : public std::runtime_error + namespace Diagnostics { - public: - explicit OptimizerError(const std::string& message) : - std::runtime_error("OptimizerError: " + message) - {} - }; + /** + * @brief Helper to create a colorized context to report errors to the user + * + * @param os stream in which the error will be written + * @param code content of the source file where the error is + * @param line line where the error is + * @param col_start where the error starts on the given line + * @param sym_size bad expression that triggered the error + */ + ARK_API void makeContext(std::ostream& os, const std::string& code, std::size_t line, std::size_t col_start, std::size_t sym_size); - /** - * @brief MacroProcessingError thrown by the compiler - * - */ - class MacroProcessingError : public std::runtime_error - { - public: - explicit MacroProcessingError(const std::string& message) : - std::runtime_error("MacroProcessingError: " + message) - {} - }; + /** + * @brief Helper used by the compiler to generate a colorized context from a node + * + * @param message error message to be included in the context + * @param node AST node with the error + * @return std::string + */ + ARK_API std::string makeContextWithNode(const std::string& message, const internal::Node& node); - /** - * @brief CompilationError thrown by the compiler - * - */ - class CompilationError : public std::runtime_error - { - public: - explicit CompilationError(const std::string& message) : - std::runtime_error("CompilationError: " + message) - {} - }; + /** + * @brief Generate a diagnostic from an error and print it to the standard output + * + * @param e code error + * @param code code of the file in which the error occured + */ + ARK_API void generate(const CodeError& e, std::string code = ""); + } } #endif diff --git a/include/Ark/Files.hpp b/include/Ark/Files.hpp index c9fd31736..4524f9ac1 100644 --- a/include/Ark/Files.hpp +++ b/include/Ark/Files.hpp @@ -4,9 +4,9 @@ * @brief Lots of utilities about the filesystem * @version 0.1 * @date 2021-11-25 - * + * * @copyright Copyright (c) 2021 - * + * */ #ifndef INCLUDE_ARK_FILES_HPP @@ -21,7 +21,7 @@ namespace Ark::Utils { /** * @brief Checks if a file exists - * + * * @param name the file name * @return true on success * @return false on failure @@ -41,9 +41,9 @@ namespace Ark::Utils /** * @brief Helper to read a file - * + * * @param name the file name - * @return std::string + * @return std::string */ inline std::string readFile(const std::string& name) { @@ -54,33 +54,32 @@ namespace Ark::Utils std::istreambuf_iterator()); } - /** - * @brief Get the directory from a path - * - * @param path - * @return std::string - */ - inline std::string getDirectoryFromPath(const std::string& path) + inline std::vector readFileAsBytes(const std::string& name) { - return (std::filesystem::path(path)).parent_path().string(); - } + // admitting the file exists + std::ifstream ifs(name, std::ios::binary | std::ios::ate); + if (!ifs.good()) + return std::vector {}; - /** - * @brief Get the filename from a path - * - * @param path - * @return std::string - */ - inline std::string getFilenameFromPath(const std::string& path) - { - return (std::filesystem::path(path)).filename().string(); + std::size_t pos = ifs.tellg(); + // reserve appropriate number of bytes + std::vector temp(pos); + ifs.seekg(0, std::ios::beg); + ifs.read(&temp[0], pos); + ifs.close(); + + auto bytecode = std::vector(pos); + // TODO would it be faster to memcpy? + for (std::size_t i = 0; i < pos; ++i) + bytecode[i] = static_cast(temp[i]); + return bytecode; } /** * @brief Get the canonical relative path from a path - * - * @param path - * @return std::string + * + * @param path + * @return std::string */ inline std::string canonicalRelPath(const std::string& path) { diff --git a/include/Ark/REPL/Repl.hpp b/include/Ark/REPL/Repl.hpp index 186d2d7d4..8fc5f8cff 100644 --- a/include/Ark/REPL/Repl.hpp +++ b/include/Ark/REPL/Repl.hpp @@ -2,7 +2,7 @@ * @file Repl.hpp * @author Alexandre Plateau (lexplt.dev@gmail.com) * @brief ArkScript REPL - Read Eval Print Loop - * @version 0.1 + * @version 0.2 * @date 2020-10-27 * * @copyright Copyright (c) 2020-2021 @@ -13,6 +13,7 @@ #define ARK_REPL_REPL_HPP #include +#include #include #include @@ -28,10 +29,9 @@ namespace Ark /** * @brief Construct a new Repl object * - * @param options the REPL options * @param libenv search path for the std library */ - Repl(uint16_t options, const std::vector& libenv); + Repl(const std::vector& libenv); /** * @brief Start the REPL @@ -40,11 +40,10 @@ namespace Ark int run(); private: - uint16_t m_options; Replxx m_repl; unsigned m_lines; int m_old_ip; - std::vector m_libenv; + std::vector m_libenv; inline void print_repl_header(); int count_open_parentheses(const std::string& line); diff --git a/include/Ark/TypeChecker.hpp b/include/Ark/TypeChecker.hpp index 744b6d8e0..c16f97f6f 100644 --- a/include/Ark/TypeChecker.hpp +++ b/include/Ark/TypeChecker.hpp @@ -17,13 +17,8 @@ #include #include -#define NOMINMAX #include -#ifdef max -# undef max -#endif - namespace Ark::types { namespace details @@ -34,7 +29,7 @@ namespace Ark::types template bool checkN([[maybe_unused]] const std::vector& args) { - return true; + return I >= args.size(); } template diff --git a/include/Ark/VM/State.hpp b/include/Ark/VM/State.hpp index de6c52a23..277cda605 100644 --- a/include/Ark/VM/State.hpp +++ b/include/Ark/VM/State.hpp @@ -16,10 +16,11 @@ #include #include #include +#include #include -#include -#include +#include +#include namespace Ark { @@ -33,10 +34,9 @@ namespace Ark /** * @brief Construct a new State object * - * @param options the options for the virtual machine, compiler, and parser * @param libpath a list of search paths for the std library */ - State(uint16_t options = DefaultFeatures, const std::vector& libpath = {}) noexcept; + State(const std::vector& libpath = {}) noexcept; /** * @brief Feed the state by giving it the path to an existing bytecode file @@ -101,7 +101,7 @@ namespace Ark * * @param libenv the list of std search paths to set */ - void setLibDirs(const std::vector& libenv) noexcept; + void setLibDirs(const std::vector& libenv) noexcept; /** * @brief Reset State (all member variables related to execution) @@ -114,6 +114,8 @@ namespace Ark friend class Repl; private: + bool checkMagic(const bytecode_t& bytecode); + /** * @brief Called to configure the state (set the bytecode, debug level, call the compiler...) * @@ -132,15 +134,14 @@ namespace Ark inline void throwStateError(const std::string& message) { - throw std::runtime_error("StateError: " + message); + throw Error("StateError: " + message); } unsigned m_debug_level; bytecode_t m_bytecode; - std::vector m_libenv; + std::vector m_libenv; std::string m_filename; - uint16_t m_options; // related to the bytecode std::vector m_symbols; diff --git a/include/Ark/VM/VM.hpp b/include/Ark/VM/VM.hpp index 0df4087c5..1dea730e6 100644 --- a/include/Ark/VM/VM.hpp +++ b/include/Ark/VM/VM.hpp @@ -32,6 +32,7 @@ #include #include #include +#include #undef abs #include diff --git a/include/termcolor/proxy.hpp b/include/termcolor/proxy.hpp index 9cbdb5e08..e3f5dbbcc 100644 --- a/include/termcolor/proxy.hpp +++ b/include/termcolor/proxy.hpp @@ -5,6 +5,15 @@ # define NOMINMAX #endif +#ifdef max +# undef max +#endif + +#ifdef abs +# undef abs +#endif + +#include #include #endif diff --git a/include/utf8.hpp b/include/utf8.hpp new file mode 100644 index 000000000..0f7a2af9f --- /dev/null +++ b/include/utf8.hpp @@ -0,0 +1,276 @@ +#ifndef UTF8_DECODER_H +#define UTF8_DECODER_H + +#include +#include + +namespace utf8 +{ + enum class Utf8Type + { + Ascii = 0, + LatinExtra = 1, + BasicMultiLingual = 2, + OthersPlanesUnicode = 3, + OutRange = 4 + }; + + namespace details + { + // clang-format off + constexpr std::array ASCIIHexToInt = + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }; + // clang-format on + } + + inline Utf8Type utf8type(const char* input, int32_t* out = nullptr) + { + int32_t codepoint = 0; + int shift = 0; + + for (const char* s = input; *s != 0; ++s) + { + codepoint = ((codepoint << shift) | details::ASCIIHexToInt[*s]); + shift = 4; + } + + if (out != nullptr) + *out = codepoint; + + if (codepoint >= 0x0000 && codepoint <= 0x007f) + return Utf8Type::Ascii; + else if (codepoint > 0x007f && codepoint <= 0x07ff) + return Utf8Type::LatinExtra; + else if (codepoint > 0x07ff && codepoint <= 0xffff) + return Utf8Type::BasicMultiLingual; + else if (codepoint > 0xffff && codepoint <= 0x10ffff) + return Utf8Type::OthersPlanesUnicode; + + return Utf8Type::OutRange; + } + + /** + * @brief Convert hex string to utf8 string + * @param input + * @param dest Output utf8 string (size [2,5]). Empty (\0) if input is invalid or out of range + */ + inline void decode(const char* input, char* dest) + { + int32_t cdp = 0; + Utf8Type type = utf8type(input, &cdp); + char c0 = details::ASCIIHexToInt[input[0]]; + char c1 = details::ASCIIHexToInt[input[1]]; + char c2 = details::ASCIIHexToInt[input[2]]; + char c3 = details::ASCIIHexToInt[input[3]]; + + switch (type) + { + case Utf8Type::Ascii: + { + dest[0] = static_cast(cdp); + dest[1] = 0; + break; + } + + case Utf8Type::LatinExtra: + { + dest[0] = (0xc0 | ((c1 & 0x7) << 2)) | ((c2 & 0xc) >> 2); + dest[1] = (0x80 | ((c2 & 0x3) << 4)) | c3; + dest[2] = 0; + break; + } + + case Utf8Type::BasicMultiLingual: + { + dest[0] = 0xe0 | c0; + dest[1] = (0x80 | (c1 << 2)) | ((c2 & 0xc) >> 2); + dest[2] = (0x80 | ((c2 & 0x3) << 4)) | c3; + dest[3] = 0; + break; + } + + case Utf8Type::OthersPlanesUnicode: + { + char c4 = details::ASCIIHexToInt[input[4]]; + + if (cdp <= 0xfffff) + { + dest[0] = 0xf0 | ((c0 & 0xc) >> 2); + dest[1] = (0x80 | ((c0 & 0x3) << 4)) | c1; + dest[2] = (0x80 | (c2 << 2)) | ((c3 & 0xc) >> 2); + dest[3] = (0x80 | ((c3 & 0x3) << 4)) | c4; + dest[4] = 0; + } + else + { + char c5 = details::ASCIIHexToInt[input[5]]; + + dest[0] = (0xf0 | ((c0 & 0x1) << 2)) | ((c1 & 0xc) >> 2); + dest[1] = ((0x80 | ((c1 & 0x3) << 4)) | ((c1 & 0xc) >> 2)) | c2; + dest[2] = (0x80 | (c3 << 2)) | ((c4 & 0xc) >> 2); + dest[3] = (0x80 | ((c4 & 0x3) << 4)) | c5; + dest[4] = 0; + } + break; + } + + case Utf8Type::OutRange: + *dest = 0; + break; + } + } + + /** + * @brief Check the validity of a given string in UTF8 + * @param str + * @return true if the given string is a valid UTF88 string + */ + inline bool isValid(const char* str) + { + const char* s = str; + + if (str == 0) + return false; + + while (*s != 0) + { + if (0xf0 == (0xf8 & *s)) + { + if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) || (0x80 != (0xc0 & s[3]))) + return false; + else if (0x80 == (0xc0 & s[4])) + return false; + else if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) + return false; + s += 4; + } + else if (0xe0 == (0xf0 & *s)) + { + if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) + return false; + else if (0x80 == (0xc0 & s[3])) + return false; + else if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) + return false; + s += 3; + } + else if (0xc0 == (0xe0 & *s)) + { + if (0x80 != (0xc0 & s[1])) + return false; + else if (0x80 == (0xc0 & s[2])) + return false; + else if (0 == (0x1e & s[0])) + return false; + s += 2; + } + else if (0x00 == (0x80 & *s)) + s += 1; + else + return false; + } + + return true; + } + + /** + * @brief Compute the UTF8 codepoint for a given UTF8 char + * @param str + * @return UTF8 codepoint if valid, -1 otherwise + */ + inline int32_t codepoint(const char* str) + { + int32_t codepoint = 0; + const char* s = str; + + if (isValid(str)) + { + if (str == 0) + return -1; + + while (*s != 0) + { + if (0xf0 == (0xf8 & *s)) + { + codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) | ((0x3f & s[2]) << 6) | (0x3f & s[3]); + s += 4; + } + else if (0xe0 == (0xf0 & *s)) + { + codepoint = ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]); + s += 3; + } + else if (0xc0 == (0xe0 & *s)) + { + codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]); + s += 2; + } + else if (0x00 == (0x80 & *s)) + { + codepoint = s[0]; + ++s; + } + else + return -1; + } + } + + return codepoint; + } + + /** + * @brief Generate an UTF8 character from a given codepoint + * @param codepoint + * @param dest Output utf8 string (size [2,5]). Empty (\0) if input is invalid or out of range + */ + inline void codepointToUtf8(const int32_t codepoint, char* dest) + { + if (codepoint >= 0x0000 && codepoint <= 0x007f) + { + dest[0] = codepoint; + dest[1] = 0; + } + else if (codepoint > 0x007f && codepoint <= 0x07ff) + { + dest[0] = 0x80; + if (codepoint > 0xff) + dest[0] |= (codepoint >> 6); + dest[0] |= ((codepoint & 0xc0) >> 6); + dest[1] = 0x80 | (codepoint & 0x3f); + dest[2] = 0; + } + else if (codepoint > 0x07ff && codepoint <= 0xffff) + { + dest[0] = 0xe0; + if (codepoint > 0xfff) + dest[0] |= ((codepoint & 0xf000) >> 12); + dest[1] = (0x80 | ((codepoint & 0xf00) >> 6)) | ((codepoint & 0xf0) >> 6); + dest[2] = (0x80 | (codepoint & 0x30)) | (codepoint & 0xf); + dest[3] = 0; + } + else if (codepoint > 0xffff && codepoint <= 0x10ffff) + { + dest[0] = 0xf0; + if (codepoint > 0xfffff) + dest[0] |= ((codepoint & 0x100000) >> 18); + dest[0] |= ((codepoint & 0xc0000) >> 18); + dest[1] = (0x80 | ((codepoint & 0x30000) >> 12)) | ((codepoint & 0xf000) >> 12); + dest[2] = (0x80 | ((codepoint & 0xf00) >> 6)) | ((codepoint & 0xc0) >> 6); + dest[3] = (0x80 | (codepoint & 0x30)) | (codepoint & 0xf); + dest[4] = 0; + } + else + *dest = 0; + } +} + +#endif diff --git a/lib/fmt b/lib/fmt index 7bdf0628b..c13753a70 160000 --- a/lib/fmt +++ b/lib/fmt @@ -1 +1 @@ -Subproject commit 7bdf0628b1276379886c7f6dda2cef2b3b374f0b +Subproject commit c13753a70cc55f3b1c99fb8f8395e78e5f9cae43 diff --git a/lib/modules b/lib/modules index 74f2cd3a2..507ce192e 160000 --- a/lib/modules +++ b/lib/modules @@ -1 +1 @@ -Subproject commit 74f2cd3a246138f336997c8da8cba496f41c8930 +Subproject commit 507ce192ecc1f5c38d0e15050b102329209ac9b1 diff --git a/lib/std b/lib/std index 4fa96f980..0afa4922c 160000 --- a/lib/std +++ b/lib/std @@ -1 +1 @@ -Subproject commit 4fa96f98073b7b57051451189ae00e2b0cf24909 +Subproject commit 0afa4922c4e9f25a7a6b4f00054e04c35e79168a diff --git a/lib/utf8_decoder b/lib/utf8_decoder deleted file mode 160000 index 7b2fb5e2c..000000000 --- a/lib/utf8_decoder +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7b2fb5e2c862bc83d0c576710010d33dda39b95b diff --git a/src/arkreactor/Builtins/String.cpp b/src/arkreactor/Builtins/String.cpp index 809504212..f824c0bbb 100644 --- a/src/arkreactor/Builtins/String.cpp +++ b/src/arkreactor/Builtins/String.cpp @@ -1,8 +1,9 @@ #include #include -#include +#include #include +#include #include #include @@ -138,7 +139,7 @@ namespace Ark::internal::Builtins::String { { types::Contract { { types::Typedef("string", ValueType::String) } } } }, n); - int ord = utf8codepoint(n[0].stringRef().c_str()); + int ord = utf8::codepoint(n[0].stringRef().c_str()); return Value(ord); } @@ -162,7 +163,7 @@ namespace Ark::internal::Builtins::String std::array sutf8; - utf8chr(static_cast(n[0].number()), sutf8.data()); + utf8::codepointToUtf8(static_cast(n[0].number()), sutf8.data()); return Value(std::string(sutf8.data())); } } diff --git a/src/arkreactor/Compiler/AST/BaseParser.cpp b/src/arkreactor/Compiler/AST/BaseParser.cpp new file mode 100644 index 000000000..e162c47c3 --- /dev/null +++ b/src/arkreactor/Compiler/AST/BaseParser.cpp @@ -0,0 +1,308 @@ +#include +#include + +namespace Ark::internal +{ + void BaseParser::next() + { + m_it = m_next_it; + if (isEOF()) + { + m_sym = utf8_char_t(); // reset sym to EOF + return; + } + + // getting a character from the stream + auto [it, sym] = utf8_char_t::at(m_it); + m_next_it = it; + m_sym = sym; + } + + void BaseParser::initParser(const std::string& filename, const std::string& code) + { + m_filename = filename; + + // if the input string is empty, raise an error + if (code.size() == 0) + { + m_sym = utf8_char_t(); + error("Expected symbol, got empty string", ""); + } + m_str = code; + + m_it = m_next_it = m_str.begin(); + + // otherwise, get the first symbol + next(); + } + + void BaseParser::backtrack(long n) + { + if (static_cast(n) >= m_str.size()) + return; + + m_it = m_str.begin() + n; + auto [it, sym] = utf8_char_t::at(m_it); + m_next_it = it; + m_sym = sym; + } + + FilePosition BaseParser::getCursor() + { + FilePosition pos { 0, 0 }; + + // adjust the row/col count (this is going to be VERY inefficient) + auto tmp = m_str.begin(); + while (true) + { + auto [it, sym] = utf8_char_t::at(tmp); + if (*tmp == '\n') + { + ++pos.row; + pos.col = 0; + } + else if (sym.isPrintable()) + pos.col += sym.size(); + tmp = it; + + if (tmp > m_it || tmp == m_str.end()) + break; + } + + return pos; + } + + void BaseParser::error(const std::string& error, const std::string exp) + { + FilePosition pos = getCursor(); + throw CodeError(error, m_filename, pos.row, pos.col, exp, m_sym); + } + + void BaseParser::errorWithNextToken(const std::string& message) + { + auto pos = getCount(); + std::string next_token; + + anyUntil(IsEither(IsInlineSpace, IsEither(IsChar('('), IsChar(')'))), &next_token); + backtrack(pos); + + error(message, next_token); + } + + void BaseParser::errorMissingSuffix(char suffix, const std::string& node_name) + { + errorWithNextToken("Missing '" + std::string(1, suffix) + "' after " + node_name); + } + + bool BaseParser::accept(const CharPred& t, std::string* s) + { + if (isEOF()) + return false; + + // return false if the predicate couldn't consume the symbol + if (!t(m_sym.codepoint())) + return false; + // otherwise, add it to the string and go to the next symbol + if (s != nullptr) + *s += m_sym.c_str(); + + next(); + return true; + } + + bool BaseParser::expect(const CharPred& t, std::string* s) + { + // throw an error if the predicate couldn't consume the symbol + if (!t(m_sym.codepoint())) + error("Expected " + t.name, m_sym.c_str()); + // otherwise, add it to the string and go to the next symbol + if (s != nullptr) + *s += m_sym.c_str(); + next(); + return true; + } + + bool BaseParser::space(std::string* s) + { + if (accept(IsSpace)) + { + if (s != nullptr) + s->push_back(' '); + // loop while there are still ' ' to consume + while (accept(IsSpace)) + ; + return true; + } + return false; + } + + bool BaseParser::inlineSpace(std::string* s) + { + if (accept(IsInlineSpace)) + { + if (s != nullptr) + s->push_back(' '); + // loop while there are still ' ' to consume + while (accept(IsInlineSpace)) + ; + return true; + } + return false; + } + + bool BaseParser::endOfLine(std::string* s) + { + if ((accept(IsChar('\r')) || true) && accept(IsChar('\n'))) + { + if (s != nullptr) + s->push_back('\n'); + while ((accept(IsChar('\r')) || true) && accept(IsChar('\n'))) + ; + return true; + } + return false; + } + + bool BaseParser::comment() + { + if (accept(IsChar('#'))) + { + while (accept(IsNot(IsChar('\n')))) + ; + accept(IsChar('\n')); + return true; + } + return false; + } + + bool BaseParser::newlineOrComment() + { + bool matched = space(); + while (!isEOF() && comment()) + { + space(); + matched = true; + } + + return matched; + } + + bool BaseParser::prefix(char c) + { + if (!accept(IsChar(c))) + return false; + newlineOrComment(); + return true; + } + + bool BaseParser::suffix(char c) + { + newlineOrComment(); + return accept(IsChar(c)); + } + + bool BaseParser::number(std::string* s) + { + if (accept(IsDigit, s)) + { + // consume all the digits available, + // stop when the symbol isn't a digit anymore + while (accept(IsDigit, s)) + ; + return true; + } + return false; + } + + bool BaseParser::signedNumber(std::string* s) + { + accept(IsMinus, s); + if (!number(s)) + return false; + + // (optional) floating part + accept(IsChar('.'), s) && number(s); + // (optional) scientific part + if (accept(IsEither(IsChar('e'), IsChar('E')), s)) + { + accept(IsEither(IsMinus, IsChar('+')), s); + number(s); + } + + return true; + } + + bool BaseParser::hexNumber(unsigned int length, std::string* s) + { + while (length != 0) + { + if (!accept(IsHex, s)) + return false; + --length; + } + return true; + } + + bool BaseParser::name(std::string* s) + { + auto alpha_symbols = IsEither(IsAlpha, IsSymbol); + auto alnum_symbols = IsEither(IsAlnum, IsSymbol); + + if (accept(alpha_symbols, s)) + { + while (accept(alnum_symbols, s)) + ; + return true; + } + return false; + } + + bool BaseParser::sequence(const std::string& s) + { + for (std::size_t i = 0, end = s.size(); i < end; ++i) + { + if (!accept(IsChar(s[i]))) + return false; + } + return true; + } + + bool BaseParser::packageName(std::string* s) + { + if (accept(IsAlnum, s)) + { + while (accept(IsEither(IsAlnum, IsEither(IsChar('_'), IsChar('-'))), s)) + ; + return true; + } + return false; + } + + bool BaseParser::anyUntil(const CharPred& delim, std::string* s) + { + if (accept(IsNot(delim), s)) + { + while (accept(IsNot(delim), s)) + ; + return true; + } + return false; + } + + bool BaseParser::oneOf(std::initializer_list words, std::string* s) + { + std::string buffer; + if (!name(&buffer)) + return false; + + if (s) + *s = buffer; + + for (auto word : words) + { + if (word == buffer) + return true; + } + return false; + } +} diff --git a/src/arkreactor/Compiler/AST/Lexer.cpp b/src/arkreactor/Compiler/AST/Lexer.cpp deleted file mode 100644 index bbca2c549..000000000 --- a/src/arkreactor/Compiler/AST/Lexer.cpp +++ /dev/null @@ -1,340 +0,0 @@ -#include - -#include // TODO remove -#include -#include -#include -#include - -#include -#include -#include - -namespace Ark::internal -{ - Lexer::Lexer(unsigned debug) noexcept : - m_debug(debug) - {} - - void Lexer::feed(const std::string& code) - { - std::size_t line = 0, character = 0; - std::size_t saved_line = 0, saved_char = 0; - // flags - bool in_string = false, in_ctrl_char = false, in_comment = false; - // buffers - std::string buffer, ctrl_char; - - auto append_token_from_buffer = [&]() { - TokenType type = guessType(buffer); - // tokenizing error management - if (type == TokenType::Mismatch) - throwTokenizingError("invalid token '" + buffer + "'", buffer, line, character, code); - else if (type == TokenType::Capture || type == TokenType::GetField) - buffer = buffer.substr(1); // remove the & or the . - m_tokens.emplace_back(type, buffer, saved_line, saved_char); - buffer.clear(); - }; - - for (std::size_t pos = 0, end = code.size(); pos < end; ++pos) - { - char current = code[pos]; - - if (m_debug >= 5) - std::printf( - "buffer: %s - ctrl_char: %s - current: '%c' - line: %zu, char: %zu\n", - buffer.c_str(), ctrl_char.c_str(), current, line, character); - - if (!in_string) - { - // handle comments first - if (in_comment) // append every character to the buffer if we're in a comment, even spaces - buffer += current; - // handle ()[]{} then - else if (current == '(' || current == ')' || current == '[' || current == ']' || current == '{' || current == '}') - { - if (!buffer.empty()) - append_token_from_buffer(); - m_tokens.emplace_back(TokenType::Grouping, std::string(1, current), line, character); - } - // handle strings next - else if (current == '"') - { - if (!buffer.empty()) - append_token_from_buffer(); - in_string = true; - buffer = "\""; - saved_line = line; - saved_char = character; - } - // handle shorthands, be careful with ! and != - else if (current == '\'' || (current == '!' && pos + 1 < code.size() && code[pos + 1] != '=' && buffer.empty())) - { - m_tokens.emplace_back(TokenType::Shorthand, std::string(1, current), line, character); - } - // handle comments - else if (current == '#') - { - if (!buffer.empty()) - append_token_from_buffer(); - in_comment = true; - buffer = "#"; - } - // separation - else if ((current == ' ' || current == '\t' || current == '\v' || current == '\n')) - { - if (!buffer.empty()) - append_token_from_buffer(); - } - // capture - else if (current == '&') - { - if (!buffer.empty()) - append_token_from_buffer(); - buffer.clear(); - buffer += current; - } - // getfield or spread - else if (current == '.') - { - // check numbers, we don't want to split 3.0 into 3 and .0 - if (!buffer.empty() && !('0' <= buffer[0] && buffer[0] <= '9') && buffer[0] != '+' && buffer[0] != '-' && buffer[0] != '.') - { - append_token_from_buffer(); - buffer.clear(); - } - buffer += current; - } - // identifier, number, operator - else - { - if (buffer.empty()) - { - saved_char = character; - saved_line = line; - } - buffer += current; - } - } - else // we are in a string here - { - // check for control character - if (!in_ctrl_char) - { - if (current == '\\') - in_ctrl_char = true; - else if (current == '"') // end of string - { - buffer += current; - in_string = false; - m_tokens.emplace_back(TokenType::String, buffer, saved_line, saved_char); - buffer.clear(); - } - else - buffer += current; - } - else - { - // end of escape code - if (current == ' ' || endOfControlChar(ctrl_char, current)) - { - // process escape code - if (ctrl_char.empty()) - throwTokenizingError("empty control character '\\' in string", buffer, line, character, code); - else if (ctrl_char.size() == 1) - { - switch (ctrl_char[0]) - { - case '"': buffer += '"'; break; - case 'n': buffer += '\n'; break; - case 'a': buffer += '\a'; break; - case 'b': buffer += '\b'; break; - case 't': buffer += '\t'; break; - case 'r': buffer += '\r'; break; - case 'f': buffer += '\f'; break; - case '\\': buffer += '\\'; break; - case '0': buffer += '\0'; break; - - default: - throwTokenizingError("unknown control character '\\" + ctrl_char + "' in string", buffer, line, character, code); - break; - } - } - else - { - switch (ctrl_char[0]) - { - case 'x': break; /// @todo - - case 'u': - { - char utf8_str[5]; - utf8decode(ctrl_char.c_str() + 1, utf8_str); - if (*utf8_str == '\0') - throwTokenizingError("invalid escape sequence \\" + ctrl_char + " in string, expected hexadecimal number that in utf8 range, got a \"" + ctrl_char + "\"", buffer, line, character + 1, code); - buffer += utf8_str; - break; - } - - case 'U': - { - short begin = 1; - for (; ctrl_char[begin] == '0'; ++begin) - ; - char utf8_str[5]; - utf8decode(ctrl_char.c_str() + begin, utf8_str); - if (*utf8_str == '\0') - throwTokenizingError("invalid escape sequence \\" + ctrl_char + " in string, expected hexadecimal number that in utf8 range, got a \"" + ctrl_char + "\"", buffer, line, character + 1, code); - buffer += utf8_str; - break; - } - - default: - throwTokenizingError("unknown control character '\\" + ctrl_char + "' in string", buffer, line, character, code); - break; - } - } - - ctrl_char.clear(); - in_ctrl_char = false; - - if (current == '"') // end of string - { - buffer += current; - in_string = false; - m_tokens.emplace_back(TokenType::String, buffer, saved_line, saved_char); - buffer.clear(); - } - else if (current == '\\') // new escape code - in_ctrl_char = true; - else - buffer += current; - } - else // the escape code continues - ctrl_char += current; - } - } - - // position counter - if (current == '\n') - { - line++; - character = 0; // before first character - - // close comments, don't append them - if (in_comment) - { - in_comment = false; - buffer.clear(); - continue; - } - } - else - { - // update position - character++; - } - } - - if (!buffer.empty() && buffer[0] != '#') - append_token_from_buffer(); - - // debugging information - if (m_debug > 3) - { - for (auto& last_token : m_tokens) - { - std::printf( - "TokenType: %s\tLine: %zu\n[%zu\t]\tToken: %s\n", - tokentype_string[static_cast(last_token.type)].data(), - last_token.line, - last_token.col, - last_token.token.c_str()); - } - } - } - - std::vector& Lexer::tokens() noexcept - { - return m_tokens; - } - - TokenType Lexer::guessType(const std::string& value) noexcept - { - if (value.empty()) - return TokenType::Mismatch; - - // assuming we already detected ()[]{}, strings, shorthands and comments - if (Utils::isDouble(value)) // works on (\+|-)?[[:digit:]]+(\.[[:digit:]]+)?([e|E](\+|-)?[[:digit]]+)? - return TokenType::Number; - else if (isOperator(value)) - return TokenType::Operator; - else if (isKeyword(value)) - return TokenType::Keyword; - else if (value[0] == '&' && value.size() > 1 && isIdentifier(value)) - return TokenType::Capture; - else if (value.size() > 3 && value[0] == value[1] && value[1] == value[2] && value[2] == '.') - return TokenType::Spread; - else if (value[0] == '.' && value.size() > 1 && isIdentifier(value)) - return TokenType::GetField; - // otherwise, identifier if it starts with [a-zA-Z_] - else if (isIdentifier(value)) - return TokenType::Identifier; - return TokenType::Mismatch; - } - - bool Lexer::isKeyword(const std::string& value) noexcept - { - return std::find(keywords.begin(), keywords.end(), value) != keywords.end(); - } - - bool Lexer::isIdentifier(const std::string& value) noexcept - { - return utf8valid(value.c_str()); - } - - bool Lexer::isOperator(const std::string& value) noexcept - { - return std::find(operators.begin(), operators.end(), value) != operators.end(); - } - - bool Lexer::endOfControlChar(const std::string& sequence, char next) noexcept - { - switch (sequence[0]) - { - case 'x': - // \x[any number of hex digits] - // if it's not a hex digit then it's most likely the end for us - return !isHexChar(next); - - case 'u': - return sequence.size() == 5; - - case 'U': - return sequence.size() == 9; - - case '"': - case 'n': - case 'a': - case 'b': - case 't': - case 'r': - case 'f': - case '\\': - case '0': - return true; - } - return false; - } - - void Lexer::throwTokenizingError(const std::string& message, const std::string& match, std::size_t line, std::size_t col, const std::string& context) - { - std::vector ctx = Utils::splitString(context, '\n'); - - std::stringstream ss; - ss << message << "\n"; - ss << makeTokenBasedErrorCtx(match, line, col, context); - - throw SyntaxError(ss.str()); - } -} diff --git a/src/arkreactor/Compiler/AST/Node.cpp b/src/arkreactor/Compiler/AST/Node.cpp index 438b9cc06..319f7c778 100644 --- a/src/arkreactor/Compiler/AST/Node.cpp +++ b/src/arkreactor/Compiler/AST/Node.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -6,89 +7,37 @@ namespace Ark::internal { - // Static methods. - const Node& Node::getTrueNode() - { - static const Node TrueNode { "true", NodeType::Symbol }; - return TrueNode; - } - - const Node& Node::getFalseNode() - { - static const Node FalseNode { "false", NodeType::Symbol }; - return FalseNode; - } - - const Node& Node::getNilNode() - { - static const Node NilNode { "nil", NodeType::Symbol }; - return NilNode; - } + Node::Node(NodeType node_type, const std::string& value) : + m_type(node_type), m_value(value) + {} - const Node& Node::getListNode() + Node::Node(NodeType node_type) : + m_type(node_type) { - static const Node ListNode { "list", NodeType::Symbol }; - return ListNode; + if (m_type == NodeType::List || m_type == NodeType::Macro || m_type == NodeType::Field) + m_value = std::vector(); } - // Normal Methods - Node::Node(long value) noexcept : - m_type(NodeType::Number), - m_value(static_cast(value)) - {} - - Node::Node(double value) noexcept : - m_type(NodeType::Number), - m_value(value) + Node::Node(double value) : + m_type(NodeType::Number), m_value(value) {} - Node::Node(const std::string& value, NodeType const& type) noexcept : - m_type(type), - m_value(value) + Node::Node(long value) : + m_type(NodeType::Number), m_value(static_cast(value)) {} - Node::Node(const std::string& value) noexcept : - Node(value, NodeType::String) + Node::Node(int value) : + m_type(NodeType::Number), m_value(static_cast(value)) {} - Node::Node(Keyword value) noexcept : - m_type(NodeType::Keyword), - m_value(value) + Node::Node(Keyword value) : + m_type(NodeType::Keyword), m_value(value) {} - Node::Node(NodeType type) noexcept : - m_type(type) + Node::Node(const std::vector& nodes) : + m_type(NodeType::List), m_value(nodes) {} - Node::Node(const Node& other) noexcept : - m_type(other.m_type), - m_value(other.m_value), - m_list(other.m_list), - m_line(other.m_line), - m_col(other.m_col), - m_filename(other.m_filename) - {} - - Node& Node::operator=(Node other) noexcept - { - swap(other); - return *this; - } - - void Node::swap(Node& other) noexcept - { - using std::swap; - - swap(m_type, other.m_type); - swap(m_value, other.m_value); - swap(m_list, other.m_list); - swap(m_line, other.m_line); - swap(m_col, other.m_col); - swap(m_filename, other.m_filename); - } - - // ------------------------- - const std::string& Node::string() const noexcept { return std::get(m_value); @@ -104,25 +53,21 @@ namespace Ark::internal return std::get(m_value); } - // ------------------------- - void Node::push_back(const Node& node) noexcept { - m_list.push_back(node); + list().push_back(node); } std::vector& Node::list() noexcept { - return m_list; + return std::get>(m_value); } const std::vector& Node::constList() const noexcept { - return m_list; + return std::get>(m_value); } - // ------------------------- - NodeType Node::nodeType() const noexcept { return m_type; @@ -138,18 +83,6 @@ namespace Ark::internal m_value = value; } - void Node::setNumber(double value) noexcept - { - m_value = value; - } - - void Node::setKeyword(Keyword kw) noexcept - { - m_value = kw; - } - - // ------------------------- - void Node::setPos(std::size_t line, std::size_t col) noexcept { m_line = line; @@ -176,58 +109,21 @@ namespace Ark::internal return m_filename; } - // ------------------------- - - auto colors = std::vector( - { termcolor::blue, - termcolor::red, - termcolor::green, - termcolor::cyan, - termcolor::magenta }); - - void swap(Node& lhs, Node& rhs) noexcept - { - lhs.swap(rhs); - } - std::ostream& operator<<(std::ostream& os, const Node& N) noexcept + std::ostream& operator<<(std::ostream& os, const Node& node) noexcept { - static int index = 0; - - switch (N.m_type) + switch (node.m_type) { - case NodeType::String: - os << '"' << N.string() << '"'; - break; - case NodeType::Symbol: - os << "(Symbol) " << N.string(); + os << "Symbol:" << node.string(); break; case NodeType::Capture: - os << "(Capture) " << N.string(); - break; - - case NodeType::GetField: - os << "(GetField) " << N.string(); + os << "Capture:" << node.string(); break; - case NodeType::Number: - os << N.number(); - break; - - case NodeType::List: - { - os << colors[index % colors.size()] << "( " << termcolor::reset; - index++; - for (auto& t : N.m_list) - os << t << " "; - index--; - os << colors[index % colors.size()] << ")" << termcolor::reset; - break; - } - case NodeType::Keyword: - switch (N.keyword()) + os << "Keyword:"; + switch (node.keyword()) { case Keyword::Fun: os << "Fun"; break; case Keyword::Let: os << "Let"; break; @@ -237,28 +133,45 @@ namespace Ark::internal case Keyword::While: os << "While"; break; case Keyword::Begin: os << "Begin"; break; case Keyword::Import: os << "Import"; break; - case Keyword::Quote: os << "Quote"; break; case Keyword::Del: os << "Del"; break; } break; + case NodeType::String: + os << "String:" << node.string(); + break; + + case NodeType::Number: + os << "Number:" << node.number(); + break; + + case NodeType::List: + os << "( "; + for (std::size_t i = 0, end = node.constList().size(); i < end; ++i) + os << node.constList()[i] << " "; + os << ")"; + break; + + case NodeType::Field: + os << "( Field "; + for (std::size_t i = 0, end = node.constList().size(); i < end; ++i) + os << node.constList()[i] << " "; + os << ")"; + break; + case NodeType::Macro: - { - os << colors[index % colors.size()] << "( " << termcolor::reset << "Macro "; - index++; - for (auto& t : N.m_list) - os << t << " "; - index--; - os << colors[index % colors.size()] << ")" << termcolor::reset; + os << "( Macro "; + for (std::size_t i = 0, end = node.constList().size(); i < end; ++i) + os << node.constList()[i] << " "; + os << ")"; break; - } case NodeType::Spread: - os << "(Spread) " << N.string(); + os << "Spread:" << node.string(); break; case NodeType::Unused: - os << "(Unused)"; + os << "Unused:" << node.string(); break; default: @@ -268,16 +181,31 @@ namespace Ark::internal return os; } - std::ostream& operator<<(std::ostream& os, const std::vector& N) noexcept + const Node& getTrueNode() { - os << "( "; - for (auto& t : N) - os << t << " "; - os << ")"; + static const Node TrueNode(NodeType::Symbol, "true"); + return TrueNode; + } - return os; + const Node& getFalseNode() + { + static const Node FalseNode(NodeType::Symbol, "false"); + return FalseNode; } + const Node& getNilNode() + { + static const Node NilNode(NodeType::Symbol, "nil"); + return NilNode; + } + + const Node& getListNode() + { + static const Node ListNode(NodeType::Symbol, "list"); + return ListNode; + } + + // todo: do we really need all those operators? maybe for macros? bool operator==(const Node& A, const Node& B) { if (A.m_type != B.m_type) // should have the same types @@ -306,7 +234,7 @@ namespace Ark::internal return A.m_value < B.m_value; case NodeType::List: - return A.m_list < B.m_list; + //return A.m_list < B.m_list; // fixme default: return false; @@ -323,7 +251,6 @@ namespace Ark::internal case NodeType::Number: return !A.number(); - case NodeType::GetField: case NodeType::Capture: case NodeType::String: return A.string().size() == 0; @@ -335,6 +262,8 @@ namespace Ark::internal return true; return false; + // todo: implement field? + default: return false; } diff --git a/src/arkreactor/Compiler/AST/Optimizer.cpp b/src/arkreactor/Compiler/AST/Optimizer.cpp index 74fc05fef..11bc0afd1 100644 --- a/src/arkreactor/Compiler/AST/Optimizer.cpp +++ b/src/arkreactor/Compiler/AST/Optimizer.cpp @@ -1,12 +1,14 @@ #include +#include + namespace Ark::internal { Optimizer::Optimizer(uint16_t options) noexcept : m_options(options) {} - void Optimizer::feed(const Node& ast) + void Optimizer::process(const Node& ast) { m_ast = ast; @@ -21,7 +23,10 @@ namespace Ark::internal void Optimizer::throwOptimizerError(const std::string& message, const Node& node) { - throw OptimizerError(makeNodeBasedErrorCtx(message, node)); + std::stringstream ss; + ss << node; + + throw CodeError(message, node.filename(), node.line(), node.col(), ss.str()); } void Optimizer::remove_unused() diff --git a/src/arkreactor/Compiler/AST/Parser.cpp b/src/arkreactor/Compiler/AST/Parser.cpp index 1ce913a4b..805aab99c 100644 --- a/src/arkreactor/Compiler/AST/Parser.cpp +++ b/src/arkreactor/Compiler/AST/Parser.cpp @@ -1,611 +1,760 @@ #include -#include -#include -#include - #include -#include -#include namespace Ark::internal { - Parser::Parser(unsigned debug, uint16_t options, const std::vector& lib_env) noexcept : - m_debug(debug), - m_libenv(lib_env), - m_options(options), - m_lexer(debug), - m_file(ARK_NO_NAME_FILE) - {} - - void Parser::feed(const std::string& code, const std::string& filename) + Parser::Parser() : + BaseParser(), m_ast(NodeType::List), m_imports({}), m_allow_macro_behavior(0) { - // not the default value - if (filename != ARK_NO_NAME_FILE) - { - m_file = Utils::canonicalRelPath(filename); - if (m_debug >= 2) - std::cout << "New parser: " << m_file << '\n'; - m_parent_include.push_back(m_file); - } + m_ast.push_back(Node(Keyword::Begin)); + } - m_code = code; - - m_lexer.feed(code); - // apply syntactic sugar - std::vector& t = m_lexer.tokens(); - if (t.empty()) - throw ParseError("empty file"); - sugar(t); - - // create program - std::list tokens(t.begin(), t.end()); - m_last_token = tokens.front(); - - // accept every nodes in the file - m_ast = Node(NodeType::List); - m_ast.setFilename(m_file); - m_ast.list().emplace_back(Keyword::Begin); - while (!tokens.empty()) - m_ast.list().push_back(parse(tokens)); - // include files if needed - checkForInclude(m_ast, m_ast); - - if (m_debug >= 3) - std::cout << "(Parser) AST\n" - << m_ast << "\n\n"; + void Parser::processFile(const std::string& filename) + { + const std::string code = Utils::readFile(filename); + initParser(filename, code); + run(); + } + + void Parser::processString(const std::string& code) + { + initParser(ARK_NO_NAME_FILE, code); + run(); } - const Node& Parser::ast() const noexcept + const Node& Parser::ast() const { return m_ast; } - const std::vector& Parser::getImports() const noexcept + const std::vector& Parser::imports() const { - return m_parent_include; + return m_imports; } - void Parser::sugar(std::vector& tokens) noexcept + void Parser::run() { - std::size_t i = 0; - while (true) + while (!isEOF()) { - std::size_t line = tokens[i].line; - std::size_t col = tokens[i].col; - - if (tokens[i].token == "{") - { - tokens[i] = Token(TokenType::Grouping, "(", line, col); - // handle macros - if (i > 0 && tokens[i - 1].token != "!") - tokens.insert(tokens.begin() + i + 1, Token(TokenType::Keyword, "begin", line, col)); - else if (i == 0) - tokens.insert(tokens.begin() + i + 1, Token(TokenType::Keyword, "begin", line, col)); - } - else if (tokens[i].token == "}" || tokens[i].token == "]") - tokens[i] = Token(TokenType::Grouping, ")", line, col); - else if (tokens[i].token == "[") - { - tokens[i] = Token(TokenType::Grouping, "(", line, col); - tokens.insert(tokens.begin() + i + 1, Token(TokenType::Identifier, "list", line, col)); - } - - ++i; - - if (i == tokens.size()) + newlineOrComment(); + if (isEOF()) break; + + auto n = node(); + if (n) + m_ast.push_back(n.value()); } } - // sugar() was called before, so it's safe to assume we only have ( and ) - Node Parser::parse(std::list& tokens, bool authorize_capture, bool authorize_field_read, bool in_macro) + std::optional Parser::node() { - using namespace std::string_literals; + // save current position in buffer to be able to go back if needed + auto position = getCount(); - Token token = nextToken(tokens); + if (auto result = wrapped(&Parser::letMutSet, "let/mut/set", '(', ')')) + return result; + else + backtrack(position); - bool previous_token_was_lparen = false; + if (auto result = wrapped(&Parser::function, "function", '(', ')')) + return result; + else + backtrack(position); - // parse block - if (token.token == "(") - { - previous_token_was_lparen = true; - // create a list node to host the block - Node block = make_node_list(token.line, token.col, m_file); + if (auto result = wrapped(&Parser::condition, "condition", '(', ')')) + return result; + else + backtrack(position); - // handle sub-blocks - if (tokens.front().token == "(") - { - block.push_back(parse(tokens, false, false, in_macro)); - previous_token_was_lparen = false; - } + if (auto result = wrapped(&Parser::loop, "loop", '(', ')')) + return result; + else + backtrack(position); - // take next token, we don't want to play with a "(" - token = nextToken(tokens); + if (auto result = import_(); result.has_value()) + return result; + else + backtrack(position); - // return an empty block - if (token.token == ")") - return block; + if (auto result = block(); result.has_value()) + return result; + else + backtrack(position); - // check for unexpected keywords between expressions - if ((token.type == TokenType::Operator || - token.type == TokenType::Identifier || - token.type == TokenType::Number || - token.type == TokenType::String) && - tokens.front().type == TokenType::Keyword) - throwParseError("Unexpected keyword `" + tokens.front().token + "' in the middle of an expression", tokens.front()); + if (auto result = wrapped(&Parser::macroCondition, "$if", '(', ')')) + return result; + else + backtrack(position); - // loop until we reach the end of the block - do - { - Node atomized = atom(token); - checkForInvalidTokens(atomized, token, previous_token_was_lparen, authorize_capture, authorize_field_read); - block.push_back(atomized); + if (auto result = macroBlock()) + return result; + else + backtrack(position); - expect(!tokens.empty(), "expected more tokens after `" + token.token + "'", m_last_token); - m_last_token = tokens.front(); + if (auto result = macro(); result.has_value()) + return result; + else + backtrack(position); - if (token.type == TokenType::Keyword) - { - if (token.token == "if") - parseIf(block, tokens, in_macro); - else if (token.token == "let" || token.token == "mut") - parseLetMut(block, token, tokens, in_macro); - else if (token.token == "set") - parseSet(block, token, tokens, in_macro); - else if (token.token == "fun") - parseFun(block, token, tokens, in_macro); - else if (token.token == "while") - parseWhile(block, token, tokens, in_macro); - else if (token.token == "begin") - parseBegin(block, tokens, in_macro); - else if (token.token == "import") - parseImport(block, tokens); - else if (token.token == "quote") - parseQuote(block, tokens, in_macro); - else if (token.token == "del") - parseDel(block, tokens); - else - throwParseError("unimplemented keyword `" + token.token + "'. If you see this error please report it on GitHub.", token); - } - else if (token.type == TokenType::Identifier || token.type == TokenType::Operator || - (token.type == TokenType::Capture && authorize_capture) || - (token.type == TokenType::GetField && authorize_field_read) || - (token.type == TokenType::Spread && in_macro)) - { - while (tokens.front().token != ")") - block.push_back(parse(tokens, /* authorize_capture */ false, /* authorize_field_read */ true, in_macro)); - } - } while (tokens.front().token != ")"); + if (auto result = wrapped(&Parser::del, "del", '(', ')')) + return result; + else + backtrack(position); - // pop the ")" - tokens.pop_front(); - return block; - } - else if (token.type == TokenType::Shorthand) - return parseShorthand(token, tokens, in_macro); - // error, we shouldn't have grouping token here - else if (token.type == TokenType::Grouping) - throwParseError("Found a lonely `" + token.token + "', you most likely have too much parenthesis.", token); - else if ((token.type == TokenType::Operator || token.type == TokenType::Identifier) && - std::find(internal::operators.begin(), internal::operators.end(), token.token) != internal::operators.end()) - throwParseError("Found a free flying operator, which isn't authorized. Operators should always immediatly follow a `('.", token); - else if ((token.type == TokenType::Number || - token.type == TokenType::String) && - tokens.front().type == TokenType::Keyword) - throwParseError("Unexpected keyword `" + tokens.front().token + "' in the middle of an expression", tokens.front()); - else if (token.type == TokenType::Keyword && - !previous_token_was_lparen) - throwParseError("Unexpected keyword `" + token.token + "' in the middle of an expression", token); - return atom(token); + if (auto result = functionCall(); result.has_value()) + return result; + else + backtrack(position); + + if (auto result = list(); result.has_value()) + return result; + else + backtrack(position); + + return std::nullopt; // will never reach } - void Parser::parseIf(Node& block, std::list& tokens, bool in_macro) + std::optional Parser::letMutSet() { - auto temp = tokens.front(); - // parse condition - if (temp.type == TokenType::Grouping) - block.push_back(parse(tokens, false, false, in_macro)); - else if (temp.type == TokenType::Identifier || temp.type == TokenType::Number || - temp.type == TokenType::String || (in_macro && temp.type == TokenType::Spread)) - block.push_back(atom(nextToken(tokens))); - else - throwParseError("found invalid token after keyword `if', expected function call, value or Identifier", temp); - // parse 'then' - expect(!tokens.empty() && tokens.front().token != ")", "expected a statement after the condition", temp); - block.push_back(parse(tokens, false, false, in_macro)); - // parse 'else', if there is one - if (tokens.front().token != ")") + std::string token; + if (!oneOf({ "let", "mut", "set" }, &token)) + return std::nullopt; + newlineOrComment(); + + Node leaf(NodeType::List); + if (token == "let") + leaf.push_back(Node(Keyword::Let)); + else if (token == "mut") + leaf.push_back(Node(Keyword::Mut)); + else // "set" + leaf.push_back(Node(Keyword::Set)); + + if (m_allow_macro_behavior > 0) { - block.push_back(parse(tokens, false, false, in_macro)); - // error handling if the if is ill-formed - expect(tokens.front().token == ")", "if block is ill-formed, got more than the 3 required arguments (condition, then, else)", m_last_token); + auto position = getCount(); + if (auto value = nodeOrValue(); value.has_value()) + leaf.push_back(value.value()); + else + backtrack(position); } - } - void Parser::parseLetMut(Node& block, Token& token, std::list& tokens, bool in_macro) - { - auto temp = tokens.front(); - // parse identifier - if (temp.type == TokenType::Identifier) - block.push_back(atom(nextToken(tokens))); - else if (in_macro) - block.push_back(parse(tokens, false, false, in_macro)); + if (leaf.constList().size() == 1) + { + // we haven't parsed anything while in "macro state" + std::string symbol; + if (!name(&symbol)) + errorWithNextToken(token + " needs a symbol"); + + leaf.push_back(Node(NodeType::Symbol, symbol)); + } + + newlineOrComment(); + + if (auto value = nodeOrValue(); value.has_value()) + leaf.push_back(value.value()); else - throwParseError(std::string("missing identifier to define a ") + (token.token == "let" ? "constant" : "variable") + ", after keyword `" + token.token + "'", temp); - expect(!tokens.empty() && tokens.front().token != ")", "expected a value after the identifier", temp); - // value - while (tokens.front().token != ")") - block.push_back(parse(tokens, /* authorize_capture */ false, /* authorize_field_read */ true, in_macro)); - - // the block size can exceed 3 only if we have a serie of getfields - expect( - block.list().size() <= 3 || std::all_of(block.list().begin() + 3, block.list().end(), [](const Node& n) -> bool { - return n.nodeType() == NodeType::GetField; - }), - "too many arguments given to keyword `" + token.token + "', got " + std::to_string(block.list().size() - 1) + ", expected at most 3", m_last_token); + errorWithNextToken("Expected a value"); + + return leaf; } - void Parser::parseSet(Node& block, Token& token, std::list& tokens, bool in_macro) + std::optional Parser::del() { - auto temp = tokens.front(); - // parse identifier - if (temp.type == TokenType::Identifier) - block.push_back(atom(nextToken(tokens))); - else if (in_macro) - block.push_back(parse(tokens, false, false, in_macro)); - else - throwParseError("missing identifier to assign a value to, after keyword `set'", temp); - expect(!tokens.empty() && tokens.front().token != ")", "expected a value after the identifier", temp); - // set can not accept a.b...c as an identifier - if (tokens.front().type == TokenType::GetField) - throwParseError("found invalid token after keyword `set', expected an identifier, got a closure field reading expression", tokens.front()); - // value - while (tokens.front().token != ")") - block.push_back(parse(tokens, /* authorize_capture */ false, /* authorize_field_read */ true, in_macro)); - - // the block size can exceed 3 only if we have a serie of getfields - expect( - block.list().size() <= 3 || std::all_of(block.list().begin() + 3, block.list().end(), [](const Node& n) -> bool { - return n.nodeType() == NodeType::GetField; - }), - "too many arguments given to keyword `" + token.token + "', got " + std::to_string(block.list().size() - 1) + ", expected at most 3", m_last_token); + std::string keyword; + if (!oneOf({ "del" }, &keyword)) + return std::nullopt; + + newlineOrComment(); + + std::string symbol; + if (!name(&symbol)) + errorWithNextToken(keyword + " needs a symbol"); + + Node leaf(NodeType::List); + leaf.push_back(Node(Keyword::Del)); + leaf.push_back(Node(NodeType::Symbol, symbol)); + + return leaf; } - void Parser::parseFun(Node& block, Token& token, std::list& tokens, bool in_macro) + std::optional Parser::condition() { - // parse arguments - if (tokens.front().type == TokenType::Grouping || in_macro) - block.push_back(parse(tokens, /* authorize_capture */ true, false, in_macro)); - else - throwParseError("found invalid token after keyword `fun', expected a block to define the argument list of the function\nThe block can be empty if it doesn't have arguments: `()'", tokens.front()); - // parse body - if (tokens.front().type == TokenType::Grouping || in_macro) - block.push_back(parse(tokens, false, false, in_macro)); + if (!oneOf({ "if" })) + return std::nullopt; + + newlineOrComment(); + + Node leaf(NodeType::List); + leaf.push_back(Node(Keyword::If)); + + if (auto condition = nodeOrValue(); condition.has_value()) + leaf.push_back(condition.value()); else - throwParseError("the body of a function must be a block, even an empty one `()'", tokens.front()); - expect(block.list().size() == 3, "got too many arguments after keyword `" + token.token + "', expected an argument list and a body", m_last_token); - } + errorWithNextToken("If need a valid condition"); - void Parser::parseWhile(Node& block, Token& token, std::list& tokens, bool in_macro) - { - auto temp = tokens.front(); - // parse condition - if (temp.type == TokenType::Grouping) - block.push_back(parse(tokens, false, false, in_macro)); - else if (temp.type == TokenType::Identifier || temp.type == TokenType::Number || - temp.type == TokenType::String) - block.push_back(atom(nextToken(tokens))); + newlineOrComment(); + + if (auto value_if_true = nodeOrValue(); value_if_true.has_value()) + leaf.push_back(value_if_true.value()); else - throwParseError("found invalid token after keyword `while', expected function call, value or Identifier", temp); - expect(!tokens.empty() && tokens.front().token != ")", "expected a body after the condition", temp); - // parse 'do' - block.push_back(parse(tokens, false, false, in_macro)); - expect(block.list().size() == 3, "got too many arguments after keyword `" + token.token + "', expected a condition and a body", temp); - } + errorWithNextToken("Expected a value"); - void Parser::parseBegin(Node& block, std::list& tokens, bool in_macro) - { - while (true) - { - expect(!tokens.empty(), "a `begin' block was opened but never closed\nYou most likely forgot a `}' or `)'", m_last_token); - if (tokens.front().token == ")") - break; - m_last_token = tokens.front(); + newlineOrComment(); - block.push_back(parse(tokens, false, false, in_macro)); + if (auto value_if_false = nodeOrValue(); value_if_false.has_value()) + { + leaf.push_back(value_if_false.value()); + newlineOrComment(); } + + return leaf; } - void Parser::parseImport(Node& block, std::list& tokens) + std::optional Parser::loop() { - if (tokens.front().type == TokenType::String) - block.push_back(atom(nextToken(tokens))); + if (!oneOf({ "while" })) + return std::nullopt; + + newlineOrComment(); + + Node leaf(NodeType::List); + leaf.push_back(Node(Keyword::While)); + + if (auto condition = nodeOrValue(); condition.has_value()) + leaf.push_back(condition.value()); else - throwParseError("found invalid token after keyword `import', expected String (path to the file or module to import)", tokens.front()); - expect(tokens.front().token == ")", "got too many arguments after keyword `import', expected a single filename as String", tokens.front()); - } + errorWithNextToken("While need a valid condition"); - void Parser::parseQuote(Node& block, std::list& tokens, bool in_macro) - { - block.push_back(parse(tokens, false, false, in_macro)); - expect(tokens.front().token == ")", "got too many arguments after keyword `quote', expected a single block or value", tokens.front()); - } + newlineOrComment(); - void Parser::parseDel(Node& block, std::list& tokens) - { - if (tokens.front().type == TokenType::Identifier) - block.push_back(atom(nextToken(tokens))); + if (auto body = nodeOrValue(); body.has_value()) + leaf.push_back(body.value()); else - throwParseError("found invalid token after keyword `del', expected Identifier", tokens.front()); - expect(tokens.front().token == ")", "got too many arguments after keyword `del', expected a single identifier", tokens.front()); + errorWithNextToken("Expected a value"); + + return leaf; } - Node Parser::parseShorthand(Token& token, std::list& tokens, bool in_macro) + std::optional Parser::import_() { - if (token.token == "'") + if (!accept(IsChar('('))) + return std::nullopt; + newlineOrComment(); + + if (!oneOf({ "import" })) + return std::nullopt; + newlineOrComment(); + + Node leaf(NodeType::List); + leaf.push_back(Node(Keyword::Import)); + + Import import_data; + + if (!packageName(&import_data.prefix)) + errorWithNextToken("Import expected a package name"); + import_data.package.push_back(import_data.prefix); + + Node packageNode(NodeType::List); + packageNode.push_back(Node(NodeType::String, import_data.prefix)); + + // first, parse the package name + while (!isEOF()) { - // create a list node to host the block - Node block = make_node_list(token.line, token.col, m_file); + // parsing package folder.foo.bar.yes + if (accept(IsChar('.'))) + { + std::string path; + if (!packageName(&path)) + errorWithNextToken("Package name expected after '.'"); + else + { + packageNode.push_back(Node(NodeType::String, path)); + import_data.package.push_back(path); + import_data.prefix = path; // in the end we will store the last element of the package, which is what we want + } + } + else if (accept(IsChar(':')) && accept(IsChar('*'))) // parsing :* + { + space(); + expect(IsChar(')')); + + leaf.push_back(packageNode); + leaf.push_back(Node(NodeType::Symbol, "*")); - block.push_back(make_node(Keyword::Quote, token.line, token.col, m_file)); - block.push_back(parse(tokens, false, false, in_macro)); - return block; + // save the import data structure to know we encounter an import node, and retrieve its data more easily later on + import_data.with_prefix = false; + m_imports.push_back(import_data); + + return leaf; + } + else + break; } - else if (token.token == "!") + + Node symbols(NodeType::List); + // then parse the symbols to import, if any + if (newlineOrComment()) { - if (m_debug >= 2) - std::cout << "Found a macro at " << token.line << ':' << token.col << " in " << m_file << '\n'; + while (!isEOF()) + { + if (accept(IsChar(':'))) // parsing potential :a :b :c + { + std::string symbol; + if (!name(&symbol)) + errorWithNextToken("Expected a valid symbol to import"); - // macros - Node block = make_node(NodeType::Macro, token.line, token.col, m_file); + if (symbol.size() >= 2 && symbol[symbol.size() - 2] == ':' && symbol.back() == '*') + { + backtrack(getCount() - 2); // we can backtrack n-2 safely here because we know the previous chars were ":*" + error("Glob pattern can not follow a symbol to import", ":*"); + } - Node parsed = parse(tokens, /* authorize_capture */ false, /* authorize_field_read */ false, /* in_macro */ true); - if (parsed.nodeType() != NodeType::List || parsed.list().size() < 2 || parsed.list().size() > 4) - throwParseError("Macros can only defined using the !{ name value } or !{ name (args) value } syntax", token); + symbols.push_back(Node(NodeType::Symbol, symbol)); + import_data.symbols.push_back(symbol); + } - // append the nodes of the parsed node to the current macro node - for (std::size_t i = 0, end = parsed.list().size(); i < end; ++i) - block.push_back(parsed.list()[i]); - return block; + if (!newlineOrComment()) + break; + } } - throwParseError("unknown shorthand", token); + leaf.push_back(packageNode); + leaf.push_back(symbols); + // save the import data + m_imports.push_back(import_data); + + newlineOrComment(); + expect(IsChar(')')); + return leaf; } - void Parser::checkForInvalidTokens(Node& atomized, Token& token, bool previous_token_was_lparen, bool authorize_capture, bool authorize_field_read) + std::optional Parser::block() { - if ((atomized.nodeType() == NodeType::String || atomized.nodeType() == NodeType::Number || - atomized.nodeType() == NodeType::List) && - previous_token_was_lparen) + bool alt_syntax = false; + if (accept(IsChar('('))) { - std::stringstream ss; - ss << "found invalid token after `(', expected Keyword, Identifier"; - if (!authorize_capture && !authorize_field_read) - ss << " or Operator"; + newlineOrComment(); + if (!oneOf({ "begin" })) + return std::nullopt; + } + else if (accept(IsChar('{'))) + alt_syntax = true; + else + return std::nullopt; + newlineOrComment(); + + Node leaf(NodeType::List); + leaf.push_back(Node(Keyword::Begin)); + + while (!isEOF()) + { + if (auto value = nodeOrValue(); value.has_value()) + { + leaf.push_back(value.value()); + newlineOrComment(); + } else + break; + } + + newlineOrComment(); + expect(IsChar(!alt_syntax ? ')' : '}')); + return leaf; + } + + std::optional Parser::functionArgs() + { + expect(IsChar('(')); + newlineOrComment(); + + Node args(NodeType::List); + bool has_captures = false; + + while (!isEOF()) + { + if (accept(IsChar('&'))) // captures { - ss << ", Operator"; - if (authorize_capture && !authorize_field_read) - ss << " or Capture"; - else if (!authorize_capture && authorize_field_read) - ss << " or GetField"; + has_captures = true; + std::string capture; + if (!name(&capture)) + break; else - ss << ", Capture or GetField"; + { + newlineOrComment(); + args.push_back(Node(NodeType::Capture, capture)); + } + } + else + { + auto pos = getCount(); + std::string symbol; + if (!name(&symbol)) + break; + else + { + if (has_captures) + { + backtrack(pos); + error("Captured variables should be at the end of the argument list", symbol); + } + + newlineOrComment(); + args.push_back(Node(NodeType::Symbol, symbol)); + } } - throwParseError(ss.str(), token); } + + if (accept(IsChar(')'))) + return args; + return std::nullopt; } - Token Parser::nextToken(std::list& tokens) + std::optional Parser::function() { - expect(!tokens.empty(), "no more token to consume", m_last_token); - m_last_token = tokens.front(); + if (!oneOf({ "fun" })) + return std::nullopt; + newlineOrComment(); + + while (m_allow_macro_behavior > 0) + { + auto position = getCount(); + + Node leaf(NodeType::List); + leaf.push_back(Node(Keyword::Fun)); + // args + if (auto value = nodeOrValue(); value.has_value()) + leaf.push_back(value.value()); + else + { + backtrack(position); + break; + } + newlineOrComment(); + // body + if (auto value = nodeOrValue(); value.has_value()) + leaf.push_back(value.value()); + else + errorWithNextToken("Expected a body for the function"); + return leaf; + } + + Node leaf(NodeType::List); + leaf.push_back(Node(Keyword::Fun)); + + auto position = getCount(); + if (auto args = functionArgs(); args.has_value()) + leaf.push_back(args.value()); + else + { + backtrack(position); + + if (auto value = nodeOrValue(); value.has_value()) + leaf.push_back(value.value()); + else + errorWithNextToken("Expected an argument list"); + } + + newlineOrComment(); + + if (auto value = nodeOrValue(); value.has_value()) + leaf.push_back(value.value()); + else + errorWithNextToken("Expected a body for the function"); - const Token out = std::move(tokens.front()); - tokens.pop_front(); - return out; + return leaf; } - Node Parser::atom(const Token& token) + std::optional Parser::macroCondition() { - switch (token.type) + if (!oneOf({ "$if" })) + return std::nullopt; + newlineOrComment(); + + Node leaf(NodeType::Macro); + leaf.push_back(Node(Keyword::If)); + + if (auto condition = nodeOrValue(); condition.has_value()) + leaf.push_back(condition.value()); + else + errorWithNextToken("$if need a valid condition"); + + newlineOrComment(); + + if (auto value_if_true = nodeOrValue(); value_if_true.has_value()) + leaf.push_back(value_if_true.value()); + else + errorWithNextToken("Expected a value"); + + newlineOrComment(); + + if (auto value_if_false = nodeOrValue(); value_if_false.has_value()) { - case TokenType::Number: - return make_node(std::stod(token.token), token.line, token.col, m_file); + leaf.push_back(value_if_false.value()); + newlineOrComment(); + } - case TokenType::String: - { - std::string str = token.token; - // remove the " at the beginning and at the end - str.erase(0, 1); - str.erase(token.token.size() - 2, 1); + return leaf; + } - return make_node(str, token.line, token.col, m_file); - } + std::optional Parser::macroBlock() + { + if (!accept(IsChar('('))) + return std::nullopt; + newlineOrComment(); + + if (!oneOf({ "$*" })) + return std::nullopt; + newlineOrComment(); - case TokenType::Keyword: + Node leaf(NodeType::List); + + while (!isEOF()) + { + if (auto value = nodeOrValue(); value.has_value()) { - std::optional kw; - if (token.token == "if") - kw = Keyword::If; - else if (token.token == "set") - kw = Keyword::Set; - else if (token.token == "let") - kw = Keyword::Let; - else if (token.token == "mut") - kw = Keyword::Mut; - else if (token.token == "fun") - kw = Keyword::Fun; - else if (token.token == "while") - kw = Keyword::While; - else if (token.token == "begin") - kw = Keyword::Begin; - else if (token.token == "import") - kw = Keyword::Import; - else if (token.token == "quote") - kw = Keyword::Quote; - else if (token.token == "del") - kw = Keyword::Del; - - if (kw) - return make_node(kw.value(), token.line, token.col, m_file); - throwParseError("unknown keyword", token); + leaf.push_back(value.value()); + newlineOrComment(); } + else + break; + } + + newlineOrComment(); + expect(IsChar(')')); + return leaf; + } - case TokenType::Capture: - case TokenType::GetField: - case TokenType::Spread: + std::optional Parser::macroArgs() + { + if (accept(IsChar('('))) + { + newlineOrComment(); + Node args = Node(NodeType::List); + + while (!isEOF()) { - Node n = make_node(internal::similarNodetypeFromTokentype(token.type), token.line, token.col, m_file); - n.setString(token.type != TokenType::Spread ? token.token : token.token.substr(3)); - return n; + std::string arg_name; + if (!name(&arg_name)) + break; + else + { + newlineOrComment(); + args.push_back(Node(NodeType::Symbol, arg_name)); + } } - case TokenType::Shorthand: - throwParseError("got a shorthand to atomize, and that's not normal. If you see this error please report it on GitHub.", token); - - default: + if (sequence("...")) { - // assuming it is a TokenType::Identifier, thus a Symbol - Node n = make_node(NodeType::Symbol, token.line, token.col, m_file); - n.setString(token.token); - return n; + std::string spread_name; + if (!name(&spread_name)) + errorWithNextToken("Expected a name for the variadic arguments list"); + args.push_back(Node(NodeType::Spread, spread_name)); + newlineOrComment(); } + + if (!accept(IsChar(')'))) + return std::nullopt; + newlineOrComment(); + + return args; } + + return std::nullopt; } - // high cpu cost - bool Parser::checkForInclude(Node& n, Node& parent, std::size_t pos) + std::optional Parser::macro() { - namespace fs = std::filesystem; + if (!accept(IsChar('('))) + return std::nullopt; + newlineOrComment(); + + if (!oneOf({ "$" })) + return std::nullopt; + newlineOrComment(); - // if we have a list, we may find an import statement inside - if (n.nodeType() == NodeType::List) + std::string symbol; + if (!name(&symbol)) + errorWithNextToken("$ needs a symbol to declare a macro"); + newlineOrComment(); + + Node leaf(NodeType::Macro); + leaf.push_back(Node(NodeType::Symbol, symbol)); + + auto position = getCount(); + if (auto args = macroArgs(); args.has_value()) + leaf.push_back(args.value()); + else { - if (n.constList().size() == 0) - return false; + backtrack(position); - const Node& first = n.constList()[0]; + ++m_allow_macro_behavior; + auto value = nodeOrValue(); + --m_allow_macro_behavior; - // if we found an import statement, inspect it - if (first.nodeType() == NodeType::Keyword && first.keyword() == Keyword::Import) - { - if (m_debug >= 2) - std::cout << "Import found in file: " << m_file << '\n'; + if (value.has_value()) + leaf.push_back(value.value()); + else + errorWithNextToken("Expected an argument list, atom or node while defining macro `" + symbol + "'"); - if (n.constList()[1].nodeType() != NodeType::String) - throw TypeError("Arguments of import must be of type String"); + if (accept(IsChar(')'))) + return leaf; + } - // check if we are not loading a plugin - if (std::string file = n.constList()[1].string(); fs::path(file).extension().string() == ".ark") - { - // search for the source file everywhere - std::string included_file = seekFile(file); - - // if the file isn't in the include list, then we can include it - // this avoids cyclic includes - if (std::find(m_parent_include.begin(), m_parent_include.end(), Utils::canonicalRelPath(included_file)) != m_parent_include.end()) - return true; - - // feed a new parser with our parent includes - Parser p(m_debug, m_options, m_libenv); - for (auto const& pi : m_parent_include) - p.m_parent_include.push_back(pi); // new parser, we can assume that the parent include list is empty - p.m_parent_include.push_back(m_file); // add the current file to avoid importing it again - p.feed(Utils::readFile(included_file), included_file); - - // update our list of included files - for (auto const& inc : p.m_parent_include) - { - if (std::find(m_parent_include.begin(), m_parent_include.end(), inc) == m_parent_include.end()) - m_parent_include.push_back(inc); - } + ++m_allow_macro_behavior; + auto value = nodeOrValue(); + --m_allow_macro_behavior; - for (std::size_t j = 1, end = p.ast().constList().size(); j < end; ++j) - parent.list().insert(parent.list().begin() + pos + j, p.ast().constList()[j]); + if (value.has_value()) + leaf.push_back(value.value()); + else + errorWithNextToken("Expected a value while defining macro `" + symbol + "'"); - return true; - } - } + newlineOrComment(); + expect(IsChar(')')); + return leaf; + } + + std::optional Parser::functionCall() + { + if (!accept(IsChar('('))) + return std::nullopt; + newlineOrComment(); + + std::optional func = std::nullopt; + if (auto atom = anyAtomOf({ NodeType::Symbol, NodeType::Field }); atom.has_value()) + func = atom; + else if (auto nested = node(); nested.has_value()) + func = nested; + else + return std::nullopt; + newlineOrComment(); + + NodeType call_type = NodeType::List; + if (auto node = func.value(); node.nodeType() == NodeType::Symbol) + { + // TODO enhance this to work with more/all macros + if (node.string() == "$undef") + call_type = NodeType::Macro; + } + + Node leaf(call_type); + leaf.push_back(func.value()); - // inspect every other node in the list - for (std::size_t i = 0; i < n.list().size(); ++i) + while (!isEOF()) + { + if (auto arg = nodeOrValue(); arg.has_value()) { - if (checkForInclude(n.list()[i], n, i)) - { - n.list().erase(n.list().begin() + i); - --i; - } + newlineOrComment(); + leaf.push_back(arg.value()); } + else + break; } - return false; + newlineOrComment(); + expect(IsChar(')')); + return leaf; } - std::string Parser::seekFile(const std::string& file) + std::optional Parser::list() { - const std::string current_dir = Utils::getDirectoryFromPath(m_file) + "/"; - const std::string path = (current_dir != "/") ? current_dir + file : file; + if (!accept(IsChar('['))) + return std::nullopt; + newlineOrComment(); - if (m_debug >= 2) - { - std::cout << "path: " << path << " ; file: " << file << " ; libpath: "; - for (auto&& lib : m_libenv) - std::cout << lib << ":"; - std::cout << "\nfilename: " << Utils::getFilenameFromPath(file) << '\n'; - } + Node leaf(NodeType::List); + leaf.push_back(Node(NodeType::Symbol, "list")); - // search in the current directory - if (Utils::fileExists(path)) - return path; - - // search in all folders in environment path - for (auto const& p : m_libenv) + while (!isEOF()) { - // then search in the standard library directory - if (std::string f = p + "/std/" + file; Utils::fileExists(f)) - return f; - // then in the standard library root directory - else if (std::string f2 = p + "/" + file; Utils::fileExists(f2)) - return f2; + if (auto value = nodeOrValue(); value.has_value()) + { + leaf.push_back(value.value()); + newlineOrComment(); + } + else + break; } - // fallback, we couldn't find the file - throw std::runtime_error("While processing file " + m_file + ", couldn't import " + file + ": file not found"); + newlineOrComment(); + expect(IsChar(']')); + return leaf; } - void Parser::expect(bool pred, const std::string& message, internal::Token token) + std::optional Parser::atom() { - if (!pred) - throwParseError(message, token); + auto pos = getCount(); + + if (auto res = Parser::number(); res.has_value()) + return res; + else + backtrack(pos); + + if (auto res = Parser::string(); res.has_value()) + return res; + else + backtrack(pos); + + if (auto res = Parser::spread(); m_allow_macro_behavior > 0 && res.has_value()) + return res; + else + backtrack(pos); + + if (auto res = Parser::field(); res.has_value()) + return res; + else + backtrack(pos); + + if (auto res = Parser::symbol(); res.has_value()) + return res; + else + backtrack(pos); + + if (auto res = Parser::nil(); res.has_value()) + return res; + else + backtrack(pos); + + return std::nullopt; } - void Parser::throwParseError(const std::string& message, internal::Token token) + std::optional Parser::anyAtomOf(std::initializer_list types) { - std::stringstream ss; - ss << message << "\nGot TokenType::" << internal::tokentype_string[static_cast(token.type)] << "\n"; + auto value = atom(); + if (value.has_value()) + { + for (auto type : types) + { + if (value->nodeType() == type) + return value; + } + } + return std::nullopt; + } - if (m_file != ARK_NO_NAME_FILE) - ss << "In file " << m_file << "\n"; - ss << internal::makeTokenBasedErrorCtx(token.token, token.line, token.col, m_code); + std::optional Parser::nodeOrValue() + { + if (auto value = atom(); value.has_value()) + return value; + else if (auto sub_node = node(); sub_node.has_value()) + return sub_node; - throw ParseError(ss.str()); + return std::nullopt; } - std::ostream& operator<<(std::ostream& os, const Parser& P) noexcept + std::optional Parser::wrapped(std::optional (Parser::*parser)(), const std::string& name, char a, char b) { - os << "AST\n"; - if (P.ast().nodeType() == NodeType::List) + if (!prefix(a)) + return std::nullopt; + + if (auto result = (this->*parser)(); result.has_value()) { - int i = 0; - for (const auto& node : P.ast().constList()) - std::cout << (i++) << ": " << node << '\n'; + if (!suffix(b)) + errorMissingSuffix(b, name); + return result; } - else - os << "Single item\n" - << P.m_ast << std::endl; - return os; + + return std::nullopt; } } diff --git a/src/arkreactor/Compiler/AST/makeErrorCtx.cpp b/src/arkreactor/Compiler/AST/makeErrorCtx.cpp deleted file mode 100644 index 968e453c8..000000000 --- a/src/arkreactor/Compiler/AST/makeErrorCtx.cpp +++ /dev/null @@ -1,127 +0,0 @@ -#include - -#include -#include -#include - -#include -#include -#include - -namespace Ark::internal -{ - void makeContext(std::ostream& os, const std::string& code, std::size_t line, std::size_t col_start, std::size_t sym_size) - { - os << termcolor::colorize; - std::vector ctx = Utils::splitString(code, '\n'); - - std::size_t col_end = std::min(col_start + sym_size, ctx[line].size()); - std::size_t first = line >= 3 ? line - 3 : 0; - std::size_t last = (line + 3) <= ctx.size() ? line + 3 : ctx.size(); - LineColorContextCounts line_color_context_counts; - - for (std::size_t loop = first; loop < last; ++loop) - { - std::string current_line = colorizeLine(ctx[loop], line_color_context_counts); - os << termcolor::green << std::setw(5) << (loop + 1) << termcolor::reset << " | " << current_line << "\n"; - - if (loop == line) - { - os << " | "; - - // padding of spaces - for (std::size_t i = 0; i < col_start; ++i) - os << " "; - - // underline the error - os << termcolor::red; - for (std::size_t i = col_start; i < col_end; ++i) - os << "^"; - - os << termcolor::reset << "\n"; - } - } - } - - std::string colorizeLine(const std::string& line, LineColorContextCounts& line_color_context_counts) - { - constexpr std::array pairing_color { - termcolor::bright_blue, - termcolor::bright_green, - termcolor::bright_yellow - }; - std::size_t pairing_color_size = pairing_color.size(); - - std::stringstream colorized_line; - colorized_line << termcolor::colorize; - - for (const char& c : line) - { - if (isPairableChar(c)) - { - std::size_t pairing_color_index = 0; - - switch (c) - { - case '(': - pairing_color_index = std::abs(line_color_context_counts.open_parentheses) % pairing_color_size; - line_color_context_counts.open_parentheses++; - break; - case ')': - line_color_context_counts.open_parentheses--; - pairing_color_index = std::abs(line_color_context_counts.open_parentheses) % pairing_color_size; - break; - case '[': - pairing_color_index = std::abs(line_color_context_counts.open_square_braces) % pairing_color_size; - line_color_context_counts.open_square_braces++; - break; - case ']': - line_color_context_counts.open_square_braces--; - pairing_color_index = std::abs(line_color_context_counts.open_square_braces) % pairing_color_size; - break; - case '{': - pairing_color_index = std::abs(line_color_context_counts.open_curly_braces) % pairing_color_size; - line_color_context_counts.open_curly_braces++; - break; - case '}': - line_color_context_counts.open_curly_braces--; - pairing_color_index = std::abs(line_color_context_counts.open_curly_braces) % pairing_color_size; - break; - } - - colorized_line << pairing_color[pairing_color_index] << c << termcolor::reset; - } - else - colorized_line << c; - } - - return colorized_line.str(); - } - - std::string makeNodeBasedErrorCtx(const std::string& message, const Node& node) - { - std::stringstream ss; - ss << message << "\n\n"; - if (node.filename() != ARK_NO_NAME_FILE) - ss << "In file " << node.filename() << "\n"; - ss << "On line " << (node.line() + 1) << ":" << node.col() << ", got `" << node << "'\n"; - - std::size_t ssize = 1; - if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::String || node.nodeType() == NodeType::Spread) - ssize = node.string().size(); - - if (node.filename() != ARK_NO_NAME_FILE) - makeContext(ss, Utils::readFile(node.filename()), node.line(), node.col(), ssize); - - return ss.str(); - } - - std::string makeTokenBasedErrorCtx(const std::string& match, std::size_t line, std::size_t col, const std::string& code) - { - std::stringstream ss; - ss << "On line " << (line + 1) << ":" << col << "\n"; - makeContext(ss, code, line, col, match.size()); - - return ss.str(); - } -} diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Compiler.cpp index 53b1e331f..8b38e882d 100644 --- a/src/arkreactor/Compiler/Compiler.cpp +++ b/src/arkreactor/Compiler/Compiler.cpp @@ -5,8 +5,8 @@ #include #include #include -#include -#undef max +#include +#include #include #include @@ -18,28 +18,18 @@ namespace Ark using namespace internal; using namespace literals; - Compiler::Compiler(unsigned debug, const std::vector& libenv, uint16_t options) : - m_parser(debug, options, libenv), m_optimizer(options), - m_options(options), m_debug(debug) + Compiler::Compiler(unsigned debug) : + m_debug(debug) {} - void Compiler::feed(const std::string& code, const std::string& filename) - { - m_parser.feed(code, filename); - - MacroProcessor mp(m_debug, m_options); - mp.feed(m_parser.ast()); - m_optimizer.feed(mp.ast()); - } - - void Compiler::compile() + void Compiler::process(const internal::Node& ast) { pushFileHeader(); m_code_pages.emplace_back(); // create empty page // gather symbols, values, and start to create code segments - compileExpression(m_optimizer.ast(), /* current_page */ 0, /* is_result_unused */ false, /* is_terminal */ false); + compileExpression(ast, /* current_page */ 0, /* is_result_unused */ false, /* is_terminal */ false); // throw an error on undefined symbol uses checkForUndefinedSymbol(); @@ -92,17 +82,7 @@ namespace Ark m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end()); } - void Compiler::saveTo(const std::string& file) - { - if (m_debug >= 1) - std::cout << "Final bytecode size: " << m_bytecode.size() * sizeof(uint8_t) << "B\n"; - - std::ofstream output(file, std::ofstream::binary); - output.write(reinterpret_cast(&m_bytecode[0]), m_bytecode.size() * sizeof(uint8_t)); - output.close(); - } - - const bytecode_t& Compiler::bytecode() noexcept + const bytecode_t& Compiler::bytecode() const noexcept { return m_bytecode; } @@ -193,23 +173,12 @@ namespace Ark m_bytecode.push_back(static_cast(addr & 0x00ff)); } else - throw CompilationError("trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code"); + throw Error("The compiler is trying to put a value in the value table, but the type isn't handled.\nCertainly a logic problem in the compiler source code"); m_bytecode.push_back(0_u8); } } - std::size_t Compiler::countArkObjects(const std::vector& lst) noexcept - { - std::size_t n = 0; - for (const Node& node : lst) - { - if (node.nodeType() != NodeType::GetField) - n++; - } - return n; - } - std::optional Compiler::getOperator(const std::string& name) noexcept { auto it = std::find(internal::operators.begin(), internal::operators.end(), name); @@ -280,13 +249,14 @@ namespace Ark void Compiler::compilerWarning(const std::string& message, const Node& node) { - if (m_options & FeatureShowWarnings) - std::cout << termcolor::yellow << "Warning " << termcolor::reset << makeNodeBasedErrorCtx(message, node) << "\n"; + std::cout << termcolor::yellow << "Warning " << termcolor::reset << Diagnostics::makeContextWithNode(message, node) << "\n"; } void Compiler::throwCompilerError(const std::string& message, const Node& node) { - throw CompilationError(makeNodeBasedErrorCtx(message, node)); + std::stringstream ss; + ss << node; + throw CodeError(message, node.filename(), node.line(), node.col(), ss.str()); } void Compiler::compileExpression(const Node& x, int p, bool is_result_unused, bool is_terminal, const std::string& var_name) @@ -294,10 +264,15 @@ namespace Ark // register symbols if (x.nodeType() == NodeType::Symbol) compileSymbol(x, p, is_result_unused); - else if (x.nodeType() == NodeType::GetField) + else if (x.nodeType() == NodeType::Field) { - uint16_t i = addSymbol(x); - page(p).emplace_back(Instruction::GET_FIELD, i); + // the parser guarantees us that there is at least 2 elements (eg: a.b) + compileSymbol(x.constList()[0], p, is_result_unused); + for (auto it = x.constList().begin() + 1, end = x.constList().end(); it != end; ++it) + { + uint16_t i = addSymbol(*it); + page(p).emplace_back(Instruction::GET_FIELD, i); + } } // register values else if (x.nodeType() == NodeType::String || x.nodeType() == NodeType::Number) @@ -364,10 +339,6 @@ namespace Ark compilePluginImport(x, p); break; - case Keyword::Quote: - compileQuote(x, p, is_result_unused, is_terminal, var_name); - break; - case Keyword::Del: page(p).emplace_back(Instruction::DEL, addSymbol(x.constList()[1])); break; @@ -405,26 +376,14 @@ namespace Ark Instruction inst = getSpecific(name).value(); // length of at least 1 since we got a symbol name - uint16_t argc = countArkObjects(x.constList()) - 1; + uint16_t argc = x.constList().size() - 1; // error, can not use append/concat/pop (and their in place versions) with a <2 length argument list if (argc < 2 && inst != Instruction::LIST) - throw CompilationError("can not use " + name + " with less than 2 arguments"); + throwCompilerError(fmt::format("Can not use {} with less than 2 arguments", name), c0); // compile arguments in reverse order for (uint16_t i = x.constList().size() - 1; i > 0; --i) - { - uint16_t j = i; - while (x.constList()[j].nodeType() == NodeType::GetField) - --j; - uint16_t diff = i - j; - while (j < i) - { - compileExpression(x.constList()[j], p, false, false); - ++j; - } compileExpression(x.constList()[i], p, false, false); - i -= diff; - } // put inst and number of arguments page(p).emplace_back(inst, computeSpecificInstArgc(inst, argc)); @@ -549,32 +508,23 @@ namespace Ark page(p)[jump_to_end_pos].data = static_cast(page(p).size()); } - void Compiler::compileQuote(const Node& x, int p, bool is_result_unused, bool is_terminal, const std::string& var_name) + void Compiler::compilePluginImport(const Node& x, int p) { - // create new page for quoted code - m_code_pages.emplace_back(); - std::size_t page_id = m_code_pages.size() - 1; - compileExpression(x.constList()[1], page_id, false, is_terminal, var_name); - page(page_id).emplace_back(Instruction::RET); // return to the last frame - - // call it - uint16_t id = addValue(page_id, x); // save page_id into the constants table as PageAddr - page(p).emplace_back(Instruction::LOAD_CONST, id); - - if (is_result_unused) + std::string path; + Node package_node = x.constList()[1]; + for (std::size_t i = 0, end = package_node.constList().size(); i < end; ++i) { - compilerWarning("Unused quote expression", x); - page(p).push_back(Instruction::POP); + path += package_node.constList()[i].string(); + if (i + 1 != end) + path += "/"; } - } + path += ".arkm"; - void Compiler::compilePluginImport(const Node& x, int p) - { // register plugin path in the constants table - uint16_t id = addValue(x.constList()[1]); + uint16_t id = addValue(Node(NodeType::String, path)); // save plugin name to use it later - m_plugins.push_back(x.constList()[1].string()); - // add plugin instruction + id of the constant refering to the plugin path + m_plugins.push_back(path); + // add plugin instruction + id of the constant referring to the plugin path page(p).emplace_back(Instruction::PLUGIN, id); } @@ -582,21 +532,10 @@ namespace Ark { m_temp_pages.emplace_back(); int proc_page = -static_cast(m_temp_pages.size()); - compileExpression(x.constList()[0], proc_page, false, false); // storing proc + std::size_t n = 1; - // trying to handle chained closure.field.field.field... - std::size_t n = 1; // we need it later - const std::size_t end = x.constList().size(); - while (n < end) - { - if (x.constList()[n].nodeType() == NodeType::GetField) - { - compileExpression(x.constList()[n], proc_page, false, false); - n++; - } - else - break; - } + compileExpression(x.constList()[0], proc_page, false, false); // storing proc + // closure chains have been handled: closure.field.field.function // it's a builtin/function if (m_temp_pages.back()[0].opcode < Instruction::FIRST_OPERATOR) @@ -628,8 +567,7 @@ namespace Ark std::size_t args_count = 0; for (auto it = x.constList().begin() + 1, it_end = x.constList().end(); it != it_end; ++it) { - if (it->nodeType() != NodeType::GetField && - it->nodeType() != NodeType::Capture) + if (it->nodeType() != NodeType::Capture) args_count++; } // call the procedure @@ -651,10 +589,7 @@ namespace Ark { compileExpression(x.constList()[index], p, false, false); - if ((index + 1 < size && - x.constList()[index + 1].nodeType() != NodeType::GetField && - x.constList()[index + 1].nodeType() != NodeType::Capture) || - index + 1 == size) + if ((index + 1 < size && x.constList()[index + 1].nodeType() != NodeType::Capture) || index + 1 == size) exp_count++; // in order to be able to handle things like (op A B C D...) diff --git a/src/arkreactor/Compiler/ImportSolver.cpp b/src/arkreactor/Compiler/ImportSolver.cpp new file mode 100644 index 000000000..2af43881c --- /dev/null +++ b/src/arkreactor/Compiler/ImportSolver.cpp @@ -0,0 +1,191 @@ +#include + +#include +#include + +namespace Ark::internal +{ + ImportSolver::ImportSolver(unsigned debug, const std::vector& libenv) : + m_debug(debug), m_libenv(libenv) + {} + + void ImportSolver::process(const std::filesystem::path& root, const Node& origin_ast, const std::vector& origin_imports) + { + m_root = root; + + std::stack imports; + for (auto it = origin_imports.rbegin(), end = origin_imports.rend(); it != end; ++it) + imports.push(*it); + + while (!imports.empty()) + { + Import import = imports.top(); + // Remove the top element to process the other imports + // It needs to be removed first because we might be adding + // other imports later and don't want to pop THEM + imports.pop(); + + // TODO: add special handling for each type of import (prefixed, with symbols, glob pattern) + if (m_modules.find(import.toPackageString()) == m_modules.end()) + { + // NOTE: since the "file" (=root) argument doesn't change between all calls, we could get rid of it + std::vector additional_imports = parseImport(root, import); + // TODO import and store the new node as a Module node. + // Module nodes should be scoped relatively to their packages + // They should provide specific methods to resolve symbols, + // mark them as public or private. + // OR we could have a map, update the module + // accordingly, and once we are done concat all the nodes + // in a single AST. + for (auto it = additional_imports.rbegin(), end = additional_imports.rend(); it != end; ++it) + imports.push(*it); + } + else + { + // TODO: if we already imported a package we should merge their definition + // (import foo:*) > (import foo:a) -- no prefix + // (import foo) -- with prefix + // and then decide what to do with the module + } + } + + m_ast = findAndReplaceImports(origin_ast).first; + } + + std::pair ImportSolver::findAndReplaceImports(const Node& ast) + { + Node x = ast; + if (x.nodeType() == NodeType::List) + { + if (x.constList().size() >= 2 && x.constList()[0].nodeType() == NodeType::Keyword && + x.constList()[0].keyword() == Keyword::Import) + { + // TODO maybe we'll have problems with :* ? + std::string package = std::accumulate( + std::next(x.constList()[1].constList().begin()), + x.constList()[1].constList().end(), + x.constList()[1].constList()[0].string(), + [](const std::string& acc, const Node& elem) -> std::string { + return acc + "." + elem.string(); + }); + + if (std::find(m_imported.begin(), m_imported.end(), package) == m_imported.end()) + { + m_imported.push_back(package); + // modules are already handled, we can safely replace the node + x = m_modules[package].ast; + if (!m_modules[package].has_been_processed) + x = findAndReplaceImports(x).first; // FIXME? + return std::make_pair(x, !m_modules[package].has_been_processed); + } + else + { + // Replace by empty node to avoid breaking the code gen + x = Node(NodeType::List); + x.push_back(Node(Keyword::Begin)); + } + } + else + { + for (std::size_t i = 0; i < x.constList().size(); ++i) + { + auto [node, is_import] = findAndReplaceImports(x.constList()[i]); + if (!is_import) + x.list()[i] = node; + else + { + if (node.constList().size() > 1) + { + x.list()[i] = node.constList()[1]; + // NOTE maybe maybe maybe + // why do we start at 2 and not 1? + for (std::size_t j = 2, end_j = node.constList().size(); j < end_j; ++j) + { + if (i + j - 1 < x.list().size()) + x.list().insert(x.list().begin() + i + j - 1, node.constList()[j]); + else + x.list().push_back(node.constList()[j]); + } + + // -2 because we skipped the Begin node and the first node of the block isn't inserted + // but replaces an existing one + i += node.constList().size() - 2; + } + else + x.list()[i] = node; + } + } + } + } + + return std::make_pair(x, false); + } + + const Node& ImportSolver::ast() const noexcept + { + return m_ast; + } + + std::vector ImportSolver::parseImport(const std::filesystem::path& file, const Import& import) + { + const auto path = findFile(file, import); + if (path.extension() == ".arkm") // Nothing to import in case of modules + { + // Creating an import node that will stay there when visiting the AST and + // replacing the imports with their parsed module + Node module_node = Node(NodeType::List); + module_node.push_back(Node(Keyword::Import)); + + Node package_node = Node(NodeType::List); + for (const std::string& stem : import.package) + package_node.push_back(Node(NodeType::String, stem)); + module_node.push_back(package_node); + // empty symbols list + module_node.push_back(Node(NodeType::List)); + + m_modules[import.toPackageString()] = Module { + module_node, + true + }; + + return {}; + } + + Parser parser; + parser.processFile(path.string()); + m_modules[import.toPackageString()] = Module { + parser.ast(), + false + }; + + return parser.imports(); + } + + std::optional testExtensions(const std::filesystem::path& folder, const std::string& package_path) + { + if (auto code_path = folder / (package_path + ".ark"); std::filesystem::exists(code_path)) + return code_path; + else if (auto module_path = folder / (package_path + ".arkm"); std::filesystem::exists(module_path)) + return module_path; + return {}; + } + + std::filesystem::path ImportSolver::findFile(const std::filesystem::path& file, const Import& import) + { + const std::string package_path = import.packageToPath(); + if (auto maybe_path = testExtensions(m_root, package_path); maybe_path.has_value()) + return maybe_path.value(); + + // search in all folders in environment path + for (const auto& path : m_libenv) + { + if (auto maybe_path = testExtensions(path, package_path); maybe_path.has_value()) + return maybe_path.value(); + } + + // fallback, we couldn't find the file + throw std::runtime_error( + "While processing file " + std::filesystem::relative(file, m_root).generic_string() + + ", couldn't import " + import.toPackageString() + ": file not found"); + } +} diff --git a/src/arkreactor/Compiler/JsonCompiler.cpp b/src/arkreactor/Compiler/JsonCompiler.cpp index 659d30e67..7ee75cd04 100644 --- a/src/arkreactor/Compiler/JsonCompiler.cpp +++ b/src/arkreactor/Compiler/JsonCompiler.cpp @@ -1,45 +1,28 @@ #include -#include - #include #include #include #include +#include + namespace Ark { using namespace internal; - JsonCompiler::JsonCompiler(unsigned debug, const std::vector& libenv, uint16_t options) : - m_parser(debug, options, libenv), m_optimizer(options), - m_options(options), m_debug(debug) + JsonCompiler::JsonCompiler(unsigned debug, const std::vector& libenv) : + m_welder(debug, libenv) {} - void JsonCompiler::feed(const std::string& code, const std::string& filename) + void JsonCompiler::feed(const std::string& filename) { - m_parser.feed(code, filename); - - MacroProcessor mp(m_debug, m_options); - mp.feed(m_parser.ast()); - m_optimizer.feed(mp.ast()); + m_welder.computeASTFromFile(filename); } std::string JsonCompiler::compile() { - return _compile(m_optimizer.ast()); - } - - template - std::string string_format(const std::string& format, Args&&... args) - { - constexpr size_t buffer_size = 8192; - static char buf[buffer_size] = { 0 }; - std::string to_return = ""; - while (snprintf(buf, buffer_size - 1, format.c_str(), std::forward(args)...) == buffer_size - 1) - to_return += std::string(buf); - to_return += std::string(buf); - return to_return; + return _compile(m_welder.ast()); } std::string JsonCompiler::_compile(const Node& node) @@ -50,40 +33,39 @@ namespace Ark { case NodeType::Symbol: { - json += string_format( - R"({"type": "Symbol", "name": "%s"})", + json += fmt::format( + R"({{"type": "Symbol", "name": "{}"}})", node.string().c_str()); break; } case NodeType::Capture: { - json += string_format( - R"({"type": "Capture", "name": "%s"})", + json += fmt::format( + R"({{"type": "Capture", "name": "{}"}})", node.string().c_str()); break; } - case NodeType::GetField: + case NodeType::Field: { - json += string_format( - R"({"type": "GetField", "name": "%s"})", - node.string().c_str()); + json += R"({"type": "Field", "children": )"; + json += toJsonList(node, 0) + "}"; break; } case NodeType::String: { - json += string_format( - R"({"type": "String", "value": "%s"})", + json += fmt::format( + R"({{"type": "String", "value": "{}"}})", node.string().c_str()); break; } case NodeType::Number: { - json += string_format( - R"({"type": "Number", "value": %f})", + json += fmt::format( + R"({{"type": "Number", "value": {}}})", node.number()); break; } @@ -107,8 +89,8 @@ namespace Ark args += ", "; } - json += string_format( - R"({"type": "Fun", "args": [%s], "body": %s})", + json += fmt::format( + R"({{"type": "Fun", "args": [{}], "body": {}}})", args.c_str(), _compile(node.constList()[2]).c_str()); break; } @@ -116,8 +98,8 @@ namespace Ark case Keyword::Let: { // (let name value) - json += string_format( - R"({"type": "Let", "name": %s, "value": %s})", + json += fmt::format( + R"({{"type": "Let", "name": {}, "value": {}}})", _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); break; } @@ -125,8 +107,8 @@ namespace Ark case Keyword::Mut: { // (mut name value) - json += string_format( - R"({"type": "Mut", "name": %s, "value": %s})", + json += fmt::format( + R"({{"type": "Mut", "name": {}, "value": {}}})", _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); break; } @@ -134,8 +116,8 @@ namespace Ark case Keyword::Set: { // (set name value) - json += string_format( - R"({"type": "Set", "name": %s, "value": %s})", + json += fmt::format( + R"({{"type": "Set", "name": {}, "value": {}}})", _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); break; } @@ -143,8 +125,8 @@ namespace Ark case Keyword::If: { // (if condition then else) - json += string_format( - R"({"type": "If", "condition": %s, "then": %s, "else": %s})", + json += fmt::format( + R"({{"type": "If", "condition": {}, "then": {}, "else": {}}})", _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str(), _compile(node.constList()[3]).c_str()); break; } @@ -152,8 +134,8 @@ namespace Ark case Keyword::While: { // (while condition body) - json += string_format( - R"({"type": "While", "condition": %s, "body": %s})", + json += fmt::format( + R"({{"type": "While", "condition": {}, "body": {}}})", _compile(node.constList()[1]).c_str(), _compile(node.constList()[2]).c_str()); break; } @@ -169,17 +151,8 @@ namespace Ark case Keyword::Import: { // (import value) - json += string_format( - R"({"type": "Import", "value": %s})", - _compile(node.constList()[1]).c_str()); - break; - } - - case Keyword::Quote: - { - // (quote value) - json += string_format( - R"({"type": "Quote", "value": %s})", + json += fmt::format( + R"({{"type": "Import", "value": {}}})", _compile(node.constList()[1]).c_str()); break; } @@ -187,8 +160,8 @@ namespace Ark case Keyword::Del: { // (del value) - json += string_format( - R"({"type": "Del", "value": %s})", + json += fmt::format( + R"({{"type": "Del", "value": {}}})", _compile(node.constList()[1]).c_str()); break; } @@ -197,8 +170,8 @@ namespace Ark else if (node.constList().size() > 1 && node.constList()[0].nodeType() == NodeType::Symbol) { // (foo bar 1) - json += string_format( - R"({"type": "FunctionCall", "name": %s, "args": )", + json += fmt::format( + R"({{"type": "FunctionCall", "name": {}, "args": )", _compile(node.constList()[0]).c_str()); json += toJsonList(node, 1) + "}"; } @@ -209,8 +182,8 @@ namespace Ark } default: - throw std::runtime_error(string_format( - "Not handled NodeType::%s (%s at %zu:%zu), please report this error on GitHub", + throw Error(fmt::format( + "Not handled NodeType::{} ({} at {}:{}), please report this error on GitHub", nodeTypes[static_cast(node.nodeType())].data(), node.filename().c_str(), node.line(), diff --git a/src/arkreactor/Compiler/Macros/Executors/Conditional.cpp b/src/arkreactor/Compiler/Macros/Executors/Conditional.cpp index bedbf7b65..3694eeb8e 100644 --- a/src/arkreactor/Compiler/Macros/Executors/Conditional.cpp +++ b/src/arkreactor/Compiler/Macros/Executors/Conditional.cpp @@ -11,7 +11,7 @@ namespace Ark::internal Node cond = node.list()[1]; Node temp = evaluate(cond, /* is_not_body */ true); Node if_true = node.list()[2]; - Node if_false = node.constList().size() > 3 ? node.list()[3] : Node::getNilNode(); + Node if_false = node.constList().size() > 3 ? node.list()[3] : getNilNode(); // evaluate cond if (isTruthy(temp)) diff --git a/src/arkreactor/Compiler/Macros/Executors/Function.cpp b/src/arkreactor/Compiler/Macros/Executors/Function.cpp index eaca0e710..4c31a56f8 100644 --- a/src/arkreactor/Compiler/Macros/Executors/Function.cpp +++ b/src/arkreactor/Compiler/Macros/Executors/Function.cpp @@ -1,5 +1,7 @@ #include +#include + namespace Ark::internal { bool FunctionExecutor::canHandle(Node& node) @@ -46,7 +48,7 @@ namespace Ark::internal if (args_applied.find(arg_name) == args_applied.end()) { args_applied[arg_name] = Node(NodeType::List); - args_applied[arg_name].push_back(Node::getListNode()); + args_applied[arg_name].push_back(getListNode()); } // do not move j because we checked before that the spread is always the last one args_applied[arg_name].push_back(node.constList()[i]); @@ -58,14 +60,14 @@ namespace Ark::internal { // just a spread we didn't assign args_applied[args.list().back().string()] = Node(NodeType::List); - args_applied[args.list().back().string()].push_back(Node::getListNode()); + args_applied[args.list().back().string()].push_back(getListNode()); } if (args_given != args_needed && !has_spread) - throwMacroProcessingError("Macro `" + macro_name + "' got " + std::to_string(args_given) + " argument(s) but needed " + std::to_string(args_needed), node); + throwMacroProcessingError(fmt::format("Macro `{}' got {} argument(s) but needed {}", macro_name, args_given, args_needed), node); else if (args_applied.size() != args_needed && has_spread) // args_needed - 1 because we do not count the spread as a required argument - throwMacroProcessingError("Macro `" + macro_name + "' got " + std::to_string(args_applied.size()) + " argument(s) but needed at least " + std::to_string(args_needed - 1), node); + throwMacroProcessingError(fmt::format("Macro `{}' got {} argument(s) but needed at least {}", macro_name, args_applied.size(), args_needed - 1), node); if (!args_applied.empty()) unify(args_applied, temp_body, nullptr); diff --git a/src/arkreactor/Compiler/Macros/Processor.cpp b/src/arkreactor/Compiler/Macros/Processor.cpp index fe304c9b6..6822af9dd 100644 --- a/src/arkreactor/Compiler/Macros/Processor.cpp +++ b/src/arkreactor/Compiler/Macros/Processor.cpp @@ -2,18 +2,22 @@ #include #include +#include +#include #include -#include #include #include #include #include +// fixme +#include + namespace Ark::internal { - MacroProcessor::MacroProcessor(unsigned debug, uint16_t options) noexcept : - m_debug(debug), m_options(options) + MacroProcessor::MacroProcessor(unsigned debug) noexcept : + m_debug(debug) { // create executors pipeline m_executor_pipeline = MacroExecutorPipeline( @@ -27,14 +31,14 @@ namespace Ark::internal }; } - void MacroProcessor::feed(const Node& ast) + void MacroProcessor::process(const Node& ast) { if (m_debug >= 2) std::cout << "Processing macros...\n"; // to be able to modify it m_ast = ast; - process(m_ast, 0); + processNode(m_ast, 0); if (m_debug >= 3) { @@ -52,64 +56,62 @@ namespace Ark::internal { // a macro needs at least 2 nodes, name + value is the minimal form if (node.constList().size() < 2) - throwMacroProcessingError("invalid macro, missing value", node); + throwMacroProcessingError("Invalid macro, missing value", node); Node& first_node = node.list()[0]; Node& second_node = node.list()[1]; - // !{name value} + // ($ name value) if (node.constList().size() == 2) { if (first_node.nodeType() == NodeType::Symbol) { - if (first_node.string() != "undef") + if (first_node.string() != "$undef") m_macros.back().add(first_node.string(), node); else if (second_node.nodeType() == NodeType::Symbol) // undefine a macro deleteNearestMacro(second_node.string()); else // used undef on a non-symbol - throwMacroProcessingError("can not undefine a macro without a name", second_node); + throwMacroProcessingError("Can not undefine a macro without a name", second_node); return; } - throwMacroProcessingError("can not define a macro without a symbol", first_node); + throwMacroProcessingError("Can not define a macro without a symbol", first_node); } - // !{name (args) body} + // ($ name (args) body) else if (node.constList().size() == 3 && first_node.nodeType() == NodeType::Symbol) { if (second_node.nodeType() != NodeType::List) - throwMacroProcessingError("invalid macro argument's list", second_node); + throwMacroProcessingError("Invalid macro argument's list", second_node); else { bool had_spread = false; for (const Node& n : second_node.constList()) { if (n.nodeType() != NodeType::Symbol && n.nodeType() != NodeType::Spread) - throwMacroProcessingError("invalid macro argument's list, expected symbols", n); + throwMacroProcessingError("Invalid macro argument's list, expected symbols", n); else if (n.nodeType() == NodeType::Spread) { if (had_spread) - throwMacroProcessingError("got another spread argument, only one is allowed", n); + throwMacroProcessingError("Invalid macro, multiple spread in argument list while only one is allowed", n); had_spread = true; } else if (had_spread && n.nodeType() == NodeType::Symbol) - throwMacroProcessingError("got another argument after a spread argument, which is invalid", n); + throwMacroProcessingError(fmt::format("Invalid macro, a spread should mark the end of an argument list, but found another argument: {}", n.string()), n); } m_macros.back().add(first_node.string(), node); return; } } - // !{if cond then else} + // ($if cond then else) else if (std::size_t size = node.constList().size(); size == 3 || size == 4) { - if (first_node.nodeType() == NodeType::Keyword && first_node.keyword() == Keyword::If) + if (first_node.nodeType() == NodeType::Keyword && first_node.keyword() == Keyword::If) // FIXME when we change the keyword for conditions inside macros { applyMacro(node); return; } - else if (first_node.nodeType() == NodeType::Keyword) - throwMacroProcessingError("the only authorized keyword in macros is `if'", first_node); } // if we are here, it means we couldn't recognize the given macro, thus making it invalid - throwMacroProcessingError("unrecognized macro form", node); + throwMacroProcessingError("Unrecognized macro form", node); } void MacroProcessor::registerFuncDef(Node& node) @@ -130,7 +132,7 @@ namespace Ark::internal } } - void MacroProcessor::process(Node& node, unsigned depth) + void MacroProcessor::processNode(Node& node, unsigned depth) { bool has_created = false; @@ -189,7 +191,7 @@ namespace Ark::internal if (node.nodeType() == NodeType::List) { - process(node.list()[i], depth + 1); + processNode(node.list()[i], depth + 1); // needed if we created a function node from a macro registerFuncDef(node.list()[i]); } @@ -234,7 +236,7 @@ namespace Ark::internal unify(map, subnode, parent); if (subnode.nodeType() != NodeType::List) - throwMacroProcessingError("Got a non-list while trying to apply the spread operator", subnode); + throwMacroProcessingError(fmt::format("Can not unify a {} to a Spread", typeToString(subnode)), subnode); for (std::size_t i = 1, end = subnode.list().size(); i < end; ++i) parent->list().insert(parent->list().begin() + index + i, subnode.list()[i]); @@ -263,21 +265,22 @@ namespace Ark::internal return ret; \ } -#define GEN_COMPARATOR(str_name, cond) GEN_NOT_BODY( \ - str_name, \ - throwMacroProcessingError("Interpreting a `" str_name "' condition with " + \ - std::to_string(node.list().size() - 1) + " arguments, instead of 2.", \ - node), \ - (cond) ? Node::getTrueNode() : Node::getFalseNode()) - -#define GEN_OP(str_name, op) GEN_NOT_BODY( \ - str_name, \ - throwMacroProcessingError("Interpreting a `" str_name "' operation with " + \ - std::to_string(node.list().size() - 1) + " arguments, instead of 2.", \ - node), \ +#define GEN_COMPARATOR(str_name, cond) GEN_NOT_BODY( \ + str_name, \ + throwMacroProcessingError( \ + fmt::format("Interpreting a `{}' condition with {} arguments, expected 2.", str_name, argcount), \ + node), \ + (cond) ? getTrueNode() : getFalseNode()) + +#define GEN_OP(str_name, op) GEN_NOT_BODY( \ + str_name, \ + throwMacroProcessingError( \ + fmt::format("Interpreting a `{}' operation with {} arguments, expected 2.", str_name, argcount), \ + node), \ (one.nodeType() == two.nodeType() && two.nodeType() == NodeType::Number) ? Node(one.number() op two.number()) : node) const std::string& name = node.list()[0].string(); + const std::size_t argcount = node.list().size() - 1; if (const Node* macro = findNearestMacro(name); macro != nullptr) { applyMacro(node.list()[0]); @@ -297,43 +300,43 @@ namespace Ark::internal else if (name == "not" && is_not_body) { if (node.list().size() != 2) - throwMacroProcessingError("Interpreting a `not' condition with " + std::to_string(node.list().size() - 1) + " arguments, instead of 1.", node); + throwMacroProcessingError(fmt::format("Interpreting a `not' condition with {} arguments, expected 1.", argcount), node); - return (!isTruthy(evaluate(node.list()[1], is_not_body))) ? Node::getTrueNode() : Node::getFalseNode(); + return (!isTruthy(evaluate(node.list()[1], is_not_body))) ? getTrueNode() : getFalseNode(); } else if (name == "and" && is_not_body) { if (node.list().size() < 3) - throwMacroProcessingError("Interpreting a `and' chain with " + std::to_string(node.list().size() - 1) + " arguments, expected at least 2.", node); + throwMacroProcessingError(fmt::format("Interpreting a `and' chain with {} arguments, expected at least 2.", argcount), node); for (std::size_t i = 1, end = node.list().size(); i < end; ++i) { if (!isTruthy(evaluate(node.list()[i], is_not_body))) - return Node::getFalseNode(); + return getFalseNode(); } - return Node::getTrueNode(); + return getTrueNode(); } else if (name == "or" && is_not_body) { if (node.list().size() < 3) - throwMacroProcessingError("Interpreting a `or' chain with " + std::to_string(node.list().size() - 1) + " arguments, expected at least 2.", node); + throwMacroProcessingError(fmt::format("Interpreting an `or' chain with {} arguments, expected at least 2.", argcount), node); for (std::size_t i = 1, end = node.list().size(); i < end; ++i) { if (isTruthy(evaluate(node.list()[i], is_not_body))) - return Node::getTrueNode(); + return getTrueNode(); } - return Node::getFalseNode(); + return getFalseNode(); } else if (name == "len") { if (node.list().size() > 2) - throwMacroProcessingError("When expanding `len' inside a macro, got " + std::to_string(node.list().size() - 1) + " arguments, needed only 1", node); + throwMacroProcessingError(fmt::format("When expanding `len' inside a macro, got {} arguments, expected 1", argcount), node); else if (Node& lst = node.list()[1]; lst.nodeType() == NodeType::List) // only apply len at compile time if we can { if (isConstEval(lst)) { - if (lst.list().size() > 0 && lst.list()[0] == Node::getListNode()) + if (lst.list().size() > 0 && lst.list()[0] == getListNode()) node = Node(static_cast(lst.list().size()) - 1); else node = Node(static_cast(lst.list().size())); @@ -343,7 +346,7 @@ namespace Ark::internal else if (name == "@") { if (node.list().size() != 3) - throwMacroProcessingError("Interpreting a `@' with " + std::to_string(node.list().size() - 1) + " arguments, instead of 2.", node); + throwMacroProcessingError(fmt::format("Interpreting a `@' with {} arguments, expected 2.", argcount), node); Node sublist = evaluate(node.list()[1], is_not_body); Node idx = evaluate(node.list()[2], is_not_body); @@ -355,7 +358,7 @@ namespace Ark::internal long num_idx = static_cast(idx.number()); // if the first node is the function call to "list", don't count it - if (size > 0 && sublist.list()[0] == Node::getListNode()) + if (size > 0 && sublist.list()[0] == getListNode()) { real_size--; if (num_idx >= 0) @@ -366,17 +369,17 @@ namespace Ark::internal if (num_idx < size) return sublist.list()[num_idx]; else - throwMacroProcessingError("Index (" + std::to_string(static_cast(idx.number())) + ") out of range (list size: " + std::to_string(real_size) + ")", node); + throwMacroProcessingError(fmt::format("Index ({}) out of range (list size: {})", num_idx, real_size), node); } } else if (name == "head") { if (node.list().size() > 2) - throwMacroProcessingError("When expanding `head' inside a macro, got " + std::to_string(node.list().size() - 1) + " arguments, needed only 1", node); + throwMacroProcessingError(fmt::format("When expanding `head' inside a macro, got {} arguments, expected 1", argcount), node); else if (node.list()[1].nodeType() == NodeType::List) { Node& sublist = node.list()[1]; - if (sublist.constList().size() > 0 && sublist.constList()[0] == Node::getListNode()) + if (sublist.constList().size() > 0 && sublist.constList()[0] == getListNode()) { if (sublist.constList().size() > 1) { @@ -384,22 +387,22 @@ namespace Ark::internal node = sublistCopy; } else - node = Node::getNilNode(); + node = getNilNode(); } else if (sublist.list().size() > 0) node = sublist.constList()[0]; else - node = Node::getNilNode(); + node = getNilNode(); } } else if (name == "tail") { if (node.list().size() > 2) - throwMacroProcessingError("When expanding `tail' inside a macro, got " + std::to_string(node.list().size() - 1) + " arguments, needed only 1", node); + throwMacroProcessingError(fmt::format("When expanding `tail' inside a macro, got {} arguments, expected 1", argcount), node); else if (node.list()[1].nodeType() == NodeType::List) { Node sublist = node.list()[1]; - if (sublist.list().size() > 0 && sublist.list()[0] == Node::getListNode()) + if (sublist.list().size() > 0 && sublist.list()[0] == getListNode()) { if (sublist.list().size() > 1) { @@ -409,7 +412,7 @@ namespace Ark::internal else { node = Node(NodeType::List); - node.push_back(Node::getListNode()); + node.push_back(getListNode()); } } else if (sublist.list().size() > 0) @@ -420,16 +423,16 @@ namespace Ark::internal else { node = Node(NodeType::List); - node.push_back(Node::getListNode()); + node.push_back(getListNode()); } } } else if (name == "symcat") { if (node.list().size() <= 2) - throwMacroProcessingError("When expanding `symcat', expected at least 2 arguments, got " + std::to_string(node.list().size() - 1) + " arguments", node); + throwMacroProcessingError(fmt::format("When expanding `symcat', expected at least 2 arguments, got {} arguments", argcount), node); if (node.list()[1].nodeType() != NodeType::Symbol) - throwMacroProcessingError("When expanding `symcat', expected the first argument to be a Symbol, got a " + typeToString(node.list()[1]), node); + throwMacroProcessingError(fmt::format("When expanding `symcat', expected the first argument to be a Symbol, got a {}", typeToString(node.list()[1])), node); std::string sym = node.list()[1].string(); @@ -449,7 +452,7 @@ namespace Ark::internal break; default: - throwMacroProcessingError("When expanding `symcat', expected either a Number, String or Symbol, got a " + typeToString(ev), ev); + throwMacroProcessingError(fmt::format("When expanding `symcat', expected either a Number, String or Symbol, got a {}", typeToString(ev)), ev); } } @@ -464,12 +467,12 @@ namespace Ark::internal if (auto it = m_defined_functions.find(sym.string()); it != m_defined_functions.end()) node = Node(static_cast(it->second.constList().size())); else - throwMacroProcessingError("When expanding `argcount', expected a known function name, got unbound variable " + sym.string(), sym); + throwMacroProcessingError(fmt::format("When expanding `argcount', expected a known function name, got unbound variable {}", sym.string()), sym); } else if (sym.nodeType() == NodeType::List && sym.list().size() == 3 && sym.list()[0].nodeType() == NodeType::Keyword && sym.list()[0].keyword() == Keyword::Fun) node = Node(static_cast(sym.list()[1].list().size())); else - throwMacroProcessingError("When trying to apply `argcount', got a " + std::string(nodeTypes[static_cast(sym.nodeType())]) + " instead of a Symbol or Function", sym); + throwMacroProcessingError(fmt::format("When trying to apply `argcount', got a {} instead of a Symbol or Function", typeToString(sym)), sym); } } @@ -601,7 +604,7 @@ namespace Ark::internal }); case NodeType::Capture: - case NodeType::GetField: + case NodeType::Field: return false; case NodeType::Keyword: @@ -619,6 +622,8 @@ namespace Ark::internal void MacroProcessor::throwMacroProcessingError(const std::string& message, const Node& node) { - throw MacroProcessingError(makeNodeBasedErrorCtx(message, node)); + std::stringstream ss; + ss << node; + throw CodeError(message, node.filename(), node.line(), node.col(), ss.str()); } } diff --git a/src/arkreactor/Compiler/Welder.cpp b/src/arkreactor/Compiler/Welder.cpp new file mode 100644 index 000000000..d564398e7 --- /dev/null +++ b/src/arkreactor/Compiler/Welder.cpp @@ -0,0 +1,98 @@ +#include + +#include + +#include +#include + +namespace Ark +{ + Welder::Welder(unsigned debug, const std::vector& libenv) : + m_debug(debug), m_importer(debug, libenv), m_macro_processor(debug), m_optimizer(debug), m_compiler(debug) + {} + + void Welder::registerSymbol(const std::string& name) + { + m_compiler.addDefinedSymbol(name); + } + + bool Welder::computeASTFromFile(const std::string& filename) + { + m_root_file = std::filesystem::path(filename); + + try + { + m_parser.processFile(m_root_file.string()); + m_importer.process(m_root_file.parent_path(), m_parser.ast(), m_parser.imports()); + m_macro_processor.process(m_importer.ast()); + m_optimizer.process(m_macro_processor.ast()); + + return true; + } + catch (const CodeError& e) + { + Diagnostics::generate(e); + return false; + } + } + + bool Welder::computeASTFromString(const std::string& code) + { + m_root_file = std::filesystem::current_path(); // No filename given, take the current working directory + + try + { + m_parser.processString(code); + // TODO mutualise this piece of code + m_macro_processor.process(m_parser.ast()); + m_optimizer.process(m_macro_processor.ast()); + + return true; + } + catch (const CodeError& e) + { + Diagnostics::generate(e, code); + return false; + } + } + + bool Welder::generateBytecode() + { + try + { + m_compiler.process(m_optimizer.ast()); + m_bytecode = m_compiler.bytecode(); + + return true; + } + catch (const CodeError& e) + { + Diagnostics::generate(e); + return false; + } + } + + bool Welder::saveBytecodeToFile(const std::string& filename) + { + if (m_debug >= 1) + std::cout << "Final bytecode size: " << m_bytecode.size() * sizeof(uint8_t) << "B\n"; + + if (m_bytecode.empty()) + return false; + + std::ofstream output(filename, std::ofstream::binary); + output.write(reinterpret_cast(&m_bytecode[0]), m_bytecode.size() * sizeof(uint8_t)); + output.close(); + return true; + } + + const internal::Node& Welder::ast() const noexcept + { + return m_optimizer.ast(); + } + + const bytecode_t& Welder::bytecode() const noexcept + { + return m_bytecode; + } +} diff --git a/src/arkreactor/Exceptions.cpp b/src/arkreactor/Exceptions.cpp new file mode 100644 index 000000000..5f5dda512 --- /dev/null +++ b/src/arkreactor/Exceptions.cpp @@ -0,0 +1,194 @@ +#include + +#include +#include + +#include +#include +#include + +namespace Ark::Diagnostics +{ + struct LineColorContextCounts + { + int open_parentheses = 0; + int open_square_braces = 0; + int open_curly_braces = 0; + }; + + inline bool isPairableChar(const char c) + { + return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}'; + } + + std::string colorizeLine(const std::string& line, LineColorContextCounts& line_color_context_counts) + { + constexpr std::array pairing_color { + termcolor::bright_blue, + termcolor::bright_green, + termcolor::bright_yellow + }; + std::size_t pairing_color_size = pairing_color.size(); + + std::stringstream colorized_line; + colorized_line << termcolor::colorize; + + for (const char& c : line) + { + if (isPairableChar(c)) + { + std::size_t pairing_color_index = 0; + + switch (c) + { + case '(': + pairing_color_index = std::abs(line_color_context_counts.open_parentheses) % pairing_color_size; + line_color_context_counts.open_parentheses++; + break; + case ')': + line_color_context_counts.open_parentheses--; + pairing_color_index = std::abs(line_color_context_counts.open_parentheses) % pairing_color_size; + break; + case '[': + pairing_color_index = std::abs(line_color_context_counts.open_square_braces) % pairing_color_size; + line_color_context_counts.open_square_braces++; + break; + case ']': + line_color_context_counts.open_square_braces--; + pairing_color_index = std::abs(line_color_context_counts.open_square_braces) % pairing_color_size; + break; + case '{': + pairing_color_index = std::abs(line_color_context_counts.open_curly_braces) % pairing_color_size; + line_color_context_counts.open_curly_braces++; + break; + case '}': + line_color_context_counts.open_curly_braces--; + pairing_color_index = std::abs(line_color_context_counts.open_curly_braces) % pairing_color_size; + break; + } + + colorized_line << pairing_color[pairing_color_index] << c << termcolor::reset; + } + else + colorized_line << c; + } + + return colorized_line.str(); + } + + void makeContext(std::ostream& os, const std::string& code, std::size_t target_line, std::size_t col_start, std::size_t sym_size) + { + os << termcolor::colorize; + std::vector ctx = Utils::splitString(code, '\n'); + + std::size_t first_line = target_line >= 3 ? target_line - 3 : 0; + std::size_t last_line = (target_line + 3) <= ctx.size() ? target_line + 3 : ctx.size(); + std::size_t overflow = (col_start + sym_size < ctx[target_line].size()) ? 0 : col_start + sym_size - ctx[target_line].size(); // number of characters that are on more lines below + LineColorContextCounts line_color_context_counts; + + for (std::size_t i = first_line; i < last_line; ++i) + { + os << termcolor::green << std::setw(5) << (i + 1) << termcolor::reset + << " | " << colorizeLine(ctx[i], line_color_context_counts) << "\n"; + + if (i == target_line || (i > target_line && overflow > 0)) + { + os << " |"; + // if we have an overflow then we start at the beginning of the line + std::size_t curr_col_start = (overflow == 0) ? col_start : 0; + // if we have an overflow, it is used as the end of the line + std::size_t col_end = (i == target_line) ? std::min(col_start + sym_size, ctx[target_line].size()) + : std::min(overflow, ctx[i].size()); + // update the overflow to avoid going here again if not needed + overflow = (overflow > ctx[i].size()) ? overflow - ctx[i].size() : 0; + + // fixing padding when the error is on the first character + if (curr_col_start == 0) + os << " "; + + // padding of spaces + for (std::size_t i = 0; i < curr_col_start; ++i) + os << " "; + + // underline the error + os << termcolor::red << "^"; + for (std::size_t i = curr_col_start + 1; i < col_end; ++i) + os << "~"; + + os << termcolor::reset << "\n"; + } + } + } + + template + void helper(std::ostream& os, const std::string& message, const std::string& filename, const std::string& code, const T& expr, std::size_t line, std::size_t column, std::size_t sym_size) + { + if (filename != ARK_NO_NAME_FILE) + os << "In file " << filename << "\n"; + os << "At " << expr << " @ " << (line + 1) << ":" << column << "\n"; + + if (!code.empty()) + makeContext(os, std::move(code), line, column, sym_size); + os << " " << message; + } + + std::string makeContextWithNode(const std::string& message, const internal::Node& node) + { + std::stringstream ss; + + std::size_t size = 3; + // todo add "can be string" attribute + if (node.nodeType() == internal::NodeType::Symbol || node.nodeType() == internal::NodeType::String || node.nodeType() == internal::NodeType::Spread) + size = node.string().size(); + + helper( + ss, + message, + node.filename(), + (node.filename() == ARK_NO_NAME_FILE) ? "" : Utils::readFile(node.filename()), + node, + node.line(), + node.col(), + size); + + return ss.str(); + } + + void generate(const CodeError& e, std::string code) + { + std::string escaped_symbol; + if (e.symbol.has_value()) + { + switch (e.symbol.value().codepoint()) + { + case '\n': escaped_symbol = "'\\n'"; break; + case '\r': escaped_symbol = "'\\r'"; break; + case '\t': escaped_symbol = "'\\t'"; break; + case '\v': escaped_symbol = "'\\v'"; break; + case '\0': escaped_symbol = "EOF"; break; + case ' ': escaped_symbol = "' '"; break; + default: + escaped_symbol = e.symbol.value().c_str(); + } + } + else + escaped_symbol = e.expr; + + std::string file_content; + if (e.filename == ARK_NO_NAME_FILE) + file_content = std::move(code); + else + file_content = Utils::readFile(e.filename); + + // TODO enhance the error messages + helper( + std::cout, + e.what(), + e.filename, + file_content, + escaped_symbol, + e.line, + e.col, + e.expr.size()); + } +} diff --git a/src/arkreactor/VM/State.cpp b/src/arkreactor/VM/State.cpp index e2093ac60..0e4491ac4 100644 --- a/src/arkreactor/VM/State.cpp +++ b/src/arkreactor/VM/State.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #ifdef _MSC_VER # pragma warning(push) @@ -15,93 +17,55 @@ namespace Ark { - State::State(uint16_t options, const std::vector& libenv) noexcept : + State::State(const std::vector& libenv) noexcept : m_debug_level(0), - m_filename(ARK_NO_NAME_FILE), - m_options(options) - { - if (libenv.size() > 0) - { - m_libenv = libenv; - } - else - { - const char* arkpath = getenv("ARKSCRIPT_PATH"); - if (arkpath) - m_libenv = Utils::splitString(arkpath, ';'); - else if (Utils::fileExists("./lib")) - m_libenv.push_back(Utils::canonicalRelPath("./lib")); - else - { - if (m_debug_level >= 1) - std::cout << termcolor::yellow << "Warning" << termcolor::reset << " no std library was found and ARKSCRIPT_PATH was not supplied" << std::endl; - } - } - } + m_libenv(libenv), + m_filename(ARK_NO_NAME_FILE) + {} bool State::feed(const std::string& bytecode_filename) { - bool result = true; - try - { - BytecodeReader bcr; - bcr.feed(bytecode_filename); - m_bytecode = bcr.bytecode(); - - configure(); - } - catch (const std::exception& e) - { - result = false; - std::printf("%s\n", e.what()); - } + if (!Utils::fileExists(bytecode_filename)) + return false; - return result; + return feed(Utils::readFileAsBytes(bytecode_filename)); } bool State::feed(const bytecode_t& bytecode) { - bool result = true; + if (!checkMagic(bytecode)) + return false; + + m_bytecode = bytecode; + try { - m_bytecode = bytecode; configure(); + return true; } - catch (const std::exception& e) + catch (const std::exception& e) // FIXME I don't like this shit { - result = false; - std::printf("%s\n", e.what()); + std::cout << e.what() << std::endl; + return false; } - - return result; } bool State::compile(const std::string& file, const std::string& output) { - Compiler compiler(m_debug_level, m_libenv, m_options); + Welder welder(m_debug_level, m_libenv); - try - { - compiler.feed(Utils::readFile(file), file); - for (auto& p : m_binded) - compiler.m_defined_symbols.push_back(p.first); - compiler.compile(); - - if (output != "") - compiler.saveTo(output); - else - compiler.saveTo(file.substr(0, file.find_last_of('.')) + ".arkc"); - } - catch (const std::exception& e) - { - std::printf("%s\n", e.what()); + if (!welder.computeASTFromFile(file)) return false; - } - catch (...) - { - std::printf("Unknown lexer-parser-or-compiler error (%s)\n", file.c_str()); + + for (auto& p : m_binded) + welder.registerSymbol(p.first); + + if (!welder.generateBytecode()) + return false; + + std::string destination = output.empty() ? (file.substr(0, file.find_last_of('.')) + ".arkc") : output; + if (!welder.saveBytecodeToFile(destination)) return false; - } return true; } @@ -116,22 +80,11 @@ namespace Ark } m_filename = file; - // check if it's a bytecode file or a source code file - BytecodeReader bcr; - try - { - bcr.feed(file); - } - catch (const std::exception& e) - { - std::printf("%s\n", e.what()); - return false; - } - - if (bcr.timestamp() == 0) // couldn't read magic number, it's a source file + bytecode_t bytecode = Utils::readFileAsBytes(file); + if (!checkMagic(bytecode)) // couldn't read magic number, it's a source file { // check if it's in the arkscript cache - std::string short_filename = Utils::getFilenameFromPath(file); + std::string short_filename = (std::filesystem::path(file)).filename().string(); std::string filename = short_filename.substr(0, short_filename.find_last_of('.')) + ".arkc"; std::filesystem::path directory = (std::filesystem::path(file)).parent_path() / ARK_CACHE_DIRNAME; std::string path = (directory / filename).string(); @@ -143,34 +96,23 @@ namespace Ark if (compiled_successfuly && feed(path)) return true; } - else if (feed(file)) // it's a bytecode file + else if (feed(bytecode)) // it's a bytecode file return true; return false; } bool State::doString(const std::string& code) { - Compiler compiler(m_debug_level, m_libenv, m_options); + Welder welder(m_debug_level, m_libenv); - try - { - compiler.feed(code); - for (auto& p : m_binded) - compiler.m_defined_symbols.push_back(p.first); - compiler.compile(); - } - catch (const std::exception& e) - { - std::printf("%s\n", e.what()); + if (!welder.computeASTFromString(code)) return false; - } - catch (...) - { - std::printf("Unknown lexer-parser-or-compiler error\n"); - return false; - } - return feed(compiler.bytecode()); + for (auto& p : m_binded) + welder.registerSymbol(p.first); + welder.generateBytecode(); + + return feed(welder.bytecode()); } void State::loadFunction(const std::string& name, Value::ProcType function) noexcept @@ -193,13 +135,21 @@ namespace Ark m_debug_level = level; } - void State::setLibDirs(const std::vector& libenv) noexcept + void State::setLibDirs(const std::vector& libenv) noexcept { m_libenv = libenv; } + bool State::checkMagic(const bytecode_t& bytecode) + { + return (bytecode.size() > 4 && bytecode[0] == 'a' && + bytecode[1] == 'r' && bytecode[2] == 'k' && + bytecode[3] == internal::Instruction::NOP); + } + void State::configure() { + // FIXME refactor this crap and try to mutualise with the bytecode reader?? using namespace internal; // configure tables and pages diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index 9116d3c38..df3a68940 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -126,9 +127,9 @@ namespace Ark auto lib_path = std::accumulate( std::next(m_state.m_libenv.begin()), m_state.m_libenv.end(), - m_state.m_libenv[0], - [](const std::string& a, const std::string& b) -> std::string { - return a + "\n\t- " + b; + m_state.m_libenv[0].string(), + [](const std::string& a, const fs::path& b) -> std::string { + return a + "\n\t- " + b.string(); }); throwVMError(ErrorKind::Module, "Could not find module '" + file + "'. Searched in\n\t- " + path + "\n\t- " + lib_path); } diff --git a/src/arkscript/REPL/Repl.cpp b/src/arkscript/REPL/Repl.cpp index b68ad2a41..f6a896850 100644 --- a/src/arkscript/REPL/Repl.cpp +++ b/src/arkscript/REPL/Repl.cpp @@ -8,13 +8,13 @@ namespace Ark { - Repl::Repl(uint16_t options, const std::vector& libenv) : - m_options(options), m_lines(1), m_old_ip(0), m_libenv(libenv) + Repl::Repl(const std::vector& libenv) : + m_lines(1), m_old_ip(0), m_libenv(libenv) {} int Repl::run() { - Ark::State state(m_options, m_libenv); + Ark::State state(m_libenv); Ark::VM vm(state); state.setDebug(0); std::string code; @@ -88,7 +88,7 @@ namespace Ark // save good code code = tmp_code.str(); // place ip to end of bytecode intruction (HALT) - --vm.m_execution_contexts[0]->ip; + vm.m_execution_contexts[0]->ip -= 4; } else { diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index 92e878459..b37f801c2 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -1,16 +1,20 @@ -#include #include #include #include #include +#include +#include #include #include +#include +#include #include #include #include +#include #include int main(int argc, char** argv) @@ -30,7 +34,6 @@ int main(int argc, char** argv) ast }; mode selected = mode::repl; - uint16_t options = Ark::DefaultFeatures; std::string file = "", eval_expresion = ""; @@ -39,6 +42,7 @@ int main(int argc, char** argv) constexpr uint16_t max_uint16 = std::numeric_limits::max(); + // by default, select all pages and segment types, without slicing anything uint16_t bcr_page = max_uint16; uint16_t bcr_start = max_uint16; uint16_t bcr_end = max_uint16; @@ -46,8 +50,7 @@ int main(int argc, char** argv) std::vector wrong, script_args; - std::string libdir = ""; - std::vector libenv; + std::string libdir; // clang-format off auto cli = ( @@ -89,9 +92,7 @@ int main(int argc, char** argv) value("file", file).set(selected, mode::run) , ( joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }))) - , - // shouldn't change now, the lib option is fine and working - ( + , ( option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'") & value("lib_dir", libdir) ) @@ -102,8 +103,7 @@ int main(int argc, char** argv) required("--ast").set(selected, mode::ast).doc("Compile the given program and output its AST as JSON to stdout") & value("file", file) , joinable(repeatable(option("-d", "--debug").call([&]{ debug++; }).doc("Increase debug level (default: 0)"))) - , - ( + , ( option("-L", "--lib").doc("Set the location of the ArkScript standard library. Paths can be delimited by ';'") & value("lib_dir", libdir) ) @@ -124,8 +124,34 @@ int main(int argc, char** argv) { using namespace Ark; + std::vector lib_paths; + // if arkscript lib paths were provided by the CLI, bypass the automatic lookup if (!libdir.empty()) - libenv = Utils::splitString(libdir, ';'); + { + for (const auto& path : Utils::splitString(libdir, ';')) + { + lib_paths.push_back(std::filesystem::path(path)); + lib_paths.push_back(std::filesystem::path(path) / "std"); + } + } + else + { + if (const char* arkpath = std::getenv("ARKSCRIPT_PATH")) + { + for (const auto& path : Utils::splitString(arkpath, ';')) + { + lib_paths.push_back(std::filesystem::path(path)); + lib_paths.push_back(std::filesystem::path(path) / "std"); + } + } + else if (Utils::fileExists("./lib")) + { + lib_paths.push_back(std::filesystem::path("./lib")); + lib_paths.push_back(std::filesystem::path("./lib/std")); + } + else + std::cerr << termcolor::yellow << "Warning" << termcolor::reset << " Couldn't read ARKSCRIPT_PATH environment variable" << std::endl; + } switch (selected) { @@ -133,33 +159,34 @@ int main(int argc, char** argv) // clipp only supports streams std::cout << make_man_page(cli, "arkscript", fmt) .prepend_section("DESCRIPTION", " ArkScript programming language") + .append_section("VERSION", fmt::format(" {}.{}.{}", ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH)) .append_section("LICENSE", " Mozilla Public License 2.0") << std::endl; break; case mode::version: - std::printf("Version %i.%i.%i\n", ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH); + std::cout << fmt::format("Version {}.{}.{}\n", ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH); break; case mode::dev_info: { - std::printf( - "Have been compiled with %s, options: %s\n\n" - "sizeof(Ark::Value) = %zuB\n" - " sizeof(Value_t) = %zuB\n" - " sizeof(ValueType) = %zuB\n" - " sizeof(ProcType) = %zuB\n" - " sizeof(Ark::Closure) = %zuB\n" - " sizeof(Ark::UserType) = %zuB\n" + std::cout << fmt::format( + "Have been compiled with {}, options: {}\n\n" + "sizeof(Ark::Value) = {}B\n" + " sizeof(Value_t) = {}B\n" + " sizeof(ValueType) = {}B\n" + " sizeof(ProcType) = {}B\n" + " sizeof(Ark::Closure) = {}B\n" + " sizeof(Ark::UserType) = {}B\n" "\nVirtual Machine\n" - "sizeof(Ark::VM) = %zuB\n" - " sizeof(Ark::State) = %zuB\n" - " sizeof(Ark::Scope) = %zuB\n" - " sizeof(ExecutionContext) = %zuB\n" + "sizeof(Ark::VM) = {}B\n" + " sizeof(Ark::State) = {}B\n" + " sizeof(Ark::Scope) = {}B\n" + " sizeof(ExecutionContext) = {}B\n" "\nMisc\n" - " sizeof(vector) = %zuB\n" - " sizeof(char) = %zuB\n" - "\nsizeof(Node) = %zuB\n", + " sizeof(vector) = {}B\n" + " sizeof(char) = {}B\n" + "\nsizeof(Node) = {}B\n", ARK_COMPILER, ARK_COMPILATION_OPTIONS, // value sizeof(Ark::Value), @@ -182,49 +209,39 @@ int main(int argc, char** argv) case mode::repl: { - // send default features without FeatureRemoveUnusedVars to avoid deleting code which will be used later on - Ark::Repl repl(Ark::DefaultFeatures & ~Ark::FeatureRemoveUnusedVars, libenv); + Ark::Repl repl(lib_paths); return repl.run(); } case mode::compile: { - Ark::State state(options, libenv); + Ark::State state(lib_paths); state.setDebug(debug); if (!state.doFile(file)) - { - std::cerr << "Could not compile file at " << file << "\n"; return -1; - } break; } case mode::run: { - Ark::State state(options, libenv); + Ark::State state(lib_paths); state.setDebug(debug); state.setArgs(script_args); if (!state.doFile(file)) - { - std::cerr << "Could not run file at " << file << "\n"; return -1; - } Ark::VM vm(state); int out = vm.run(); #ifdef ARK_PROFILER_COUNT - std::printf( - "\n\nValue\n" - "=====\n" - "\tCreations: %u\n\tCopies: %u\n\tMoves: %u\n\n\tCopy coeff: %f", - Ark::internal::value_creations, - Ark::internal::value_copies, - Ark::internal::value_moves, - static_cast(Ark::internal::value_copies) / Ark::internal::value_creations); + std::cout << "\n\nValue\n=====\n" + << "\tCreations: " << Ark::internal::value_creations + << "\n\tCopies: " << Ark::internal::value_copies + << "\n\tMoves: " << Ark::internal::value_moves + << "\n\n\tCopy coeff: " << static_cast(Ark::internal::value_copies) / Ark::internal::value_creations; #endif return out; @@ -232,7 +249,7 @@ int main(int argc, char** argv) case mode::eval: { - Ark::State state(options, libenv); + Ark::State state(lib_paths); state.setDebug(debug); if (!state.doString(eval_expresion)) @@ -247,8 +264,8 @@ int main(int argc, char** argv) case mode::ast: { - Ark::JsonCompiler jcompiler(debug, libenv, options); - jcompiler.feed(Ark::Utils::readFile(file), file); + Ark::JsonCompiler jcompiler(debug, lib_paths); + jcompiler.feed(file); std::cout << jcompiler.compile() << std::endl; break; } @@ -271,7 +288,8 @@ int main(int argc, char** argv) } catch (const std::exception& e) { - std::printf("%s\n", e.what()); + std::cerr << e.what() << std::endl; + return -1; } break; } @@ -280,11 +298,12 @@ int main(int argc, char** argv) else { for (const auto& arg : wrong) - std::printf("'%s' ins't a valid argument\n", arg.c_str()); + std::cerr << "'" << arg.c_str() << "' ins't a valid argument\n"; // clipp only supports streams std::cout << make_man_page(cli, "arkscript", fmt) .prepend_section("DESCRIPTION", " ArkScript programming language") + .append_section("VERSION", fmt::format(" {}.{}.{}", ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH)) .append_section("LICENSE", " Mozilla Public License 2.0") << std::endl; } diff --git a/tests/arkscript/async-tests.ark b/tests/arkscript/async-tests.ark index 1568906a6..a9b0e336f 100644 --- a/tests/arkscript/async-tests.ark +++ b/tests/arkscript/async-tests.ark @@ -1,5 +1,5 @@ -(import "tests-tools.ark") -(import "List.ark") +(import tests-tools) +(import std.List) (let async-tests (fun () { (mut tests 0) diff --git a/tests/arkscript/builtins-tests.ark b/tests/arkscript/builtins-tests.ark index 82a16e89d..e6c5b5a15 100644 --- a/tests/arkscript/builtins-tests.ark +++ b/tests/arkscript/builtins-tests.ark @@ -1,4 +1,4 @@ -(import "tests-tools.ark") +(import tests-tools) (let builtin-tests (fun () { (mut tests 0) diff --git a/tests/arkscript/list-tests.ark b/tests/arkscript/list-tests.ark index dd981e64a..71ed1a09e 100644 --- a/tests/arkscript/list-tests.ark +++ b/tests/arkscript/list-tests.ark @@ -1,4 +1,4 @@ -(import "tests-tools.ark") +(import tests-tools) (let list-tests (fun () { (mut tests 0) diff --git a/tests/arkscript/macro-tests.ark b/tests/arkscript/macro-tests.ark index 844600aae..30b134ae8 100644 --- a/tests/arkscript/macro-tests.ark +++ b/tests/arkscript/macro-tests.ark @@ -1,133 +1,133 @@ -(import "tests-tools.ark") +(import tests-tools) (let macro-tests (fun () { (mut tests 0) (let start-time (time)) - !{void () nil} + ($ void () nil) (set tests (assert-eq (void) nil "macro void with no arguments" tests)) - !{add_two (a b) (+ a b)} - !{nice_value 12} + ($ add_two (a b) (+ a b)) + ($ nice_value 12) (set tests (assert-eq (add_two 1 2) 3 "macro add_two" tests)) (set tests (assert-eq (add_two nice_value 2) 14 "macro add_two and macro value" tests)) - !{if (and true true) + ($if (and true true) (set tests (assert-val true "macro if" tests)) - (set tests (assert-val false "macro if" tests))} + (set tests (assert-val false "macro if" tests))) - !{if (= nice_value 12) + ($if (= nice_value 12) (set tests (assert-val true "macro if" tests)) - (set tests (assert-val false "macro if" tests))} + (set tests (assert-val false "macro if" tests))) - !{if (and true (= nice_value 12)) + ($if (and true (= nice_value 12)) (set tests (assert-val true "macro if" tests)) - (set tests (assert-val false "macro if" tests))} + (set tests (assert-val false "macro if" tests))) - !{if (and false (= nice_value 12)) + ($if (and false (= nice_value 12)) (set tests (assert-val false "macro if" tests)) - (set tests (assert-val true "macro if" tests))} + (set tests (assert-val true "macro if" tests))) - !{if (or false (= nice_value 12)) + ($if (or false (= nice_value 12)) (set tests (assert-val true "macro if" tests)) - (set tests (assert-val false "macro if" tests))} + (set tests (assert-val false "macro if" tests))) - !{if (or false (!= nice_value 12)) + ($if (or false (!= nice_value 12)) (set tests (assert-val false "macro if" tests)) - (set tests (assert-val true "macro if" tests))} + (set tests (assert-val true "macro if" tests))) - !{if (not (= nice_value 12)) + ($if (not (= nice_value 12)) (set tests (assert-val false "macro if" tests)) - (set tests (assert-val true "macro if" tests))} + (set tests (assert-val true "macro if" tests))) - !{if (< nice_value 14) + ($if (< nice_value 14) (set tests (assert-val true "macro comparison <" tests)) - (set tests (assert-val false "macro comparison <" tests))} + (set tests (assert-val false "macro comparison <" tests))) - !{if (> nice_value 14) + ($if (> nice_value 14) (set tests (assert-val false "macro comparison >" tests)) - (set tests (assert-val true "macro comparison >" tests))} + (set tests (assert-val true "macro comparison >" tests))) - !{if (<= nice_value 12) + ($if (<= nice_value 12) (set tests (assert-val true "macro comparison <=" tests)) - (set tests (assert-val false "macro comparison <=" tests))} + (set tests (assert-val false "macro comparison <=" tests))) - !{if (>= nice_value 12) + ($if (>= nice_value 12) (set tests (assert-val true "macro comparison >=" tests)) - (set tests (assert-val false "macro comparison >=" tests))} + (set tests (assert-val false "macro comparison >=" tests))) - !{if (@ [true false] 0) + ($if (@ [true false] 0) (set tests (assert-val true "macro if @" tests)) - (set tests (assert-val false "macro if @" tests))} + (set tests (assert-val false "macro if @" tests))) - !{if (@ [true false] -2) + ($if (@ [true false] -2) (set tests (assert-val true "macro if @" tests)) - (set tests (assert-val false "macro if @" tests))} + (set tests (assert-val false "macro if @" tests))) - !{if true { - !{in_if_1 true} - !{in_if_2 true}}} + ($if true { + ($ in_if_1 true) + ($ in_if_2 true)}) (set tests (assert-val (and in_if_1 in_if_2) "macro if multiple definitions" tests)) - !{undef in_if_1} - !{undef in_if_2} + ($undef in_if_1) + ($undef in_if_2) { - !{val (+ 1 2 3)} + ($ val (+ 1 2 3)) (set tests (assert-eq val 6 "macro value scoping" tests)) { - !{val 0} + ($ val 0) (set tests (assert-eq val 0 "macro value scoping" tests)) - !{undef val} + ($undef val) (set tests (assert-eq val 6 "macro value undefine" tests)) - !{undef a}} # shouldn't yield an error on unknown macros + ($undef a)} # shouldn't yield an error on unknown macros (set tests (assert-eq val 6 "macro value scoping" tests))} - !{bar (a ...args) (+ a (len args))} + ($ bar (a ...args) (+ a (len args))) (set tests (assert-eq (bar 1) 1 "macro bar spreading" tests)) (set tests (assert-eq (bar 2 3) 3 "macro bar spreading" tests)) (set tests (assert-eq (bar 4 5 6) 6 "macro bar spreading" tests)) (set tests (assert-eq (bar 7 8 9 10) 10 "macro bar spreading" tests)) - !{egg (...args) (bar ...args)} + ($ egg (...args) (bar ...args)) (set tests (assert-eq (egg 1) 1 "macro egg spreading" tests)) (set tests (assert-eq (egg 0 1) 1 "macro egg spreading" tests)) (set tests (assert-eq (egg 0 0 0 1) 3 "macro egg spreading" tests)) - !{h (...args) (head args)} + ($ h (...args) (head args)) (set tests (assert-eq (h) nil "macro h spreading" tests)) (set tests (assert-eq (h 1) 1 "macro h spreading" tests)) (set tests (assert-eq (h 1 2) 1 "macro h spreading" tests)) - !{g (...args) (tail args)} + ($ g (...args) (tail args)) (set tests (assert-eq (g) [] "macro g spreading" tests)) (set tests (assert-eq (g 1) [] "macro g spreading" tests)) (set tests (assert-eq (g 1 2) [2] "macro g spreading" tests)) (set tests (assert-eq (g 1 2 3) [2 3] "macro g spreading" tests)) - !{defun (name args body) (let name (fun args body))} + ($ defun (name args body) (let name (fun args body))) (defun foo (a b) (+ a b)) (set tests (assert-eq (type foo) "Function" "macro define function" tests)) (set tests (assert-eq (foo 2 3) 5 "macro define function" tests)) - !{one (...args) (@ args 1)} + ($ one (...args) (@ args 1)) (set tests (assert-eq (one 1 2) 2 "macro @ 1" tests)) (set tests (assert-eq (one 1 3 4) 3 "macro @ 1" tests)) (set tests (assert-eq (one 1 5 6 7 8) 5 "macro @ 1" tests)) - !{last (...args) (@ args -1)} + ($ last (...args) (@ args -1)) (set tests (assert-eq (last 1 2) 2 "macro @ -1" tests)) (set tests (assert-eq (last 1 3 4) 4 "macro @ -1" tests)) (set tests (assert-eq (last 1 5 6 7 8) 8 "macro @ -1" tests)) - !{get_symbol (bloc) (@ bloc 1)} - !{define (bloc) (let (get_symbol bloc) (@ bloc 2))} + ($ get_symbol (bloc) (@ bloc 1)) + ($ define (bloc) (let (get_symbol bloc) (@ bloc 2))) (define (let a 12)) (set tests (assert-eq a 12 "macro get_symbol" tests)) - !{define (prefix suffix value) (let (symcat prefix suffix) value)} + ($ define (prefix suffix value) (let (symcat prefix suffix) value)) (define a 1 2) (set tests (assert-eq a1 2 "macro symcat" tests)) (define a (+ 1 1) 2) @@ -137,17 +137,17 @@ (define a (+ nice_value 1) 2) (set tests (assert-eq a13 2 "macro symcat" tests)) - !{suffix-dup (sym x) { - !{if (> x 1) - (suffix-dup sym (- x 1))} - (symcat sym x)}} - (let magic_func (fun ((suffix-dup a 3)) (- a1 a2 a3))) + ($ suffix-dup (sym x) { + ($if (> x 1) + (suffix-dup sym (- x 1))) + (symcat sym x)}) + (let magic_func (fun ($* (suffix-dup a 3)) (- a1 a2 a3))) (set tests (assert-eq (magic_func 1 2 3) (- 1 2 3) "macro symdup" tests)) - !{partial (func ...defargs) { - !{bloc (suffix-dup a (- (argcount func) (len defargs)))} + ($ partial (func ...defargs) { + ($ bloc (suffix-dup a (- (argcount func) (len defargs)))) (fun (bloc) (func ...defargs bloc)) - !{undef bloc}}} + ($undef bloc)}) (let test_func (fun (a b c) (* a b c))) (let test_func1 (partial test_func 1)) diff --git a/tests/arkscript/run-tests b/tests/arkscript/run-tests index 48e5226c6..aa66d68e2 100755 --- a/tests/arkscript/run-tests +++ b/tests/arkscript/run-tests @@ -11,6 +11,4 @@ else fi $ark unittests.ark --lib ../../lib/ || exit 1 -for file in ../../lib/std/tests/*.ark; do - $ark $file --lib ../../lib/ -done +$ark ../../lib/std/tests/all.ark --lib ../../lib/ || exit 1 diff --git a/tests/arkscript/string-tests.ark b/tests/arkscript/string-tests.ark index 7cc9ce927..b7468d7cd 100644 --- a/tests/arkscript/string-tests.ark +++ b/tests/arkscript/string-tests.ark @@ -1,6 +1,6 @@ -(import "tests-tools.ark") +(import tests-tools) -(import "String.ark") +(import std.String) (let string-tests (fun () { (mut tests 0) diff --git a/tests/arkscript/tests-tools.ark b/tests/arkscript/tests-tools.ark index 529fef62e..db66a9238 100644 --- a/tests/arkscript/tests-tools.ark +++ b/tests/arkscript/tests-tools.ark @@ -1,4 +1,4 @@ -(import "console.arkm") +(import console) (let assert-eq (fun (val1 val2 message tests) { (assert (= val1 val2) (str:format "{} ({}) - {} SHOULD BE EQUAL TO {}" message tests val1 val2)) diff --git a/tests/arkscript/unittests.ark b/tests/arkscript/unittests.ark index 4735f60b9..d9b74536c 100644 --- a/tests/arkscript/unittests.ark +++ b/tests/arkscript/unittests.ark @@ -11,13 +11,13 @@ ArkScript language # We *must* use functions for our tests because they create a new scope, # to avoid collisions with other tests, and avoid false positive tests. -(import "vm-tests.ark") -(import "builtins-tests.ark") -(import "utf8-tests.ark") -(import "macro-tests.ark") -(import "list-tests.ark") -(import "string-tests.ark") -(import "async-tests.ark") +(import vm-tests) +(import builtins-tests) +(import utf8-tests) +(import macro-tests) +(import list-tests) +(import string-tests) +(import async-tests) (print " ------------------------------") @@ -28,6 +28,7 @@ ArkScript language passed-macro passed-list passed-string - passed-async )) + passed-async +)) (print "Completed in " (* 1000 (- (time) start_time)) "ms") diff --git a/tests/arkscript/utf8-tests.ark b/tests/arkscript/utf8-tests.ark index c7363defb..f6cada6f3 100644 --- a/tests/arkscript/utf8-tests.ark +++ b/tests/arkscript/utf8-tests.ark @@ -1,18 +1,9 @@ -(import "tests-tools.ark") +(import tests-tools) (let utf8-tests (fun () { (mut tests 0) (let start-time (time)) - (let é 1) - (set tests (assert-eq é 1 "create identifier é" tests)) - - (let âè 2) - (set tests (assert-eq âè 2 "create identifier âè" tests)) - - (let 🥳 12) - (set tests (assert-eq 🥳 12 "create identifier 🥳" tests)) - (let ---> 15) (set tests (assert-eq ---> 15 "create identifier --->" tests)) @@ -20,17 +11,6 @@ (set tests (assert-eq <-- 16 "create identifier <--" tests)) (set tests (assert-val (< ---> <--) "comparing ---> and <-- with < operator" tests)) - (let mâkè (fun (à -->) - (fun (&à &-->) ()))) - - (let closure (mâkè 1 2)) - (set tests (assert-eq (type mâkè) "Function" "Function test" tests)) - (set tests (assert-eq (type closure) "Closure" "Closure test" tests)) - (set tests (assert-val (hasField closure "à") "Closure has field" tests)) - (set tests (assert-val (hasField closure "-->") "Closure has field" tests)) - (set tests (assert-eq closure.à 1 "Closure field à" tests)) - (set tests (assert-eq closure.--> 2 "Closure field -->" tests)) - (let emotes [ "🥳" "😅" "😥" "👿" "🟢" "🙊" "💡" "💻" "🌟" "🔹" "🌐" "🤖" @@ -45,7 +25,6 @@ (set tests (assert-eq "\u1e0b" "ḋ" "checking conversion pattern \\u and its emote" tests)) (set tests (assert-eq "\u1E0B" "ḋ" "checking conversion pattern \\u and its emote" tests)) - (set tests (assert-eq (str:ord "👺") 128122 "checking conversion emote or character to utf8 codepoint (decimal number)" tests)) (set tests (assert-eq (str:chr 128122) "👺" "checking conversion codepoint to emote or character (utf8 string)" tests)) (set tests (assert-eq (str:ord "$") 36 "checking conversion emote or character to utf8 codepoint (decimal number)" tests)) diff --git a/tests/arkscript/vm-tests.ark b/tests/arkscript/vm-tests.ark index 6f78e9606..dabe3c5fa 100644 --- a/tests/arkscript/vm-tests.ark +++ b/tests/arkscript/vm-tests.ark @@ -1,4 +1,4 @@ -(import "tests-tools.ark") +(import tests-tools) (let vm-tests (fun () { (mut tests 0) diff --git a/tests/ast/99bottles.json b/tests/ast/99bottles.json index 95dc49dce..78c5b1d36 100644 --- a/tests/ast/99bottles.json +++ b/tests/ast/99bottles.json @@ -31,7 +31,7 @@ }, { "type": "Number", - "value": 1.000000 + "value": 1 } ] }, @@ -55,7 +55,7 @@ }, { "type": "Number", - "value": 0.000000 + "value": 0 } ] } @@ -90,7 +90,7 @@ }, "then": { "type": "Number", - "value": 100.000000 + "value": 100 }, "else": { "type": "Symbol", @@ -124,7 +124,7 @@ }, { "type": "Number", - "value": 1.000000 + "value": 1 } ] }, @@ -147,8 +147,8 @@ "args": [ { "type": "String", - "value": "%% Bottles of beer on the wall -%% bottles of beer + "value": "{} Bottles of beer on the wall +{} bottles of beer Take one down, pass it around" }, { @@ -182,7 +182,7 @@ Take one down, pass it around" }, { "type": "Number", - "value": 1.000000 + "value": 1 } ] } @@ -203,7 +203,7 @@ Take one down, pass it around" "args": [ { "type": "String", - "value": "%% Bottles of beer on the wall." + "value": "{} Bottles of beer on the wall." }, { "type": "Symbol", diff --git a/tests/ast/ackermann.json b/tests/ast/ackermann.json index b735039be..f060314af 100644 --- a/tests/ast/ackermann.json +++ b/tests/ast/ackermann.json @@ -37,7 +37,7 @@ }, { "type": "Number", - "value": 0.000000 + "value": 0 } ] }, @@ -52,7 +52,7 @@ "args": [ { "type": "Number", - "value": 0.000000 + "value": 0 }, { "type": "Symbol", @@ -80,13 +80,13 @@ }, { "type": "Number", - "value": 1.000000 + "value": 1 } ] }, { "type": "Number", - "value": 1.000000 + "value": 1 } ] }, @@ -110,7 +110,7 @@ }, { "type": "Number", - "value": 1.000000 + "value": 1 } ] }, @@ -138,7 +138,7 @@ }, { "type": "Number", - "value": 1.000000 + "value": 1 } ] } @@ -156,7 +156,7 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Symbol", @@ -189,11 +189,11 @@ "args": [ { "type": "Number", - "value": 3.000000 + "value": 3 }, { "type": "Number", - "value": 6.000000 + "value": 6 } ] } diff --git a/tests/ast/closures.json b/tests/ast/closures.json index 588088296..f252c2551 100644 --- a/tests/ast/closures.json +++ b/tests/ast/closures.json @@ -73,7 +73,10 @@ "name": "weight" } ], - "body": [] + "body": { + "type": "Symbol", + "name": "nil" + } } ] } @@ -98,11 +101,11 @@ }, { "type": "Number", - "value": 0.000000 + "value": 0 }, { "type": "Number", - "value": 144.000000 + "value": 144 } ] } @@ -126,11 +129,11 @@ }, { "type": "Number", - "value": 12.000000 + "value": 12 }, { "type": "Number", - "value": 15.000000 + "value": 15 } ] } @@ -147,12 +150,17 @@ "value": "Bob's age: " }, { - "type": "Symbol", - "name": "bob" - }, - { - "type": "GetField", - "name": "age" + "type": "Field", + "children": [ + { + "type": "Symbol", + "name": "bob" + }, + { + "type": "Symbol", + "name": "age" + } + ] } ] }, @@ -169,23 +177,25 @@ } ] }, - { - "type": "FunctionCall", - "name": { - "type": "Symbol", - "name": "bob" + [ + { + "type": "Field", + "children": [ + { + "type": "Symbol", + "name": "bob" + }, + { + "type": "Symbol", + "name": "set-age" + } + ] }, - "args": [ - { - "type": "GetField", - "name": "set-age" - }, - { - "type": "Number", - "value": 10.000000 - } - ] - }, + { + "type": "Number", + "value": 10 + } + ], { "type": "FunctionCall", "name": { @@ -198,12 +208,17 @@ "value": "New age: " }, { - "type": "Symbol", - "name": "bob" - }, - { - "type": "GetField", - "name": "age" + "type": "Field", + "children": [ + { + "type": "Symbol", + "name": "bob" + }, + { + "type": "Symbol", + "name": "age" + } + ] } ] }, @@ -219,12 +234,17 @@ "value": "John's age, didn't change: " }, { - "type": "Symbol", - "name": "john" - }, - { - "type": "GetField", - "name": "age" + "type": "Field", + "children": [ + { + "type": "Symbol", + "name": "john" + }, + { + "type": "Symbol", + "name": "age" + } + ] } ] }, @@ -272,7 +292,7 @@ }, { "type": "Number", - "value": 1.000000 + "value": 1 } ] } @@ -301,7 +321,7 @@ "args": [ { "type": "Number", - "value": 3.000000 + "value": 3 } ] } diff --git a/tests/ast/error.json b/tests/ast/error.json index 0697a1777..6c9ca6b64 100644 --- a/tests/ast/error.json +++ b/tests/ast/error.json @@ -166,7 +166,7 @@ }, { "type": "Number", - "value": 0.000000 + "value": 0 } ] }, @@ -199,7 +199,7 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Symbol", @@ -230,7 +230,7 @@ "args": [ { "type": "Number", - "value": 0.000000 + "value": 0 } ] }, diff --git a/tests/ast/factorial.json b/tests/ast/factorial.json index 5592eaf1a..d7b8bdda5 100644 --- a/tests/ast/factorial.json +++ b/tests/ast/factorial.json @@ -26,7 +26,7 @@ }, "value": { "type": "Number", - "value": 1.000000 + "value": 1 } }, { @@ -37,7 +37,7 @@ }, "value": { "type": "Number", - "value": 2.000000 + "value": 2 } }, { @@ -101,7 +101,7 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Symbol", @@ -141,7 +141,7 @@ "args": [ { "type": "Number", - "value": 6.000000 + "value": 6 } ] } diff --git a/tests/ast/macros.json b/tests/ast/macros.json index 98759208c..aa5bb6555 100644 --- a/tests/ast/macros.json +++ b/tests/ast/macros.json @@ -73,7 +73,7 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Symbol", @@ -113,7 +113,7 @@ }, { "type": "Number", - "value": 3.000000 + "value": 3 }, { "type": "String", @@ -121,7 +121,7 @@ }, { "type": "Number", - "value": 3.000000 + "value": 3 } ] }, @@ -138,7 +138,7 @@ }, { "type": "Number", - "value": 2.000000 + "value": 2 }, { "type": "String", @@ -146,7 +146,7 @@ }, { "type": "Number", - "value": 2.000000 + "value": 2 } ] }, @@ -170,15 +170,15 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 2.000000 + "value": 2 }, { "type": "Number", - "value": 3.000000 + "value": 3 } ] }, @@ -195,11 +195,11 @@ "args": [ { "type": "Number", - "value": 2.000000 + "value": 2 }, { "type": "Number", - "value": 3.000000 + "value": 3 } ] } @@ -225,11 +225,11 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 2.000000 + "value": 2 } ] } @@ -248,7 +248,7 @@ }, { "type": "Number", - "value": 12.000000 + "value": 12 } ] }, @@ -348,11 +348,11 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 2.000000 + "value": 2 } ] } @@ -378,11 +378,11 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 2.000000 + "value": 2 } ] }, @@ -392,7 +392,7 @@ }, { "type": "Number", - "value": 2.000000 + "value": 2 } ] }, @@ -416,15 +416,15 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 3.000000 + "value": 3 }, { "type": "Number", - "value": 4.000000 + "value": 4 } ] }, @@ -434,7 +434,7 @@ }, { "type": "Number", - "value": 3.000000 + "value": 3 } ] }, @@ -458,23 +458,23 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 5.000000 + "value": 5 }, { "type": "Number", - "value": 6.000000 + "value": 6 }, { "type": "Number", - "value": 7.000000 + "value": 7 }, { "type": "Number", - "value": 8.000000 + "value": 8 } ] }, @@ -484,7 +484,7 @@ }, { "type": "Number", - "value": 5.000000 + "value": 5 } ] }, @@ -508,11 +508,11 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 2.000000 + "value": 2 } ] }, @@ -522,7 +522,7 @@ }, { "type": "Number", - "value": 2.000000 + "value": 2 } ] }, @@ -546,15 +546,15 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 3.000000 + "value": 3 }, { "type": "Number", - "value": 4.000000 + "value": 4 } ] }, @@ -564,7 +564,7 @@ }, { "type": "Number", - "value": 4.000000 + "value": 4 } ] }, @@ -588,23 +588,23 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 5.000000 + "value": 5 }, { "type": "Number", - "value": 6.000000 + "value": 6 }, { "type": "Number", - "value": 7.000000 + "value": 7 }, { "type": "Number", - "value": 8.000000 + "value": 8 } ] }, @@ -614,7 +614,7 @@ }, { "type": "Number", - "value": 8.000000 + "value": 8 } ] }, @@ -654,15 +654,15 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 2.000000 + "value": 2 }, { "type": "Number", - "value": 3.000000 + "value": 3 } ] } @@ -695,15 +695,15 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 2.000000 + "value": 2 }, { "type": "Number", - "value": 3.000000 + "value": 3 } ] } @@ -733,15 +733,15 @@ "args": [ { "type": "Number", - "value": 1.000000 + "value": 1 }, { "type": "Number", - "value": 2.000000 + "value": 2 }, { "type": "Number", - "value": 3.000000 + "value": 3 } ] } @@ -763,7 +763,7 @@ }, { "type": "Number", - "value": 555.000000 + "value": 555 } ] }, @@ -779,25 +779,8 @@ "value": "(subscope, undef test) Reading macro 'test', expected 6, " }, { - "type": "FunctionCall", - "name": { - "type": "Symbol", - "name": "+" - }, - "args": [ - { - "type": "Number", - "value": 1.000000 - }, - { - "type": "Number", - "value": 2.000000 - }, - { - "type": "Number", - "value": 3.000000 - } - ] + "type": "Number", + "value": 555 } ] } diff --git a/tests/ast/run-tests b/tests/ast/run-tests index a66c57d21..d77c45d76 100755 --- a/tests/ast/run-tests +++ b/tests/ast/run-tests @@ -6,6 +6,8 @@ if [ -f ../../build/Release/${file}.exe ]; then ark=../../build/Release/${file}.exe elif [ -f ../../build/${file} ]; then ark=../../build/${file} +elif [ -f ../../cmake-build-debug/${file} ]; then + ark=../../cmake-build-debug/${file} else echo "No $file executable found" && exit 1 fi diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index 0ab4b0b52..07973e693 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -35,7 +35,6 @@ foreach(ELEM ${TARGET_LIST}) ${FNAME} PUBLIC ${PROJECT_SOURCE_DIR}/../../include - ${PROJECT_SOURCE_DIR}/../../lib/String/include ) # copy to a special folder @@ -46,11 +45,5 @@ foreach(ELEM ${TARGET_LIST}) ) # request C++17 - set_target_properties( - ${FNAME} - PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CXX_EXTENSIONS OFF - ) + target_compile_features(${FNAME} PRIVATE cxx_std_17) endforeach() diff --git a/tests/errors/macros/not_enough_args.ark b/tests/errors/macros/not_enough_args.ark index c90e14de4..ab064e540 100644 --- a/tests/errors/macros/not_enough_args.ark +++ b/tests/errors/macros/not_enough_args.ark @@ -1,4 +1,4 @@ -!{foo (a b c) - (+ a b c)} +($ foo (a b c) + (+ a b c)) (foo 1 2) \ No newline at end of file diff --git a/tests/errors/macros/too_many_args.ark b/tests/errors/macros/too_many_args.ark index 79eafadd9..3dc047cb6 100644 --- a/tests/errors/macros/too_many_args.ark +++ b/tests/errors/macros/too_many_args.ark @@ -1,4 +1,4 @@ -!{foo (a b c) - (+ a b c)} +($ foo (a b c) + (+ a b c)) (foo 1 2 3 4) \ No newline at end of file diff --git a/tests/errors/operators/not_enough_args.expected b/tests/errors/operators/not_enough_args.expected index 3c539f1ef..96ab1381a 100644 --- a/tests/errors/operators/not_enough_args.expected +++ b/tests/errors/operators/not_enough_args.expected @@ -1 +1 @@ -CompilationError: Operator needs two arguments, but was called with only one \ No newline at end of file +Operator needs two arguments, but was called with only one \ No newline at end of file diff --git a/tests/errors/run-tests b/tests/errors/run-tests index 8e75b8ba1..79847c0c7 100755 --- a/tests/errors/run-tests +++ b/tests/errors/run-tests @@ -6,6 +6,8 @@ if [ -f ../../build/Release/${file}.exe ]; then ark=../../build/Release/${file}.exe elif [ -f ../../build/${file} ]; then ark=../../build/${file} +elif [ -f ../../cmake-build-debug/${file} ]; then + ark=../../cmake-build-debug/${file} else echo "No $file executable found" && exit 1 fi diff --git a/tests/parser/main.cpp b/tests/parser/main.cpp new file mode 100644 index 000000000..2e1641d22 --- /dev/null +++ b/tests/parser/main.cpp @@ -0,0 +1,60 @@ +#include +#include + +#include +#include +#include + +using namespace Ark; +using namespace Ark::internal; + +int main(int argc, char* argv[]) +{ + if (argc < 2) + { + std::cout << "Expected at least one argument: filename" << std::endl; + return 1; + } + + std::string filename(argv[1]); + + try + { + Parser parser; + parser.processFile(filename); + + // print imports too, so that we can be sure that we are parsing and reading them correctly + const auto& ast = parser.ast().constList(); + for (auto it = ast.begin() + 1, end = ast.end(); it != end; ++it) + std::cout << *it << std::endl; + + const auto& imports = parser.imports(); + + if (!imports.empty()) + std::cout << "\n"; + for (std::size_t i = 0, end = imports.size(); i < end; ++i) + { + Import data = imports[i]; + std::cout << i << ") " << data.prefix; + if (data.isBasic()) + std::cout << " (basic)"; + else if (data.isGlob()) + std::cout << " (glob)"; + else + { + std::cout << " ( "; + for (const std::string& sym : data.symbols) + std::cout << sym << " "; + std::cout << ")"; + } + std::cout << "\n"; + } + std::cout << std::endl; + } + catch (const CodeError& e) + { + Diagnostics::generate(e); + } + + return 0; +} diff --git a/tests/parser/tests/begin.ark b/tests/parser/tests/begin.ark new file mode 100644 index 000000000..f82d4c37d --- /dev/null +++ b/tests/parser/tests/begin.ark @@ -0,0 +1,18 @@ +(begin 1 2 3) +(begin) +(begin +# test + +(let a 1) +) + +(begin (let b 2) 3) + +{} +{ + # a +1 # b +} + +{(let c 4)} +{(if 5 6 7)(mut d 8)} diff --git a/tests/parser/tests/begin.expected b/tests/parser/tests/begin.expected new file mode 100644 index 000000000..f3c918b01 --- /dev/null +++ b/tests/parser/tests/begin.expected @@ -0,0 +1,8 @@ +( Keyword:Begin Number:1 Number:2 Number:3 ) +( Keyword:Begin ) +( Keyword:Begin ( Keyword:Let Symbol:a Number:1 ) ) +( Keyword:Begin ( Keyword:Let Symbol:b Number:2 ) Number:3 ) +( Keyword:Begin ) +( Keyword:Begin Number:1 ) +( Keyword:Begin ( Keyword:Let Symbol:c Number:4 ) ) +( Keyword:Begin ( Keyword:If Number:5 Number:6 Number:7 ) ( Keyword:Mut Symbol:d Number:8 ) ) \ No newline at end of file diff --git a/tests/parser/tests/call.ark b/tests/parser/tests/call.ark new file mode 100644 index 000000000..d904227a2 --- /dev/null +++ b/tests/parser/tests/call.ark @@ -0,0 +1,15 @@ +(func a b) +( +func# A +(if 1 2 #c +3) +"hello"#d +) #y +((foo bar) (test) 1) +( + ( + ( + foo + ) + ) +) \ No newline at end of file diff --git a/tests/parser/tests/call.expected b/tests/parser/tests/call.expected new file mode 100644 index 000000000..33f56b65e --- /dev/null +++ b/tests/parser/tests/call.expected @@ -0,0 +1,4 @@ +( Symbol:func Symbol:a Symbol:b ) +( Symbol:func ( Keyword:If Number:1 Number:2 Number:3 ) String:hello ) +( ( Symbol:foo Symbol:bar ) ( Symbol:test ) Number:1 ) +( ( ( Symbol:foo ) ) ) \ No newline at end of file diff --git a/tests/parser/tests/closure.ark b/tests/parser/tests/closure.ark new file mode 100644 index 000000000..ff7c4c693 --- /dev/null +++ b/tests/parser/tests/closure.ark @@ -0,0 +1,5 @@ +(fun (&a) 1) +(fun (&a #jk +&b) 2) +(fun (a &b) 3) +(fun (a b &c &d) 4) \ No newline at end of file diff --git a/tests/parser/tests/closure.expected b/tests/parser/tests/closure.expected new file mode 100644 index 000000000..f11fc2903 --- /dev/null +++ b/tests/parser/tests/closure.expected @@ -0,0 +1,4 @@ +( Keyword:Fun ( Capture:a ) Number:1 ) +( Keyword:Fun ( Capture:a Capture:b ) Number:2 ) +( Keyword:Fun ( Symbol:a Capture:b ) Number:3 ) +( Keyword:Fun ( Symbol:a Symbol:b Capture:c Capture:d ) Number:4 ) \ No newline at end of file diff --git a/tests/parser/tests/comments.ark b/tests/parser/tests/comments.ark new file mode 100644 index 000000000..e1bcc8c3a --- /dev/null +++ b/tests/parser/tests/comments.ark @@ -0,0 +1,7 @@ +# test + +#test#test(let a +##) + + +#))(()) \ No newline at end of file diff --git a/tests/parser/tests/comments.expected b/tests/parser/tests/comments.expected new file mode 100644 index 000000000..e69de29bb diff --git a/tests/parser/tests/del.ark b/tests/parser/tests/del.ark new file mode 100644 index 000000000..d636c7d06 --- /dev/null +++ b/tests/parser/tests/del.ark @@ -0,0 +1,15 @@ +(del a) + +( + del b +) + +( + del # a + c +) + +(#r +del +d #"" +) \ No newline at end of file diff --git a/tests/parser/tests/del.expected b/tests/parser/tests/del.expected new file mode 100644 index 000000000..a644f2b07 --- /dev/null +++ b/tests/parser/tests/del.expected @@ -0,0 +1,4 @@ +( Keyword:Del Symbol:a ) +( Keyword:Del Symbol:b ) +( Keyword:Del Symbol:c ) +( Keyword:Del Symbol:d ) \ No newline at end of file diff --git a/tests/parser/tests/fields.ark b/tests/parser/tests/fields.ark new file mode 100644 index 000000000..4cfe33f5f --- /dev/null +++ b/tests/parser/tests/fields.ark @@ -0,0 +1,7 @@ +(let a b.c) +(mut d e.f.g) +(if (# aze + hi.jk) l.m n.o.p) +(while q.r s.t) +(fun () u.v) +(begin x.y.z) \ No newline at end of file diff --git a/tests/parser/tests/fields.expected b/tests/parser/tests/fields.expected new file mode 100644 index 000000000..1cdbea11c --- /dev/null +++ b/tests/parser/tests/fields.expected @@ -0,0 +1,6 @@ +( Keyword:Let Symbol:a ( Field Symbol:b Symbol:c ) ) +( Keyword:Mut Symbol:d ( Field Symbol:e Symbol:f Symbol:g ) ) +( Keyword:If ( ( Field Symbol:hi Symbol:jk ) ) ( Field Symbol:l Symbol:m ) ( Field Symbol:n Symbol:o Symbol:p ) ) +( Keyword:While ( Field Symbol:q Symbol:r ) ( Field Symbol:s Symbol:t ) ) +( Keyword:Fun ( ) ( Field Symbol:u Symbol:v ) ) +( Keyword:Begin ( Field Symbol:x Symbol:y Symbol:z ) ) \ No newline at end of file diff --git a/tests/parser/tests/fun.ark b/tests/parser/tests/fun.ark new file mode 100644 index 000000000..b60906ca7 --- /dev/null +++ b/tests/parser/tests/fun.ark @@ -0,0 +1,27 @@ +(fun () 1) +( +fun +(## +#1 +) + + +"12" # 34 +) +(fun(a b) 2) +( +fun +( +cc +dddd +) +1 # 67 +) + +(fun (a b) (if 1 2 3)) +(fun (a b) (let c (if 1 2 3))) +((fun (a) a) 1) +(fun () ()) +(fun (a) ( + # test +)) \ No newline at end of file diff --git a/tests/parser/tests/fun.expected b/tests/parser/tests/fun.expected new file mode 100644 index 000000000..c000dd55a --- /dev/null +++ b/tests/parser/tests/fun.expected @@ -0,0 +1,9 @@ +( Keyword:Fun ( ) Number:1 ) +( Keyword:Fun ( ) String:12 ) +( Keyword:Fun ( Symbol:a Symbol:b ) Number:2 ) +( Keyword:Fun ( Symbol:cc Symbol:dddd ) Number:1 ) +( Keyword:Fun ( Symbol:a Symbol:b ) ( Keyword:If Number:1 Number:2 Number:3 ) ) +( Keyword:Fun ( Symbol:a Symbol:b ) ( Keyword:Let Symbol:c ( Keyword:If Number:1 Number:2 Number:3 ) ) ) +( ( Keyword:Fun ( Symbol:a ) Symbol:a ) Number:1 ) +( Keyword:Fun ( ) Symbol:nil ) +( Keyword:Fun ( Symbol:a ) Symbol:nil ) \ No newline at end of file diff --git a/tests/parser/tests/huge_number.ark b/tests/parser/tests/huge_number.ark new file mode 100644 index 000000000..337d58921 --- /dev/null +++ b/tests/parser/tests/huge_number.ark @@ -0,0 +1 @@ +(let a 1e+4932) \ No newline at end of file diff --git a/tests/parser/tests/huge_number.expected b/tests/parser/tests/huge_number.expected new file mode 100644 index 000000000..e8e64a6fb --- /dev/null +++ b/tests/parser/tests/huge_number.expected @@ -0,0 +1,5 @@ +In file ./huge_number.ark +At 1 @ 1:8 + 1 | (let a 1e+4932) + | ^~~~~~~ + Is not a valid number \ No newline at end of file diff --git a/tests/parser/tests/if.ark b/tests/parser/tests/if.ark new file mode 100644 index 000000000..69ef18122 --- /dev/null +++ b/tests/parser/tests/if.ark @@ -0,0 +1,18 @@ +(if 1 2 3) +(if +1 +2 +3) +( if # 9 + "a" #0 + 1 +# 1 +# 2 +# ## + 2) #1 +(if 1 2) + +(if 3 ()) + +(if (func a b) a b) +(if (a b c) (d e) (f)) \ No newline at end of file diff --git a/tests/parser/tests/if.expected b/tests/parser/tests/if.expected new file mode 100644 index 000000000..f853654e8 --- /dev/null +++ b/tests/parser/tests/if.expected @@ -0,0 +1,7 @@ +( Keyword:If Number:1 Number:2 Number:3 ) +( Keyword:If Number:1 Number:2 Number:3 ) +( Keyword:If String:a Number:1 Number:2 ) +( Keyword:If Number:1 Number:2 ) +( Keyword:If Number:3 Symbol:nil ) +( Keyword:If ( Symbol:func Symbol:a Symbol:b ) Symbol:a Symbol:b ) +( Keyword:If ( Symbol:a Symbol:b Symbol:c ) ( Symbol:d Symbol:e ) ( Symbol:f ) ) \ No newline at end of file diff --git a/tests/parser/tests/import.ark b/tests/parser/tests/import.ark new file mode 100644 index 000000000..113e011d1 --- /dev/null +++ b/tests/parser/tests/import.ark @@ -0,0 +1,16 @@ +(import a) +(import a.b) +( +import # keyword +# package +foo.bar.egg # end of line +# end of package +) +(import foo:*) +(import foo.bar:*) +(import foo.bar.egg:*) +(import foo :a) +(import foo.bar # cool package +:a # a nice symbol +:b# another symbol we need +) \ No newline at end of file diff --git a/tests/parser/tests/import.expected b/tests/parser/tests/import.expected new file mode 100644 index 000000000..d74fd0068 --- /dev/null +++ b/tests/parser/tests/import.expected @@ -0,0 +1,17 @@ +( Keyword:Import ( String:a ) ( ) ) +( Keyword:Import ( String:a String:b ) ( ) ) +( Keyword:Import ( String:foo String:bar String:egg ) ( ) ) +( Keyword:Import ( String:foo ) Symbol:* ) +( Keyword:Import ( String:foo String:bar ) Symbol:* ) +( Keyword:Import ( String:foo String:bar String:egg ) Symbol:* ) +( Keyword:Import ( String:foo ) ( Symbol:a ) ) +( Keyword:Import ( String:foo String:bar ) ( Symbol:a Symbol:b ) ) + +0) a (basic) +1) b (basic) +2) egg (basic) +3) foo (glob) +4) bar (glob) +5) egg (glob) +6) foo ( a ) +7) bar ( a b ) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_arguments.ark b/tests/parser/tests/incomplete_arguments.ark new file mode 100644 index 000000000..0ccca6130 --- /dev/null +++ b/tests/parser/tests/incomplete_arguments.ark @@ -0,0 +1 @@ +(fun (a \ No newline at end of file diff --git a/tests/parser/tests/incomplete_arguments.expected b/tests/parser/tests/incomplete_arguments.expected new file mode 100644 index 000000000..f5549e411 --- /dev/null +++ b/tests/parser/tests/incomplete_arguments.expected @@ -0,0 +1,5 @@ +In file ./incomplete_arguments.ark +At EOF @ 1:7 + 1 | (fun (a + | ^ + Expected ')' \ No newline at end of file diff --git a/tests/parser/tests/incomplete_begin.ark b/tests/parser/tests/incomplete_begin.ark new file mode 100644 index 000000000..46ba98069 --- /dev/null +++ b/tests/parser/tests/incomplete_begin.ark @@ -0,0 +1 @@ +{ a b (let c d) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_begin.expected b/tests/parser/tests/incomplete_begin.expected new file mode 100644 index 000000000..bd8e1c24a --- /dev/null +++ b/tests/parser/tests/incomplete_begin.expected @@ -0,0 +1,5 @@ +In file ./incomplete_begin.ark +At EOF @ 1:15 + 1 | { a b (let c d) + | ^ + Expected '}' \ No newline at end of file diff --git a/tests/parser/tests/incomplete_call.ark b/tests/parser/tests/incomplete_call.ark new file mode 100644 index 000000000..3456820e9 --- /dev/null +++ b/tests/parser/tests/incomplete_call.ark @@ -0,0 +1 @@ +(a b c (if (ok true) 1 2) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_call.expected b/tests/parser/tests/incomplete_call.expected new file mode 100644 index 000000000..10dcda83a --- /dev/null +++ b/tests/parser/tests/incomplete_call.expected @@ -0,0 +1,5 @@ +In file ./incomplete_call.ark +At EOF @ 1:25 + 1 | (a b c (if (ok true) 1 2) + | ^ + Expected ')' \ No newline at end of file diff --git a/tests/parser/tests/incomplete_del.ark b/tests/parser/tests/incomplete_del.ark new file mode 100644 index 000000000..f9748e69c --- /dev/null +++ b/tests/parser/tests/incomplete_del.ark @@ -0,0 +1 @@ +(del) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_del.expected b/tests/parser/tests/incomplete_del.expected new file mode 100644 index 000000000..ef46a075c --- /dev/null +++ b/tests/parser/tests/incomplete_del.expected @@ -0,0 +1,5 @@ +In file ./incomplete_del.ark +At ) @ 1:5 + 1 | (del) + | ^ + del needs a symbol \ No newline at end of file diff --git a/tests/parser/tests/incomplete_fun.ark b/tests/parser/tests/incomplete_fun.ark new file mode 100644 index 000000000..aa9983dd8 --- /dev/null +++ b/tests/parser/tests/incomplete_fun.ark @@ -0,0 +1 @@ +(fun (a b &c)) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_fun.expected b/tests/parser/tests/incomplete_fun.expected new file mode 100644 index 000000000..42b2de2da --- /dev/null +++ b/tests/parser/tests/incomplete_fun.expected @@ -0,0 +1,5 @@ +In file ./incomplete_fun.ark +At ) @ 1:14 + 1 | (fun (a b &c)) + | ^ + Expected a body for the function \ No newline at end of file diff --git a/tests/parser/tests/incomplete_if.ark b/tests/parser/tests/incomplete_if.ark new file mode 100644 index 000000000..9bf6b4d20 --- /dev/null +++ b/tests/parser/tests/incomplete_if.ark @@ -0,0 +1 @@ +(if 1 2 3 diff --git a/tests/parser/tests/incomplete_if.expected b/tests/parser/tests/incomplete_if.expected new file mode 100644 index 000000000..a6ecca9b0 --- /dev/null +++ b/tests/parser/tests/incomplete_if.expected @@ -0,0 +1,6 @@ +In file ./incomplete_if.ark +At EOF @ 2:0 + 1 | (if 1 2 3 + 2 | + | ^ + Missing ')' after condition \ No newline at end of file diff --git a/tests/parser/tests/incomplete_import_1.ark b/tests/parser/tests/incomplete_import_1.ark new file mode 100644 index 000000000..802f85ecf --- /dev/null +++ b/tests/parser/tests/incomplete_import_1.ark @@ -0,0 +1 @@ +(import) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_import_1.expected b/tests/parser/tests/incomplete_import_1.expected new file mode 100644 index 000000000..d0e754b7c --- /dev/null +++ b/tests/parser/tests/incomplete_import_1.expected @@ -0,0 +1,5 @@ +In file ./incomplete_import_1.ark +At ) @ 1:8 + 1 | (import) + | ^ + Import expected a package name \ No newline at end of file diff --git a/tests/parser/tests/incomplete_import_2.ark b/tests/parser/tests/incomplete_import_2.ark new file mode 100644 index 000000000..abe834157 --- /dev/null +++ b/tests/parser/tests/incomplete_import_2.ark @@ -0,0 +1 @@ +(import a. ) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_import_2.expected b/tests/parser/tests/incomplete_import_2.expected new file mode 100644 index 000000000..2f4b92725 --- /dev/null +++ b/tests/parser/tests/incomplete_import_2.expected @@ -0,0 +1,5 @@ +In file ./incomplete_import_2.ark +At ' ' @ 1:11 + 1 | (import a. ) + | ^ + Package name expected after '.' \ No newline at end of file diff --git a/tests/parser/tests/incomplete_let.ark b/tests/parser/tests/incomplete_let.ark new file mode 100644 index 000000000..a5ea45479 --- /dev/null +++ b/tests/parser/tests/incomplete_let.ark @@ -0,0 +1,2 @@ +( +let \ No newline at end of file diff --git a/tests/parser/tests/incomplete_let.expected b/tests/parser/tests/incomplete_let.expected new file mode 100644 index 000000000..25aa08707 --- /dev/null +++ b/tests/parser/tests/incomplete_let.expected @@ -0,0 +1,6 @@ +In file ./incomplete_let.ark +At EOF @ 2:3 + 1 | ( + 2 | let + | ^ + let needs a symbol \ No newline at end of file diff --git a/tests/parser/tests/incomplete_list.ark b/tests/parser/tests/incomplete_list.ark new file mode 100644 index 000000000..a67d2c30e --- /dev/null +++ b/tests/parser/tests/incomplete_list.ark @@ -0,0 +1,3 @@ +[ + 1 + 2 3 \ No newline at end of file diff --git a/tests/parser/tests/incomplete_list.expected b/tests/parser/tests/incomplete_list.expected new file mode 100644 index 000000000..da2831009 --- /dev/null +++ b/tests/parser/tests/incomplete_list.expected @@ -0,0 +1,7 @@ +In file ./incomplete_list.ark +At EOF @ 3:7 + 1 | [ + 2 | 1 + 3 | 2 3 + | ^ + Expected ']' \ No newline at end of file diff --git a/tests/parser/tests/incomplete_macro.ark b/tests/parser/tests/incomplete_macro.ark new file mode 100644 index 000000000..d5fa0be09 --- /dev/null +++ b/tests/parser/tests/incomplete_macro.ark @@ -0,0 +1 @@ +($ (a) a) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_macro.expected b/tests/parser/tests/incomplete_macro.expected new file mode 100644 index 000000000..0a3a2e8b3 --- /dev/null +++ b/tests/parser/tests/incomplete_macro.expected @@ -0,0 +1,5 @@ +In file ./incomplete_macro.ark +At ( @ 1:4 + 1 | ($ (a) a) + | ^ + $ needs a symbol to declare a macro \ No newline at end of file diff --git a/tests/parser/tests/incomplete_macro_arguments.ark b/tests/parser/tests/incomplete_macro_arguments.ark new file mode 100644 index 000000000..b2a8b4117 --- /dev/null +++ b/tests/parser/tests/incomplete_macro_arguments.ark @@ -0,0 +1 @@ +($ foo (a \ No newline at end of file diff --git a/tests/parser/tests/incomplete_macro_arguments.expected b/tests/parser/tests/incomplete_macro_arguments.expected new file mode 100644 index 000000000..3c8088ca5 --- /dev/null +++ b/tests/parser/tests/incomplete_macro_arguments.expected @@ -0,0 +1,5 @@ +In file ./incomplete_macro_arguments.ark +At EOF @ 1:9 + 1 | ($ foo (a + | ^ + Expected ')' \ No newline at end of file diff --git a/tests/parser/tests/incomplete_macro_spread.ark b/tests/parser/tests/incomplete_macro_spread.ark new file mode 100644 index 000000000..43e48c81b --- /dev/null +++ b/tests/parser/tests/incomplete_macro_spread.ark @@ -0,0 +1 @@ +($ foo (bar ...) (bar)) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_macro_spread.expected b/tests/parser/tests/incomplete_macro_spread.expected new file mode 100644 index 000000000..0f309a885 --- /dev/null +++ b/tests/parser/tests/incomplete_macro_spread.expected @@ -0,0 +1,5 @@ +In file ./incomplete_macro_spread.ark +At ) @ 1:16 + 1 | ($ foo (bar ...) (bar)) + | ^ + Expected a name for the variadic arguments list \ No newline at end of file diff --git a/tests/parser/tests/incomplete_package_name.ark b/tests/parser/tests/incomplete_package_name.ark new file mode 100644 index 000000000..9cd2b6fa4 --- /dev/null +++ b/tests/parser/tests/incomplete_package_name.ark @@ -0,0 +1 @@ +(import a.b. \ No newline at end of file diff --git a/tests/parser/tests/incomplete_package_name.expected b/tests/parser/tests/incomplete_package_name.expected new file mode 100644 index 000000000..d6a9ae90d --- /dev/null +++ b/tests/parser/tests/incomplete_package_name.expected @@ -0,0 +1,5 @@ +In file ./incomplete_package_name.ark +At EOF @ 1:12 + 1 | (import a.b. + | ^ + Package name expected after '.' \ No newline at end of file diff --git a/tests/parser/tests/incomplete_string.ark b/tests/parser/tests/incomplete_string.ark new file mode 100644 index 000000000..b486e33c2 --- /dev/null +++ b/tests/parser/tests/incomplete_string.ark @@ -0,0 +1 @@ +(let a "1 2 3) \ No newline at end of file diff --git a/tests/parser/tests/incomplete_string.expected b/tests/parser/tests/incomplete_string.expected new file mode 100644 index 000000000..fed1e13e5 --- /dev/null +++ b/tests/parser/tests/incomplete_string.expected @@ -0,0 +1,5 @@ +In file ./incomplete_string.ark +At EOF @ 1:14 + 1 | (let a "1 2 3) + | ^ + Missing '"' after string \ No newline at end of file diff --git a/tests/parser/tests/incorrect_arg_capture.ark b/tests/parser/tests/incorrect_arg_capture.ark new file mode 100644 index 000000000..363224f8a --- /dev/null +++ b/tests/parser/tests/incorrect_arg_capture.ark @@ -0,0 +1 @@ +(fun (a &b c) 1) \ No newline at end of file diff --git a/tests/parser/tests/incorrect_arg_capture.expected b/tests/parser/tests/incorrect_arg_capture.expected new file mode 100644 index 000000000..806086d4c --- /dev/null +++ b/tests/parser/tests/incorrect_arg_capture.expected @@ -0,0 +1,5 @@ +In file ./incorrect_arg_capture.ark +At c @ 1:12 + 1 | (fun (a &b c) 1) + | ^ + Captured variables should be at the end of the argument list \ No newline at end of file diff --git a/tests/parser/tests/incorrect_escape_seq.ark b/tests/parser/tests/incorrect_escape_seq.ark new file mode 100644 index 000000000..179f7d04b --- /dev/null +++ b/tests/parser/tests/incorrect_escape_seq.ark @@ -0,0 +1 @@ +(print "\i bla bla bla") \ No newline at end of file diff --git a/tests/parser/tests/incorrect_escape_seq.expected b/tests/parser/tests/incorrect_escape_seq.expected new file mode 100644 index 000000000..1b131850c --- /dev/null +++ b/tests/parser/tests/incorrect_escape_seq.expected @@ -0,0 +1,5 @@ +In file ./incorrect_escape_seq.ark +At \ @ 1:9 + 1 | (print "\i bla bla bla") + | ^ + Unknown escape sequence \ No newline at end of file diff --git a/tests/parser/tests/incorrect_import.ark b/tests/parser/tests/incorrect_import.ark new file mode 100644 index 000000000..4835b642d --- /dev/null +++ b/tests/parser/tests/incorrect_import.ark @@ -0,0 +1 @@ +(import a.b :c:*) \ No newline at end of file diff --git a/tests/parser/tests/incorrect_import.expected b/tests/parser/tests/incorrect_import.expected new file mode 100644 index 000000000..0a5e2ee61 --- /dev/null +++ b/tests/parser/tests/incorrect_import.expected @@ -0,0 +1,5 @@ +In file ./incorrect_import.ark +At : @ 1:15 + 1 | (import a.b :c:*) + | ^~ + Glob pattern can not follow a symbol to import \ No newline at end of file diff --git a/tests/parser/tests/let_atom.ark b/tests/parser/tests/let_atom.ark new file mode 100644 index 000000000..64199e6d7 --- /dev/null +++ b/tests/parser/tests/let_atom.ark @@ -0,0 +1,21 @@ +(let aaaaaaa 12) +( + mut b 13) + ( +set c "") + +# test comment +#comment### +( +let +b # my symbol +"12"#value +) + +(let d (if 1 2 3)) +(let e +( +while #keyword +4 +5 +)) diff --git a/tests/parser/tests/let_atom.expected b/tests/parser/tests/let_atom.expected new file mode 100644 index 000000000..a3b667e2b --- /dev/null +++ b/tests/parser/tests/let_atom.expected @@ -0,0 +1,6 @@ +( Keyword:Let Symbol:aaaaaaa Number:12 ) +( Keyword:Mut Symbol:b Number:13 ) +( Keyword:Set Symbol:c String: ) +( Keyword:Let Symbol:b String:12 ) +( Keyword:Let Symbol:d ( Keyword:If Number:1 Number:2 Number:3 ) ) +( Keyword:Let Symbol:e ( Keyword:While Number:4 Number:5 ) ) \ No newline at end of file diff --git a/tests/parser/tests/list.ark b/tests/parser/tests/list.ark new file mode 100644 index 000000000..33931ac01 --- /dev/null +++ b/tests/parser/tests/list.ark @@ -0,0 +1,9 @@ +(list 1 2 3) +(list) +(list (list 1)) +[]#list +[#content +1 + #end +] +[[1 a]] \ No newline at end of file diff --git a/tests/parser/tests/list.expected b/tests/parser/tests/list.expected new file mode 100644 index 000000000..6d891e060 --- /dev/null +++ b/tests/parser/tests/list.expected @@ -0,0 +1,6 @@ +( Symbol:list Number:1 Number:2 Number:3 ) +( Symbol:list ) +( Symbol:list ( Symbol:list Number:1 ) ) +( Symbol:list ) +( Symbol:list Number:1 ) +( Symbol:list ( Symbol:list Number:1 Symbol:a ) ) \ No newline at end of file diff --git a/tests/parser/tests/loop.ark b/tests/parser/tests/loop.ark new file mode 100644 index 000000000..168c292b3 --- /dev/null +++ b/tests/parser/tests/loop.ark @@ -0,0 +1,14 @@ +# +(while 1 1) +( +while +2 +2 #789 +) + + + +( # 123 + # 456 + while 3 3 ) +(while (isGood 1) (doStuff a (if b c d))) \ No newline at end of file diff --git a/tests/parser/tests/loop.expected b/tests/parser/tests/loop.expected new file mode 100644 index 000000000..262abc3df --- /dev/null +++ b/tests/parser/tests/loop.expected @@ -0,0 +1,4 @@ +( Keyword:While Number:1 Number:1 ) +( Keyword:While Number:2 Number:2 ) +( Keyword:While Number:3 Number:3 ) +( Keyword:While ( Symbol:isGood Number:1 ) ( Symbol:doStuff Symbol:a ( Keyword:If Symbol:b Symbol:c Symbol:d ) ) ) \ No newline at end of file diff --git a/tests/parser/tests/macro.ark b/tests/parser/tests/macro.ark new file mode 100644 index 000000000..180ab33a2 --- /dev/null +++ b/tests/parser/tests/macro.ark @@ -0,0 +1,18 @@ +($ a 1) +($ b () 2) +($ +c # macro name +( # arg list +d # macro argument +e +) +3 # body +) +($ f(g)4) +($ h(i j) (let a 1)) + +($ h (i j) (let a (if i 2 3))) +($ k (l ...m) (print l m)) +($ n ( +...p +) (print p)) \ No newline at end of file diff --git a/tests/parser/tests/macro.expected b/tests/parser/tests/macro.expected new file mode 100644 index 000000000..c33ff45b6 --- /dev/null +++ b/tests/parser/tests/macro.expected @@ -0,0 +1,8 @@ +( Macro Symbol:a Number:1 ) +( Macro Symbol:b ( ) Number:2 ) +( Macro Symbol:c ( Symbol:d Symbol:e ) Number:3 ) +( Macro Symbol:f ( Symbol:g ) Number:4 ) +( Macro Symbol:h ( Symbol:i Symbol:j ) ( Keyword:Let Symbol:a Number:1 ) ) +( Macro Symbol:h ( Symbol:i Symbol:j ) ( Keyword:Let Symbol:a ( Keyword:If Symbol:i Number:2 Number:3 ) ) ) +( Macro Symbol:k ( Symbol:l Spread:m ) ( Symbol:print Symbol:l Symbol:m ) ) +( Macro Symbol:n ( Spread:p ) ( Symbol:print Symbol:p ) ) \ No newline at end of file diff --git a/tests/parser/tests/numbers.ark b/tests/parser/tests/numbers.ark new file mode 100644 index 000000000..8475fd064 --- /dev/null +++ b/tests/parser/tests/numbers.ark @@ -0,0 +1,7 @@ +(let a 1.2) +(let b -3.4) +(let c -0) +(let d 1e4) +(let e 2e+8) +(let f 4e-16) +(let g 8.91e-31) \ No newline at end of file diff --git a/tests/parser/tests/numbers.expected b/tests/parser/tests/numbers.expected new file mode 100644 index 000000000..5529eb8bf --- /dev/null +++ b/tests/parser/tests/numbers.expected @@ -0,0 +1,7 @@ +( Keyword:Let Symbol:a Number:1.2 ) +( Keyword:Let Symbol:b Number:-3.4 ) +( Keyword:Let Symbol:c Number:-0 ) +( Keyword:Let Symbol:d Number:10000 ) +( Keyword:Let Symbol:e Number:2e+08 ) +( Keyword:Let Symbol:f Number:4e-16 ) +( Keyword:Let Symbol:g Number:8.91e-31 ) \ No newline at end of file diff --git a/tests/parser/tests/run b/tests/parser/tests/run new file mode 100755 index 000000000..8ec2c4155 --- /dev/null +++ b/tests/parser/tests/run @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +file=parser + +if [ -f ../build/Release/${file}.exe ]; then + cmd=../build/Release/${file}.exe +elif [ -f ../build/${file} ]; then + cmd=../build/${file} +else + echo "No $file executable found" && exit 1 +fi + +Reset='\033[0m' +Black='\033[0;30m' +Red='\033[0;31m' +Green='\033[0;32m' +Yellow='\033[0;33m' +Blue='\033[0;34m' +Purple='\033[0;35m' +Cyan='\033[0;36m' +White='\033[0;37m' + +passed=0 +failed=0 + +for f in ./*.ark; do + output=$($cmd $f 2>&1) + expected=$(cat ${f%.*}.expected) + diff=$(diff --strip-trailing-cr <(echo "$output" | sed -r "s/\x1B\[([0-9]{1,3}(;[0-9]{1,2};?)?)?[mGK]//g") <(echo "$expected")) + + if [[ $diff != "" ]]; then + echo -e "${Red}FAILED${Reset} ${f%.*}" + ((failed=failed+1)) + echo -e " ${Yellow}Output${Reset}:" + echo "$diff" + else + echo -e "${Green}PASSED${Reset} ${f%.*}" + ((passed=passed+1)) + fi +done + +echo " ------------------------------" +echo -e " ${Cyan}${passed}${Reset} passed, ${Purple}${failed}${Reset} failed" + +if [[ $failed != 0 ]]; then + exit 1 +else + exit 0 +fi diff --git a/tests/parser/tests/strings.ark b/tests/parser/tests/strings.ark new file mode 100644 index 000000000..0a83b60e0 --- /dev/null +++ b/tests/parser/tests/strings.ark @@ -0,0 +1,2 @@ +(print "abc" "123\"test") +(print "\\ 123aéoÒ") \ No newline at end of file diff --git a/tests/parser/tests/strings.expected b/tests/parser/tests/strings.expected new file mode 100644 index 000000000..3e19cec62 --- /dev/null +++ b/tests/parser/tests/strings.expected @@ -0,0 +1,2 @@ +( Symbol:print String:abc String:123"test ) +( Symbol:print String:\ 123aéoÒ ) \ No newline at end of file