diff --git a/.github/launch-tests b/.github/launch-tests
deleted file mode 100755
index 6c79721eb..000000000
--- a/.github/launch-tests
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-
-(cd tests/arkscript ; echo ; bash ./run-tests)
-(cd tests/cpp/ ; echo ; bash ./run-tests)
-(cd tests/errors ; echo ; bash ./run-tests)
-(cd tests/ast/ ; echo ; bash ./run-tests)
-(source ./lib/modules/.github/run-tests)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2818ddc78..22acdc50d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -139,38 +139,11 @@ jobs:
with:
submodules: recursive
- - name: Update GNU compilers
- if: startsWith(matrix.config.name, 'Ubuntu GCC')
- shell: bash
- run: |
- sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test
- sudo apt-get -yq install ${{ matrix.config.cc }} ${{ matrix.config.cxx }}
-
- - name: Update LLVM compilers
- if: startsWith(matrix.config.name, 'Ubuntu Clang')
- shell: bash
- run: |
- version=`echo ${{ matrix.config.cc }} | cut -c 7-`
- sudo apt-get install -y clang-${version} lld-${version} libc++-${version}-dev libc++abi-${version}-dev clang-tools-${version}
+ - name: Setup compilers
+ uses: ./.github/workflows/setup-compilers
- - name: Install MacOS dependencies
- if: startsWith(matrix.config.name, 'MacOS')
- shell: bash
- run: env HOMEBREW_NO_AUTO_UPDATE=1 brew install openssl
-
- - uses: ilammy/msvc-dev-cmd@v1
- if: startsWith(matrix.config.name, 'Windows')
-
- - name: Download Windows dependencies
- if: startsWith(matrix.config.name, 'Windows')
- shell: pwsh
- run: |
- Invoke-RestMethod -Uri https://www.sqlite.org/2022/sqlite-dll-win64-x64-${Env:SQLITE_VERSION}.zip -OutFile sqlite.zip
- Invoke-RestMethod -Uri https://www.sqlite.org/2022/sqlite-amalgamation-${Env:SQLITE_VERSION}.zip -OutFile amalgation.zip
- Expand-Archive sqlite.zip -DestinationPath sqlite_lib
- Expand-Archive amalgation.zip -DestinationPath sqlite_code
- cd sqlite_lib
- lib /DEF:sqlite3.def /OUT:sqlite3.lib /MACHINE:x64
+ - name: Setup dependencies
+ uses: ./.github/workflows/setup-deps
- name: Configure CMake Ark
shell: bash
@@ -181,7 +154,7 @@ jobs:
-DCMAKE_C_COMPILER=${{ matrix.config.cc }} \
-DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
-DARK_SANITIZERS=${{ matrix.config.sanitizers }} \
- -DARK_BUILD_EXE=On -DARK_BUILD_MODULES=On -DARK_MOD_ALL=On
+ -DARK_BUILD_EXE=On -DARK_BUILD_MODULES=On -DARK_MOD_ALL=On -DARK_BUILD_PARSER_TESTS=On
- name: Add SQLite deps
if: startsWith(matrix.config.name, 'Windows')
@@ -195,7 +168,7 @@ jobs:
shell: bash
run: cmake --build build --config $BUILD_TYPE
- - name: Configure CMake Integration tests
+ - name: Configure & build CMake Integration tests
shell: bash
run: |
cd tests/cpp
@@ -204,29 +177,21 @@ jobs:
-DCMAKE_C_COMPILER=${{ matrix.config.cc }} \
-DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
-DARK_SANITIZERS=${{ matrix.config.sanitizers }}
-
- - name: Build Integration tests
- shell: bash
- run: cd tests/cpp && cmake --build build --config $BUILD_TYPE
-
- - name: Organize files for upload
- if: startsWith(matrix.config.name, 'Ubuntu') || startsWith(matrix.config.name, 'MacOS')
- shell: bash
- run: |
- mkdir -p artifact/lib/std
- cp build/arkscript artifact
- cp build/libArkReactor.* artifact
- cp lib/*.arkm artifact/lib
- cp lib/std/*.ark artifact/lib/std
- rm -rf artifact/lib/std/{.git,.github,tests/__arkscript__}
+ cmake --build build --config $BUILD_TYPE
- name: Organize files for upload
- if: startsWith(matrix.config.name, 'Windows')
shell: bash
run: |
mkdir -p artifact/lib/std
- cp build/$BUILD_TYPE/arkscript.exe artifact
- cp build/$BUILD_TYPE/ArkReactor.dll artifact
+ # Linux/MacOS
+ cp build/arkscript artifact || true
+ cp build/parser artifact || true
+ cp build/libArkReactor.* artifact || true
+ # Windows
+ cp build/$BUILD_TYPE/arkscript.exe artifact || true
+ cp build/$BUILD_TYPE/parser.exe artifact || true
+ cp build/$BUILD_TYPE/ArkReactor.dll artifact || true
+ # Generic
cp lib/*.arkm artifact/lib
cp lib/std/*.ark artifact/lib/std
rm -rf artifact/lib/std/{.git,.github,tests/__arkscript__}
@@ -234,8 +199,9 @@ jobs:
- name: Organize temp artifact
shell: bash
run: |
- mkdir -p temp/
+ mkdir -p temp/parser/
cp -r tests/cpp temp/
+ cp -r tests/parser temp/
- name: Upload artifact
uses: actions/upload-artifact@v3.1.1
@@ -252,7 +218,7 @@ jobs:
tests:
runs-on: ${{ matrix.config.os }}
- name: Test on ${{ matrix.config.name }}
+ name: Tests on ${{ matrix.config.name }}
needs: [build]
strategy:
@@ -279,46 +245,44 @@ jobs:
with:
submodules: recursive
- - name: Download artifact
- id: download
- uses: actions/download-artifact@v3.0.1
- with:
- name: ${{ matrix.config.artifact }}
- path: build
+ - name: Setup tests
+ uses: ./.github/workflows/setup-tests
- - name: Download temp artifact
- id: download-artifact
- uses: actions/download-artifact@v3.0.1
- with:
- name: temp-${{ matrix.config.artifact }}
- path: artifact
+ - name: Parser tests
+ shell: bash
+ run: |
+ export ASAN_OPTIONS=detect_odr_violation=0
+ (cd tests/parser/tests ; bash ./run)
- - name: Update GNU compilers
- if: startsWith(matrix.config.name, 'Ubuntu GCC')
+ - name: Integration tests
shell: bash
run: |
- sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test
- sudo apt-get -yq install libstdc++6
+ export ASAN_OPTIONS=detect_odr_violation=0
+ (cd tests/cpp ; bash ./run-tests)
- - shell: bash
+ - name: AST tests
+ shell: bash
run: |
- mv artifact/cpp/out tests/cpp/
- mv build/lib/*.arkm lib/
- chmod u+x build/arkscript tests/cpp/out/*
+ export ASAN_OPTIONS=use_odr_indicator=1
+ (cd tests/ast ; bash ./run-tests)
- - name: Pre-test
- if: startsWith(matrix.config.name, 'Windows')
+ - name: Unit tests
shell: bash
run: |
- mkdir -p tests/cpp/out
- cp build/*.dll tests/cpp/out/
+ export ASAN_OPTIONS=detect_odr_violation=0
+ (cd tests/arkscript ; bash ./run-tests)
- - name: Tests
- if: steps.download.outcome == 'success' && steps.download-artifact.outcome == 'success'
+ #- name: Modules tests
+ # shell: bash
+ # run: |
+ # export ASAN_OPTIONS=detect_odr_violation=0
+ # (source ./lib/modules/.github/run-tests)
+
+ - name: Runtime error message generation tests
shell: bash
run: |
- export ASAN_OPTIONS=use_odr_indicator=1
- bash .github/launch-tests
+ export ASAN_OPTIONS=detect_odr_violation=0
+ (cd tests/errors ; bash ./run-tests)
valgrind:
runs-on: ubuntu-latest
@@ -337,14 +301,11 @@ jobs:
name: "ubuntu-clang-11-valgrind"
path: build
- - shell: bash
- run: |
- mv build/lib/*.arkm lib/
- chmod u+x build/arkscript
-
- name: Update LLVM compilers
shell: bash
run: |
+ mv build/lib/*.arkm lib/
+ chmod u+x build/arkscript
sudo apt-get update --fix-missing
sudo apt-get install -y clang-11 lld-11 libc++-11-dev libc++abi-11-dev clang-tools-11 valgrind
diff --git a/.github/workflows/setup-compilers/action.yaml b/.github/workflows/setup-compilers/action.yaml
new file mode 100644
index 000000000..d7621e762
--- /dev/null
+++ b/.github/workflows/setup-compilers/action.yaml
@@ -0,0 +1,22 @@
+---
+name: "Update compilers"
+
+runs:
+ using: "composite"
+ steps:
+ - name: Update GNU compilers
+ if: startsWith(matrix.config.name, 'Ubuntu GCC')
+ shell: bash
+ run: |
+ sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test
+ sudo apt-get -yq install ${{ matrix.config.cc }} ${{ matrix.config.cxx }}
+
+ - name: Update LLVM compilers
+ if: startsWith(matrix.config.name, 'Ubuntu Clang')
+ shell: bash
+ run: |
+ version=`echo ${{ matrix.config.cc }} | cut -c 7-`
+ sudo apt-get install -y clang-${version} lld-${version} libc++-${version}-dev libc++abi-${version}-dev clang-tools-${version}
+
+ - uses: ilammy/msvc-dev-cmd@v1
+ if: startsWith(matrix.config.name, 'Windows')
diff --git a/.github/workflows/setup-deps/action.yaml b/.github/workflows/setup-deps/action.yaml
new file mode 100644
index 000000000..4fb2b4cbe
--- /dev/null
+++ b/.github/workflows/setup-deps/action.yaml
@@ -0,0 +1,21 @@
+---
+name: "Install compilers and dependencies"
+
+runs:
+ using: "composite"
+ steps:
+ - name: Install MacOS dependencies
+ if: startsWith(matrix.config.name, 'MacOS')
+ shell: bash
+ run: env HOMEBREW_NO_AUTO_UPDATE=1 brew install openssl
+
+ - name: Download Windows dependencies
+ if: startsWith(matrix.config.name, 'Windows')
+ shell: pwsh
+ run: |
+ Invoke-RestMethod -Uri https://www.sqlite.org/2022/sqlite-dll-win64-x64-${Env:SQLITE_VERSION}.zip -OutFile sqlite.zip
+ Invoke-RestMethod -Uri https://www.sqlite.org/2022/sqlite-amalgamation-${Env:SQLITE_VERSION}.zip -OutFile amalgation.zip
+ Expand-Archive sqlite.zip -DestinationPath sqlite_lib
+ Expand-Archive amalgation.zip -DestinationPath sqlite_code
+ cd sqlite_lib
+ lib /DEF:sqlite3.def /OUT:sqlite3.lib /MACHINE:x64
diff --git a/.github/workflows/setup-tests/action.yaml b/.github/workflows/setup-tests/action.yaml
new file mode 100644
index 000000000..525e63976
--- /dev/null
+++ b/.github/workflows/setup-tests/action.yaml
@@ -0,0 +1,42 @@
+---
+name: "Setup tests"
+description: "Unpack necessary artifacts, updates compilers"
+
+runs:
+ using: "composite"
+ steps:
+ - name: Download artifact
+ id: download
+ uses: actions/download-artifact@v3.0.1
+ with:
+ name: ${{ matrix.config.artifact }}
+ path: build
+
+ - name: Download temp artifact
+ id: download-artifact
+ uses: actions/download-artifact@v3.0.1
+ with:
+ name: temp-${{ matrix.config.artifact }}
+ path: artifact
+
+ - name: Update GNU compilers
+ if: startsWith(matrix.config.name, 'Ubuntu GCC')
+ shell: bash
+ run: |
+ sudo apt-add-repository -y ppa:ubuntu-toolchain-r/test
+ sudo apt-get -yq install libstdc++6
+
+ - shell: bash
+ run: |
+ mv artifact/cpp/out tests/cpp/
+ mv build/lib/*.arkm lib/
+ chmod u+x build/arkscript tests/cpp/out/*
+ cp -r artifact/parser/* tests/parser/
+ cp -r build tests/parser/ && ls tests/parser/build/
+ chmod u+x tests/parser/build/parser
+
+ - shell: bash
+ if: startsWith(matrix.config.name, 'Windows')
+ run: |
+ cp build/*.dll tests/cpp/out/
+ cp build/*.dll tests/parser/build/
diff --git a/.gitignore b/.gitignore
index f59d697ca..6cf5f7d56 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,6 @@
# Personal utilities
warnings.log
-cformat.ps1
# ArkScript
include/Ark/Constants.hpp
@@ -28,6 +27,7 @@ afl/
.cache/
build/
ninja/
+cmake-build-*/
# Prerequisites
*.d
@@ -47,10 +47,6 @@ ninja/
*.dylib
*.dll
-# Fortran module files
-*.mod
-*.smod
-
# Compiled Static libraries
*.lai
*.la
@@ -64,3 +60,6 @@ ninja/
# MacOS files
.DS_store
+
+# Visual Studio
+CmakeSettings.json
diff --git a/.gitmodules b/.gitmodules
index 137ae9487..39a0aef00 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
-[submodule "lib/utf8_decoder"]
- path = lib/utf8_decoder
- url = https://github.com/PierrePharel/utf8_decoder.git
[submodule "lib/std"]
path = lib/std
url = https://github.com/ArkScript-lang/std.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e311bae6f..e8fe547ac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
- more tests for the io builtins
- added lines and code coloration in the error context
- new dependency: fmtlib
+- added the padding/instruction/argumentation values when displaying instructions in the bytecode reader
### Changed
- instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument
@@ -43,6 +44,11 @@
- fixed a bug in the macro processor where macros were deleted when they shouldn't
- fixed a bug where macro functions with no argument would crash the macro processor
+### Removed
+
+### Deprecated
+
+
## [3.4.0] - 2022-09-12
### Added
- added new `async` and `await` builtins
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a38fd0ad2..86fb75ef0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,7 +27,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
# files needed for the library ArkReactor
file(GLOB_RECURSE SOURCE_FILES
${ark_SOURCE_DIR}/src/arkreactor/*.cpp
- ${ark_SOURCE_DIR}/lib/fmt/src/*.cc)
+ ${ark_SOURCE_DIR}/lib/fmt/src/format.cc)
add_library(ArkReactor SHARED ${SOURCE_FILES})
@@ -46,8 +46,8 @@ if (CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG OR APPLE)
)
if (CMAKE_COMPILER_IS_GNUCXX)
- # The package utf8_decoder has an issues with constant overflow.
- # Once thisis fixed remove this flag:
+ # The package utf8 has an issue with constant overflow.
+ # Once this is fixed remove this flag:
target_compile_options(ArkReactor PUBLIC -Wno-overflow)
endif()
@@ -96,7 +96,6 @@ add_subdirectory("${ark_SOURCE_DIR}/lib/termcolor" EXCLUDE_FROM_ALL)
target_include_directories(ArkReactor
PUBLIC
- "${ark_SOURCE_DIR}/lib/utf8_decoder/"
"${ark_SOURCE_DIR}/lib/picosha2/"
"${ark_SOURCE_DIR}/lib/fmt/include")
@@ -122,12 +121,7 @@ target_include_directories(ArkReactor
PUBLIC
${ark_SOURCE_DIR}/include)
-# setting up project properties
-set_target_properties(
- ArkReactor
- PROPERTIES
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON)
+target_compile_features(ArkReactor PRIVATE cxx_std_17)
# Installation rules
@@ -176,11 +170,18 @@ if (ARK_BUILD_MODULES)
add_subdirectory(${ark_SOURCE_DIR}/lib/modules)
endif()
+if (ARK_BUILD_PARSER_TESTS)
+ add_executable(parser ${ark_SOURCE_DIR}/tests/parser/main.cpp)
+ target_link_libraries(parser PUBLIC ArkReactor)
+ target_compile_features(parser PRIVATE cxx_std_17)
+endif()
+
if (ARK_BUILD_EXE)
# additional files needed for the exe (repl, command line and stuff)
- set(EXE_SOURCES
+ file(GLOB_RECURSE EXE_SOURCES
${ark_SOURCE_DIR}/src/arkscript/REPL/Utils.cpp
${ark_SOURCE_DIR}/src/arkscript/REPL/Repl.cpp
+ ${ark_SOURCE_DIR}/lib/fmt/src/format.cc
${ark_SOURCE_DIR}/src/arkscript/main.cpp)
add_executable(arkscript ${EXE_SOURCES})
diff --git a/cmake/link_time_optimization.cmake b/cmake/link_time_optimization.cmake
index ac5156506..a2eda34ee 100644
--- a/cmake/link_time_optimization.cmake
+++ b/cmake/link_time_optimization.cmake
@@ -3,7 +3,7 @@ include(CheckIPOSupported)
check_ipo_supported(RESULT ipo_supported)
function(enable_lto target_name)
- if (ipo_supported)
+ if (ipo_supported AND (${CMAKE_BUILD_TYPE} STREQUAL "Release"))
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND (CMAKE_CXX_COMPILER_VERSION MATCHES "^8\..+"))
message(WARNING "LTO supported but not enabled to prevent https://github.com/ArkScript-lang/Ark/pull/385#issuecomment-1163597951")
else()
diff --git a/examples/error.ark b/examples/error.ark
index d54d0b41d..9d3c6c96d 100644
--- a/examples/error.ark
+++ b/examples/error.ark
@@ -2,7 +2,7 @@
# very often, and this is a convention,
# if an imported file starts with a capital letter,
# it shall be a file in the standard library.
-(import "Exceptions.ark")
+(import std.Exceptions)
# the function which should do a "safe number invertion"
(let invert (fun (x) {
diff --git a/examples/macros.ark b/examples/macros.ark
index b7ee159f1..7573f6aef 100644
--- a/examples/macros.ark
+++ b/examples/macros.ark
@@ -1,12 +1,12 @@
-!{suffix-dup (sym x) {
- !{if (> x 1)
- (suffix-dup sym (- x 1))}
- (symcat sym x)}}
+($ suffix-dup (sym x) {
+ ($if (> x 1)
+ (suffix-dup sym (- x 1)))
+ (symcat sym x)})
-!{partial (func ...defargs) {
- !{bloc (suffix-dup a (- (argcount func) (len defargs)))}
+($ partial (func ...defargs) {
+ ($ bloc (suffix-dup a (- (argcount func) (len defargs))))
(fun (bloc) (func ...defargs bloc))
- !{undef bloc}}}
+ ($undef bloc)})
(let test_func (fun (a b c) (* a b c)))
(let test_func1 (partial test_func 1))
@@ -16,31 +16,31 @@
(print "Expected arguments for test_func1: " (argcount test_func1) ", expected " 2)
(print "Calling them: " (test_func 1 2 3) " " (test_func1 2 3))
-!{foo (a b) (+ a b)}
+($ foo (a b) (+ a b))
(print "Using macro foo (a b) => (+ a b): " (foo 1 2))
-!{var 12}
+($ var 12)
(print "Using macro constant var=12: " var)
-!{if (= var 12)
+($if (= var 12)
(print "This was executed in a if macro, testing var == 12")
- (print "You shouldn't see this")}
+ (print "You shouldn't see this"))
-!{if (and true true)
+($if (and true true)
(print "This was executed in a if macro, testing (and true true)")
- (print "You shouldn't see this (bis)")}
+ (print "You shouldn't see this (bis)"))
-!{defun (name args body) (let name (fun args body))}
+($ defun (name args body) (let name (fun args body)))
(defun a_func (a b) (+ a b))
(print "Generated a function with a macro, a_func (a b) => (+ a b)")
(print "Calling (a_func 1 2): " (a_func 1 2))
-!{one (...args) (print "Macro 'one', returns the 2nd argument given in " args " => " (@ args 1))}
+($ one (...args) (print "Macro 'one', returns the 2nd argument given in " args " => " (@ args 1)))
(one 1 2)
(one 1 3 4)
(one 1 5 6 7 8)
-!{last (...args) (print "Macro 'last', returns the last argument given in " args " => " (@ args -1))}
+($ last (...args) (print "Macro 'last', returns the last argument given in " args " => " (@ args -1)))
(last 1 2)
(last 1 3 4)
(last 1 5 6 7 8)
@@ -48,28 +48,28 @@
{
(print "Testing macros in scopes and macro shadowing")
- !{test (+ 1 2 3)}
+ ($ test (+ 1 2 3))
(print "(global) Reading macro 'test', expected 6, " test)
((fun () {
- !{test (- 1 2 3)}
+ ($ test (- 1 2 3))
(print "(sub scope) Reading macro 'test', expected -4, " test)}))
(print "(global) Reading macro 'test', expected 6, " test)
{
- !{test 555}
+ ($ test 555)
(print "(subscope) Reading macro 'test', expected 555, " test)
- !{undef test}
+ ($ undef test)
(print "(subscope, undef test) Reading macro 'test', expected 6, " test)
- !{undef a}}}
+ ($ undef a)}}
(print "Demonstrating a threading macro")
-!{-> (arg fn1 ...fn) {
- !{if (> (len fn) 0)
+($ -> (arg fn1 ...fn) {
+ ($if (> (len fn) 0)
(-> (fn1 arg) ...fn)
- (fn1 arg)}}}
+ (fn1 arg))})
(let filename "hello.json")
diff --git a/images/diagram.svg b/images/diagram.svg
index 0a58aefce..9bdd90005 100644
--- a/images/diagram.svg
+++ b/images/diagram.svg
@@ -1 +1 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/include/Ark/Compiler/AST/BaseParser.hpp b/include/Ark/Compiler/AST/BaseParser.hpp
new file mode 100644
index 000000000..57c7245df
--- /dev/null
+++ b/include/Ark/Compiler/AST/BaseParser.hpp
@@ -0,0 +1,87 @@
+#ifndef SRC_BASEPARSER_HPP
+#define SRC_BASEPARSER_HPP
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+namespace Ark::internal
+{
+ struct FilePosition
+ {
+ std::size_t row;
+ std::size_t col;
+ };
+
+ class BaseParser
+ {
+ public:
+ BaseParser() = default;
+
+ private:
+ std::string m_filename;
+ std::string m_str;
+ std::string::iterator m_it, m_next_it;
+ utf8_char_t m_sym;
+
+ /*
+ getting next character and changing the values of count/row/col/sym
+ */
+ void next();
+
+ protected:
+ void initParser(const std::string& filename, const std::string& code);
+
+ FilePosition getCursor();
+
+ void error(const std::string& error, const std::string exp);
+ void errorWithNextToken(const std::string& message);
+ void errorMissingSuffix(char suffix, const std::string& node_name);
+
+ inline long getCount() { return std::distance(m_str.begin(), m_it); }
+ inline std::size_t getSize() { return m_str.size(); }
+ inline bool isEOF() { return m_it == m_str.end(); }
+
+ void backtrack(long n);
+
+ /*
+ Function to use and check if a Character Predicate was able to parse
+ the current symbol.
+ Add the symbol to the given string (if there was one) and call next()
+ */
+ bool accept(const CharPred& t, std::string* s = nullptr);
+
+ /*
+ Function to use and check if a Character Predicate was able to parse
+ the current Symbol.
+ Add the symbol to the given string (if there was one) and call next().
+ Throw a CodeError if it couldn't.
+ */
+ bool expect(const CharPred& t, std::string* s = nullptr);
+
+ // basic parsers
+ bool space(std::string* s = nullptr);
+ bool inlineSpace(std::string* s = nullptr);
+ bool endOfLine(std::string* s = nullptr);
+ bool comment();
+ bool newlineOrComment();
+ bool prefix(char c);
+ bool suffix(char c);
+ bool number(std::string* s = nullptr);
+ bool signedNumber(std::string* s = nullptr);
+ bool hexNumber(unsigned length, std::string* s = nullptr);
+ bool name(std::string* s = nullptr);
+ bool sequence(const std::string& s);
+ bool packageName(std::string* s = nullptr);
+ bool anyUntil(const CharPred& delim, std::string* s = nullptr);
+
+ bool oneOf(std::initializer_list words, std::string* s = nullptr);
+ };
+}
+
+#endif
diff --git a/include/Ark/Compiler/AST/Import.hpp b/include/Ark/Compiler/AST/Import.hpp
new file mode 100644
index 000000000..706029f75
--- /dev/null
+++ b/include/Ark/Compiler/AST/Import.hpp
@@ -0,0 +1,89 @@
+#ifndef COMPILER_AST_IMPORT_HPP
+#define COMPILER_AST_IMPORT_HPP
+
+#include
+#include
+#include
+
+#include
+
+namespace Ark::internal
+{
+ struct ARK_API Import
+ {
+ /**
+ * @brief The filename without the extension
+ * @details Example: `(import foo.bar)` => `bar`
+ * `(import foo.bar.egg:*)` => `egg`
+ * `(import foo :a :b :c)` => `foo`
+ *
+ */
+ std::string prefix;
+
+ /**
+ * @brief Package with all the segments
+ * @details Example: `(import foo.bar)` => `{foo, bar}`
+ * `(import foo.bar.egg:*)` => `{foo, bar, egg}`
+ * `(import foo :a :b :c)` => `{foo}`
+ */
+ std::vector package;
+
+ /**
+ * @brief Import with prefix (the package) or not
+ *
+ */
+ bool with_prefix = true;
+
+ /**
+ * @brief List of symbols to import, can be empty if none provided
+ *
+ */
+ std::vector symbols;
+
+ inline std::string toPackageString() const
+ {
+ return std::accumulate(package.begin() + 1, package.end(), package.front(), [](const std::string& left, const std::string& right) {
+ return left + "." + right;
+ });
+ }
+
+ inline std::string packageToPath() const
+ {
+ std::size_t offset = 0;
+ if (package.front() == "std")
+ offset = 1;
+
+ return std::accumulate(
+ std::next(package.begin() + offset),
+ package.end(),
+ package[offset],
+ [](const std::string& a, const std::string& b) {
+ return a + "/" + b;
+ });
+ }
+
+ /**
+ * @brief Check if we should import everything, given something like `(import foo.bar.egg:*)`
+ *
+ * @return true if all symbols of the file should be imported in the importer scope
+ * @return false otherwise
+ */
+ inline bool isGlob() const
+ {
+ return !with_prefix && symbols.empty();
+ }
+
+ /**
+ * @brief Check if we should import everything with a prefix, given a `(import foo.bar.egg)`
+ *
+ * @return true
+ * @return false
+ */
+ inline bool isBasic() const
+ {
+ return with_prefix && symbols.empty();
+ }
+ };
+}
+
+#endif
diff --git a/include/Ark/Compiler/AST/Lexer.hpp b/include/Ark/Compiler/AST/Lexer.hpp
deleted file mode 100644
index 19534970e..000000000
--- a/include/Ark/Compiler/AST/Lexer.hpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- * @file Lexer.hpp
- * @author Alexandre Plateau (lexplt.dev@gmail.com)
- * @brief Tokenize ArkScript code
- * @version 0.1
- * @date 2020-10-27
- *
- * @copyright Copyright (c) 2020-2021
- *
- */
-
-#ifndef ARK_COMPILER_LEXER_HPP
-#define ARK_COMPILER_LEXER_HPP
-
-#include
-
-#include
-#include
-
-namespace Ark::internal
-{
- /**
- * @brief The lexer, in charge of creating a list of tokens
- *
- */
- class Lexer
- {
- public:
- /**
- * @brief Construct a new Lexer object
- *
- * @param debug the debug level
- */
- explicit Lexer(unsigned debug) noexcept;
-
- /**
- * @brief Give code to tokenize and create the list of tokens
- *
- * @param code the ArkScript code
- */
- void feed(const std::string& code);
-
- /**
- * @brief Return the list of tokens
- *
- * @return std::vector&
- */
- std::vector& tokens() noexcept;
-
- private:
- unsigned m_debug;
- std::vector m_tokens;
-
- inline constexpr bool isHexChar(char chr)
- {
- return (('a' <= chr && chr <= 'f') || ('A' <= chr && chr <= 'F') || ('0' <= chr && chr <= '9'));
- }
-
- /**
- * @brief Helper function to determine the type of a token
- *
- * @param value
- * @return TokenType
- */
- TokenType guessType(const std::string& value) noexcept;
-
- /**
- * @brief Check if the value is a keyword in ArkScript
- *
- * @param value
- * @return true
- * @return false
- */
- bool isKeyword(const std::string& value) noexcept;
- /**
- * @brief Check if the value can be an identifier in ArkScript
- *
- * @param value
- * @return true
- * @return false
- */
- bool isIdentifier(const std::string& value) noexcept;
-
- /**
- * @brief Check if the value is an operator in ArkScript
- *
- * @param value
- * @return true
- * @return false
- */
- bool isOperator(const std::string& value) noexcept;
-
- /**
- * @brief Check if a control character / sequence is complete or not
- *
- * @param sequence the sequence without the leading \\
- * @param next the next character to come, maybe, in the sequence
- * @return true
- * @return false
- */
- bool endOfControlChar(const std::string& sequence, char next) noexcept;
-
- /**
- * @brief To throw nice lexer errors
- *
- * @param message
- * @param match
- * @param line
- * @param col
- * @param context
- */
- [[noreturn]] void throwTokenizingError(const std::string& message, const std::string& match, std::size_t line, std::size_t col, const std::string& context);
- };
-}
-
-#endif
diff --git a/include/Ark/Compiler/AST/Module.hpp b/include/Ark/Compiler/AST/Module.hpp
new file mode 100644
index 000000000..4a0dac45a
--- /dev/null
+++ b/include/Ark/Compiler/AST/Module.hpp
@@ -0,0 +1,17 @@
+#ifndef ARK_MODULE_HPP
+#define ARK_MODULE_HPP
+
+#include
+
+namespace Ark::internal
+{
+ // TODO store something better than just the AST (AST+what we are importing as private/public/namespaced... vs all)
+ // so that we can remember the order in which we encountered imports.
+ struct Module
+ {
+ Node ast;
+ bool has_been_processed = false; // TODO document this
+ };
+}
+
+#endif // ARK_MODULE_HPP
diff --git a/include/Ark/Compiler/AST/Node.hpp b/include/Ark/Compiler/AST/Node.hpp
index d4c4755d7..cb1f18944 100644
--- a/include/Ark/Compiler/AST/Node.hpp
+++ b/include/Ark/Compiler/AST/Node.hpp
@@ -4,9 +4,9 @@
* @brief AST node used by the parser, optimizer and compiler
* @version 0.3
* @date 2020-10-27
- *
+ *
* @copyright Copyright (c) 2020-2021
- *
+ *
*/
#ifndef COMPILER_AST_NODE_HPP
@@ -18,251 +18,149 @@
#include
#include
+#include
namespace Ark::internal
{
/**
* @brief A node of an Abstract Syntax Tree for ArkScript
- *
+ *
*/
- class Node
+ class ARK_API Node
{
public:
- using Value = std::variant;
-
- /**
- * @brief Provide a statically initialized / correct and guaranteed to be initialized Node representing "true"
- */
- static const Node& getTrueNode();
-
- /**
- * @brief Provide a statically initialized / correct and guaranteed to be initialized Node representing "false"
- */
- static const Node& getFalseNode();
-
- /**
- * @brief Provide a statically initialized / correct and guaranteed to be initialized Node representing "Nil"
- */
- static const Node& getNilNode();
-
- /**
- * @brief Provide a statically initialized / correct and guaranteed to be initialized Node representing "Empty List"
- */
- static const Node& getListNode();
+ using Value = std::variant>;
Node() = default;
- /**
- * @brief Construct a new Node object
- *
- * @param value
- */
- explicit Node(long value) noexcept;
-
- /**
- * @brief Construct a new Node object
- *
- * @param value
- */
- explicit Node(double value) noexcept;
-
- /**
- * @brief Construct a new Node object
- *
- * @param value
- */
- explicit Node(const std::string& value) noexcept;
-
- /**
- * @brief Construct a new Node object
- *
- * @param value
- */
- explicit Node(Keyword value) noexcept;
+ Node(NodeType node_type, const std::string& value);
- /**
- * @brief Construct a new Node object, does not set the value
- *
- * @param type
- */
- explicit Node(NodeType type) noexcept;
-
- /**
- * @brief Construct a new Node object
- *
- * @param other
- */
- Node(const Node& other) noexcept;
-
- /**
- * @brief Construct a new Node object
- *
- * @param other
- */
- Node& operator=(Node other) noexcept;
-
- /**
- * @brief Construct a new Node object
- *
- * @param other
- */
- void swap(Node& other) noexcept;
+ explicit Node(NodeType node_type);
+ explicit Node(double value);
+ explicit Node(long value);
+ explicit Node(int value);
+ explicit Node(Keyword value);
+ explicit Node(const std::vector& nodes);
/**
* @brief Return the string held by the value (if the node type allows it)
- *
- * @return const std::string&
+ *
+ * @return const std::string&
*/
const std::string& string() const noexcept;
/**
* @brief Return the number held by the value (if the node type allows it)
- *
- * @return double
+ *
+ * @return double
*/
double number() const noexcept;
/**
* @brief Return the keyword held by the value (if the node type allows it)
- *
- * @return Keyword
+ *
+ * @return Keyword
*/
Keyword keyword() const noexcept;
/**
* @brief Every node has a list as well as a value so we can push_back on all node no matter their type
- *
+ *
* @param node a sub-node to push on the list held by the current node
*/
void push_back(const Node& node) noexcept;
/**
* @brief Return the list of sub-nodes held by the node
- *
- * @return std::vector&
+ *
+ * @return std::vector&
*/
std::vector& list() noexcept;
/**
* @brief Return the list of sub-nodes held by the node
- *
- * @return const std::vector&
+ *
+ * @return const std::vector&
*/
const std::vector& constList() const noexcept;
/**
* @brief Return the node type
- *
- * @return NodeType
+ *
+ * @return NodeType
*/
NodeType nodeType() const noexcept;
/**
* @brief Set the Node Type object
- *
- * @param type
+ *
+ * @param type
*/
void setNodeType(NodeType type) noexcept;
/**
* @brief Set the String object
- *
- * @param value
+ *
+ * @param value
*/
void setString(const std::string& value) noexcept;
- /**
- * @brief Set the Number object
- *
- * @param value
- */
- void setNumber(double value) noexcept;
-
- /**
- * @brief Set the Keyword object
- *
- * @param kw
- */
- void setKeyword(Keyword kw) noexcept;
-
/**
* @brief Set the Position of the node in the text
- *
- * @param line
- * @param col
+ *
+ * @param line
+ * @param col
*/
void setPos(std::size_t line, std::size_t col) noexcept;
/**
* @brief Set the original Filename where the node was
- *
- * @param filename
+ *
+ * @param filename
*/
void setFilename(const std::string& filename) noexcept;
/**
* @brief Get the line at which this node was created
- *
- * @return std::size_t
+ *
+ * @return std::size_t
*/
std::size_t line() const noexcept;
/**
* @brief Get the column at which this node was created
- *
- * @return std::size_t
+ *
+ * @return std::size_t
*/
std::size_t col() const noexcept;
/**
* @brief Return the filename in which this node was created
- *
- * @return const std::string&
+ *
+ * @return const std::string&
*/
const std::string& filename() const noexcept;
- friend std::ostream& operator<<(std::ostream& os, const Node& N) noexcept;
+ friend ARK_API std::ostream& operator<<(std::ostream& os, const Node& N) noexcept;
friend void swap(Node& lhs, Node& rhs) noexcept;
friend bool operator==(const Node& A, const Node& B);
friend bool operator<(const Node& A, const Node& B);
friend bool operator!(const Node& A);
private:
- /**
- * @brief Construct a new Node object.
- * This is private because it is only used by the static member of this class
- * to generate specialized versions of the node.
- *
- * @param value
- * @param type
- */
- explicit Node(const std::string& value, NodeType const& type) noexcept;
NodeType m_type;
Value m_value;
- std::vector m_list;
// position of the node in the original code, useful when it comes to parser errors
std::size_t m_line = 0, m_col = 0;
std::string m_filename = "";
};
- std::ostream& operator<<(std::ostream& os, const std::vector& N) noexcept;
-
- template
- Node make_node(T&& value, std::size_t line, std::size_t col, const std::string& file)
- {
- Node n(std::forward(value));
- n.setPos(line, col);
- n.setFilename(file);
- return n;
- }
+ ARK_API std::ostream& operator<<(std::ostream& os, const std::vector& node) noexcept;
- inline Node make_node_list(std::size_t line, std::size_t col, const std::string& file)
- {
- Node n(NodeType::List);
- n.setPos(line, col);
- n.setFilename(file);
- return n;
- }
+ const Node& getTrueNode();
+ const Node& getFalseNode();
+ const Node& getNilNode();
+ const Node& getListNode();
inline std::string typeToString(const Node& node) noexcept
{
diff --git a/include/Ark/Compiler/AST/Optimizer.hpp b/include/Ark/Compiler/AST/Optimizer.hpp
index d711caf99..31647fc0f 100644
--- a/include/Ark/Compiler/AST/Optimizer.hpp
+++ b/include/Ark/Compiler/AST/Optimizer.hpp
@@ -20,7 +20,6 @@
#include
#include
#include
-#include
namespace Ark::internal
{
@@ -42,7 +41,7 @@ namespace Ark::internal
*
* @param ast
*/
- void feed(const Node& ast);
+ void process(const Node& ast);
/**
* @brief Returns the modified AST
diff --git a/include/Ark/Compiler/AST/Parser.hpp b/include/Ark/Compiler/AST/Parser.hpp
index 7d8f9a981..71a615293 100644
--- a/include/Ark/Compiler/AST/Parser.hpp
+++ b/include/Ark/Compiler/AST/Parser.hpp
@@ -1,180 +1,214 @@
-/**
- * @file Parser.hpp
- * @author Alexandre Plateau (lexplt.dev@gmail.com)
- * @brief Parses a token stream into an AST by using the Ark::Node
- * @version 0.4
- * @date 2020-10-27
- *
- * @copyright Copyright (c) 2020-2021
- *
- */
-
#ifndef COMPILER_AST_PARSER_HPP
#define COMPILER_AST_PARSER_HPP
+#include
+#include
+#include
+#include
+#include
+
#include
-#include
-#include
+#include
#include
-#include
+#include
-#include
-#include
-#include
+#include
namespace Ark::internal
{
- inline NodeType similarNodetypeFromTokentype(TokenType tt)
- {
- if (tt == TokenType::Capture)
- return NodeType::Capture;
- else if (tt == TokenType::GetField)
- return NodeType::GetField;
- else if (tt == TokenType::Spread)
- return NodeType::Spread;
-
- return NodeType::Symbol;
- }
-
- /**
- * @brief The parser is responsible of constructing the Abstract Syntax Tree from a token list
- *
- */
- class Parser
+ class ARK_API Parser : public BaseParser
{
public:
- /**
- * @brief Construct a new Parser object
- *
- * @param debug the debug level
- * @param options the parsing options
- * @param lib_env fallback library search path
- */
- Parser(unsigned debug, uint16_t options, const std::vector& lib_env) noexcept;
-
- /**
- * @brief Give the code to parse
- *
- * @param code the ArkScript code
- * @param filename the name of the file
- */
- void feed(const std::string& code, const std::string& filename = ARK_NO_NAME_FILE);
-
- /**
- * @brief Return the generated AST
- *
- * @return const Node&
- */
- const Node& ast() const noexcept;
-
- /**
- * @brief Return the list of files imported by the code given to the parser
- *
- * Each path of each imported file is relative to the filename given when feeding the parser.
- *
- * @return const std::vector&
- */
- const std::vector& getImports() const noexcept;
-
- friend std::ostream& operator<<(std::ostream& os, const Parser& P) noexcept;
+ Parser();
+
+ void processFile(const std::string& filename);
+ void processString(const std::string& code);
+
+ const Node& ast() const;
+ const std::vector& imports() const;
private:
- unsigned m_debug;
- std::vector m_libenv;
- uint16_t m_options;
- Lexer m_lexer;
Node m_ast;
- Token m_last_token;
-
- // path of the current file
- std::string m_file;
- // source code of the current file
- std::string m_code;
- // the files included by the "includer" to avoid multiple includes
- std::vector m_parent_include;
-
- /**
- * @brief Applying syntactic sugar: {...} => (begin...), [...] => (list ...)
- *
- * @param tokens a list of tokens
- */
- void sugar(std::vector& tokens) noexcept;
-
- /**
- * @brief Parse a list of tokens recursively
- *
- * @param tokens
- * @param authorize_capture if we are authorized to consume TokenType::Capture tokens
- * @param authorize_field_read if we are authorized to consume TokenType::GetField tokens
- * @param in_macro if we are in a macro, there a bunch of things we can tolerate
- * @return Node
- */
- Node parse(std::list& tokens, bool authorize_capture = false, bool authorize_field_read = false, bool in_macro = false);
-
- void parseIf(Node&, std::list&, bool);
- void parseLetMut(Node&, Token&, std::list&, bool);
- void parseSet(Node&, Token&, std::list&, bool);
- void parseFun(Node&, Token&, std::list&, bool);
- void parseWhile(Node&, Token&, std::list&, bool);
- void parseBegin(Node&, std::list&, bool);
- void parseImport(Node&, std::list&);
- void parseQuote(Node&, std::list&, bool);
- void parseDel(Node&, std::list&);
- Node parseShorthand(Token&, std::list&, bool);
- void checkForInvalidTokens(Node&, Token&, bool, bool, bool);
-
- /**
- * @brief Get the next token if possible, from a list of tokens
- *
- * The list of tokens is modified.
- *
- * @param tokens list of tokens to get the next token from
- * @return Token
- */
- Token nextToken(std::list& tokens);
-
- /**
- * @brief Convert a token to a node
- *
- * @param token the token to converts
- * @return Node
- */
- Node atom(const Token& token);
-
- /**
- * @brief Search for all the includes in a given node, in its sub-nodes and replace them by the code of the included file
- *
- * @param n
- * @param parent the parent node of the current one
- * @param pos the position of the child node in the parent node list
- * @return true if we found an import and replaced it by the corresponding code
- */
- bool checkForInclude(Node& n, Node& parent, std::size_t pos = 0);
-
- /**
- * @brief Seek a file in the lib folder and everywhere
- *
- * @param file
- * @return std::string
- */
- std::string seekFile(const std::string& file);
-
- /**
- * @brief Throw a parse exception is the given predicated is false
- *
- * @param pred
- * @param message error message to use
- * @param token concerned token
- */
- void expect(bool pred, const std::string& message, Token token);
-
- /**
- * @brief Throw a parse error related to a token (seek it in the related file and highlight the error)
- *
- * @param message
- * @param token
- */
- [[noreturn]] void throwParseError(const std::string& message, Token token);
+ std::vector m_imports;
+ unsigned m_allow_macro_behavior; ///< Toggled on when inside a macro definition, off afterward
+
+ void run();
+
+ std::optional node();
+ std::optional letMutSet();
+ std::optional del();
+ std::optional condition();
+ std::optional loop();
+ std::optional import_();
+ std::optional block();
+ std::optional functionArgs();
+ std::optional function();
+ std::optional macroCondition();
+ std::optional macroBlock();
+ std::optional macroArgs();
+ std::optional macro();
+ std::optional functionCall();
+ std::optional list();
+
+ inline std::optional number()
+ {
+ auto pos = getCount();
+
+ std::string res;
+ if (signedNumber(&res))
+ {
+ double output;
+ if (Utils::isDouble(res, &output))
+ return Node(output);
+ else
+ {
+ backtrack(pos);
+ error("Is not a valid number", res);
+ }
+ }
+ return std::nullopt;
+ }
+
+ inline std::optional string()
+ {
+ std::string res;
+ if (accept(IsChar('"')))
+ {
+ while (true)
+ {
+ if (accept(IsChar('\\')))
+ {
+ if (accept(IsChar('"')))
+ res += '\"';
+ else if (accept(IsChar('\\')))
+ res += '\\';
+ else if (accept(IsChar('n')))
+ res += '\n';
+ else if (accept(IsChar('t')))
+ res += '\t';
+ else if (accept(IsChar('v')))
+ res += '\v';
+ else if (accept(IsChar('r')))
+ res += '\r';
+ else if (accept(IsChar('a')))
+ res += '\a';
+ else if (accept(IsChar('b')))
+ res += '\b';
+ else if (accept(IsChar('0')))
+ res += '\0';
+ else if (accept(IsChar('f')))
+ res += '\f';
+ else if (accept(IsChar('u')))
+ {
+ std::string seq;
+ if (hexNumber(4, &seq))
+ {
+ char utf8_str[5];
+ utf8::decode(seq.c_str(), utf8_str);
+ if (*utf8_str == '\0')
+ error("Invalid escape sequence", "\\u" + seq);
+ res += utf8_str;
+ }
+ else
+ error("Invalid escape sequence", "\\u");
+ }
+ else if (accept(IsChar('U')))
+ {
+ std::string seq;
+ if (hexNumber(8, &seq))
+ {
+ std::size_t begin = 0;
+ for (; seq[begin] == '0'; ++begin)
+ ;
+ char utf8_str[5];
+ utf8::decode(seq.c_str() + begin, utf8_str);
+ if (*utf8_str == '\0')
+ error("Invalid escape sequence", "\\U" + seq);
+ res += utf8_str;
+ }
+ else
+ error("Invalid escape sequence", "\\U");
+ }
+ else
+ {
+ backtrack(getCount() - 1);
+ error("Unknown escape sequence", "\\");
+ }
+ }
+ else
+ accept(IsNot(IsEither(IsChar('\\'), IsChar('"'))), &res);
+
+ if (accept(IsChar('"')))
+ break;
+ else if (isEOF())
+ errorMissingSuffix('"', "string");
+ }
+
+ return Node(NodeType::String, res);
+ }
+ return std::nullopt;
+ }
+
+ inline std::optional field()
+ {
+ std::string symbol;
+ if (!name(&symbol))
+ return std::nullopt;
+
+ Node leaf = Node(NodeType::Field);
+ leaf.push_back(Node(NodeType::Symbol, symbol));
+
+ while (true)
+ {
+ if (leaf.list().size() == 1 && !accept(IsChar('.'))) // Symbol:abc
+ return std::nullopt;
+
+ if (leaf.list().size() > 1 && !accept(IsChar('.')))
+ break;
+ std::string res;
+ if (!name(&res))
+ errorWithNextToken("Expected a field name: .");
+ leaf.push_back(Node(NodeType::Symbol, res));
+ }
+
+ return leaf;
+ }
+
+ inline std::optional symbol()
+ {
+ std::string res;
+ if (!name(&res))
+ return std::nullopt;
+ return Node(NodeType::Symbol, res);
+ }
+
+ inline std::optional spread()
+ {
+ std::string res;
+ if (sequence("..."))
+ {
+ if (!name(&res))
+ errorWithNextToken("Expected a name for the variadic");
+ return Node(NodeType::Spread, res);
+ }
+ return std::nullopt;
+ }
+
+ inline std::optional nil()
+ {
+ if (!accept(IsChar('(')))
+ return std::nullopt;
+ newlineOrComment();
+ if (!accept(IsChar(')')))
+ return std::nullopt;
+
+ return Node(NodeType::Symbol, "nil");
+ }
+
+ std::optional atom();
+ std::optional anyAtomOf(std::initializer_list types);
+ std::optional nodeOrValue();
+ std::optional wrapped(std::optional (Parser::*parser)(), const std::string& name, char prefix, char suffix);
};
}
diff --git a/include/Ark/Compiler/AST/Predicates.hpp b/include/Ark/Compiler/AST/Predicates.hpp
new file mode 100644
index 000000000..d36727046
--- /dev/null
+++ b/include/Ark/Compiler/AST/Predicates.hpp
@@ -0,0 +1,201 @@
+#ifndef SRC_PREDICATES_HPP
+#define SRC_PREDICATES_HPP
+
+#include
+#include
+
+#include
+
+namespace Ark::internal
+{
+ struct CharPred
+ {
+ const std::string name;
+
+ CharPred(const std::string& n) :
+ name(n) {}
+
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const = 0;
+ };
+
+ inline struct IsSpace : public CharPred
+ {
+ IsSpace() :
+ CharPred("space") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && std::isspace(c) != 0;
+ }
+ } IsSpace;
+
+ inline struct IsInlineSpace : public CharPred
+ {
+ IsInlineSpace() :
+ CharPred("inline space") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && (std::isspace(c) != 0) && (c != '\n') && (c != '\r');
+ }
+ } IsInlineSpace;
+
+ inline struct IsDigit : public CharPred
+ {
+ IsDigit() :
+ CharPred("digit") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && std::isdigit(c) != 0;
+ }
+ } IsDigit;
+
+ inline struct IsHex : public CharPred
+ {
+ IsHex() :
+ CharPred("hex") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && std::isxdigit(c) != 0;
+ }
+ } IsHex;
+
+ inline struct IsUpper : public CharPred
+ {
+ IsUpper() :
+ CharPred("uppercase") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && std::isupper(c) != 0;
+ }
+ } IsUpper;
+
+ inline struct IsLower : public CharPred
+ {
+ IsLower() :
+ CharPred("lowercase") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && std::islower(c) != 0;
+ }
+ } IsLower;
+
+ inline struct IsAlpha : public CharPred
+ {
+ IsAlpha() :
+ CharPred("alphabetic") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && std::isalpha(c) != 0;
+ }
+ } IsAlpha;
+
+ inline struct IsAlnum : public CharPred
+ {
+ IsAlnum() :
+ CharPred("alphanumeric") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && std::isalnum(c) != 0;
+ }
+ } IsAlnum;
+
+ inline struct IsPrint : public CharPred
+ {
+ IsPrint() :
+ CharPred("printable") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return 0 <= c && c <= 255 && std::isprint(c) != 0;
+ }
+ } IsPrint;
+
+ struct IsChar : public CharPred
+ {
+ explicit IsChar(const char c) :
+ CharPred("'" + std::string(1, c) + "'"), m_k(c)
+ {}
+ explicit IsChar(const utf8_char_t c) :
+ CharPred(std::string(c.c_str())), m_k(c.codepoint())
+ {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return m_k == c;
+ }
+
+ private:
+ const utf8_char_t::codepoint_t m_k;
+ };
+
+ struct IsEither : public CharPred
+ {
+ explicit IsEither(const CharPred& a, const CharPred& b) :
+ CharPred("(" + a.name + " | " + b.name + ")"), m_a(a), m_b(b)
+ {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return m_a(c) || m_b(c);
+ }
+
+ private:
+ const CharPred& m_a;
+ const CharPred& m_b;
+ };
+
+ struct IsNot : public CharPred
+ {
+ explicit IsNot(const CharPred& a) :
+ CharPred("~" + a.name), m_a(a)
+ {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ return !m_a(c);
+ }
+
+ private:
+ const CharPred& m_a;
+ };
+
+ inline struct IsSymbol : public CharPred
+ {
+ IsSymbol() :
+ CharPred("sym") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t c) const override
+ {
+ switch (c)
+ {
+ case ':':
+ case '!':
+ case '?':
+ case '@':
+ case '_':
+ case '-':
+ case '+':
+ case '*':
+ case '/':
+ case '|':
+ case '=':
+ case '<':
+ case '>':
+ case '%':
+ case '$':
+ return true;
+
+ default:
+ return false;
+ }
+ }
+ } IsSymbol;
+
+ inline struct IsAny : public CharPred
+ {
+ IsAny() :
+ CharPred("any") {}
+ virtual bool operator()(const utf8_char_t::codepoint_t) const override
+ {
+ return true;
+ }
+ } IsAny;
+
+ const IsChar IsMinus('-');
+}
+
+#endif
diff --git a/include/Ark/Compiler/AST/Token.hpp b/include/Ark/Compiler/AST/Token.hpp
deleted file mode 100644
index 77ec9ec84..000000000
--- a/include/Ark/Compiler/AST/Token.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * @file Token.hpp
- * @author Alexandre Plateau (lexplt.dev@gmail.com)
- * @brief Token definition for ArkScript
- * @version 0.1
- * @date 2021-10-02
- *
- * @copyright Copyright (c) 2021
- *
- */
-
-#ifndef ARK_COMPILER_AST_TOKEN_HPP
-#define ARK_COMPILER_AST_TOKEN_HPP
-
-#include
-#include
-#include
-
-namespace Ark::internal
-{
- enum class TokenType
- {
- Grouping,
- String,
- Number,
- Operator,
- Identifier,
- Capture,
- GetField,
- Keyword,
- Skip,
- Comment,
- Shorthand,
- Spread,
- Mismatch
- };
-
- // TokenType to string
- constexpr std::array tokentype_string = {
- "Grouping",
- "String",
- "Number",
- "Operator",
- "Identifier",
- "Capture",
- "GetField",
- "Keyword",
- "Skip",
- "Comment",
- "Shorthand",
- "Spread",
- "Mistmatch"
- };
-
- struct Token
- {
- TokenType type;
- std::string token;
- std::size_t line;
- std::size_t col;
-
- /**
- * @brief Construct a new Token object
- *
- */
- Token() = default;
-
- /**
- * @brief Construct a new Token object
- *
- * @param type the token type
- * @param tok the token value
- * @param line the line where we found the token
- * @param col the column at which was the token
- */
- Token(TokenType type, const std::string& tok, std::size_t line, std::size_t col) noexcept :
- type(type), token(tok), line(line), col(col)
- {}
- };
-}
-
-#endif
diff --git a/include/Ark/Compiler/AST/makeErrorCtx.hpp b/include/Ark/Compiler/AST/makeErrorCtx.hpp
deleted file mode 100644
index 782d461b4..000000000
--- a/include/Ark/Compiler/AST/makeErrorCtx.hpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * @file makeErrorCtx.hpp
- * @author Alexandre Plateau (lexplt.dev@gmail.com)
- * @brief Create string error context for AST errors
- * @version 0.2
- * @date 2022-02-19
- *
- * @copyright Copyright (c) 2020-2022
- *
- */
-
-#ifndef COMPILER_AST_MAKEERRORCTX_HPP
-#define COMPILER_AST_MAKEERRORCTX_HPP
-
-#include
-#include
-
-#include
-
-namespace Ark::internal
-{
- struct LineColorContextCounts
- {
- int open_parentheses = 0;
- int open_square_braces = 0;
- int open_curly_braces = 0;
- };
-
- /**
- * @brief Construct an error message based on a given node
- * @details It opens the related file at the line and column of the node,
- * and display context, plus underline the problem with a serie of ^.
- *
- * @param message
- * @param node
- * @return std::string the complete generated error message
- */
- std::string makeNodeBasedErrorCtx(const std::string& message, const Node& node);
-
- /**
- * @brief Construct an error message based on a given match in the code
- * @details Mostly used by the Lexer and Parser since they don't have Nodes to work on
- *
- * @param match the identified token, causing a problem
- * @param line line of the token
- * @param col starting column of the token
- * @param code the whole code of the file
- * @return std::string the complete generated error message
- */
- std::string makeTokenBasedErrorCtx(const std::string& match, std::size_t line, std::size_t col, const std::string& code);
-
- /**
- * @brief Add colors to highlight matching parentheses/curly braces/square braces on a line
- *
- * @param line the line of code to colorize
- * @param line_color_context_counts a LineColorContextCounts to manipulate the running counts of open pairings
- * @return std::string a colorized line of code
- */
- std::string colorizeLine(const std::string& line, LineColorContextCounts& line_color_context_counts);
-
- /**
- * @brief Check if the character passed in can be paired (parentheses, curly braces, or square braces)
- *
- * @param c
- * @return bool
- */
- inline bool isPairableChar(const char c)
- {
- return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}';
- }
-}
-
-#endif
diff --git a/include/Ark/Compiler/AST/utf8_char.hpp b/include/Ark/Compiler/AST/utf8_char.hpp
new file mode 100644
index 000000000..9d639ac46
--- /dev/null
+++ b/include/Ark/Compiler/AST/utf8_char.hpp
@@ -0,0 +1,85 @@
+#ifndef SRC_UTF8_CHAR_HPP
+#define SRC_UTF8_CHAR_HPP
+
+#include
+#include
+#include
+#include
+
+#undef max
+
+namespace Ark::internal
+{
+ class utf8_char_t
+ {
+ public:
+ using codepoint_t = int;
+ using length_t = unsigned char;
+ using repr_t = std::array;
+
+ utf8_char_t() :
+ m_codepoint(0), m_length(0), m_repr({ 0 }) {}
+
+ utf8_char_t(codepoint_t cp, length_t len, repr_t&& repr) :
+ m_codepoint(cp), m_length(len), m_repr(repr) {}
+
+ // https://github.com/sheredom/utf8.h/blob/4e4d828174c35e4564c31a9e35580c299c69a063/utf8.h#L1178
+ static std::pair at(std::string::iterator it)
+ {
+ codepoint_t codepoint = 0;
+ length_t length = 0;
+ repr_t repr = { 0 };
+
+ if (0xf0 == (0xf8 & *it)) // 4 byte utf8 codepoint
+ {
+ codepoint = (static_cast(0x07 & *it) << 18) |
+ (static_cast(0x3f & *(it + 1)) << 12) |
+ (static_cast(0x3f & *(it + 2)) << 6) |
+ static_cast(0x3f & *(it + 3));
+ length = 4;
+ }
+ else if (0xe0 == (0xf0 & *it)) // 3 byte utf8 codepoint
+ {
+ codepoint = (static_cast(0x0f & *it) << 12) |
+ (static_cast(0x3f & *(it + 1)) << 6) |
+ static_cast(0x3f & *(it + 2));
+ length = 3;
+ }
+ else if (0xc0 == (0xe0 & *it)) // 2 byte utf8 codepoint
+ {
+ codepoint = (static_cast(0x1f & *it) << 6) |
+ static_cast(0x3f & *(it + 1));
+ length = 2;
+ }
+ else // 1 byte utf8 codepoint otherwise
+ {
+ codepoint = static_cast(*it);
+ length = 1;
+ }
+
+ for (length_t i = 0; i < length; ++i)
+ repr[i] = static_cast(*(it + static_cast(i)));
+
+ return std::make_pair(it + static_cast(length),
+ utf8_char_t(codepoint, length, std::move(repr)));
+ }
+
+ bool isPrintable() const
+ {
+ if (m_codepoint < std::numeric_limits::max())
+ return std::isprint(m_codepoint);
+ return true;
+ }
+
+ const char* c_str() const { return reinterpret_cast(m_repr.data()); }
+ std::size_t size() const { return static_cast(m_length); }
+ codepoint_t codepoint() const { return m_codepoint; }
+
+ private:
+ codepoint_t m_codepoint;
+ length_t m_length;
+ repr_t m_repr;
+ };
+}
+
+#endif
diff --git a/include/Ark/Compiler/Common.hpp b/include/Ark/Compiler/Common.hpp
index 3d2391154..a7e2b4393 100644
--- a/include/Ark/Compiler/Common.hpp
+++ b/include/Ark/Compiler/Common.hpp
@@ -29,26 +29,26 @@ namespace Ark::internal
{
Symbol,
Capture,
- GetField,
Keyword,
String,
Number,
List,
- Macro,
Spread,
+ Field,
+ Macro,
Unused
};
constexpr std::array nodeTypes = {
"Symbol",
"Capture",
- "GetField",
"Keyword",
"String",
"Number",
"List",
- "Macro",
"Spread",
+ "Field",
+ "Macro",
"Unused"
};
@@ -63,12 +63,11 @@ namespace Ark::internal
While,
Begin,
Import,
- Quote,
Del
};
/// List of available keywords in ArkScript
- constexpr std::array keywords = {
+ constexpr std::array keywords = {
"fun",
"let",
"mut",
@@ -77,7 +76,6 @@ namespace Ark::internal
"while",
"begin",
"import",
- "quote",
"del"
};
diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Compiler.hpp
index a5a8d4524..1c0f05615 100644
--- a/include/Ark/Compiler/Compiler.hpp
+++ b/include/Ark/Compiler/Compiler.hpp
@@ -2,7 +2,7 @@
* @file Compiler.hpp
* @author Alexandre Plateau (lexplt.dev@gmail.com)
* @brief ArkScript compiler is in charge of transforming the AST into bytecode
- * @version 1.2
+ * @version 1.3
* @date 2020-10-27
*
* @copyright Copyright (c) 2020-2021
@@ -21,13 +21,12 @@
#include
#include
#include
-#include
-#include
#include
namespace Ark
{
class State;
+ class Welder;
/**
* @brief The ArkScript bytecode compiler
@@ -40,44 +39,27 @@ namespace Ark
* @brief Construct a new Compiler object
*
* @param debug the debug level
- * @param options the compilers options
*/
- Compiler(unsigned debug, const std::vector& libenv, uint16_t options = DefaultFeatures);
-
- /**
- * @brief Feed the differents variables with information taken from the given source code file
- *
- * @param code the code of the file
- * @param filename the name of the file
- */
- void feed(const std::string& code, const std::string& filename = ARK_NO_NAME_FILE);
+ Compiler(unsigned debug);
/**
* @brief Start the compilation
*
+ * @param ast
*/
- void compile();
-
- /**
- * @brief Save generated bytecode to a file
- *
- * @param file the name of the file where the bytecode will be saved
- */
- void saveTo(const std::string& file);
+ void process(const internal::Node& ast);
/**
* @brief Return the constructed bytecode object
*
* @return const bytecode_t&
*/
- const bytecode_t& bytecode() noexcept;
+ const bytecode_t& bytecode() const noexcept;
friend class Ark::State;
+ friend class Ark::Welder;
private:
- internal::Parser m_parser;
- internal::Optimizer m_optimizer;
- uint16_t m_options;
// tables: symbols, values, plugins and codes
std::vector m_symbols;
std::vector m_defined_symbols;
@@ -127,16 +109,6 @@ namespace Ark
return &m_temp_pages[-i - 1];
}
- /**
- * @brief Count the number of "valid" ark objects in a node
- * @details Isn't considered valid a GetField, because we use
- * this function to count the number of arguments of function calls.
- *
- * @param lst
- * @return std::size_t
- */
- std::size_t countArkObjects(const std::vector& lst) noexcept;
-
/**
* @brief Checking if a symbol is an operator
*
@@ -238,7 +210,6 @@ namespace Ark
void compileFunction(const internal::Node& x, int p, bool is_result_unused, const std::string& var_name);
void compileLetMutSet(internal::Keyword n, const internal::Node& x, int p);
void compileWhile(const internal::Node& x, int p);
- void compileQuote(const internal::Node& x, int p, bool is_result_unused, bool is_terminal, const std::string& var_name);
void compilePluginImport(const internal::Node& x, int p);
void handleCalls(const internal::Node& x, int p, bool is_result_unused, bool is_terminal, const std::string& var_name);
diff --git a/include/Ark/Compiler/ImportSolver.hpp b/include/Ark/Compiler/ImportSolver.hpp
new file mode 100644
index 000000000..fdbc3a799
--- /dev/null
+++ b/include/Ark/Compiler/ImportSolver.hpp
@@ -0,0 +1,61 @@
+#ifndef ARK_COMPILER_IMPORTSOLVER_HPP
+#define ARK_COMPILER_IMPORTSOLVER_HPP
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+namespace Ark::internal
+{
+ class ImportSolver final
+ {
+ public:
+ ImportSolver(unsigned debug, const std::vector& libenv);
+
+ void process(const std::filesystem::path& root, const Node& origin_ast, const std::vector& origin_imports);
+
+ const Node& ast() const noexcept;
+
+ private:
+ unsigned m_debug;
+ std::vector m_libenv;
+ std::filesystem::path m_root; ///< Folder were the entry file is
+ Node m_ast;
+ std::unordered_map m_modules; ///< Package to module map
+ // TODO is this ok? is this fine? this is sort of ugly
+ std::vector m_imported; ///< List of imports, in the order they were found and parsed
+
+ /**
+ * @brief Visits the AST, looking for import nodes to replace with their parsed module version
+ * @param ast
+ * @return
+ */
+ std::pair findAndReplaceImports(const Node& ast);
+
+ /**
+ * @brief Parse a given file and returns a list of its imports.
+ * The AST is parsed and stored in m_modules[import.prefix]
+ *
+ * @param file path to the file containing the import
+ * @param import current import directive
+ * @return std::vector imports found in the processed file
+ */
+ std::vector parseImport(const std::filesystem::path& file, const Import& import);
+
+ /**
+ * @brief Search for an import file, using the root file path
+ *
+ * @param file path to the file containing the import
+ * @param import current import directive
+ * @return std::filesystem::path
+ */
+ std::filesystem::path findFile(const std::filesystem::path& file, const Import& import);
+ };
+}
+
+#endif
diff --git a/include/Ark/Compiler/JsonCompiler.hpp b/include/Ark/Compiler/JsonCompiler.hpp
index 9c50b2a3a..785bc7342 100644
--- a/include/Ark/Compiler/JsonCompiler.hpp
+++ b/include/Ark/Compiler/JsonCompiler.hpp
@@ -3,33 +3,31 @@
#include
#include
+#include
#include
#include
#include
-#include
-#include
+#include
namespace Ark
{
- class ARK_API JsonCompiler
+ class ARK_API JsonCompiler final
{
public:
/**
* @brief Construct a new JsonCompiler object
*
* @param debug the debug level
- * @param options the compilers options
*/
- JsonCompiler(unsigned debug, const std::vector& libenv, uint16_t options = DefaultFeatures);
+ JsonCompiler(unsigned debug, const std::vector& libenv);
/**
* @brief Feed the differents variables with information taken from the given source code file
*
- * @param code the code of the file
* @param filename the name of the file
*/
- void feed(const std::string& code, const std::string& filename = ARK_NO_NAME_FILE);
+ void feed(const std::string& filename);
/**
* @brief Start the compilation
@@ -39,10 +37,7 @@ namespace Ark
std::string compile();
private:
- internal::Parser m_parser;
- internal::Optimizer m_optimizer;
- uint16_t m_options;
- unsigned m_debug; ///< the debug level of the compiler
+ Welder m_welder;
/**
* @brief Compile a single node and return its representation
diff --git a/include/Ark/Compiler/Macros/Processor.hpp b/include/Ark/Compiler/Macros/Processor.hpp
index db0803294..20b3c7864 100644
--- a/include/Ark/Compiler/Macros/Processor.hpp
+++ b/include/Ark/Compiler/Macros/Processor.hpp
@@ -34,16 +34,15 @@ namespace Ark::internal
* @brief Construct a new Macro Processor object
*
* @param debug the debug level
- * @param options the options flags
*/
- MacroProcessor(unsigned debug, uint16_t options) noexcept;
+ MacroProcessor(unsigned debug) noexcept;
/**
- * @brief Send the complete AST (after the inclusions and stuff), and work on it
+ * @brief Send the complete AST and work on it
*
* @param ast
*/
- void feed(const Node& ast);
+ void process(const Node& ast);
/**
* @brief Return the modified AST
@@ -141,7 +140,7 @@ namespace Ark::internal
* @param node node on which to operate
* @param depth
*/
- void process(Node& node, unsigned depth);
+ void processNode(Node& node, unsigned depth);
/**
* @brief Apply a macro on a given node
diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp
new file mode 100644
index 000000000..dcfc90a36
--- /dev/null
+++ b/include/Ark/Compiler/Welder.hpp
@@ -0,0 +1,65 @@
+/**
+ * @file Welder.hpp
+ * @author Alexandre Plateau (lexplt.dev@gmail.com)
+ * @brief In charge of welding everything needed to compile code
+ * @version 0.2
+ * @date 2023-03-26
+ *
+ * @copyright Copyright (c) 2023
+ *
+ */
+
+#ifndef ARK_COMPILER_WELDER_HPP
+#define ARK_COMPILER_WELDER_HPP
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace Ark
+{
+ class ARK_API Welder final
+ {
+ public:
+ Welder(unsigned debug, const std::vector& libenv);
+
+ /**
+ * @brief Register a symbol as a global in the compiler
+ *
+ * @param name
+ */
+ void registerSymbol(const std::string& name);
+
+ bool computeASTFromFile(const std::string& filename);
+ bool computeASTFromString(const std::string& code);
+
+ bool generateBytecode();
+ bool saveBytecodeToFile(const std::string& filename);
+
+ const internal::Node& ast() const noexcept;
+ const bytecode_t& bytecode() const noexcept;
+
+ private:
+ unsigned m_debug; ///< The debug level
+ std::filesystem::path m_root_file;
+ std::vector m_imports;
+ bytecode_t m_bytecode;
+
+ internal::Parser m_parser;
+ internal::ImportSolver m_importer;
+ internal::MacroProcessor m_macro_processor;
+ internal::Optimizer m_optimizer;
+ Compiler m_compiler;
+ };
+} // namespace Ark
+
+#endif
diff --git a/include/Ark/Exceptions.hpp b/include/Ark/Exceptions.hpp
index b45573b04..d556b43b0 100644
--- a/include/Ark/Exceptions.hpp
+++ b/include/Ark/Exceptions.hpp
@@ -4,9 +4,9 @@
* @brief ArkScript homemade exceptions
* @version 0.2
* @date 2020-10-27
- *
+ *
* @copyright Copyright (c) 2020-2021
- *
+ *
*/
#ifndef INCLUDE_ARK_EXCEPTIONS_HPP
@@ -16,32 +16,49 @@
#include
#include
#include
+#include
+#include
+#include
-#include
+#include
+#include
namespace Ark
{
+ namespace internal
+ {
+ class Node;
+ }
+
+ class Error : public std::runtime_error
+ {
+ public:
+ explicit Error(const std::string& message) :
+ std::runtime_error(message)
+ {}
+ };
+
/**
* @brief A type error triggered when types don't match
- *
+ *
*/
- class TypeError : public std::runtime_error
+ class TypeError : public Error
{
public:
explicit TypeError(const std::string& message) :
- std::runtime_error(message)
+ Error(message)
{}
};
/**
* @brief A special zero division error triggered when a number is divided by 0
- *
+ *
*/
- class ZeroDivisionError : public std::runtime_error
+ class ZeroDivisionError : public Error
{
public:
ZeroDivisionError() :
- std::runtime_error(
+ Error(
"ZeroDivisionError: In ordonary arithmetic, the expression has no meaning, "
"as there is no number which, when multiplied by 0, gives a (assuming a != 0), "
"and so division by zero is undefined. Since any number multiplied by 0 is 0, "
@@ -51,13 +68,13 @@ namespace Ark
/**
* @brief A pow error triggered when we can't do a pow b
- *
+ *
*/
- class PowError : public std::runtime_error
+ class PowError : public Error
{
public:
PowError() :
- std::runtime_error(
+ Error(
"PowError: Can not pow the given number (a) to the given exponent (b) because "
"a^b, with b being a member of the rational numbers, isn't supported.")
{}
@@ -65,75 +82,70 @@ namespace Ark
/**
* @brief An assertion error, only triggered from ArkScript code through (assert expr error-message)
- *
+ *
*/
- class AssertionFailed : public std::runtime_error
+ class AssertionFailed : public Error
{
public:
explicit AssertionFailed(const std::string& message) :
- std::runtime_error("AssertionFailed: " + message)
+ Error("AssertionFailed: " + message)
{}
};
/**
- * @brief SyntaxError thrown by the lexer
- *
+ * @brief CodeError thrown by the compiler (parser, macro processor, optimizer, and compiler itself)
+ *
*/
- class SyntaxError : public std::runtime_error
+ struct CodeError : public Error
{
- public:
- explicit SyntaxError(const std::string& message) :
- std::runtime_error("SyntaxError: " + message)
- {}
- };
+ const std::string filename;
+ const std::size_t line;
+ const std::size_t col;
+ const std::string expr;
+ const std::optional symbol;
- /**
- * @brief ParseError thrown by the parser
- *
- */
- class ParseError : public std::runtime_error
- {
- public:
- explicit ParseError(const std::string& message) :
- std::runtime_error("ParseError: " + message)
+ CodeError(
+ const std::string& what,
+ const std::string& filename,
+ std::size_t lineNum,
+ std::size_t column,
+ std::string exp,
+ std::optional opt_sym = std::nullopt) :
+ Error(what),
+ filename(filename), line(lineNum), col(column), expr(std::move(exp)), symbol(opt_sym)
{}
};
- /**
- * @brief OptimizerError thrown by the AST optimizer
- *
- */
- class OptimizerError : public std::runtime_error
+ namespace Diagnostics
{
- public:
- explicit OptimizerError(const std::string& message) :
- std::runtime_error("OptimizerError: " + message)
- {}
- };
+ /**
+ * @brief Helper to create a colorized context to report errors to the user
+ *
+ * @param os stream in which the error will be written
+ * @param code content of the source file where the error is
+ * @param line line where the error is
+ * @param col_start where the error starts on the given line
+ * @param sym_size bad expression that triggered the error
+ */
+ ARK_API void makeContext(std::ostream& os, const std::string& code, std::size_t line, std::size_t col_start, std::size_t sym_size);
- /**
- * @brief MacroProcessingError thrown by the compiler
- *
- */
- class MacroProcessingError : public std::runtime_error
- {
- public:
- explicit MacroProcessingError(const std::string& message) :
- std::runtime_error("MacroProcessingError: " + message)
- {}
- };
+ /**
+ * @brief Helper used by the compiler to generate a colorized context from a node
+ *
+ * @param message error message to be included in the context
+ * @param node AST node with the error
+ * @return std::string
+ */
+ ARK_API std::string makeContextWithNode(const std::string& message, const internal::Node& node);
- /**
- * @brief CompilationError thrown by the compiler
- *
- */
- class CompilationError : public std::runtime_error
- {
- public:
- explicit CompilationError(const std::string& message) :
- std::runtime_error("CompilationError: " + message)
- {}
- };
+ /**
+ * @brief Generate a diagnostic from an error and print it to the standard output
+ *
+ * @param e code error
+ * @param code code of the file in which the error occured
+ */
+ ARK_API void generate(const CodeError& e, std::string code = "");
+ }
}
#endif
diff --git a/include/Ark/Files.hpp b/include/Ark/Files.hpp
index c9fd31736..4524f9ac1 100644
--- a/include/Ark/Files.hpp
+++ b/include/Ark/Files.hpp
@@ -4,9 +4,9 @@
* @brief Lots of utilities about the filesystem
* @version 0.1
* @date 2021-11-25
- *
+ *
* @copyright Copyright (c) 2021
- *
+ *
*/
#ifndef INCLUDE_ARK_FILES_HPP
@@ -21,7 +21,7 @@ namespace Ark::Utils
{
/**
* @brief Checks if a file exists
- *
+ *
* @param name the file name
* @return true on success
* @return false on failure
@@ -41,9 +41,9 @@ namespace Ark::Utils
/**
* @brief Helper to read a file
- *
+ *
* @param name the file name
- * @return std::string
+ * @return std::string
*/
inline std::string readFile(const std::string& name)
{
@@ -54,33 +54,32 @@ namespace Ark::Utils
std::istreambuf_iterator());
}
- /**
- * @brief Get the directory from a path
- *
- * @param path
- * @return std::string
- */
- inline std::string getDirectoryFromPath(const std::string& path)
+ inline std::vector readFileAsBytes(const std::string& name)
{
- return (std::filesystem::path(path)).parent_path().string();
- }
+ // admitting the file exists
+ std::ifstream ifs(name, std::ios::binary | std::ios::ate);
+ if (!ifs.good())
+ return std::vector {};
- /**
- * @brief Get the filename from a path
- *
- * @param path
- * @return std::string
- */
- inline std::string getFilenameFromPath(const std::string& path)
- {
- return (std::filesystem::path(path)).filename().string();
+ std::size_t pos = ifs.tellg();
+ // reserve appropriate number of bytes
+ std::vector temp(pos);
+ ifs.seekg(0, std::ios::beg);
+ ifs.read(&temp[0], pos);
+ ifs.close();
+
+ auto bytecode = std::vector(pos);
+ // TODO would it be faster to memcpy?
+ for (std::size_t i = 0; i < pos; ++i)
+ bytecode[i] = static_cast(temp[i]);
+ return bytecode;
}
/**
* @brief Get the canonical relative path from a path
- *
- * @param path
- * @return std::string
+ *
+ * @param path
+ * @return std::string
*/
inline std::string canonicalRelPath(const std::string& path)
{
diff --git a/include/Ark/REPL/Repl.hpp b/include/Ark/REPL/Repl.hpp
index 186d2d7d4..8fc5f8cff 100644
--- a/include/Ark/REPL/Repl.hpp
+++ b/include/Ark/REPL/Repl.hpp
@@ -2,7 +2,7 @@
* @file Repl.hpp
* @author Alexandre Plateau (lexplt.dev@gmail.com)
* @brief ArkScript REPL - Read Eval Print Loop
- * @version 0.1
+ * @version 0.2
* @date 2020-10-27
*
* @copyright Copyright (c) 2020-2021
@@ -13,6 +13,7 @@
#define ARK_REPL_REPL_HPP
#include
+#include
#include
#include
@@ -28,10 +29,9 @@ namespace Ark
/**
* @brief Construct a new Repl object
*
- * @param options the REPL options
* @param libenv search path for the std library
*/
- Repl(uint16_t options, const std::vector& libenv);
+ Repl(const std::vector& libenv);
/**
* @brief Start the REPL
@@ -40,11 +40,10 @@ namespace Ark
int run();
private:
- uint16_t m_options;
Replxx m_repl;
unsigned m_lines;
int m_old_ip;
- std::vector m_libenv;
+ std::vector m_libenv;
inline void print_repl_header();
int count_open_parentheses(const std::string& line);
diff --git a/include/Ark/TypeChecker.hpp b/include/Ark/TypeChecker.hpp
index 744b6d8e0..c16f97f6f 100644
--- a/include/Ark/TypeChecker.hpp
+++ b/include/Ark/TypeChecker.hpp
@@ -17,13 +17,8 @@
#include
#include
-#define NOMINMAX
#include
-#ifdef max
-# undef max
-#endif
-
namespace Ark::types
{
namespace details
@@ -34,7 +29,7 @@ namespace Ark::types
template
bool checkN([[maybe_unused]] const std::vector& args)
{
- return true;
+ return I >= args.size();
}
template
diff --git a/include/Ark/VM/State.hpp b/include/Ark/VM/State.hpp
index de6c52a23..277cda605 100644
--- a/include/Ark/VM/State.hpp
+++ b/include/Ark/VM/State.hpp
@@ -16,10 +16,11 @@
#include
#include
#include
+#include
#include
-#include
-#include
+#include
+#include
namespace Ark
{
@@ -33,10 +34,9 @@ namespace Ark
/**
* @brief Construct a new State object
*
- * @param options the options for the virtual machine, compiler, and parser
* @param libpath a list of search paths for the std library
*/
- State(uint16_t options = DefaultFeatures, const std::vector& libpath = {}) noexcept;
+ State(const std::vector& libpath = {}) noexcept;
/**
* @brief Feed the state by giving it the path to an existing bytecode file
@@ -101,7 +101,7 @@ namespace Ark
*
* @param libenv the list of std search paths to set
*/
- void setLibDirs(const std::vector& libenv) noexcept;
+ void setLibDirs(const std::vector& libenv) noexcept;
/**
* @brief Reset State (all member variables related to execution)
@@ -114,6 +114,8 @@ namespace Ark
friend class Repl;
private:
+ bool checkMagic(const bytecode_t& bytecode);
+
/**
* @brief Called to configure the state (set the bytecode, debug level, call the compiler...)
*
@@ -132,15 +134,14 @@ namespace Ark
inline void throwStateError(const std::string& message)
{
- throw std::runtime_error("StateError: " + message);
+ throw Error("StateError: " + message);
}
unsigned m_debug_level;
bytecode_t m_bytecode;
- std::vector m_libenv;
+ std::vector m_libenv;
std::string m_filename;
- uint16_t m_options;
// related to the bytecode
std::vector m_symbols;
diff --git a/include/Ark/VM/VM.hpp b/include/Ark/VM/VM.hpp
index 0df4087c5..1dea730e6 100644
--- a/include/Ark/VM/VM.hpp
+++ b/include/Ark/VM/VM.hpp
@@ -32,6 +32,7 @@
#include
#include
#include
+#include
#undef abs
#include
diff --git a/include/termcolor/proxy.hpp b/include/termcolor/proxy.hpp
index 9cbdb5e08..e3f5dbbcc 100644
--- a/include/termcolor/proxy.hpp
+++ b/include/termcolor/proxy.hpp
@@ -5,6 +5,15 @@
# define NOMINMAX
#endif
+#ifdef max
+# undef max
+#endif
+
+#ifdef abs
+# undef abs
+#endif
+
+#include
#include
#endif
diff --git a/include/utf8.hpp b/include/utf8.hpp
new file mode 100644
index 000000000..0f7a2af9f
--- /dev/null
+++ b/include/utf8.hpp
@@ -0,0 +1,276 @@
+#ifndef UTF8_DECODER_H
+#define UTF8_DECODER_H
+
+#include
+#include
+
+namespace utf8
+{
+ enum class Utf8Type
+ {
+ Ascii = 0,
+ LatinExtra = 1,
+ BasicMultiLingual = 2,
+ OthersPlanesUnicode = 3,
+ OutRange = 4
+ };
+
+ namespace details
+ {
+ // clang-format off
+ constexpr std::array ASCIIHexToInt =
+ {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ };
+ // clang-format on
+ }
+
+ inline Utf8Type utf8type(const char* input, int32_t* out = nullptr)
+ {
+ int32_t codepoint = 0;
+ int shift = 0;
+
+ for (const char* s = input; *s != 0; ++s)
+ {
+ codepoint = ((codepoint << shift) | details::ASCIIHexToInt[*s]);
+ shift = 4;
+ }
+
+ if (out != nullptr)
+ *out = codepoint;
+
+ if (codepoint >= 0x0000 && codepoint <= 0x007f)
+ return Utf8Type::Ascii;
+ else if (codepoint > 0x007f && codepoint <= 0x07ff)
+ return Utf8Type::LatinExtra;
+ else if (codepoint > 0x07ff && codepoint <= 0xffff)
+ return Utf8Type::BasicMultiLingual;
+ else if (codepoint > 0xffff && codepoint <= 0x10ffff)
+ return Utf8Type::OthersPlanesUnicode;
+
+ return Utf8Type::OutRange;
+ }
+
+ /**
+ * @brief Convert hex string to utf8 string
+ * @param input
+ * @param dest Output utf8 string (size [2,5]). Empty (\0) if input is invalid or out of range
+ */
+ inline void decode(const char* input, char* dest)
+ {
+ int32_t cdp = 0;
+ Utf8Type type = utf8type(input, &cdp);
+ char c0 = details::ASCIIHexToInt[input[0]];
+ char c1 = details::ASCIIHexToInt[input[1]];
+ char c2 = details::ASCIIHexToInt[input[2]];
+ char c3 = details::ASCIIHexToInt[input[3]];
+
+ switch (type)
+ {
+ case Utf8Type::Ascii:
+ {
+ dest[0] = static_cast(cdp);
+ dest[1] = 0;
+ break;
+ }
+
+ case Utf8Type::LatinExtra:
+ {
+ dest[0] = (0xc0 | ((c1 & 0x7) << 2)) | ((c2 & 0xc) >> 2);
+ dest[1] = (0x80 | ((c2 & 0x3) << 4)) | c3;
+ dest[2] = 0;
+ break;
+ }
+
+ case Utf8Type::BasicMultiLingual:
+ {
+ dest[0] = 0xe0 | c0;
+ dest[1] = (0x80 | (c1 << 2)) | ((c2 & 0xc) >> 2);
+ dest[2] = (0x80 | ((c2 & 0x3) << 4)) | c3;
+ dest[3] = 0;
+ break;
+ }
+
+ case Utf8Type::OthersPlanesUnicode:
+ {
+ char c4 = details::ASCIIHexToInt[input[4]];
+
+ if (cdp <= 0xfffff)
+ {
+ dest[0] = 0xf0 | ((c0 & 0xc) >> 2);
+ dest[1] = (0x80 | ((c0 & 0x3) << 4)) | c1;
+ dest[2] = (0x80 | (c2 << 2)) | ((c3 & 0xc) >> 2);
+ dest[3] = (0x80 | ((c3 & 0x3) << 4)) | c4;
+ dest[4] = 0;
+ }
+ else
+ {
+ char c5 = details::ASCIIHexToInt[input[5]];
+
+ dest[0] = (0xf0 | ((c0 & 0x1) << 2)) | ((c1 & 0xc) >> 2);
+ dest[1] = ((0x80 | ((c1 & 0x3) << 4)) | ((c1 & 0xc) >> 2)) | c2;
+ dest[2] = (0x80 | (c3 << 2)) | ((c4 & 0xc) >> 2);
+ dest[3] = (0x80 | ((c4 & 0x3) << 4)) | c5;
+ dest[4] = 0;
+ }
+ break;
+ }
+
+ case Utf8Type::OutRange:
+ *dest = 0;
+ break;
+ }
+ }
+
+ /**
+ * @brief Check the validity of a given string in UTF8
+ * @param str
+ * @return true if the given string is a valid UTF88 string
+ */
+ inline bool isValid(const char* str)
+ {
+ const char* s = str;
+
+ if (str == 0)
+ return false;
+
+ while (*s != 0)
+ {
+ if (0xf0 == (0xf8 & *s))
+ {
+ if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) || (0x80 != (0xc0 & s[3])))
+ return false;
+ else if (0x80 == (0xc0 & s[4]))
+ return false;
+ else if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1])))
+ return false;
+ s += 4;
+ }
+ else if (0xe0 == (0xf0 & *s))
+ {
+ if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])))
+ return false;
+ else if (0x80 == (0xc0 & s[3]))
+ return false;
+ else if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1])))
+ return false;
+ s += 3;
+ }
+ else if (0xc0 == (0xe0 & *s))
+ {
+ if (0x80 != (0xc0 & s[1]))
+ return false;
+ else if (0x80 == (0xc0 & s[2]))
+ return false;
+ else if (0 == (0x1e & s[0]))
+ return false;
+ s += 2;
+ }
+ else if (0x00 == (0x80 & *s))
+ s += 1;
+ else
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * @brief Compute the UTF8 codepoint for a given UTF8 char
+ * @param str
+ * @return UTF8 codepoint if valid, -1 otherwise
+ */
+ inline int32_t codepoint(const char* str)
+ {
+ int32_t codepoint = 0;
+ const char* s = str;
+
+ if (isValid(str))
+ {
+ if (str == 0)
+ return -1;
+
+ while (*s != 0)
+ {
+ if (0xf0 == (0xf8 & *s))
+ {
+ codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) | ((0x3f & s[2]) << 6) | (0x3f & s[3]);
+ s += 4;
+ }
+ else if (0xe0 == (0xf0 & *s))
+ {
+ codepoint = ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]);
+ s += 3;
+ }
+ else if (0xc0 == (0xe0 & *s))
+ {
+ codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]);
+ s += 2;
+ }
+ else if (0x00 == (0x80 & *s))
+ {
+ codepoint = s[0];
+ ++s;
+ }
+ else
+ return -1;
+ }
+ }
+
+ return codepoint;
+ }
+
+ /**
+ * @brief Generate an UTF8 character from a given codepoint
+ * @param codepoint
+ * @param dest Output utf8 string (size [2,5]). Empty (\0) if input is invalid or out of range
+ */
+ inline void codepointToUtf8(const int32_t codepoint, char* dest)
+ {
+ if (codepoint >= 0x0000 && codepoint <= 0x007f)
+ {
+ dest[0] = codepoint;
+ dest[1] = 0;
+ }
+ else if (codepoint > 0x007f && codepoint <= 0x07ff)
+ {
+ dest[0] = 0x80;
+ if (codepoint > 0xff)
+ dest[0] |= (codepoint >> 6);
+ dest[0] |= ((codepoint & 0xc0) >> 6);
+ dest[1] = 0x80 | (codepoint & 0x3f);
+ dest[2] = 0;
+ }
+ else if (codepoint > 0x07ff && codepoint <= 0xffff)
+ {
+ dest[0] = 0xe0;
+ if (codepoint > 0xfff)
+ dest[0] |= ((codepoint & 0xf000) >> 12);
+ dest[1] = (0x80 | ((codepoint & 0xf00) >> 6)) | ((codepoint & 0xf0) >> 6);
+ dest[2] = (0x80 | (codepoint & 0x30)) | (codepoint & 0xf);
+ dest[3] = 0;
+ }
+ else if (codepoint > 0xffff && codepoint <= 0x10ffff)
+ {
+ dest[0] = 0xf0;
+ if (codepoint > 0xfffff)
+ dest[0] |= ((codepoint & 0x100000) >> 18);
+ dest[0] |= ((codepoint & 0xc0000) >> 18);
+ dest[1] = (0x80 | ((codepoint & 0x30000) >> 12)) | ((codepoint & 0xf000) >> 12);
+ dest[2] = (0x80 | ((codepoint & 0xf00) >> 6)) | ((codepoint & 0xc0) >> 6);
+ dest[3] = (0x80 | (codepoint & 0x30)) | (codepoint & 0xf);
+ dest[4] = 0;
+ }
+ else
+ *dest = 0;
+ }
+}
+
+#endif
diff --git a/lib/fmt b/lib/fmt
index 7bdf0628b..c13753a70 160000
--- a/lib/fmt
+++ b/lib/fmt
@@ -1 +1 @@
-Subproject commit 7bdf0628b1276379886c7f6dda2cef2b3b374f0b
+Subproject commit c13753a70cc55f3b1c99fb8f8395e78e5f9cae43
diff --git a/lib/modules b/lib/modules
index 74f2cd3a2..507ce192e 160000
--- a/lib/modules
+++ b/lib/modules
@@ -1 +1 @@
-Subproject commit 74f2cd3a246138f336997c8da8cba496f41c8930
+Subproject commit 507ce192ecc1f5c38d0e15050b102329209ac9b1
diff --git a/lib/std b/lib/std
index 4fa96f980..0afa4922c 160000
--- a/lib/std
+++ b/lib/std
@@ -1 +1 @@
-Subproject commit 4fa96f98073b7b57051451189ae00e2b0cf24909
+Subproject commit 0afa4922c4e9f25a7a6b4f00054e04c35e79168a
diff --git a/lib/utf8_decoder b/lib/utf8_decoder
deleted file mode 160000
index 7b2fb5e2c..000000000
--- a/lib/utf8_decoder
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 7b2fb5e2c862bc83d0c576710010d33dda39b95b
diff --git a/src/arkreactor/Builtins/String.cpp b/src/arkreactor/Builtins/String.cpp
index 809504212..f824c0bbb 100644
--- a/src/arkreactor/Builtins/String.cpp
+++ b/src/arkreactor/Builtins/String.cpp
@@ -1,8 +1,9 @@
#include
#include
-#include
+#include
#include
+#include