diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4124c9991..68dfd5583 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,6 +5,11 @@ updates: schedule: interval: weekly open-pull-requests-limit: 99 +- package-ecosystem: cargo + directory: "/tree-sitter-mozcpp" + schedule: + interval: weekly + open-pull-requests-limit: 99 - package-ecosystem: cargo directory: "/enums" schedule: diff --git a/.gitmodules b/.gitmodules index 1460a128e..c0bf37542 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,6 +10,3 @@ [submodule "tree-sitter-typescript"] path = tree-sitter-typescript url = https://github.com/tree-sitter/tree-sitter-typescript/ -[submodule "tree-sitter-cpp"] - path = tree-sitter-cpp - url = https://github.com/tree-sitter/tree-sitter-cpp.git diff --git a/Cargo.lock b/Cargo.lock index f0f112827..19613a203 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1689,7 +1689,7 @@ dependencies = [ "regex", "serde", "termcolor", - "tree-sitter", + "tree-sitter 0.17.1", "tree-sitter-ccomment", "tree-sitter-java", "tree-sitter-mozcpp", @@ -2182,12 +2182,32 @@ dependencies = [ "regex", ] +[[package]] +name = "tree-sitter" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8085411272e5e472f38ed09b911b29ab61979f21cd5fba3454738b85b438ba20" +dependencies = [ + "cc", + "regex", +] + [[package]] name = "tree-sitter-ccomment" version = "0.16.0" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.17.1", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7bd90c7b7db59369ed00fbc40458d9c9b2b8ed145640e337e839ac07aa63e15" +dependencies = [ + "cc", + "tree-sitter 0.19.2", ] [[package]] @@ -2197,7 +2217,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a1817218b66589235a1a234ada9669785095aeeb20597a2bf515d4dbf846d46" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.17.1", ] [[package]] @@ -2205,7 +2225,8 @@ name = "tree-sitter-mozcpp" version = "0.16.0" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.17.1", + "tree-sitter-cpp", ] [[package]] @@ -2213,7 +2234,7 @@ name = "tree-sitter-preproc" version = "0.16.0" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.17.1", ] [[package]] diff --git a/build.rs b/build.rs index 7f1feabf8..6364670ff 100644 --- a/build.rs +++ b/build.rs @@ -160,7 +160,6 @@ fn main() { "tree-sitter-ccomment".to_string(), "tree-sitter-mozcpp".to_string(), "tree-sitter-typescript".to_string(), - "tree-sitter-cpp".to_string(), ]; let dirs = collect_tree_sitter_dirs(ignore); for dir in dirs { diff --git a/generate-moz-grammars/generate-mozcpp.sh b/generate-moz-grammars/generate-mozcpp.sh index 94aee9ed2..d2bf25d2f 100755 --- a/generate-moz-grammars/generate-mozcpp.sh +++ b/generate-moz-grammars/generate-mozcpp.sh @@ -4,10 +4,8 @@ # # Usage: ./generate-moz-grammars/generate-mozcpp.sh -# FIXME we need to remove this line once we are going to use -# the tree-sitter-cpp bindings -# Get the tree-sitter-cpp submodule version -TS_CPP_VERSION=`git submodule status tree-sitter-cpp | awk '{ print $1 }'` +# Set tree-sitter-cpp version +TS_CPP_VERSION="a35a275df92e7583df38f2de2562361f2b69987e" # Enter the mozcpp directory pushd tree-sitter-mozcpp @@ -30,6 +28,9 @@ npm install -y # Exit tree-sitter-cpp directory popd +# Copy tree-sitter-cpp `scanner.cc` functions into the `src` directory +cp --verbose tree-sitter-cpp/src/scanner.cc ./src/scanner.cc + # Init npm npm init -y diff --git a/tree-sitter-cpp b/tree-sitter-cpp deleted file mode 160000 index a35a275df..000000000 --- a/tree-sitter-cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a35a275df92e7583df38f2de2562361f2b69987e diff --git a/tree-sitter-mozcpp/Cargo.toml b/tree-sitter-mozcpp/Cargo.toml index f48072db8..9ac2f3e11 100644 --- a/tree-sitter-mozcpp/Cargo.toml +++ b/tree-sitter-mozcpp/Cargo.toml @@ -25,3 +25,6 @@ tree-sitter = "^0.17" [build-dependencies] cc = "^1.0" +# This dependency is not used at all for this crate, but it is here so that +# dependabot can send notifications when there are updates for this grammar +tree-sitter-cpp = "0.19.0" diff --git a/tree-sitter-mozcpp/src/scanner.cc b/tree-sitter-mozcpp/src/scanner.cc index 140d7249c..ca89bb340 100644 --- a/tree-sitter-mozcpp/src/scanner.cc +++ b/tree-sitter-mozcpp/src/scanner.cc @@ -1 +1,119 @@ -#include "../../tree-sitter-cpp/src/scanner.cc" +#include +#include +#include + +namespace { + +using std::wstring; +using std::iswspace; + +enum TokenType { + RAW_STRING_LITERAL, +}; + +struct Scanner { + bool scan(TSLexer *lexer, const bool *valid_symbols) { + while (iswspace(lexer->lookahead)) { + lexer->advance(lexer, true); + } + + lexer->result_symbol = RAW_STRING_LITERAL; + + // Raw string literals can start with: R, LR, uR, UR, u8R + // Consume 'R' + if (lexer->lookahead == 'L' || lexer->lookahead == 'U') { + lexer->advance(lexer, false); + if (lexer->lookahead != 'R') { + return false; + } + } else if (lexer->lookahead == 'u') { + lexer->advance(lexer, false); + if (lexer->lookahead == '8') { + lexer->advance(lexer, false); + if (lexer->lookahead != 'R') { + return false; + } + } else if (lexer->lookahead != 'R') { + return false; + } + } else if (lexer->lookahead != 'R') { + return false; + } + lexer->advance(lexer, false); + + // Consume '"' + if (lexer->lookahead != '"') return false; + lexer->advance(lexer, false); + + // Consume '(', delimiter + wstring delimiter; + for (;;) { + if (lexer->lookahead == 0 || lexer->lookahead == '\\' || iswspace(lexer->lookahead)) { + return false; + } + if (lexer->lookahead == '(') { + lexer->advance(lexer, false); + break; + } + delimiter += lexer->lookahead; + lexer->advance(lexer, false); + } + + // Consume content, delimiter, ')', '"' + int delimiter_index = -1; + for (;;) { + if (lexer->lookahead == 0) return false; + + if (delimiter_index >= 0) { + if (static_cast(delimiter_index) == delimiter.size()) { + if (lexer->lookahead == '"') { + lexer->advance(lexer, false); + return true; + } else { + delimiter_index = -1; + } + } else { + if (lexer->lookahead == delimiter[delimiter_index]) { + delimiter_index++; + } else { + delimiter_index = -1; + } + } + } + + if (delimiter_index == -1 && lexer->lookahead == ')') { + delimiter_index = 0; + } + + lexer->advance(lexer, false); + } + } +}; + +} + +extern "C" { + +void *tree_sitter_cpp_external_scanner_create() { + return new Scanner(); +} + +bool tree_sitter_cpp_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + Scanner *scanner = static_cast(payload); + return scanner->scan(lexer, valid_symbols); +} + +unsigned tree_sitter_cpp_external_scanner_serialize(void *payload, char *buffer) { + return 0; +} + +void tree_sitter_cpp_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { +} + +void tree_sitter_cpp_external_scanner_destroy(void *payload) { + Scanner *scanner = static_cast(payload); + delete scanner; +} + +}