From a37413edbc314fbf6e08cf02d5edf4ae1b85cc2f Mon Sep 17 00:00:00 2001 From: Carlos Almeida Date: Sat, 21 Feb 2026 17:43:10 -0700 Subject: [PATCH 01/11] feat: add Rust core via napi-rs with dual-engine support (Phase 1) Move CPU-intensive parsing, import resolution, and cycle detection to Rust via napi-rs while keeping JS for CLI, SQLite, MCP, and embeddings. The WASM path remains as a fallback for unsupported platforms. Rust crate (crates/codegraph-core): - All 9 language extractors (JS/TS/TSX, Python, Go, Rust, Java, C#, Ruby, PHP, HCL) ported with SymbolExtractor trait - Rayon-based parallel file parsing - 6-level import resolution with confidence scoring - Tarjan's SCC cycle detection - Incremental parse tree cache for watch mode - napi-rs API: parseFile, parseFiles, resolveImport, resolveImports, computeConfidence, detectCycles, engineName, engineVersion JS integration: - src/native.js: platform-aware addon loader with graceful fallback - src/builder.js: dual-engine path (native fast path + WASM fallback) - src/cycles.js: dispatches to native detectCycles when available - src/watcher.js: uses native parseFile in watch mode - src/cli.js: --engine global option - src/index.js: exports isNativeAvailable CI & packaging: - GitHub Actions workflow for 4-platform matrix build - Platform optionalDependencies in package.json Tests: - Rust unit tests for JS, Python, Go extractors and cycle detection - Cross-engine parity tests for all 11 languages (skip when native N/A) - Full build parity test comparing SQLite output - Extended JS cycle tests for findCyclesJS --- .github/workflows/build-native.yml | 132 ++++ Cargo.toml | 3 + crates/codegraph-core/Cargo.toml | 29 + crates/codegraph-core/build.rs | 5 + crates/codegraph-core/src/cycles.rs | 170 +++++ .../codegraph-core/src/extractors/csharp.rs | 332 ++++++++++ crates/codegraph-core/src/extractors/go.rs | 246 +++++++ crates/codegraph-core/src/extractors/hcl.rs | 114 ++++ .../codegraph-core/src/extractors/helpers.rs | 58 ++ crates/codegraph-core/src/extractors/java.rs | 290 +++++++++ .../src/extractors/javascript.rs | 607 ++++++++++++++++++ crates/codegraph-core/src/extractors/mod.rs | 36 ++ crates/codegraph-core/src/extractors/php.rs | 274 ++++++++ .../codegraph-core/src/extractors/python.rs | 242 +++++++ crates/codegraph-core/src/extractors/ruby.rs | 223 +++++++ .../src/extractors/rust_lang.rs | 258 ++++++++ .../codegraph-core/src/import_resolution.rs | 197 ++++++ crates/codegraph-core/src/incremental.rs | 82 +++ crates/codegraph-core/src/lib.rs | 84 +++ crates/codegraph-core/src/parallel.rs | 43 ++ crates/codegraph-core/src/parser_registry.rs | 63 ++ crates/codegraph-core/src/types.rs | 137 ++++ package.json | 6 +- src/builder.js | 87 ++- src/cli.js | 6 +- src/cycles.js | 19 +- src/index.js | 3 + src/native.js | 75 +++ src/watcher.js | 64 +- tests/engines/parity.test.js | 226 +++++++ tests/graph/cycles.test.js | 34 +- tests/integration/build-parity.test.js | 83 +++ 32 files changed, 4205 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/build-native.yml create mode 100644 Cargo.toml create mode 100644 crates/codegraph-core/Cargo.toml create mode 100644 crates/codegraph-core/build.rs create mode 100644 crates/codegraph-core/src/cycles.rs create mode 100644 crates/codegraph-core/src/extractors/csharp.rs create mode 100644 crates/codegraph-core/src/extractors/go.rs create mode 100644 crates/codegraph-core/src/extractors/hcl.rs create mode 100644 crates/codegraph-core/src/extractors/helpers.rs create mode 100644 crates/codegraph-core/src/extractors/java.rs create mode 100644 crates/codegraph-core/src/extractors/javascript.rs create mode 100644 crates/codegraph-core/src/extractors/mod.rs create mode 100644 crates/codegraph-core/src/extractors/php.rs create mode 100644 crates/codegraph-core/src/extractors/python.rs create mode 100644 crates/codegraph-core/src/extractors/ruby.rs create mode 100644 crates/codegraph-core/src/extractors/rust_lang.rs create mode 100644 crates/codegraph-core/src/import_resolution.rs create mode 100644 crates/codegraph-core/src/incremental.rs create mode 100644 crates/codegraph-core/src/lib.rs create mode 100644 crates/codegraph-core/src/parallel.rs create mode 100644 crates/codegraph-core/src/parser_registry.rs create mode 100644 crates/codegraph-core/src/types.rs create mode 100644 src/native.js create mode 100644 tests/engines/parity.test.js create mode 100644 tests/integration/build-parity.test.js diff --git a/.github/workflows/build-native.yml b/.github/workflows/build-native.yml new file mode 100644 index 00000000..265294c9 --- /dev/null +++ b/.github/workflows/build-native.yml @@ -0,0 +1,132 @@ +name: Build Native + +on: + push: + tags: + - 'v*' + workflow_dispatch: + +permissions: + contents: write + +jobs: + build: + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + package: '@optave/codegraph-linux-x64-gnu' + node-arch: x64 + node-os: linux + - os: macos-latest + target: aarch64-apple-darwin + package: '@optave/codegraph-darwin-arm64' + node-arch: arm64 + node-os: darwin + - os: macos-13 + target: x86_64-apple-darwin + package: '@optave/codegraph-darwin-x64' + node-arch: x64 + node-os: darwin + - os: windows-latest + target: x86_64-pc-windows-msvc + package: '@optave/codegraph-win32-x64-msvc' + node-arch: x64 + node-os: win32 + + runs-on: ${{ matrix.os }} + name: Build ${{ matrix.target }} + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + registry-url: 'https://registry.npmjs.org' + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + + - name: Install napi-rs CLI + run: npm install -g @napi-rs/cli + + - name: Build native addon + working-directory: crates/codegraph-core + run: napi build --platform --release --target ${{ matrix.target }} + + - name: Prepare platform package + shell: bash + run: | + PKG_DIR="npm/${{ matrix.node-os }}-${{ matrix.node-arch }}" + mkdir -p "$PKG_DIR" + + # Find the built .node file + if [ "${{ matrix.os }}" = "windows-latest" ]; then + NODE_FILE=$(find crates/codegraph-core -name "*.node" | head -1) + else + NODE_FILE=$(find crates/codegraph-core -name "*.node" | head -1) + fi + + cp "$NODE_FILE" "$PKG_DIR/codegraph-core.node" + + # Generate package.json for the platform package + cat > "$PKG_DIR/package.json" << EOF + { + "name": "${{ matrix.package }}", + "version": "0.1.0", + "description": "Native codegraph-core binary for ${{ matrix.node-os }}-${{ matrix.node-arch }}", + "os": ["${{ matrix.node-os }}"], + "cpu": ["${{ matrix.node-arch }}"], + "main": "codegraph-core.node", + "files": ["codegraph-core.node"], + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/optave/codegraph.git" + } + } + EOF + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: native-${{ matrix.node-os }}-${{ matrix.node-arch }} + path: npm/${{ matrix.node-os }}-${{ matrix.node-arch }}/ + if-no-files-found: error + + publish: + needs: build + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/v') + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + registry-url: 'https://registry.npmjs.org' + + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts/ + + - name: Publish platform packages + shell: bash + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + for dir in artifacts/native-*/; do + echo "Publishing $(cat "$dir/package.json" | grep '"name"')" + cd "$dir" + npm publish --access public || true + cd - + done diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..4b274689 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,3 @@ +[workspace] +members = ["crates/codegraph-core"] +resolver = "2" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml new file mode 100644 index 00000000..02845c0b --- /dev/null +++ b/crates/codegraph-core/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "codegraph-core" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +napi = { version = "3", features = ["serde-json"] } +napi-derive = "3" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tree-sitter = "0.24" +tree-sitter-javascript = "0.23" +tree-sitter-typescript = "0.23" +tree-sitter-python = "0.23" +tree-sitter-go = "0.23" +tree-sitter-rust = "0.23" +tree-sitter-java = "0.23" +tree-sitter-c-sharp = "0.23" +tree-sitter-ruby = "0.23" +tree-sitter-php = { version = "0.23", features = ["php"] } +tree-sitter-hcl = "0.4" +rayon = "1" + +[build-dependencies] +napi-build = "2" diff --git a/crates/codegraph-core/build.rs b/crates/codegraph-core/build.rs new file mode 100644 index 00000000..9fc23678 --- /dev/null +++ b/crates/codegraph-core/build.rs @@ -0,0 +1,5 @@ +extern crate napi_build; + +fn main() { + napi_build::setup(); +} diff --git a/crates/codegraph-core/src/cycles.rs b/crates/codegraph-core/src/cycles.rs new file mode 100644 index 00000000..0f5479d2 --- /dev/null +++ b/crates/codegraph-core/src/cycles.rs @@ -0,0 +1,170 @@ +use std::collections::HashMap; + +use crate::types::GraphEdge; + +/// Detect cycles using Tarjan's strongly connected components algorithm. +/// Returns SCCs with size > 1 (actual cycles). +/// Mirrors the JS implementation in src/cycles.js. +pub fn detect_cycles(edges: &[GraphEdge]) -> Vec> { + // Build adjacency list + let mut graph: HashMap<&str, Vec<&str>> = HashMap::new(); + for edge in edges { + graph + .entry(edge.source.as_str()) + .or_default() + .push(edge.target.as_str()); + graph.entry(edge.target.as_str()).or_default(); + } + + let mut state = TarjanState { + index: 0, + stack: Vec::new(), + on_stack: HashMap::new(), + indices: HashMap::new(), + lowlinks: HashMap::new(), + sccs: Vec::new(), + }; + + let nodes: Vec<&str> = graph.keys().copied().collect(); + for node in nodes { + if !state.indices.contains_key(node) { + strongconnect(node, &graph, &mut state); + } + } + + state.sccs +} + +struct TarjanState<'a> { + index: usize, + stack: Vec<&'a str>, + on_stack: HashMap<&'a str, bool>, + indices: HashMap<&'a str, usize>, + lowlinks: HashMap<&'a str, usize>, + sccs: Vec>, +} + +fn strongconnect<'a>( + v: &'a str, + graph: &HashMap<&'a str, Vec<&'a str>>, + state: &mut TarjanState<'a>, +) { + state.indices.insert(v, state.index); + state.lowlinks.insert(v, state.index); + state.index += 1; + state.stack.push(v); + state.on_stack.insert(v, true); + + if let Some(neighbors) = graph.get(v) { + for &w in neighbors { + if !state.indices.contains_key(w) { + strongconnect(w, graph, state); + let low_w = state.lowlinks[w]; + let low_v = state.lowlinks[v]; + state.lowlinks.insert(v, low_v.min(low_w)); + } else if state.on_stack.get(w).copied().unwrap_or(false) { + let idx_w = state.indices[w]; + let low_v = state.lowlinks[v]; + state.lowlinks.insert(v, low_v.min(idx_w)); + } + } + } + + if state.lowlinks[v] == state.indices[v] { + let mut scc = Vec::new(); + loop { + let w = state.stack.pop().unwrap(); + state.on_stack.insert(w, false); + scc.push(w.to_string()); + if w == v { + break; + } + } + if scc.len() > 1 { + state.sccs.push(scc); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_cycles() { + let edges = vec![ + GraphEdge { + source: "a".to_string(), + target: "b".to_string(), + }, + GraphEdge { + source: "b".to_string(), + target: "c".to_string(), + }, + ]; + let cycles = detect_cycles(&edges); + assert!(cycles.is_empty()); + } + + #[test] + fn test_simple_cycle() { + let edges = vec![ + GraphEdge { + source: "a".to_string(), + target: "b".to_string(), + }, + GraphEdge { + source: "b".to_string(), + target: "a".to_string(), + }, + ]; + let cycles = detect_cycles(&edges); + assert_eq!(cycles.len(), 1); + assert_eq!(cycles[0].len(), 2); + } + + #[test] + fn test_triangle_cycle() { + let edges = vec![ + GraphEdge { + source: "a".to_string(), + target: "b".to_string(), + }, + GraphEdge { + source: "b".to_string(), + target: "c".to_string(), + }, + GraphEdge { + source: "c".to_string(), + target: "a".to_string(), + }, + ]; + let cycles = detect_cycles(&edges); + assert_eq!(cycles.len(), 1); + assert_eq!(cycles[0].len(), 3); + } + + #[test] + fn test_multiple_cycles() { + let edges = vec![ + GraphEdge { + source: "a".to_string(), + target: "b".to_string(), + }, + GraphEdge { + source: "b".to_string(), + target: "a".to_string(), + }, + GraphEdge { + source: "c".to_string(), + target: "d".to_string(), + }, + GraphEdge { + source: "d".to_string(), + target: "c".to_string(), + }, + ]; + let cycles = detect_cycles(&edges); + assert_eq!(cycles.len(), 2); + } +} diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs new file mode 100644 index 00000000..3421ca88 --- /dev/null +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -0,0 +1,332 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct CSharpExtractor; + +impl SymbolExtractor for CSharpExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_csharp_parent_type<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class_declaration" | "struct_declaration" | "interface_declaration" + | "enum_declaration" | "record_declaration" => { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + _ => {} + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + extract_csharp_base_types(node, &class_name, source, symbols); + } + } + + "struct_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + extract_csharp_base_types(node, &name, source, symbols); + } + } + + "record_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + extract_csharp_base_types(node, &name, source, symbols); + } + } + + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "method_declaration" { + if let Some(meth_name) = child.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + iface_name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } + } + } + } + + "enum_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_type = find_csharp_parent_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_type { + Some(pt) => format!("{}.{}", pt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "constructor_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_type = find_csharp_parent_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_type { + Some(pt) => format!("{}.{}", pt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "property_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_type = find_csharp_parent_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_type { + Some(pt) => format!("{}.{}", pt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "using_directive" => { + let name_node = node + .child_by_field_name("name") + .or_else(|| find_child(node, "qualified_name")) + .or_else(|| find_child(node, "identifier")); + if let Some(name_node) = name_node { + let full_path = node_text(&name_node, source).to_string(); + let last_name = full_path.split('.').last().unwrap_or("").to_string(); + let mut imp = Import::new(full_path, vec![last_name], start_line(node)); + imp.csharp_using = Some(true); + symbols.imports.push(imp); + } + } + + "invocation_expression" => { + let fn_node = node + .child_by_field_name("function") + .or_else(|| node.child(0)); + if let Some(fn_node) = fn_node { + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + "member_access_expression" => { + if let Some(name) = fn_node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + "generic_name" | "member_binding_expression" => { + let name = fn_node + .child_by_field_name("name") + .or_else(|| fn_node.child(0)); + if let Some(name) = name { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + _ => {} + } + } + } + + "object_creation_expression" => { + if let Some(type_node) = node.child_by_field_name("type") { + let type_name = if type_node.kind() == "generic_name" { + type_node + .child_by_field_name("name") + .or_else(|| type_node.child(0)) + .map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&type_node, source).to_string()) + }; + if let Some(name) = type_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + }); + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_csharp_base_types( + node: &Node, + class_name: &str, + source: &[u8], + symbols: &mut FileSymbols, +) { + let base_list = node.child_by_field_name("bases"); + let base_list = match base_list { + Some(bl) => bl, + None => return, + }; + + for i in 0..base_list.child_count() { + if let Some(child) = base_list.child(i) { + match child.kind() { + "identifier" | "qualified_name" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + } + "generic_name" => { + let name = child + .child_by_field_name("name") + .or_else(|| child.child(0)); + if let Some(name) = name { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&name, source).to_string()), + implements: None, + line: start_line(node), + }); + } + } + "base_list" => { + for j in 0..child.child_count() { + if let Some(base) = child.child(j) { + match base.kind() { + "identifier" | "qualified_name" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&base, source).to_string()), + implements: None, + line: start_line(node), + }); + } + "generic_name" => { + let name = base + .child_by_field_name("name") + .or_else(|| base.child(0)); + if let Some(name) = name { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some( + node_text(&name, source).to_string(), + ), + implements: None, + line: start_line(node), + }); + } + } + _ => {} + } + } + } + } + _ => {} + } + } + } +} diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs new file mode 100644 index 00000000..0799281f --- /dev/null +++ b/crates/codegraph-core/src/extractors/go.rs @@ -0,0 +1,246 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct GoExtractor; + +impl SymbolExtractor for GoExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let receiver = node.child_by_field_name("receiver"); + let mut receiver_type: Option = None; + if let Some(receiver) = receiver { + for i in 0..receiver.child_count() { + if let Some(param) = receiver.child(i) { + if let Some(type_node) = param.child_by_field_name("type") { + receiver_type = Some(if type_node.kind() == "pointer_type" { + node_text(&type_node, source) + .trim_start_matches('*') + .to_string() + } else { + node_text(&type_node, source).to_string() + }); + break; + } + } + } + } + let name = node_text(&name_node, source); + let full_name = match &receiver_type { + Some(rt) => format!("{}.{}", rt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "type_declaration" => { + for i in 0..node.child_count() { + if let Some(spec) = node.child(i) { + if spec.kind() != "type_spec" { + continue; + } + let name_node = spec.child_by_field_name("name"); + let type_node = spec.child_by_field_name("type"); + if let (Some(name_node), Some(type_node)) = (name_node, type_node) { + let name = node_text(&name_node, source).to_string(); + match type_node.kind() { + "struct_type" => { + symbols.definitions.push(Definition { + name, + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + "interface_type" => { + symbols.definitions.push(Definition { + name: name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + // Extract interface methods + for j in 0..type_node.child_count() { + if let Some(member) = type_node.child(j) { + if member.kind() == "method_elem" { + if let Some(meth_name) = + member.child_by_field_name("name") + { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&member), + end_line: Some(end_line(&member)), + decorators: None, + }); + } + } + } + } + } + _ => { + symbols.definitions.push(Definition { + name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + } + } + } + } + + "import_declaration" => { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "import_spec" => { + extract_go_import_spec(&child, source, symbols); + } + "import_spec_list" => { + for j in 0..child.child_count() { + if let Some(spec) = child.child(j) { + if spec.kind() == "import_spec" { + extract_go_import_spec(&spec, source, symbols); + } + } + } + } + _ => {} + } + } + } + } + + "call_expression" => { + if let Some(fn_node) = node.child_by_field_name("function") { + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + "selector_expression" => { + if let Some(field) = fn_node.child_by_field_name("field") { + symbols.calls.push(Call { + name: node_text(&field, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + _ => {} + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_go_import_spec(spec: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(path_node) = spec.child_by_field_name("path") { + let import_path = node_text(&path_node, source).replace('"', ""); + let name_node = spec.child_by_field_name("name"); + let alias = match name_node { + Some(n) => node_text(&n, source).to_string(), + None => import_path.split('/').last().unwrap_or("").to_string(), + }; + let mut imp = Import::new(import_path, vec![alias], start_line(spec)); + imp.go_import = Some(true); + symbols.imports.push(imp); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_go(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_go::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + GoExtractor.extract(&tree, code.as_bytes(), "test.go") + } + + #[test] + fn finds_function() { + let s = parse_go("package main\nfunc hello() {}\n"); + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "hello"); + assert_eq!(s.definitions[0].kind, "function"); + } + + #[test] + fn finds_struct_and_method() { + let s = parse_go("package main\ntype Server struct{}\nfunc (s *Server) Start() {}\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Server")); + assert!(names.contains(&"Server.Start")); + } + + #[test] + fn finds_interface() { + let s = parse_go("package main\ntype Reader interface {\n\tRead() error\n}\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Reader")); + assert!(names.contains(&"Reader.Read")); + } + + #[test] + fn finds_imports() { + let s = parse_go("package main\nimport (\n\t\"fmt\"\n\t\"os\"\n)\n"); + assert_eq!(s.imports.len(), 2); + assert_eq!(s.imports[0].source, "fmt"); + assert_eq!(s.imports[1].source, "os"); + } +} diff --git a/crates/codegraph-core/src/extractors/hcl.rs b/crates/codegraph-core/src/extractors/hcl.rs new file mode 100644 index 00000000..776c9de8 --- /dev/null +++ b/crates/codegraph-core/src/extractors/hcl.rs @@ -0,0 +1,114 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct HclExtractor; + +impl SymbolExtractor for HclExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if node.kind() == "block" { + let mut identifiers = Vec::new(); + let mut strings = Vec::new(); + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "identifier" { + identifiers.push(node_text(&child, source).to_string()); + } + if child.kind() == "string_lit" { + strings.push( + node_text(&child, source) + .replace('"', "") + .to_string(), + ); + } + } + } + + if !identifiers.is_empty() { + let block_type = &identifiers[0]; + let mut name = String::new(); + + match block_type.as_str() { + "resource" if strings.len() >= 2 => { + name = format!("{}.{}", strings[0], strings[1]); + } + "data" if strings.len() >= 2 => { + name = format!("data.{}.{}", strings[0], strings[1]); + } + "variable" | "output" | "module" if !strings.is_empty() => { + name = format!("{}.{}", block_type, strings[0]); + } + "locals" => { + name = "locals".to_string(); + } + "terraform" | "provider" => { + name = block_type.clone(); + if !strings.is_empty() { + name = format!("{}.{}", block_type, strings[0]); + } + } + _ => {} + } + + if !name.is_empty() { + symbols.definitions.push(Definition { + name, + kind: block_type.clone(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + + // Module source imports + if block_type == "module" { + let body = node + .children(&mut node.walk()) + .find(|c| c.kind() == "body"); + if let Some(body) = body { + for i in 0..body.child_count() { + if let Some(attr) = body.child(i) { + if attr.kind() == "attribute" { + let key = attr + .child_by_field_name("key") + .or_else(|| attr.child(0)); + let val = attr + .child_by_field_name("val") + .or_else(|| attr.child(2)); + if let (Some(key), Some(val)) = (key, val) { + if node_text(&key, source) == "source" { + let src = + node_text(&val, source).replace('"', ""); + if src.starts_with("./") || src.starts_with("../") + { + symbols.imports.push(Import::new( + src, + vec![], + start_line(&attr), + )); + } + } + } + } + } + } + } + } + } + } + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs new file mode 100644 index 00000000..f931732f --- /dev/null +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -0,0 +1,58 @@ +use tree_sitter::Node; + +/// Get the text of a node from the source bytes. +pub fn node_text<'a>(node: &Node, source: &'a [u8]) -> &'a str { + node.utf8_text(source).unwrap_or("") +} + +/// Find the first child of a given type. +pub fn find_child<'a>(node: &Node<'a>, kind: &str) -> Option> { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == kind { + return Some(child); + } + } + } + None +} + +/// Find a parent of a given type, walking up the tree. +pub fn find_parent_of_type<'a>(node: &Node<'a>, kind: &str) -> Option> { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == kind { + return Some(parent); + } + current = parent.parent(); + } + None +} + +/// Find a parent that is any of the given types. +pub fn find_parent_of_types<'a>(node: &Node<'a>, kinds: &[&str]) -> Option> { + let mut current = node.parent(); + while let Some(parent) = current { + if kinds.contains(&parent.kind()) { + return Some(parent); + } + current = parent.parent(); + } + None +} + +/// Get the name of a named field child, returning its text. +pub fn named_child_text<'a>(node: &Node<'a>, field: &str, source: &'a [u8]) -> Option<&'a str> { + node.child_by_field_name(field) + .map(|n| node_text(&n, source)) +} + +/// Get the 1-based start line of a node. +pub fn start_line(node: &Node) -> u32 { + node.start_position().row as u32 + 1 +} + +/// Get the 1-based end line of a node. +pub fn end_line(node: &Node) -> u32 { + node.end_position().row as u32 + 1 +} diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs new file mode 100644 index 00000000..b1b5e492 --- /dev/null +++ b/crates/codegraph-core/src/extractors/java.rs @@ -0,0 +1,290 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct JavaExtractor; + +impl SymbolExtractor for JavaExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_java_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class_declaration" | "enum_declaration" | "interface_declaration" => { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + _ => {} + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + + // Superclass + if let Some(superclass) = node.child_by_field_name("superclass") { + for i in 0..superclass.child_count() { + if let Some(child) = superclass.child(i) { + match child.kind() { + "type_identifier" | "identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + break; + } + "generic_type" => { + if let Some(first) = child.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some( + node_text(&first, source).to_string(), + ), + implements: None, + line: start_line(node), + }); + } + break; + } + _ => {} + } + } + } + } + + // Interfaces + if let Some(interfaces) = node.child_by_field_name("interfaces") { + extract_java_interfaces(&interfaces, &class_name, source, symbols); + } + } + } + + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "method_declaration" { + if let Some(meth_name) = child.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + iface_name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } + } + } + } + + "enum_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_java_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "constructor_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_java_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "import_declaration" => { + let mut import_path = String::new(); + let mut has_asterisk = false; + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "scoped_identifier" || child.kind() == "identifier" { + import_path = node_text(&child, source).to_string(); + } + if child.kind() == "asterisk" { + has_asterisk = true; + } + } + } + if !import_path.is_empty() { + let names = if has_asterisk { + vec!["*".to_string()] + } else { + let last = import_path.split('.').last().unwrap_or("").to_string(); + vec![last] + }; + let mut imp = Import::new(import_path, names, start_line(node)); + imp.java_import = Some(true); + symbols.imports.push(imp); + } + } + + "method_invocation" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + + "object_creation_expression" => { + if let Some(type_node) = node.child_by_field_name("type") { + let type_name = if type_node.kind() == "generic_type" { + type_node.child(0).map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&type_node, source).to_string()) + }; + if let Some(name) = type_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + }); + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_java_interfaces( + interfaces: &Node, + class_name: &str, + source: &[u8], + symbols: &mut FileSymbols, +) { + for i in 0..interfaces.child_count() { + if let Some(child) = interfaces.child(i) { + match child.kind() { + "type_identifier" | "identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(node_text(&child, source).to_string()), + line: start_line(interfaces), + }); + } + "type_list" => { + for j in 0..child.child_count() { + if let Some(t) = child.child(j) { + match t.kind() { + "type_identifier" | "identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(node_text(&t, source).to_string()), + line: start_line(interfaces), + }); + } + "generic_type" => { + if let Some(first) = t.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some( + node_text(&first, source).to_string(), + ), + line: start_line(interfaces), + }); + } + } + _ => {} + } + } + } + } + "generic_type" => { + if let Some(first) = child.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(node_text(&first, source).to_string()), + line: start_line(interfaces), + }); + } + } + _ => {} + } + } + } +} diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs new file mode 100644 index 00000000..f3835415 --- /dev/null +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -0,0 +1,607 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct JsExtractor; + +impl SymbolExtractor for JsExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + + // Heritage: extends + implements + let heritage = node + .child_by_field_name("heritage") + .or_else(|| find_child(node, "class_heritage")); + if let Some(heritage) = heritage { + if let Some(super_name) = extract_superclass(&heritage, source) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(super_name), + implements: None, + line: start_line(node), + }); + } + for iface in extract_implements(&heritage, source) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: None, + implements: Some(iface), + line: start_line(node), + }); + } + } + } + } + + "method_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let method_name = node_text(&name_node, source); + let parent_class = find_parent_class(node, source); + let full_name = match parent_class { + Some(cls) => format!("{}.{}", cls, method_name), + None => method_name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + // Extract interface methods + let body = node + .child_by_field_name("body") + .or_else(|| find_child(node, "interface_body")) + .or_else(|| find_child(node, "object_type")); + if let Some(body) = body { + extract_interface_methods(&body, &iface_name, source, &mut symbols.definitions); + } + } + } + + "type_alias_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "lexical_declaration" | "variable_declaration" => { + for i in 0..node.child_count() { + if let Some(declarator) = node.child(i) { + if declarator.kind() == "variable_declarator" { + let name_n = declarator.child_by_field_name("name"); + let value_n = declarator.child_by_field_name("value"); + if let (Some(name_n), Some(value_n)) = (name_n, value_n) { + let vt = value_n.kind(); + if vt == "arrow_function" + || vt == "function_expression" + || vt == "function" + { + symbols.definitions.push(Definition { + name: node_text(&name_n, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(&value_n)), + decorators: None, + }); + } + } + } + } + } + } + + "call_expression" => { + if let Some(fn_node) = node.child_by_field_name("function") { + if let Some(call_info) = extract_call_info(&fn_node, node, source) { + symbols.calls.push(call_info); + } + } + } + + "import_statement" => { + let text = node_text(node, source); + let is_type_only = text.starts_with("import type"); + let source_node = node + .child_by_field_name("source") + .or_else(|| find_child(node, "string")); + if let Some(source_node) = source_node { + let mod_path = node_text(&source_node, source) + .replace(&['\'', '"'][..], ""); + let names = extract_import_names(node, source); + let mut imp = Import::new(mod_path, names, start_line(node)); + if is_type_only { + imp.type_only = Some(true); + } + symbols.imports.push(imp); + } + } + + "export_statement" => { + let decl = node.child_by_field_name("declaration"); + if let Some(decl) = &decl { + match decl.kind() { + "function_declaration" => { + if let Some(n) = decl.child_by_field_name("name") { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + }); + } + } + "class_declaration" => { + if let Some(n) = decl.child_by_field_name("name") { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + }); + } + } + "interface_declaration" => { + if let Some(n) = decl.child_by_field_name("name") { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: "interface".to_string(), + line: start_line(node), + }); + } + } + "type_alias_declaration" => { + if let Some(n) = decl.child_by_field_name("name") { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + }); + } + } + _ => {} + } + } + let source_node = node + .child_by_field_name("source") + .or_else(|| find_child(node, "string")); + if source_node.is_some() && decl.is_none() { + let source_node = source_node.unwrap(); + let mod_path = node_text(&source_node, source) + .replace(&['\'', '"'][..], ""); + let reexport_names = extract_import_names(node, source); + let text = node_text(node, source); + let is_wildcard = + text.contains("export *") || text.contains("export*"); + let mut imp = Import::new(mod_path, reexport_names.clone(), start_line(node)); + imp.reexport = Some(true); + if is_wildcard && reexport_names.is_empty() { + imp.wildcard_reexport = Some(true); + } + symbols.imports.push(imp); + } + } + + "expression_statement" => { + if let Some(expr) = node.child(0) { + if expr.kind() == "assignment_expression" { + let left = expr.child_by_field_name("left"); + let right = expr.child_by_field_name("right"); + if let (Some(left), Some(right)) = (left, right) { + let left_text = node_text(&left, source); + if left_text.starts_with("module.exports") || left_text == "exports" { + if right.kind() == "call_expression" { + let fn_node = right.child_by_field_name("function"); + let args = right + .child_by_field_name("arguments") + .or_else(|| find_child(&right, "arguments")); + if let (Some(fn_node), Some(args)) = (fn_node, args) { + if node_text(&fn_node, source) == "require" { + if let Some(str_arg) = find_child(&args, "string") { + let mod_path = node_text(&str_arg, source) + .replace(&['\'', '"'][..], ""); + let mut imp = + Import::new(mod_path, vec![], start_line(node)); + imp.reexport = Some(true); + imp.wildcard_reexport = Some(true); + symbols.imports.push(imp); + } + } + } + } + if right.kind() == "object" { + for ci in 0..right.child_count() { + if let Some(child) = right.child(ci) { + if child.kind() == "spread_element" { + let spread_expr = child + .child(1) + .or_else(|| child.child_by_field_name("value")); + if let Some(spread_expr) = spread_expr { + if spread_expr.kind() == "call_expression" { + let fn2 = spread_expr + .child_by_field_name("function"); + let args2 = spread_expr + .child_by_field_name("arguments") + .or_else(|| { + find_child( + &spread_expr, + "arguments", + ) + }); + if let (Some(fn2), Some(args2)) = + (fn2, args2) + { + if node_text(&fn2, source) == "require" { + if let Some(str_arg2) = + find_child(&args2, "string") + { + let mod_path2 = + node_text(&str_arg2, source) + .replace( + &['\'', '"'][..], + "", + ); + let mut imp = Import::new( + mod_path2, + vec![], + start_line(node), + ); + imp.reexport = Some(true); + imp.wildcard_reexport = Some(true); + symbols.imports.push(imp); + } + } + } + } + } + } + } + } + } + } + } + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_interface_methods( + body: &Node, + iface_name: &str, + source: &[u8], + definitions: &mut Vec, +) { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "method_signature" || child.kind() == "property_signature" { + if let Some(name_node) = child.child_by_field_name("name") { + definitions.push(Definition { + name: format!("{}.{}", iface_name, node_text(&name_node, source)), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } +} + +fn extract_implements(heritage: &Node, source: &[u8]) -> Vec { + let mut interfaces = Vec::new(); + for i in 0..heritage.child_count() { + if let Some(child) = heritage.child(i) { + if node_text(&child, source) == "implements" { + for j in (i + 1)..heritage.child_count() { + if let Some(next) = heritage.child(j) { + if next.kind() == "identifier" || next.kind() == "type_identifier" { + interfaces.push(node_text(&next, source).to_string()); + } + if next.child_count() > 0 { + extract_implements_from_node(&next, source, &mut interfaces); + } + } + } + break; + } + if child.kind() == "implements_clause" { + extract_implements_from_node(&child, source, &mut interfaces); + } + } + } + interfaces +} + +fn extract_implements_from_node(node: &Node, source: &[u8], result: &mut Vec) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "identifier" || child.kind() == "type_identifier" { + result.push(node_text(&child, source).to_string()); + } + if child.child_count() > 0 { + extract_implements_from_node(&child, source, result); + } + } + } +} + +fn extract_call_info(fn_node: &Node, call_node: &Node, source: &[u8]) -> Option { + match fn_node.kind() { + "identifier" => Some(Call { + name: node_text(fn_node, source).to_string(), + line: start_line(call_node), + dynamic: None, + }), + "member_expression" => { + let obj = fn_node.child_by_field_name("object"); + let prop = fn_node.child_by_field_name("property"); + let prop = prop?; + let prop_text = node_text(&prop, source); + + if prop_text == "call" || prop_text == "apply" || prop_text == "bind" { + if let Some(obj) = &obj { + if obj.kind() == "identifier" { + return Some(Call { + name: node_text(obj, source).to_string(), + line: start_line(call_node), + dynamic: Some(true), + }); + } + if obj.kind() == "member_expression" { + if let Some(inner_prop) = obj.child_by_field_name("property") { + return Some(Call { + name: node_text(&inner_prop, source).to_string(), + line: start_line(call_node), + dynamic: Some(true), + }); + } + } + } + } + + if prop.kind() == "string" || prop.kind() == "string_fragment" { + let method_name = node_text(&prop, source).replace(&['\'', '"'][..], ""); + if !method_name.is_empty() { + return Some(Call { + name: method_name, + line: start_line(call_node), + dynamic: Some(true), + }); + } + } + + Some(Call { + name: prop_text.to_string(), + line: start_line(call_node), + dynamic: None, + }) + } + "subscript_expression" => { + let index = fn_node.child_by_field_name("index"); + if let Some(index) = index { + if index.kind() == "string" || index.kind() == "template_string" { + let method_name = node_text(&index, source) + .replace(&['\'', '"', '`'][..], ""); + if !method_name.is_empty() && !method_name.contains('$') { + return Some(Call { + name: method_name, + line: start_line(call_node), + dynamic: Some(true), + }); + } + } + } + None + } + _ => None, + } +} + +fn extract_superclass(heritage: &Node, source: &[u8]) -> Option { + for i in 0..heritage.child_count() { + if let Some(child) = heritage.child(i) { + if child.kind() == "identifier" || child.kind() == "member_expression" { + return Some(node_text(&child, source).to_string()); + } + if let Some(found) = extract_superclass(&child, source) { + return Some(found); + } + } + } + None +} + +fn find_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "class_declaration" || parent.kind() == "class" { + if let Some(name_node) = parent.child_by_field_name("name") { + return Some(node_text(&name_node, source).to_string()); + } + return None; + } + current = parent.parent(); + } + None +} + +fn extract_import_names(node: &Node, source: &[u8]) -> Vec { + let mut names = Vec::new(); + scan_import_names(node, source, &mut names); + names +} + +fn scan_import_names(node: &Node, source: &[u8], names: &mut Vec) { + match node.kind() { + "import_specifier" | "export_specifier" => { + let name_node = node + .child_by_field_name("name") + .or_else(|| node.child_by_field_name("alias")); + if let Some(name_node) = name_node { + names.push(node_text(&name_node, source).to_string()); + } else { + names.push(node_text(node, source).to_string()); + } + } + "identifier" => { + if let Some(parent) = node.parent() { + if parent.kind() == "import_clause" { + names.push(node_text(node, source).to_string()); + } + } + } + "namespace_import" => { + names.push(node_text(node, source).to_string()); + } + _ => {} + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + scan_import_names(&child, source, names); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_js(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_javascript::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + JsExtractor.extract(&tree, code.as_bytes(), "test.js") + } + + #[test] + fn finds_function_declaration() { + let s = parse_js("function greet(name) { return name; }"); + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "greet"); + assert_eq!(s.definitions[0].kind, "function"); + } + + #[test] + fn finds_arrow_function() { + let s = parse_js("const add = (a, b) => a + b;"); + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "add"); + assert_eq!(s.definitions[0].kind, "function"); + } + + #[test] + fn finds_class_with_methods() { + let s = parse_js("class Foo { bar() {} baz() {} }"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Foo")); + assert!(names.contains(&"Foo.bar")); + assert!(names.contains(&"Foo.baz")); + } + + #[test] + fn finds_imports() { + let s = parse_js("import { readFile } from 'fs';"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "fs"); + assert_eq!(s.imports[0].names, vec!["readFile"]); + } + + #[test] + fn finds_calls() { + let s = parse_js("function f() { console.log('hi'); foo(); }"); + let call_names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(call_names.contains(&"log")); + assert!(call_names.contains(&"foo")); + } + + #[test] + fn finds_exports() { + let s = parse_js("export function hello() {} export class World {}"); + assert_eq!(s.exports.len(), 2); + assert_eq!(s.exports[0].name, "hello"); + assert_eq!(s.exports[1].name, "World"); + } + + #[test] + fn finds_class_heritage() { + let s = parse_js("class Dog extends Animal {}"); + assert_eq!(s.classes.len(), 1); + assert_eq!(s.classes[0].name, "Dog"); + assert_eq!(s.classes[0].extends, Some("Animal".to_string())); + } + + #[test] + fn finds_reexports() { + let s = parse_js("export { foo, bar } from './utils';"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].reexport, Some(true)); + assert_eq!(s.imports[0].source, "./utils"); + } + + #[test] + fn finds_wildcard_reexport() { + let s = parse_js("export * from './helpers';"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].wildcard_reexport, Some(true)); + } +} diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs new file mode 100644 index 00000000..c0a81ff8 --- /dev/null +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -0,0 +1,36 @@ +pub mod helpers; +pub mod javascript; +pub mod python; +pub mod go; +pub mod rust_lang; +pub mod java; +pub mod csharp; +pub mod ruby; +pub mod php; +pub mod hcl; + +use tree_sitter::Tree; +use crate::types::FileSymbols; +use crate::parser_registry::LanguageKind; + +/// Trait every language extractor implements. +pub trait SymbolExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols; +} + +/// Dispatch to the correct extractor based on language kind. +pub fn extract_symbols(lang: LanguageKind, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + match lang { + LanguageKind::JavaScript | LanguageKind::TypeScript | LanguageKind::Tsx => { + javascript::JsExtractor.extract(tree, source, file_path) + } + LanguageKind::Python => python::PythonExtractor.extract(tree, source, file_path), + LanguageKind::Go => go::GoExtractor.extract(tree, source, file_path), + LanguageKind::Rust => rust_lang::RustExtractor.extract(tree, source, file_path), + LanguageKind::Java => java::JavaExtractor.extract(tree, source, file_path), + LanguageKind::CSharp => csharp::CSharpExtractor.extract(tree, source, file_path), + LanguageKind::Ruby => ruby::RubyExtractor.extract(tree, source, file_path), + LanguageKind::Php => php::PhpExtractor.extract(tree, source, file_path), + LanguageKind::Hcl => hcl::HclExtractor.extract(tree, source, file_path), + } +} diff --git a/crates/codegraph-core/src/extractors/php.rs b/crates/codegraph-core/src/extractors/php.rs new file mode 100644 index 00000000..4092333f --- /dev/null +++ b/crates/codegraph-core/src/extractors/php.rs @@ -0,0 +1,274 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct PhpExtractor; + +impl SymbolExtractor for PhpExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_php_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class_declaration" | "trait_declaration" | "enum_declaration" => { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + _ => {} + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + + // Extends + let base_clause = node + .child_by_field_name("base_clause") + .or_else(|| find_child(node, "base_clause")); + if let Some(base_clause) = base_clause { + for i in 0..base_clause.child_count() { + if let Some(child) = base_clause.child(i) { + if child.kind() == "name" || child.kind() == "qualified_name" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + break; + } + } + } + } + + // Implements + let interface_clause = find_child(node, "class_interface_clause"); + if let Some(interface_clause) = interface_clause { + for i in 0..interface_clause.child_count() { + if let Some(child) = interface_clause.child(i) { + if child.kind() == "name" || child.kind() == "qualified_name" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: None, + implements: Some(node_text(&child, source).to_string()), + line: start_line(node), + }); + } + } + } + } + } + } + + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "method_declaration" { + if let Some(meth_name) = child.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + iface_name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } + } + } + } + + "trait_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "enum_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_php_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "namespace_use_declaration" => { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "namespace_use_clause" { + let name_node = find_child(&child, "qualified_name") + .or_else(|| find_child(&child, "name")); + if let Some(name_node) = name_node { + let full_path = node_text(&name_node, source).to_string(); + let last_name = full_path.split('\\').last().unwrap_or("").to_string(); + let alias = child.child_by_field_name("alias"); + let alias_text = alias + .map(|a| node_text(&a, source).to_string()) + .unwrap_or(last_name); + let mut imp = + Import::new(full_path, vec![alias_text], start_line(node)); + imp.php_use = Some(true); + symbols.imports.push(imp); + } + } + // Single use clause without wrapper + if child.kind() == "qualified_name" || child.kind() == "name" { + let full_path = node_text(&child, source).to_string(); + let last_name = full_path.split('\\').last().unwrap_or("").to_string(); + let mut imp = + Import::new(full_path, vec![last_name], start_line(node)); + imp.php_use = Some(true); + symbols.imports.push(imp); + } + } + } + } + + "function_call_expression" => { + let fn_node = node + .child_by_field_name("function") + .or_else(|| node.child(0)); + if let Some(fn_node) = fn_node { + match fn_node.kind() { + "name" | "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + "qualified_name" => { + let text = node_text(&fn_node, source); + let last = text.split('\\').last().unwrap_or(""); + symbols.calls.push(Call { + name: last.to_string(), + line: start_line(node), + dynamic: None, + }); + } + _ => {} + } + } + } + + "member_call_expression" => { + if let Some(name) = node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + + "scoped_call_expression" => { + if let Some(name) = node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + + "object_creation_expression" => { + // Skip 'new' keyword (child 0) and get class node (child 1) + if let Some(class_node) = node.child(1) { + if class_node.kind() == "name" || class_node.kind() == "qualified_name" { + let text = node_text(&class_node, source); + let last = text.split('\\').last().unwrap_or(""); + symbols.calls.push(Call { + name: last.to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} diff --git a/crates/codegraph-core/src/extractors/python.rs b/crates/codegraph-core/src/extractors/python.rs new file mode 100644 index 00000000..619f1638 --- /dev/null +++ b/crates/codegraph-core/src/extractors/python.rs @@ -0,0 +1,242 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct PythonExtractor; + +impl SymbolExtractor for PythonExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name_text = node_text(&name_node, source); + let mut decorators = Vec::new(); + if let Some(prev) = node.prev_sibling() { + if prev.kind() == "decorator" { + decorators.push(node_text(&prev, source).to_string()); + } + } + let parent_class = find_python_parent_class(node, source); + let (full_name, kind) = match &parent_class { + Some(cls) => (format!("{}.{}", cls, name_text), "method".to_string()), + None => (name_text.to_string(), "function".to_string()), + }; + symbols.definitions.push(Definition { + name: full_name, + kind, + line: start_line(node), + end_line: Some(end_line(node)), + decorators: if decorators.is_empty() { + None + } else { + Some(decorators) + }, + }); + } + } + + "class_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + let superclasses = node + .child_by_field_name("superclasses") + .or_else(|| find_child(node, "argument_list")); + if let Some(superclasses) = superclasses { + for i in 0..superclasses.child_count() { + if let Some(child) = superclasses.child(i) { + if child.kind() == "identifier" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + } + } + } + } + } + } + + "decorated_definition" => { + // Walk children directly to handle decorated functions/classes + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } + return; + } + + "call" => { + if let Some(fn_node) = node.child_by_field_name("function") { + let call_name = match fn_node.kind() { + "identifier" => Some(node_text(&fn_node, source).to_string()), + "attribute" => fn_node + .child_by_field_name("attribute") + .map(|a| node_text(&a, source).to_string()), + _ => None, + }; + if let Some(name) = call_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + }); + } + } + } + + "import_statement" => { + let mut names = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "dotted_name" || child.kind() == "aliased_import" { + let name = if child.kind() == "aliased_import" { + child + .child_by_field_name("alias") + .or_else(|| child.child_by_field_name("name")) + .map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&child, source).to_string()) + }; + if let Some(name) = name { + names.push(name); + } + } + } + } + if !names.is_empty() { + let mut imp = Import::new(names[0].clone(), names, start_line(node)); + imp.python_import = Some(true); + symbols.imports.push(imp); + } + } + + "import_from_statement" => { + let mut source_str = String::new(); + let mut names = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "dotted_name" | "relative_import" => { + if source_str.is_empty() { + source_str = node_text(&child, source).to_string(); + } else { + names.push(node_text(&child, source).to_string()); + } + } + "aliased_import" => { + let n = child + .child_by_field_name("name") + .or_else(|| child.child(0)); + if let Some(n) = n { + names.push(node_text(&n, source).to_string()); + } + } + "wildcard_import" => { + names.push("*".to_string()); + } + _ => {} + } + } + } + if !source_str.is_empty() { + let mut imp = Import::new(source_str, names, start_line(node)); + imp.python_import = Some(true); + symbols.imports.push(imp); + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn find_python_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "class_definition" { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + current = parent.parent(); + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_py(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_python::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + PythonExtractor.extract(&tree, code.as_bytes(), "test.py") + } + + #[test] + fn finds_function() { + let s = parse_py("def greet(name):\n return name\n"); + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "greet"); + assert_eq!(s.definitions[0].kind, "function"); + } + + #[test] + fn finds_class_and_method() { + let s = parse_py("class Foo:\n def bar(self):\n pass\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Foo")); + assert!(names.contains(&"Foo.bar")); + } + + #[test] + fn finds_imports() { + let s = parse_py("from os.path import join, exists\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "os.path"); + assert!(s.imports[0].names.contains(&"join".to_string())); + } + + #[test] + fn finds_calls() { + let s = parse_py("print('hello')\nos.path.join('a', 'b')\n"); + let call_names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(call_names.contains(&"print")); + assert!(call_names.contains(&"join")); + } + + #[test] + fn finds_inheritance() { + let s = parse_py("class Dog(Animal):\n pass\n"); + assert_eq!(s.classes.len(), 1); + assert_eq!(s.classes[0].name, "Dog"); + assert_eq!(s.classes[0].extends, Some("Animal".to_string())); + } +} diff --git a/crates/codegraph-core/src/extractors/ruby.rs b/crates/codegraph-core/src/extractors/ruby.rs new file mode 100644 index 00000000..ebf0faf2 --- /dev/null +++ b/crates/codegraph-core/src/extractors/ruby.rs @@ -0,0 +1,223 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct RubyExtractor; + +impl SymbolExtractor for RubyExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_ruby_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class" | "module" => { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + _ => {} + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "class" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(superclass) = node.child_by_field_name("superclass") { + // Walk superclass node to find the constant + extract_ruby_superclass(&superclass, &class_name, node, source, symbols); + } + } + } + + "module" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_ruby_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "singleton_method" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_ruby_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "call" => { + if let Some(method_node) = node.child_by_field_name("method") { + let method_text = node_text(&method_node, source); + + if method_text == "require" || method_text == "require_relative" { + let args = node.child_by_field_name("arguments"); + if let Some(args) = args { + for i in 0..args.child_count() { + if let Some(arg) = args.child(i) { + let str_content = extract_ruby_string_content(&arg, source); + if let Some(content) = str_content { + let last = content.split('/').last().unwrap_or("").to_string(); + let mut imp = + Import::new(content, vec![last], start_line(node)); + imp.ruby_require = Some(true); + symbols.imports.push(imp); + break; + } + } + } + } + } else if method_text == "include" + || method_text == "extend" + || method_text == "prepend" + { + let parent_class = find_ruby_parent_class(node, source); + if let Some(parent_class) = parent_class { + if let Some(args) = node.child_by_field_name("arguments") { + for i in 0..args.child_count() { + if let Some(arg) = args.child(i) { + if arg.kind() == "constant" + || arg.kind() == "scope_resolution" + { + symbols.classes.push(ClassRelation { + name: parent_class.clone(), + extends: None, + implements: Some( + node_text(&arg, source).to_string(), + ), + line: start_line(node), + }); + } + } + } + } + } + } else { + symbols.calls.push(Call { + name: method_text.to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_ruby_superclass( + superclass: &Node, + class_name: &str, + class_node: &Node, + source: &[u8], + symbols: &mut FileSymbols, +) { + // Direct check for superclass node type + if superclass.kind() == "superclass" { + for i in 0..superclass.child_count() { + if let Some(child) = superclass.child(i) { + if child.kind() == "constant" || child.kind() == "scope_resolution" { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(class_node), + }); + return; + } + } + } + } + // Fallback: check children directly + for i in 0..superclass.child_count() { + if let Some(child) = superclass.child(i) { + if child.kind() == "constant" || child.kind() == "scope_resolution" { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(class_node), + }); + return; + } + } + } +} + +fn extract_ruby_string_content(node: &Node, source: &[u8]) -> Option { + if node.kind() == "string" { + // Look for string_content child + if let Some(content) = find_child(node, "string_content") { + return Some(node_text(&content, source).to_string()); + } + // Fallback: strip quotes from text + let text = node_text(node, source); + let stripped = text + .trim_start_matches(&['\'', '"'][..]) + .trim_end_matches(&['\'', '"'][..]); + if !stripped.is_empty() { + return Some(stripped.to_string()); + } + } + if node.kind() == "string_content" { + return Some(node_text(node, source).to_string()); + } + None +} diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs new file mode 100644 index 00000000..9c7484e1 --- /dev/null +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -0,0 +1,258 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct RustExtractor; + +impl SymbolExtractor for RustExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_current_impl<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "impl_item" { + return parent + .child_by_field_name("type") + .map(|n| node_text(&n, source).to_string()); + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = node_text(&name_node, source); + let impl_type = find_current_impl(node, source); + let (full_name, kind) = match &impl_type { + Some(t) => (format!("{}.{}", t, name), "method".to_string()), + None => (name.to_string(), "function".to_string()), + }; + symbols.definitions.push(Definition { + name: full_name, + kind, + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "struct_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "enum_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "trait_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let trait_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: trait_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "function_signature_item" + || child.kind() == "function_item" + { + if let Some(meth_name) = child.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + trait_name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } + } + } + } + + "impl_item" => { + let type_node = node.child_by_field_name("type"); + let trait_node = node.child_by_field_name("trait"); + if let (Some(type_node), Some(trait_node)) = (type_node, trait_node) { + symbols.classes.push(ClassRelation { + name: node_text(&type_node, source).to_string(), + extends: None, + implements: Some(node_text(&trait_node, source).to_string()), + line: start_line(node), + }); + } + } + + "use_declaration" => { + if let Some(arg_node) = node.child(1) { + let use_paths = extract_rust_use_path(&arg_node, source); + for (src, names) in use_paths { + let mut imp = Import::new(src, names, start_line(node)); + imp.rust_use = Some(true); + symbols.imports.push(imp); + } + } + } + + "call_expression" => { + if let Some(fn_node) = node.child_by_field_name("function") { + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + "field_expression" => { + if let Some(field) = fn_node.child_by_field_name("field") { + symbols.calls.push(Call { + name: node_text(&field, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + "scoped_identifier" => { + if let Some(name) = fn_node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + _ => {} + } + } + } + + "macro_invocation" => { + if let Some(macro_node) = node.child(0) { + symbols.calls.push(Call { + name: format!("{}!", node_text(¯o_node, source)), + line: start_line(node), + dynamic: None, + }); + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_rust_use_path(node: &Node, source: &[u8]) -> Vec<(String, Vec)> { + match node.kind() { + "use_list" => { + let mut results = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + results.extend(extract_rust_use_path(&child, source)); + } + } + results + } + + "scoped_use_list" => { + let path_node = node.child_by_field_name("path"); + let list_node = node.child_by_field_name("list"); + let prefix = path_node + .map(|p| node_text(&p, source).to_string()) + .unwrap_or_default(); + if let Some(list_node) = list_node { + let mut names = Vec::new(); + for i in 0..list_node.child_count() { + if let Some(child) = list_node.child(i) { + match child.kind() { + "identifier" | "self" => { + names.push(node_text(&child, source).to_string()); + } + "use_as_clause" => { + let name = child + .child_by_field_name("alias") + .or_else(|| child.child_by_field_name("name")) + .map(|n| node_text(&n, source).to_string()); + if let Some(name) = name { + names.push(name); + } + } + _ => {} + } + } + } + vec![(prefix, names)] + } else { + vec![(prefix, vec![])] + } + } + + "use_as_clause" => { + let name = node + .child_by_field_name("alias") + .or_else(|| node.child_by_field_name("name")) + .map(|n| node_text(&n, source).to_string()); + vec![( + node_text(node, source).to_string(), + name.into_iter().collect(), + )] + } + + "use_wildcard" => { + let path_node = node.child_by_field_name("path"); + let src = path_node + .map(|p| node_text(&p, source).to_string()) + .unwrap_or_else(|| "*".to_string()); + vec![(src, vec!["*".to_string()])] + } + + "scoped_identifier" | "identifier" => { + let text = node_text(node, source).to_string(); + let last_name = text.split("::").last().unwrap_or("").to_string(); + vec![(text, vec![last_name])] + } + + _ => vec![], + } +} diff --git a/crates/codegraph-core/src/import_resolution.rs b/crates/codegraph-core/src/import_resolution.rs new file mode 100644 index 00000000..89157e6e --- /dev/null +++ b/crates/codegraph-core/src/import_resolution.rs @@ -0,0 +1,197 @@ +use std::path::{Path, PathBuf}; + +use crate::types::{AliasMapping, ImportResolutionInput, PathAliases, ResolvedImport}; + +/// Normalize a path to use forward slashes (cross-platform consistency). +fn normalize_path(p: &str) -> String { + p.replace('\\', "/") +} + +/// Try resolving via path aliases (tsconfig/jsconfig paths). +fn resolve_via_alias( + import_source: &str, + aliases: &PathAliases, + _root_dir: &str, +) -> Option { + // baseUrl resolution + if let Some(base_url) = &aliases.base_url { + if !import_source.starts_with('.') && !import_source.starts_with('/') { + let candidate = PathBuf::from(base_url).join(import_source); + for ext in &["", ".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"] + { + let full = format!("{}{}", candidate.display(), ext); + if Path::new(&full).exists() { + return Some(full); + } + } + } + } + + // Path pattern resolution + for mapping in &aliases.paths { + let prefix = mapping.pattern.trim_end_matches('*'); + if !import_source.starts_with(prefix) { + continue; + } + let rest = &import_source[prefix.len()..]; + for target in &mapping.targets { + let resolved = target.replace('*', rest); + for ext in &["", ".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"] + { + let full = format!("{}{}", resolved, ext); + if Path::new(&full).exists() { + return Some(full); + } + } + } + } + + None +} + +/// Resolve a single import path, mirroring `resolveImportPath()` in builder.js. +pub fn resolve_import_path( + from_file: &str, + import_source: &str, + root_dir: &str, + aliases: &PathAliases, +) -> String { + // Try alias resolution for non-relative imports + if !import_source.starts_with('.') { + if let Some(alias_resolved) = resolve_via_alias(import_source, aliases, root_dir) { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&alias_resolved).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + return normalize_path(&alias_resolved); + } + // Bare specifier (e.g., "lodash") — return as-is + return import_source.to_string(); + } + + // Relative import + let dir = Path::new(from_file).parent().unwrap_or(Path::new("")); + let resolved = dir.join(import_source); + let resolved_str = resolved.display().to_string(); + + // .js → .ts remap + if resolved_str.ends_with(".js") { + let ts_candidate = resolved_str.replace(".js", ".ts"); + if Path::new(&ts_candidate).exists() { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&ts_candidate).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + } + let tsx_candidate = resolved_str.replace(".js", ".tsx"); + if Path::new(&tsx_candidate).exists() { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&tsx_candidate).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + } + } + + // Extension probing + let extensions = [ + ".ts", ".tsx", ".js", ".jsx", ".mjs", ".py", "/index.ts", "/index.tsx", "/index.js", + "/__init__.py", + ]; + for ext in &extensions { + let candidate = format!("{}{}", resolved_str, ext); + if Path::new(&candidate).exists() { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&candidate).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + } + } + + // Exact match + if Path::new(&resolved_str).exists() { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&resolved_str).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + } + + // Fallback: return relative path + let root = Path::new(root_dir); + if let Ok(rel) = resolved.strip_prefix(root) { + normalize_path(&rel.display().to_string()) + } else { + normalize_path(&resolved_str) + } +} + +/// Compute proximity-based confidence for call resolution. +/// Mirrors `computeConfidence()` in builder.js. +pub fn compute_confidence( + caller_file: &str, + target_file: &str, + imported_from: Option<&str>, +) -> f64 { + if target_file.is_empty() || caller_file.is_empty() { + return 0.3; + } + if caller_file == target_file { + return 1.0; + } + if let Some(imp) = imported_from { + if imp == target_file { + return 1.0; + } + } + + let caller_dir = Path::new(caller_file) + .parent() + .map(|p| p.display().to_string()) + .unwrap_or_default(); + let target_dir = Path::new(target_file) + .parent() + .map(|p| p.display().to_string()) + .unwrap_or_default(); + + if caller_dir == target_dir { + return 0.7; + } + + let caller_parent = Path::new(&caller_dir) + .parent() + .map(|p| p.display().to_string()) + .unwrap_or_default(); + let target_parent = Path::new(&target_dir) + .parent() + .map(|p| p.display().to_string()) + .unwrap_or_default(); + + if caller_parent == target_parent { + return 0.5; + } + + 0.3 +} + +/// Batch resolve multiple imports. +pub fn resolve_imports_batch( + inputs: &[ImportResolutionInput], + root_dir: &str, + aliases: &PathAliases, +) -> Vec { + inputs + .iter() + .map(|input| { + let resolved = resolve_import_path( + &input.from_file, + &input.import_source, + root_dir, + aliases, + ); + ResolvedImport { + from_file: input.from_file.clone(), + import_source: input.import_source.clone(), + resolved_path: resolved, + } + }) + .collect() +} diff --git a/crates/codegraph-core/src/incremental.rs b/crates/codegraph-core/src/incremental.rs new file mode 100644 index 00000000..8336b8b0 --- /dev/null +++ b/crates/codegraph-core/src/incremental.rs @@ -0,0 +1,82 @@ +use std::collections::HashMap; +use tree_sitter::{InputEdit, Parser, Tree}; + +use crate::extractors::extract_symbols; +use crate::parser_registry::LanguageKind; +use crate::types::FileSymbols; + +/// Cache of parse trees for incremental parsing. +/// Keeps the old tree and source for each file so tree-sitter can apply edits +/// and re-parse only the changed portion. +pub struct ParseTreeCache { + entries: HashMap, +} + +struct CacheEntry { + tree: Tree, + source: Vec, + lang: LanguageKind, +} + +impl ParseTreeCache { + pub fn new() -> Self { + Self { + entries: HashMap::new(), + } + } + + /// Parse a file, using the cached tree if available for incremental re-parse. + /// If `edits` is provided, they are applied to the old tree before re-parsing. + /// Returns the extracted symbols if parsing succeeds. + pub fn parse_file( + &mut self, + file_path: &str, + new_source: &[u8], + edits: Option<&[InputEdit]>, + ) -> Option { + let lang = LanguageKind::from_extension(file_path)?; + + let mut parser = Parser::new(); + parser.set_language(&lang.tree_sitter_language()).ok()?; + + let old_tree = if let Some(entry) = self.entries.get_mut(file_path) { + if let Some(edits) = edits { + for edit in edits { + entry.tree.edit(edit); + } + } + Some(&entry.tree) + } else { + None + }; + + let tree = parser.parse(new_source, old_tree)?; + let symbols = extract_symbols(lang, &tree, new_source, file_path); + + self.entries.insert( + file_path.to_string(), + CacheEntry { + tree, + source: new_source.to_vec(), + lang, + }, + ); + + Some(symbols) + } + + /// Remove a file from the cache. + pub fn remove(&mut self, file_path: &str) { + self.entries.remove(file_path); + } + + /// Check if a file is in the cache. + pub fn contains(&self, file_path: &str) -> bool { + self.entries.contains_key(file_path) + } + + /// Clear the entire cache. + pub fn clear(&mut self) { + self.entries.clear(); + } +} diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs new file mode 100644 index 00000000..6223460f --- /dev/null +++ b/crates/codegraph-core/src/lib.rs @@ -0,0 +1,84 @@ +pub mod types; +pub mod parser_registry; +pub mod extractors; +pub mod parallel; +pub mod import_resolution; +pub mod cycles; +pub mod incremental; + +use napi_derive::napi; +use types::*; + +/// Parse a single file and return extracted symbols. +#[napi] +pub fn parse_file(file_path: String, source: String) -> Option { + parallel::parse_file(&file_path, &source) +} + +/// Parse multiple files in parallel and return all extracted symbols. +#[napi] +pub fn parse_files(file_paths: Vec, root_dir: String) -> Vec { + parallel::parse_files_parallel(&file_paths, &root_dir) +} + +/// Resolve a single import path. +#[napi] +pub fn resolve_import( + from_file: String, + import_source: String, + root_dir: String, + aliases: Option, +) -> String { + let aliases = aliases.unwrap_or(PathAliases { + base_url: None, + paths: vec![], + }); + import_resolution::resolve_import_path(&from_file, &import_source, &root_dir, &aliases) +} + +/// Batch resolve multiple imports. +#[napi] +pub fn resolve_imports( + inputs: Vec, + root_dir: String, + aliases: Option, +) -> Vec { + let aliases = aliases.unwrap_or(PathAliases { + base_url: None, + paths: vec![], + }); + import_resolution::resolve_imports_batch(&inputs, &root_dir, &aliases) +} + +/// Compute proximity-based confidence for call resolution. +#[napi] +pub fn compute_confidence( + caller_file: String, + target_file: String, + imported_from: Option, +) -> f64 { + import_resolution::compute_confidence( + &caller_file, + &target_file, + imported_from.as_deref(), + ) +} + +/// Detect cycles using Tarjan's SCC algorithm. +/// Returns arrays of node names forming each cycle. +#[napi] +pub fn detect_cycles(edges: Vec) -> Vec> { + cycles::detect_cycles(&edges) +} + +/// Returns the engine name. +#[napi] +pub fn engine_name() -> String { + "native".to_string() +} + +/// Returns the engine version (crate version). +#[napi] +pub fn engine_version() -> String { + env!("CARGO_PKG_VERSION").to_string() +} diff --git a/crates/codegraph-core/src/parallel.rs b/crates/codegraph-core/src/parallel.rs new file mode 100644 index 00000000..f1d3fd29 --- /dev/null +++ b/crates/codegraph-core/src/parallel.rs @@ -0,0 +1,43 @@ +use rayon::prelude::*; +use std::fs; +use tree_sitter::Parser; + +use crate::extractors::extract_symbols; +use crate::parser_registry::LanguageKind; +use crate::types::FileSymbols; + +/// Parse multiple files in parallel using rayon. +/// Each thread creates its own Parser (cheap; Language objects are Send+Sync). +/// Failed files are silently skipped (matches WASM behavior). +pub fn parse_files_parallel(file_paths: &[String], root_dir: &str) -> Vec { + file_paths + .par_iter() + .filter_map(|file_path| { + let lang = LanguageKind::from_extension(file_path)?; + let source = fs::read(file_path).ok()?; + + let mut parser = Parser::new(); + parser + .set_language(&lang.tree_sitter_language()) + .ok()?; + + let tree = parser.parse(&source, None)?; + let symbols = extract_symbols(lang, &tree, &source, file_path); + Some(symbols) + }) + .collect() +} + +/// Parse a single file and return its symbols. +pub fn parse_file(file_path: &str, source: &str) -> Option { + let lang = LanguageKind::from_extension(file_path)?; + let source_bytes = source.as_bytes(); + + let mut parser = Parser::new(); + parser + .set_language(&lang.tree_sitter_language()) + .ok()?; + + let tree = parser.parse(source_bytes, None)?; + Some(extract_symbols(lang, &tree, source_bytes, file_path)) +} diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs new file mode 100644 index 00000000..0fdc766f --- /dev/null +++ b/crates/codegraph-core/src/parser_registry.rs @@ -0,0 +1,63 @@ +use std::path::Path; +use tree_sitter::Language; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum LanguageKind { + JavaScript, + TypeScript, + Tsx, + Python, + Go, + Rust, + Java, + CSharp, + Ruby, + Php, + Hcl, +} + +impl LanguageKind { + /// Determine language from file extension — mirrors `getParser()` in parser.js + pub fn from_extension(file_path: &str) -> Option { + let path = Path::new(file_path); + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + + // .tsx must come before .ts check + if file_path.ends_with(".tsx") { + return Some(Self::Tsx); + } + if file_path.ends_with(".d.ts") || ext == "ts" { + return Some(Self::TypeScript); + } + match ext { + "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), + "py" => Some(Self::Python), + "tf" | "hcl" => Some(Self::Hcl), + "go" => Some(Self::Go), + "rs" => Some(Self::Rust), + "java" => Some(Self::Java), + "cs" => Some(Self::CSharp), + "rb" => Some(Self::Ruby), + "php" => Some(Self::Php), + _ => None, + } + } + + /// Return the native tree-sitter `Language` for this variant. + pub fn tree_sitter_language(&self) -> Language { + match self { + Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Self::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(), + Self::Python => tree_sitter_python::LANGUAGE.into(), + Self::Go => tree_sitter_go::LANGUAGE.into(), + Self::Rust => tree_sitter_rust::LANGUAGE.into(), + Self::Java => tree_sitter_java::LANGUAGE.into(), + Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + Self::Ruby => tree_sitter_ruby::LANGUAGE.into(), + Self::Php => tree_sitter_php::LANGUAGE_PHP.into(), + Self::Hcl => tree_sitter_hcl::LANGUAGE.into(), + } + } +} diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs new file mode 100644 index 00000000..3fcbffe9 --- /dev/null +++ b/crates/codegraph-core/src/types.rs @@ -0,0 +1,137 @@ +use napi_derive::napi; +use serde::{Deserialize, Serialize}; + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Definition { + pub name: String, + pub kind: String, + pub line: u32, + pub end_line: Option, + #[napi(ts_type = "string[] | undefined")] + pub decorators: Option>, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Call { + pub name: String, + pub line: u32, + pub dynamic: Option, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Import { + pub source: String, + pub names: Vec, + pub line: u32, + pub type_only: Option, + pub reexport: Option, + pub wildcard_reexport: Option, + // Language-specific flags + pub python_import: Option, + pub go_import: Option, + pub rust_use: Option, + pub java_import: Option, + pub csharp_using: Option, + pub ruby_require: Option, + pub php_use: Option, +} + +impl Import { + pub fn new(source: String, names: Vec, line: u32) -> Self { + Self { + source, + names, + line, + type_only: None, + reexport: None, + wildcard_reexport: None, + python_import: None, + go_import: None, + rust_use: None, + java_import: None, + csharp_using: None, + ruby_require: None, + php_use: None, + } + } +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ClassRelation { + pub name: String, + pub extends: Option, + pub implements: Option, + pub line: u32, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExportInfo { + pub name: String, + pub kind: String, + pub line: u32, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileSymbols { + pub file: String, + pub definitions: Vec, + pub calls: Vec, + pub imports: Vec, + pub classes: Vec, + pub exports: Vec, +} + +impl FileSymbols { + pub fn new(file: String) -> Self { + Self { + file, + definitions: Vec::new(), + calls: Vec::new(), + imports: Vec::new(), + classes: Vec::new(), + exports: Vec::new(), + } + } +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GraphEdge { + pub source: String, + pub target: String, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PathAliases { + pub base_url: Option, + pub paths: Vec, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AliasMapping { + pub pattern: String, + pub targets: Vec, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImportResolutionInput { + pub from_file: String, + pub import_source: String, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ResolvedImport { + pub from_file: String, + pub import_source: String, + pub resolved_path: String, +} diff --git a/package.json b/package.json index ed723490..6cea27c1 100644 --- a/package.json +++ b/package.json @@ -54,7 +54,11 @@ }, "optionalDependencies": { "@huggingface/transformers": "^3.8.1", - "@modelcontextprotocol/sdk": "^1.0.0" + "@modelcontextprotocol/sdk": "^1.0.0", + "@optave/codegraph-linux-x64-gnu": "0.1.0", + "@optave/codegraph-darwin-arm64": "0.1.0", + "@optave/codegraph-darwin-x64": "0.1.0", + "@optave/codegraph-win32-x64-msvc": "0.1.0" }, "devDependencies": { "@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0", diff --git a/src/builder.js b/src/builder.js index 3f2299bb..1995fdcf 100644 --- a/src/builder.js +++ b/src/builder.js @@ -7,6 +7,7 @@ import { createParsers, getParser, extractSymbols, extractHCLSymbols, extractPyt import { IGNORE_DIRS, EXTENSIONS, normalizePath } from './constants.js'; import { loadConfig } from './config.js'; import { warn, debug, info } from './logger.js'; +import { loadNative, isNativeAvailable } from './native.js'; export function collectFiles(dir, files = [], config = {}) { let entries; @@ -193,7 +194,21 @@ export async function buildGraph(rootDir, opts = {}) { const config = loadConfig(rootDir); const incremental = opts.incremental !== false && config.build && config.build.incremental !== false; - const parsers = await createParsers(); + // Engine selection: 'native', 'wasm', or 'auto' (default) + const enginePref = opts.engine || 'auto'; + const useNative = enginePref === 'native' || (enginePref === 'auto' && isNativeAvailable()); + const native = useNative ? loadNative() : null; + + if (native) { + console.log(`Using native engine (v${native.engineVersion()})`); + } else { + if (enginePref === 'native') { + console.warn('Native engine requested but unavailable — falling back to WASM'); + } + console.log('Using WASM engine'); + } + + const parsers = useNative ? null : await createParsers(); const aliases = loadPathAliases(rootDir); // Merge config aliases if (config.aliases) { @@ -268,6 +283,75 @@ export async function buildGraph(rootDir, opts = {}) { ? files.map(f => ({ file: f })) : changed; + // ── Native engine fast path ────────────────────────────────────────── + if (native) { + const filePaths = filesToParse.map(item => item.file); + const nativeResults = native.parseFiles(filePaths, rootDir); + + const insertNative = db.transaction(() => { + for (const result of nativeResults) { + if (!result) continue; + const relPath = normalizePath(path.relative(rootDir, result.file)); + + // Adapt native field names to match JS convention (snake_case → camelCase) + const symbols = { + definitions: (result.definitions || []).map(d => ({ + name: d.name, kind: d.kind, line: d.line, + endLine: d.endLine ?? d.end_line ?? null, + decorators: d.decorators + })), + calls: (result.calls || []).map(c => ({ + name: c.name, line: c.line, dynamic: c.dynamic + })), + imports: (result.imports || []).map(i => ({ + source: i.source, names: i.names || [], line: i.line, + typeOnly: i.typeOnly ?? i.type_only, + reexport: i.reexport ?? i.reexport, + wildcardReexport: i.wildcardReexport ?? i.wildcard_reexport, + pythonImport: i.pythonImport ?? i.python_import, + goImport: i.goImport ?? i.go_import, + rustUse: i.rustUse ?? i.rust_use, + javaImport: i.javaImport ?? i.java_import, + csharpUsing: i.csharpUsing ?? i.csharp_using, + rubyRequire: i.rubyRequire ?? i.ruby_require, + phpUse: i.phpUse ?? i.php_use + })), + classes: (result.classes || []).map(c => ({ + name: c.name, extends: c.extends, implements: c.implements, line: c.line + })), + exports: (result.exports || []).map(e => ({ + name: e.name, kind: e.kind, line: e.line + })) + }; + fileSymbols.set(relPath, symbols); + + insertNode.run(relPath, 'file', relPath, 0, null); + for (const def of symbols.definitions) { + insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); + } + for (const exp of symbols.exports) { + insertNode.run(exp.name, exp.kind, relPath, exp.line, null); + } + + // Update file hash for incremental builds + if (upsertHash) { + let code; + try { code = fs.readFileSync(result.file, 'utf-8'); } catch { code = null; } + if (code !== null) { + const hash = fileHash(code); + upsertHash.run(relPath, hash, Date.now()); + } + } + + parsed++; + if (parsed % 100 === 0) process.stdout.write(` Parsed ${parsed}/${filesToParse.length} files\r`); + } + skipped = filesToParse.length - parsed; + }); + insertNative(); + } else { + // ── WASM engine path (original) ──────────────────────────────────── + const insertMany = db.transaction(() => { for (const item of filesToParse) { const filePath = item.file; @@ -335,6 +419,7 @@ export async function buildGraph(rootDir, opts = {}) { } }); insertMany(); + } // end else (WASM path) console.log(`Parsed ${parsed} files (${skipped} skipped)`); // Clean up removed file hashes diff --git a/src/cli.js b/src/cli.js index 64c2077a..9f1257c9 100644 --- a/src/cli.js +++ b/src/cli.js @@ -17,8 +17,9 @@ const program = new Command(); program .name('codegraph') .description('Local code dependency graph tool') - .version('1.1.0') + .version('1.2.0') .option('-v, --verbose', 'Enable verbose/debug output') + .option('--engine ', 'Parser engine: native, wasm, or auto (default: auto)', 'auto') .hook('preAction', (thisCommand) => { const opts = thisCommand.opts(); if (opts.verbose) setVerbose(true); @@ -30,7 +31,8 @@ program .option('--no-incremental', 'Force full rebuild (ignore file hashes)') .action(async (dir, opts) => { const root = path.resolve(dir || '.'); - await buildGraph(root, { incremental: opts.incremental }); + const engine = program.opts().engine; + await buildGraph(root, { incremental: opts.incremental, engine }); }); program diff --git a/src/cycles.js b/src/cycles.js index bd6628e4..554ae074 100644 --- a/src/cycles.js +++ b/src/cycles.js @@ -1,5 +1,8 @@ +import { loadNative } from './native.js'; + /** * Detect circular dependencies in the codebase using Tarjan's SCC algorithm. + * Dispatches to native Rust implementation when available, falls back to JS. * @param {object} db - Open SQLite database * @param {object} opts - { fileLevel: true } * @returns {string[][]} Array of cycles, each cycle is an array of file paths @@ -7,7 +10,7 @@ export function findCycles(db, opts = {}) { const fileLevel = opts.fileLevel !== false; - // Build adjacency list + // Build adjacency list from SQLite (stays in JS — only the algorithm can move to Rust) let edges; if (fileLevel) { edges = db.prepare(` @@ -32,6 +35,20 @@ export function findCycles(db, opts = {}) { `).all(); } + // Try native Rust implementation + const native = loadNative(); + if (native) { + return native.detectCycles(edges); + } + + // Fallback: JS Tarjan + return findCyclesJS(edges); +} + +/** + * Pure-JS Tarjan's SCC implementation. + */ +export function findCyclesJS(edges) { const graph = new Map(); for (const { source, target } of edges) { if (!graph.has(source)) graph.set(source, []); diff --git a/src/index.js b/src/index.js index 6d1b9350..b5edf432 100644 --- a/src/index.js +++ b/src/index.js @@ -35,5 +35,8 @@ export { loadConfig } from './config.js'; // Shared constants export { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js'; +// Native engine +export { isNativeAvailable } from './native.js'; + // Logger export { setVerbose } from './logger.js'; diff --git a/src/native.js b/src/native.js new file mode 100644 index 00000000..cbe293f9 --- /dev/null +++ b/src/native.js @@ -0,0 +1,75 @@ +/** + * Native addon loader with graceful fallback to WASM. + * + * Tries to load the platform-specific napi-rs binary built from + * crates/codegraph-core. If unavailable the caller should fall back + * to the existing WASM pipeline. + */ + +import { createRequire } from 'node:module'; +import os from 'node:os'; + +let _cached = undefined; // undefined = not yet tried, null = failed, object = module +let _loadError = null; + +/** Map of (platform-arch) → npm package name. */ +const PLATFORM_PACKAGES = { + 'linux-x64': '@optave/codegraph-linux-x64-gnu', + 'darwin-arm64': '@optave/codegraph-darwin-arm64', + 'darwin-x64': '@optave/codegraph-darwin-x64', + 'win32-x64': '@optave/codegraph-win32-x64-msvc', +}; + +/** + * Try to load the native napi addon. + * Returns the module on success, null on failure. + */ +export function loadNative() { + if (_cached !== undefined) return _cached; + + const require = createRequire(import.meta.url); + + // Try the umbrella package first (if published as @optave/codegraph-core) + try { + _cached = require('@optave/codegraph-core'); + return _cached; + } catch { /* try platform package */ } + + // Try the platform-specific package + const key = `${os.platform()}-${os.arch()}`; + const pkg = PLATFORM_PACKAGES[key]; + if (pkg) { + try { + _cached = require(pkg); + return _cached; + } catch (err) { + _loadError = err; + } + } else { + _loadError = new Error(`Unsupported platform: ${key}`); + } + + _cached = null; + return null; +} + +/** + * Check whether the native engine is available on this platform. + */ +export function isNativeAvailable() { + return loadNative() !== null; +} + +/** + * Return the native module or throw if not available. + */ +export function getNative() { + const mod = loadNative(); + if (!mod) { + throw new Error( + `Native codegraph-core not available: ${_loadError?.message || 'unknown error'}. ` + + 'Install the platform package or use --engine wasm.' + ); + } + return mod; +} diff --git a/src/watcher.js b/src/watcher.js index bd4b4266..eb8bcf16 100644 --- a/src/watcher.js +++ b/src/watcher.js @@ -6,6 +6,7 @@ import { createParsers, getParser, extractSymbols, extractHCLSymbols, extractPyt import { IGNORE_DIRS, EXTENSIONS, normalizePath } from './constants.js'; import { resolveImportPath } from './builder.js'; import { warn, debug, info } from './logger.js'; +import { loadNative } from './native.js'; function shouldIgnore(filePath) { const parts = filePath.split(path.sep); @@ -19,7 +20,7 @@ function isTrackedExt(filePath) { /** * Parse a single file and update the database incrementally. */ -function updateFile(db, rootDir, filePath, parsers, stmts) { +function updateFile(db, rootDir, filePath, parsers, stmts, native) { const relPath = normalizePath(path.relative(rootDir, filePath)); const oldNodes = stmts.countNodes.get(relPath)?.c || 0; @@ -32,9 +33,6 @@ function updateFile(db, rootDir, filePath, parsers, stmts) { return { file: relPath, nodesAdded: 0, nodesRemoved: oldNodes, edgesAdded: 0, deleted: true }; } - const parser = getParser(parsers, filePath); - if (!parser) return null; - let code; try { code = fs.readFileSync(filePath, 'utf-8'); } catch (err) { @@ -42,18 +40,46 @@ function updateFile(db, rootDir, filePath, parsers, stmts) { return null; } - let tree; - try { tree = parser.parse(code); } - catch (err) { - warn(`Parse error in ${relPath}: ${err.message}`); - return null; - } + let symbols; + if (native) { + // Use native engine for parsing + const result = native.parseFile(filePath, code); + if (!result) return null; + symbols = { + definitions: (result.definitions || []).map(d => ({ + name: d.name, kind: d.kind, line: d.line, + endLine: d.endLine ?? d.end_line ?? null + })), + calls: (result.calls || []).map(c => ({ + name: c.name, line: c.line, dynamic: c.dynamic + })), + imports: (result.imports || []).map(i => ({ + source: i.source, names: i.names || [], line: i.line, + typeOnly: i.typeOnly ?? i.type_only, + reexport: i.reexport, wildcardReexport: i.wildcardReexport ?? i.wildcard_reexport + })), + classes: result.classes || [], + exports: (result.exports || []).map(e => ({ + name: e.name, kind: e.kind, line: e.line + })) + }; + } else { + const parser = getParser(parsers, filePath); + if (!parser) return null; + + let tree; + try { tree = parser.parse(code); } + catch (err) { + warn(`Parse error in ${relPath}: ${err.message}`); + return null; + } - const isHCL = filePath.endsWith('.tf') || filePath.endsWith('.hcl'); - const isPython = filePath.endsWith('.py'); - const symbols = isHCL ? extractHCLSymbols(tree, filePath) - : isPython ? extractPythonSymbols(tree, filePath) - : extractSymbols(tree, filePath); + const isHCL = filePath.endsWith('.tf') || filePath.endsWith('.hcl'); + const isPython = filePath.endsWith('.py'); + symbols = isHCL ? extractHCLSymbols(tree, filePath) + : isPython ? extractPythonSymbols(tree, filePath) + : extractSymbols(tree, filePath); + } stmts.insertNode.run(relPath, 'file', relPath, 0, null); @@ -140,7 +166,11 @@ export async function watchProject(rootDir) { const db = openDb(dbPath); initSchema(db); - const parsers = await createParsers(); + const native = loadNative(); + const parsers = native ? null : await createParsers(); + if (native) { + console.log(`Watch mode using native engine (v${native.engineVersion()})`); + } const stmts = { insertNode: db.prepare('INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)'), @@ -171,7 +201,7 @@ export async function watchProject(rootDir) { const updates = db.transaction(() => { const results = []; for (const filePath of files) { - const result = updateFile(db, rootDir, filePath, parsers, stmts); + const result = updateFile(db, rootDir, filePath, parsers, stmts, native); if (result) results.push(result); } return results; diff --git a/tests/engines/parity.test.js b/tests/engines/parity.test.js new file mode 100644 index 00000000..bd941703 --- /dev/null +++ b/tests/engines/parity.test.js @@ -0,0 +1,226 @@ +/** + * Cross-engine parity tests. + * + * Parse the same source snippets with both WASM and native engines, + * then assert the FileSymbols output is equivalent for all 11 languages. + * + * Skipped when the native engine is not installed. + */ + +import { describe, it, expect, beforeAll } from 'vitest'; +import { createParsers, getParser, extractSymbols, extractHCLSymbols, extractPythonSymbols, extractGoSymbols, extractRustSymbols, extractJavaSymbols, extractCSharpSymbols, extractRubySymbols, extractPHPSymbols } from '../../src/parser.js'; +import { isNativeAvailable } from '../../src/native.js'; + +let native; +let parsers; + +function wasmExtract(code, filePath) { + const parser = getParser(parsers, filePath); + if (!parser) return null; + const tree = parser.parse(code); + const isHCL = filePath.endsWith('.tf') || filePath.endsWith('.hcl'); + const isPython = filePath.endsWith('.py'); + const isGo = filePath.endsWith('.go'); + const isRust = filePath.endsWith('.rs'); + const isJava = filePath.endsWith('.java'); + const isCSharp = filePath.endsWith('.cs'); + const isRuby = filePath.endsWith('.rb'); + const isPHP = filePath.endsWith('.php'); + return isHCL ? extractHCLSymbols(tree, filePath) + : isPython ? extractPythonSymbols(tree, filePath) + : isGo ? extractGoSymbols(tree, filePath) + : isRust ? extractRustSymbols(tree, filePath) + : isJava ? extractJavaSymbols(tree, filePath) + : isCSharp ? extractCSharpSymbols(tree, filePath) + : isRuby ? extractRubySymbols(tree, filePath) + : isPHP ? extractPHPSymbols(tree, filePath) + : extractSymbols(tree, filePath); +} + +function nativeExtract(code, filePath) { + return native.parseFile(filePath, code); +} + +/** Normalize symbols for comparison — strip undefined/null optional fields. */ +function normalize(symbols) { + if (!symbols) return symbols; + return { + definitions: (symbols.definitions || []).map(d => ({ + name: d.name, + kind: d.kind, + line: d.line, + endLine: d.endLine ?? d.end_line ?? null, + })), + calls: (symbols.calls || []).map(c => ({ + name: c.name, + line: c.line, + ...(c.dynamic ? { dynamic: true } : {}), + })), + imports: (symbols.imports || []).map(i => ({ + source: i.source, + names: i.names || [], + line: i.line, + })), + classes: (symbols.classes || []).map(c => ({ + name: c.name, + ...(c.extends ? { extends: c.extends } : {}), + ...(c.implements ? { implements: c.implements } : {}), + line: c.line, + })), + exports: (symbols.exports || []).map(e => ({ + name: e.name, + kind: e.kind, + line: e.line, + })), + }; +} + +const hasNative = isNativeAvailable(); + +const describeOrSkip = hasNative ? describe : describe.skip; + +describeOrSkip('Cross-engine parity', () => { + beforeAll(async () => { + if (!hasNative) return; + const { getNative } = await import('../../src/native.js'); + native = getNative(); + parsers = await createParsers(); + }); + + const cases = [ + { + name: 'JavaScript — functions and calls', + file: 'test.js', + code: ` +function greet(name) { return 'Hello ' + name; } +const add = (a, b) => a + b; +greet('world'); +add(1, 2); +`, + }, + { + name: 'TypeScript — interfaces and types', + file: 'test.ts', + code: ` +interface Greeter { greet(name: string): string; } +type ID = string | number; +class MyGreeter implements Greeter { + greet(name: string) { return name; } +} +`, + }, + { + name: 'TSX — class with extends', + file: 'test.tsx', + code: ` +import React from 'react'; +class Button extends React.Component { + render() { return