diff --git a/.github/workflows/build-native.yml b/.github/workflows/build-native.yml new file mode 100644 index 00000000..22cacdb0 --- /dev/null +++ b/.github/workflows/build-native.yml @@ -0,0 +1,134 @@ +name: Build Native + +on: + push: + tags: + - 'v*' + workflow_dispatch: + +permissions: + contents: write + +jobs: + build: + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + package: '@optave/codegraph-linux-x64-gnu' + node-arch: x64 + node-os: linux + - os: macos-latest + target: aarch64-apple-darwin + package: '@optave/codegraph-darwin-arm64' + node-arch: arm64 + node-os: darwin + - os: macos-13 + target: x86_64-apple-darwin + package: '@optave/codegraph-darwin-x64' + node-arch: x64 + node-os: darwin + - os: windows-latest + target: x86_64-pc-windows-msvc + package: '@optave/codegraph-win32-x64-msvc' + node-arch: x64 + node-os: win32 + + runs-on: ${{ matrix.os }} + name: Build ${{ matrix.target }} + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + registry-url: 'https://registry.npmjs.org' + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + + - name: Rust cache + uses: Swatinem/rust-cache@v2 + with: + workspaces: crates/codegraph-core + + - name: Install napi-rs CLI + run: npm install -g @napi-rs/cli@3 + + - name: Build native addon + working-directory: crates/codegraph-core + run: napi build --release --target ${{ matrix.target }} + + - name: Prepare platform package + shell: bash + run: | + PKG_DIR="npm/${{ matrix.node-os }}-${{ matrix.node-arch }}" + mkdir -p "$PKG_DIR" + + # Copy the built .node file (use glob to avoid Windows find issues) + cp crates/codegraph-core/*.node "$PKG_DIR/codegraph-core.node" + + # Read version from root package.json + VERSION=$(node -p "require('./package.json').version") + + # Generate package.json for the platform package + cat > "$PKG_DIR/package.json" < 1 (actual cycles). +/// Mirrors the JS implementation in src/cycles.js. +pub fn detect_cycles(edges: &[GraphEdge]) -> Vec> { + // Build adjacency list + let mut graph: HashMap<&str, Vec<&str>> = HashMap::new(); + for edge in edges { + graph + .entry(edge.source.as_str()) + .or_default() + .push(edge.target.as_str()); + graph.entry(edge.target.as_str()).or_default(); + } + + let mut state = TarjanState { + index: 0, + stack: Vec::new(), + on_stack: HashMap::new(), + indices: HashMap::new(), + lowlinks: HashMap::new(), + sccs: Vec::new(), + }; + + let nodes: Vec<&str> = graph.keys().copied().collect(); + for node in nodes { + if !state.indices.contains_key(node) { + strongconnect(node, &graph, &mut state); + } + } + + state.sccs +} + +struct TarjanState<'a> { + index: usize, + stack: Vec<&'a str>, + on_stack: HashMap<&'a str, bool>, + indices: HashMap<&'a str, usize>, + lowlinks: HashMap<&'a str, usize>, + sccs: Vec>, +} + +fn strongconnect<'a>( + v: &'a str, + graph: &HashMap<&'a str, Vec<&'a str>>, + state: &mut TarjanState<'a>, +) { + state.indices.insert(v, state.index); + state.lowlinks.insert(v, state.index); + state.index += 1; + state.stack.push(v); + state.on_stack.insert(v, true); + + if let Some(neighbors) = graph.get(v) { + for &w in neighbors { + if !state.indices.contains_key(w) { + strongconnect(w, graph, state); + let low_w = state.lowlinks[w]; + let low_v = state.lowlinks[v]; + state.lowlinks.insert(v, low_v.min(low_w)); + } else if state.on_stack.get(w).copied().unwrap_or(false) { + let idx_w = state.indices[w]; + let low_v = state.lowlinks[v]; + state.lowlinks.insert(v, low_v.min(idx_w)); + } + } + } + + if state.lowlinks[v] == state.indices[v] { + let mut scc = Vec::new(); + loop { + let w = state.stack.pop().unwrap(); + state.on_stack.insert(w, false); + scc.push(w.to_string()); + if w == v { + break; + } + } + if scc.len() > 1 { + state.sccs.push(scc); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_cycles() { + let edges = vec![ + GraphEdge { + source: "a".to_string(), + target: "b".to_string(), + }, + GraphEdge { + source: "b".to_string(), + target: "c".to_string(), + }, + ]; + let cycles = detect_cycles(&edges); + assert!(cycles.is_empty()); + } + + #[test] + fn test_simple_cycle() { + let edges = vec![ + GraphEdge { + source: "a".to_string(), + target: "b".to_string(), + }, + GraphEdge { + source: "b".to_string(), + target: "a".to_string(), + }, + ]; + let cycles = detect_cycles(&edges); + assert_eq!(cycles.len(), 1); + assert_eq!(cycles[0].len(), 2); + } + + #[test] + fn test_triangle_cycle() { + let edges = vec![ + GraphEdge { + source: "a".to_string(), + target: "b".to_string(), + }, + GraphEdge { + source: "b".to_string(), + target: "c".to_string(), + }, + GraphEdge { + source: "c".to_string(), + target: "a".to_string(), + }, + ]; + let cycles = detect_cycles(&edges); + assert_eq!(cycles.len(), 1); + assert_eq!(cycles[0].len(), 3); + } + + #[test] + fn test_multiple_cycles() { + let edges = vec![ + GraphEdge { + source: "a".to_string(), + target: "b".to_string(), + }, + GraphEdge { + source: "b".to_string(), + target: "a".to_string(), + }, + GraphEdge { + source: "c".to_string(), + target: "d".to_string(), + }, + GraphEdge { + source: "d".to_string(), + target: "c".to_string(), + }, + ]; + let cycles = detect_cycles(&edges); + assert_eq!(cycles.len(), 2); + } +} diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs new file mode 100644 index 00000000..3421ca88 --- /dev/null +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -0,0 +1,332 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct CSharpExtractor; + +impl SymbolExtractor for CSharpExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_csharp_parent_type<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class_declaration" | "struct_declaration" | "interface_declaration" + | "enum_declaration" | "record_declaration" => { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + _ => {} + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + extract_csharp_base_types(node, &class_name, source, symbols); + } + } + + "struct_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + extract_csharp_base_types(node, &name, source, symbols); + } + } + + "record_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + extract_csharp_base_types(node, &name, source, symbols); + } + } + + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "method_declaration" { + if let Some(meth_name) = child.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + iface_name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } + } + } + } + + "enum_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_type = find_csharp_parent_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_type { + Some(pt) => format!("{}.{}", pt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "constructor_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_type = find_csharp_parent_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_type { + Some(pt) => format!("{}.{}", pt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "property_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_type = find_csharp_parent_type(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_type { + Some(pt) => format!("{}.{}", pt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "using_directive" => { + let name_node = node + .child_by_field_name("name") + .or_else(|| find_child(node, "qualified_name")) + .or_else(|| find_child(node, "identifier")); + if let Some(name_node) = name_node { + let full_path = node_text(&name_node, source).to_string(); + let last_name = full_path.split('.').last().unwrap_or("").to_string(); + let mut imp = Import::new(full_path, vec![last_name], start_line(node)); + imp.csharp_using = Some(true); + symbols.imports.push(imp); + } + } + + "invocation_expression" => { + let fn_node = node + .child_by_field_name("function") + .or_else(|| node.child(0)); + if let Some(fn_node) = fn_node { + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + "member_access_expression" => { + if let Some(name) = fn_node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + "generic_name" | "member_binding_expression" => { + let name = fn_node + .child_by_field_name("name") + .or_else(|| fn_node.child(0)); + if let Some(name) = name { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + _ => {} + } + } + } + + "object_creation_expression" => { + if let Some(type_node) = node.child_by_field_name("type") { + let type_name = if type_node.kind() == "generic_name" { + type_node + .child_by_field_name("name") + .or_else(|| type_node.child(0)) + .map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&type_node, source).to_string()) + }; + if let Some(name) = type_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + }); + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_csharp_base_types( + node: &Node, + class_name: &str, + source: &[u8], + symbols: &mut FileSymbols, +) { + let base_list = node.child_by_field_name("bases"); + let base_list = match base_list { + Some(bl) => bl, + None => return, + }; + + for i in 0..base_list.child_count() { + if let Some(child) = base_list.child(i) { + match child.kind() { + "identifier" | "qualified_name" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + } + "generic_name" => { + let name = child + .child_by_field_name("name") + .or_else(|| child.child(0)); + if let Some(name) = name { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&name, source).to_string()), + implements: None, + line: start_line(node), + }); + } + } + "base_list" => { + for j in 0..child.child_count() { + if let Some(base) = child.child(j) { + match base.kind() { + "identifier" | "qualified_name" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&base, source).to_string()), + implements: None, + line: start_line(node), + }); + } + "generic_name" => { + let name = base + .child_by_field_name("name") + .or_else(|| base.child(0)); + if let Some(name) = name { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some( + node_text(&name, source).to_string(), + ), + implements: None, + line: start_line(node), + }); + } + } + _ => {} + } + } + } + } + _ => {} + } + } + } +} diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs new file mode 100644 index 00000000..0799281f --- /dev/null +++ b/crates/codegraph-core/src/extractors/go.rs @@ -0,0 +1,246 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct GoExtractor; + +impl SymbolExtractor for GoExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let receiver = node.child_by_field_name("receiver"); + let mut receiver_type: Option = None; + if let Some(receiver) = receiver { + for i in 0..receiver.child_count() { + if let Some(param) = receiver.child(i) { + if let Some(type_node) = param.child_by_field_name("type") { + receiver_type = Some(if type_node.kind() == "pointer_type" { + node_text(&type_node, source) + .trim_start_matches('*') + .to_string() + } else { + node_text(&type_node, source).to_string() + }); + break; + } + } + } + } + let name = node_text(&name_node, source); + let full_name = match &receiver_type { + Some(rt) => format!("{}.{}", rt, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "type_declaration" => { + for i in 0..node.child_count() { + if let Some(spec) = node.child(i) { + if spec.kind() != "type_spec" { + continue; + } + let name_node = spec.child_by_field_name("name"); + let type_node = spec.child_by_field_name("type"); + if let (Some(name_node), Some(type_node)) = (name_node, type_node) { + let name = node_text(&name_node, source).to_string(); + match type_node.kind() { + "struct_type" => { + symbols.definitions.push(Definition { + name, + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + "interface_type" => { + symbols.definitions.push(Definition { + name: name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + // Extract interface methods + for j in 0..type_node.child_count() { + if let Some(member) = type_node.child(j) { + if member.kind() == "method_elem" { + if let Some(meth_name) = + member.child_by_field_name("name") + { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&member), + end_line: Some(end_line(&member)), + decorators: None, + }); + } + } + } + } + } + _ => { + symbols.definitions.push(Definition { + name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + } + } + } + } + + "import_declaration" => { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "import_spec" => { + extract_go_import_spec(&child, source, symbols); + } + "import_spec_list" => { + for j in 0..child.child_count() { + if let Some(spec) = child.child(j) { + if spec.kind() == "import_spec" { + extract_go_import_spec(&spec, source, symbols); + } + } + } + } + _ => {} + } + } + } + } + + "call_expression" => { + if let Some(fn_node) = node.child_by_field_name("function") { + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + "selector_expression" => { + if let Some(field) = fn_node.child_by_field_name("field") { + symbols.calls.push(Call { + name: node_text(&field, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + _ => {} + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_go_import_spec(spec: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(path_node) = spec.child_by_field_name("path") { + let import_path = node_text(&path_node, source).replace('"', ""); + let name_node = spec.child_by_field_name("name"); + let alias = match name_node { + Some(n) => node_text(&n, source).to_string(), + None => import_path.split('/').last().unwrap_or("").to_string(), + }; + let mut imp = Import::new(import_path, vec![alias], start_line(spec)); + imp.go_import = Some(true); + symbols.imports.push(imp); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_go(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_go::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + GoExtractor.extract(&tree, code.as_bytes(), "test.go") + } + + #[test] + fn finds_function() { + let s = parse_go("package main\nfunc hello() {}\n"); + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "hello"); + assert_eq!(s.definitions[0].kind, "function"); + } + + #[test] + fn finds_struct_and_method() { + let s = parse_go("package main\ntype Server struct{}\nfunc (s *Server) Start() {}\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Server")); + assert!(names.contains(&"Server.Start")); + } + + #[test] + fn finds_interface() { + let s = parse_go("package main\ntype Reader interface {\n\tRead() error\n}\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Reader")); + assert!(names.contains(&"Reader.Read")); + } + + #[test] + fn finds_imports() { + let s = parse_go("package main\nimport (\n\t\"fmt\"\n\t\"os\"\n)\n"); + assert_eq!(s.imports.len(), 2); + assert_eq!(s.imports[0].source, "fmt"); + assert_eq!(s.imports[1].source, "os"); + } +} diff --git a/crates/codegraph-core/src/extractors/hcl.rs b/crates/codegraph-core/src/extractors/hcl.rs new file mode 100644 index 00000000..776c9de8 --- /dev/null +++ b/crates/codegraph-core/src/extractors/hcl.rs @@ -0,0 +1,114 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct HclExtractor; + +impl SymbolExtractor for HclExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if node.kind() == "block" { + let mut identifiers = Vec::new(); + let mut strings = Vec::new(); + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "identifier" { + identifiers.push(node_text(&child, source).to_string()); + } + if child.kind() == "string_lit" { + strings.push( + node_text(&child, source) + .replace('"', "") + .to_string(), + ); + } + } + } + + if !identifiers.is_empty() { + let block_type = &identifiers[0]; + let mut name = String::new(); + + match block_type.as_str() { + "resource" if strings.len() >= 2 => { + name = format!("{}.{}", strings[0], strings[1]); + } + "data" if strings.len() >= 2 => { + name = format!("data.{}.{}", strings[0], strings[1]); + } + "variable" | "output" | "module" if !strings.is_empty() => { + name = format!("{}.{}", block_type, strings[0]); + } + "locals" => { + name = "locals".to_string(); + } + "terraform" | "provider" => { + name = block_type.clone(); + if !strings.is_empty() { + name = format!("{}.{}", block_type, strings[0]); + } + } + _ => {} + } + + if !name.is_empty() { + symbols.definitions.push(Definition { + name, + kind: block_type.clone(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + + // Module source imports + if block_type == "module" { + let body = node + .children(&mut node.walk()) + .find(|c| c.kind() == "body"); + if let Some(body) = body { + for i in 0..body.child_count() { + if let Some(attr) = body.child(i) { + if attr.kind() == "attribute" { + let key = attr + .child_by_field_name("key") + .or_else(|| attr.child(0)); + let val = attr + .child_by_field_name("val") + .or_else(|| attr.child(2)); + if let (Some(key), Some(val)) = (key, val) { + if node_text(&key, source) == "source" { + let src = + node_text(&val, source).replace('"', ""); + if src.starts_with("./") || src.starts_with("../") + { + symbols.imports.push(Import::new( + src, + vec![], + start_line(&attr), + )); + } + } + } + } + } + } + } + } + } + } + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs new file mode 100644 index 00000000..f931732f --- /dev/null +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -0,0 +1,58 @@ +use tree_sitter::Node; + +/// Get the text of a node from the source bytes. +pub fn node_text<'a>(node: &Node, source: &'a [u8]) -> &'a str { + node.utf8_text(source).unwrap_or("") +} + +/// Find the first child of a given type. +pub fn find_child<'a>(node: &Node<'a>, kind: &str) -> Option> { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == kind { + return Some(child); + } + } + } + None +} + +/// Find a parent of a given type, walking up the tree. +pub fn find_parent_of_type<'a>(node: &Node<'a>, kind: &str) -> Option> { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == kind { + return Some(parent); + } + current = parent.parent(); + } + None +} + +/// Find a parent that is any of the given types. +pub fn find_parent_of_types<'a>(node: &Node<'a>, kinds: &[&str]) -> Option> { + let mut current = node.parent(); + while let Some(parent) = current { + if kinds.contains(&parent.kind()) { + return Some(parent); + } + current = parent.parent(); + } + None +} + +/// Get the name of a named field child, returning its text. +pub fn named_child_text<'a>(node: &Node<'a>, field: &str, source: &'a [u8]) -> Option<&'a str> { + node.child_by_field_name(field) + .map(|n| node_text(&n, source)) +} + +/// Get the 1-based start line of a node. +pub fn start_line(node: &Node) -> u32 { + node.start_position().row as u32 + 1 +} + +/// Get the 1-based end line of a node. +pub fn end_line(node: &Node) -> u32 { + node.end_position().row as u32 + 1 +} diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs new file mode 100644 index 00000000..b1b5e492 --- /dev/null +++ b/crates/codegraph-core/src/extractors/java.rs @@ -0,0 +1,290 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct JavaExtractor; + +impl SymbolExtractor for JavaExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_java_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class_declaration" | "enum_declaration" | "interface_declaration" => { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + _ => {} + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + + // Superclass + if let Some(superclass) = node.child_by_field_name("superclass") { + for i in 0..superclass.child_count() { + if let Some(child) = superclass.child(i) { + match child.kind() { + "type_identifier" | "identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + break; + } + "generic_type" => { + if let Some(first) = child.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some( + node_text(&first, source).to_string(), + ), + implements: None, + line: start_line(node), + }); + } + break; + } + _ => {} + } + } + } + } + + // Interfaces + if let Some(interfaces) = node.child_by_field_name("interfaces") { + extract_java_interfaces(&interfaces, &class_name, source, symbols); + } + } + } + + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "method_declaration" { + if let Some(meth_name) = child.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + iface_name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } + } + } + } + + "enum_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_java_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "constructor_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_java_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "import_declaration" => { + let mut import_path = String::new(); + let mut has_asterisk = false; + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "scoped_identifier" || child.kind() == "identifier" { + import_path = node_text(&child, source).to_string(); + } + if child.kind() == "asterisk" { + has_asterisk = true; + } + } + } + if !import_path.is_empty() { + let names = if has_asterisk { + vec!["*".to_string()] + } else { + let last = import_path.split('.').last().unwrap_or("").to_string(); + vec![last] + }; + let mut imp = Import::new(import_path, names, start_line(node)); + imp.java_import = Some(true); + symbols.imports.push(imp); + } + } + + "method_invocation" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + + "object_creation_expression" => { + if let Some(type_node) = node.child_by_field_name("type") { + let type_name = if type_node.kind() == "generic_type" { + type_node.child(0).map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&type_node, source).to_string()) + }; + if let Some(name) = type_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + }); + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_java_interfaces( + interfaces: &Node, + class_name: &str, + source: &[u8], + symbols: &mut FileSymbols, +) { + for i in 0..interfaces.child_count() { + if let Some(child) = interfaces.child(i) { + match child.kind() { + "type_identifier" | "identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(node_text(&child, source).to_string()), + line: start_line(interfaces), + }); + } + "type_list" => { + for j in 0..child.child_count() { + if let Some(t) = child.child(j) { + match t.kind() { + "type_identifier" | "identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(node_text(&t, source).to_string()), + line: start_line(interfaces), + }); + } + "generic_type" => { + if let Some(first) = t.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some( + node_text(&first, source).to_string(), + ), + line: start_line(interfaces), + }); + } + } + _ => {} + } + } + } + } + "generic_type" => { + if let Some(first) = child.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(node_text(&first, source).to_string()), + line: start_line(interfaces), + }); + } + } + _ => {} + } + } + } +} diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs new file mode 100644 index 00000000..f3835415 --- /dev/null +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -0,0 +1,607 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct JsExtractor; + +impl SymbolExtractor for JsExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + + // Heritage: extends + implements + let heritage = node + .child_by_field_name("heritage") + .or_else(|| find_child(node, "class_heritage")); + if let Some(heritage) = heritage { + if let Some(super_name) = extract_superclass(&heritage, source) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(super_name), + implements: None, + line: start_line(node), + }); + } + for iface in extract_implements(&heritage, source) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: None, + implements: Some(iface), + line: start_line(node), + }); + } + } + } + } + + "method_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let method_name = node_text(&name_node, source); + let parent_class = find_parent_class(node, source); + let full_name = match parent_class { + Some(cls) => format!("{}.{}", cls, method_name), + None => method_name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + // Extract interface methods + let body = node + .child_by_field_name("body") + .or_else(|| find_child(node, "interface_body")) + .or_else(|| find_child(node, "object_type")); + if let Some(body) = body { + extract_interface_methods(&body, &iface_name, source, &mut symbols.definitions); + } + } + } + + "type_alias_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "lexical_declaration" | "variable_declaration" => { + for i in 0..node.child_count() { + if let Some(declarator) = node.child(i) { + if declarator.kind() == "variable_declarator" { + let name_n = declarator.child_by_field_name("name"); + let value_n = declarator.child_by_field_name("value"); + if let (Some(name_n), Some(value_n)) = (name_n, value_n) { + let vt = value_n.kind(); + if vt == "arrow_function" + || vt == "function_expression" + || vt == "function" + { + symbols.definitions.push(Definition { + name: node_text(&name_n, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(&value_n)), + decorators: None, + }); + } + } + } + } + } + } + + "call_expression" => { + if let Some(fn_node) = node.child_by_field_name("function") { + if let Some(call_info) = extract_call_info(&fn_node, node, source) { + symbols.calls.push(call_info); + } + } + } + + "import_statement" => { + let text = node_text(node, source); + let is_type_only = text.starts_with("import type"); + let source_node = node + .child_by_field_name("source") + .or_else(|| find_child(node, "string")); + if let Some(source_node) = source_node { + let mod_path = node_text(&source_node, source) + .replace(&['\'', '"'][..], ""); + let names = extract_import_names(node, source); + let mut imp = Import::new(mod_path, names, start_line(node)); + if is_type_only { + imp.type_only = Some(true); + } + symbols.imports.push(imp); + } + } + + "export_statement" => { + let decl = node.child_by_field_name("declaration"); + if let Some(decl) = &decl { + match decl.kind() { + "function_declaration" => { + if let Some(n) = decl.child_by_field_name("name") { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + }); + } + } + "class_declaration" => { + if let Some(n) = decl.child_by_field_name("name") { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + }); + } + } + "interface_declaration" => { + if let Some(n) = decl.child_by_field_name("name") { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: "interface".to_string(), + line: start_line(node), + }); + } + } + "type_alias_declaration" => { + if let Some(n) = decl.child_by_field_name("name") { + symbols.exports.push(ExportInfo { + name: node_text(&n, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + }); + } + } + _ => {} + } + } + let source_node = node + .child_by_field_name("source") + .or_else(|| find_child(node, "string")); + if source_node.is_some() && decl.is_none() { + let source_node = source_node.unwrap(); + let mod_path = node_text(&source_node, source) + .replace(&['\'', '"'][..], ""); + let reexport_names = extract_import_names(node, source); + let text = node_text(node, source); + let is_wildcard = + text.contains("export *") || text.contains("export*"); + let mut imp = Import::new(mod_path, reexport_names.clone(), start_line(node)); + imp.reexport = Some(true); + if is_wildcard && reexport_names.is_empty() { + imp.wildcard_reexport = Some(true); + } + symbols.imports.push(imp); + } + } + + "expression_statement" => { + if let Some(expr) = node.child(0) { + if expr.kind() == "assignment_expression" { + let left = expr.child_by_field_name("left"); + let right = expr.child_by_field_name("right"); + if let (Some(left), Some(right)) = (left, right) { + let left_text = node_text(&left, source); + if left_text.starts_with("module.exports") || left_text == "exports" { + if right.kind() == "call_expression" { + let fn_node = right.child_by_field_name("function"); + let args = right + .child_by_field_name("arguments") + .or_else(|| find_child(&right, "arguments")); + if let (Some(fn_node), Some(args)) = (fn_node, args) { + if node_text(&fn_node, source) == "require" { + if let Some(str_arg) = find_child(&args, "string") { + let mod_path = node_text(&str_arg, source) + .replace(&['\'', '"'][..], ""); + let mut imp = + Import::new(mod_path, vec![], start_line(node)); + imp.reexport = Some(true); + imp.wildcard_reexport = Some(true); + symbols.imports.push(imp); + } + } + } + } + if right.kind() == "object" { + for ci in 0..right.child_count() { + if let Some(child) = right.child(ci) { + if child.kind() == "spread_element" { + let spread_expr = child + .child(1) + .or_else(|| child.child_by_field_name("value")); + if let Some(spread_expr) = spread_expr { + if spread_expr.kind() == "call_expression" { + let fn2 = spread_expr + .child_by_field_name("function"); + let args2 = spread_expr + .child_by_field_name("arguments") + .or_else(|| { + find_child( + &spread_expr, + "arguments", + ) + }); + if let (Some(fn2), Some(args2)) = + (fn2, args2) + { + if node_text(&fn2, source) == "require" { + if let Some(str_arg2) = + find_child(&args2, "string") + { + let mod_path2 = + node_text(&str_arg2, source) + .replace( + &['\'', '"'][..], + "", + ); + let mut imp = Import::new( + mod_path2, + vec![], + start_line(node), + ); + imp.reexport = Some(true); + imp.wildcard_reexport = Some(true); + symbols.imports.push(imp); + } + } + } + } + } + } + } + } + } + } + } + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_interface_methods( + body: &Node, + iface_name: &str, + source: &[u8], + definitions: &mut Vec, +) { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "method_signature" || child.kind() == "property_signature" { + if let Some(name_node) = child.child_by_field_name("name") { + definitions.push(Definition { + name: format!("{}.{}", iface_name, node_text(&name_node, source)), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } +} + +fn extract_implements(heritage: &Node, source: &[u8]) -> Vec { + let mut interfaces = Vec::new(); + for i in 0..heritage.child_count() { + if let Some(child) = heritage.child(i) { + if node_text(&child, source) == "implements" { + for j in (i + 1)..heritage.child_count() { + if let Some(next) = heritage.child(j) { + if next.kind() == "identifier" || next.kind() == "type_identifier" { + interfaces.push(node_text(&next, source).to_string()); + } + if next.child_count() > 0 { + extract_implements_from_node(&next, source, &mut interfaces); + } + } + } + break; + } + if child.kind() == "implements_clause" { + extract_implements_from_node(&child, source, &mut interfaces); + } + } + } + interfaces +} + +fn extract_implements_from_node(node: &Node, source: &[u8], result: &mut Vec) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "identifier" || child.kind() == "type_identifier" { + result.push(node_text(&child, source).to_string()); + } + if child.child_count() > 0 { + extract_implements_from_node(&child, source, result); + } + } + } +} + +fn extract_call_info(fn_node: &Node, call_node: &Node, source: &[u8]) -> Option { + match fn_node.kind() { + "identifier" => Some(Call { + name: node_text(fn_node, source).to_string(), + line: start_line(call_node), + dynamic: None, + }), + "member_expression" => { + let obj = fn_node.child_by_field_name("object"); + let prop = fn_node.child_by_field_name("property"); + let prop = prop?; + let prop_text = node_text(&prop, source); + + if prop_text == "call" || prop_text == "apply" || prop_text == "bind" { + if let Some(obj) = &obj { + if obj.kind() == "identifier" { + return Some(Call { + name: node_text(obj, source).to_string(), + line: start_line(call_node), + dynamic: Some(true), + }); + } + if obj.kind() == "member_expression" { + if let Some(inner_prop) = obj.child_by_field_name("property") { + return Some(Call { + name: node_text(&inner_prop, source).to_string(), + line: start_line(call_node), + dynamic: Some(true), + }); + } + } + } + } + + if prop.kind() == "string" || prop.kind() == "string_fragment" { + let method_name = node_text(&prop, source).replace(&['\'', '"'][..], ""); + if !method_name.is_empty() { + return Some(Call { + name: method_name, + line: start_line(call_node), + dynamic: Some(true), + }); + } + } + + Some(Call { + name: prop_text.to_string(), + line: start_line(call_node), + dynamic: None, + }) + } + "subscript_expression" => { + let index = fn_node.child_by_field_name("index"); + if let Some(index) = index { + if index.kind() == "string" || index.kind() == "template_string" { + let method_name = node_text(&index, source) + .replace(&['\'', '"', '`'][..], ""); + if !method_name.is_empty() && !method_name.contains('$') { + return Some(Call { + name: method_name, + line: start_line(call_node), + dynamic: Some(true), + }); + } + } + } + None + } + _ => None, + } +} + +fn extract_superclass(heritage: &Node, source: &[u8]) -> Option { + for i in 0..heritage.child_count() { + if let Some(child) = heritage.child(i) { + if child.kind() == "identifier" || child.kind() == "member_expression" { + return Some(node_text(&child, source).to_string()); + } + if let Some(found) = extract_superclass(&child, source) { + return Some(found); + } + } + } + None +} + +fn find_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "class_declaration" || parent.kind() == "class" { + if let Some(name_node) = parent.child_by_field_name("name") { + return Some(node_text(&name_node, source).to_string()); + } + return None; + } + current = parent.parent(); + } + None +} + +fn extract_import_names(node: &Node, source: &[u8]) -> Vec { + let mut names = Vec::new(); + scan_import_names(node, source, &mut names); + names +} + +fn scan_import_names(node: &Node, source: &[u8], names: &mut Vec) { + match node.kind() { + "import_specifier" | "export_specifier" => { + let name_node = node + .child_by_field_name("name") + .or_else(|| node.child_by_field_name("alias")); + if let Some(name_node) = name_node { + names.push(node_text(&name_node, source).to_string()); + } else { + names.push(node_text(node, source).to_string()); + } + } + "identifier" => { + if let Some(parent) = node.parent() { + if parent.kind() == "import_clause" { + names.push(node_text(node, source).to_string()); + } + } + } + "namespace_import" => { + names.push(node_text(node, source).to_string()); + } + _ => {} + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + scan_import_names(&child, source, names); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_js(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_javascript::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + JsExtractor.extract(&tree, code.as_bytes(), "test.js") + } + + #[test] + fn finds_function_declaration() { + let s = parse_js("function greet(name) { return name; }"); + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "greet"); + assert_eq!(s.definitions[0].kind, "function"); + } + + #[test] + fn finds_arrow_function() { + let s = parse_js("const add = (a, b) => a + b;"); + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "add"); + assert_eq!(s.definitions[0].kind, "function"); + } + + #[test] + fn finds_class_with_methods() { + let s = parse_js("class Foo { bar() {} baz() {} }"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Foo")); + assert!(names.contains(&"Foo.bar")); + assert!(names.contains(&"Foo.baz")); + } + + #[test] + fn finds_imports() { + let s = parse_js("import { readFile } from 'fs';"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "fs"); + assert_eq!(s.imports[0].names, vec!["readFile"]); + } + + #[test] + fn finds_calls() { + let s = parse_js("function f() { console.log('hi'); foo(); }"); + let call_names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(call_names.contains(&"log")); + assert!(call_names.contains(&"foo")); + } + + #[test] + fn finds_exports() { + let s = parse_js("export function hello() {} export class World {}"); + assert_eq!(s.exports.len(), 2); + assert_eq!(s.exports[0].name, "hello"); + assert_eq!(s.exports[1].name, "World"); + } + + #[test] + fn finds_class_heritage() { + let s = parse_js("class Dog extends Animal {}"); + assert_eq!(s.classes.len(), 1); + assert_eq!(s.classes[0].name, "Dog"); + assert_eq!(s.classes[0].extends, Some("Animal".to_string())); + } + + #[test] + fn finds_reexports() { + let s = parse_js("export { foo, bar } from './utils';"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].reexport, Some(true)); + assert_eq!(s.imports[0].source, "./utils"); + } + + #[test] + fn finds_wildcard_reexport() { + let s = parse_js("export * from './helpers';"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].wildcard_reexport, Some(true)); + } +} diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs new file mode 100644 index 00000000..c0a81ff8 --- /dev/null +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -0,0 +1,36 @@ +pub mod helpers; +pub mod javascript; +pub mod python; +pub mod go; +pub mod rust_lang; +pub mod java; +pub mod csharp; +pub mod ruby; +pub mod php; +pub mod hcl; + +use tree_sitter::Tree; +use crate::types::FileSymbols; +use crate::parser_registry::LanguageKind; + +/// Trait every language extractor implements. +pub trait SymbolExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols; +} + +/// Dispatch to the correct extractor based on language kind. +pub fn extract_symbols(lang: LanguageKind, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + match lang { + LanguageKind::JavaScript | LanguageKind::TypeScript | LanguageKind::Tsx => { + javascript::JsExtractor.extract(tree, source, file_path) + } + LanguageKind::Python => python::PythonExtractor.extract(tree, source, file_path), + LanguageKind::Go => go::GoExtractor.extract(tree, source, file_path), + LanguageKind::Rust => rust_lang::RustExtractor.extract(tree, source, file_path), + LanguageKind::Java => java::JavaExtractor.extract(tree, source, file_path), + LanguageKind::CSharp => csharp::CSharpExtractor.extract(tree, source, file_path), + LanguageKind::Ruby => ruby::RubyExtractor.extract(tree, source, file_path), + LanguageKind::Php => php::PhpExtractor.extract(tree, source, file_path), + LanguageKind::Hcl => hcl::HclExtractor.extract(tree, source, file_path), + } +} diff --git a/crates/codegraph-core/src/extractors/php.rs b/crates/codegraph-core/src/extractors/php.rs new file mode 100644 index 00000000..4092333f --- /dev/null +++ b/crates/codegraph-core/src/extractors/php.rs @@ -0,0 +1,274 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct PhpExtractor; + +impl SymbolExtractor for PhpExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_php_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class_declaration" | "trait_declaration" | "enum_declaration" => { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + _ => {} + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "class_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + + // Extends + let base_clause = node + .child_by_field_name("base_clause") + .or_else(|| find_child(node, "base_clause")); + if let Some(base_clause) = base_clause { + for i in 0..base_clause.child_count() { + if let Some(child) = base_clause.child(i) { + if child.kind() == "name" || child.kind() == "qualified_name" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + break; + } + } + } + } + + // Implements + let interface_clause = find_child(node, "class_interface_clause"); + if let Some(interface_clause) = interface_clause { + for i in 0..interface_clause.child_count() { + if let Some(child) = interface_clause.child(i) { + if child.kind() == "name" || child.kind() == "qualified_name" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: None, + implements: Some(node_text(&child, source).to_string()), + line: start_line(node), + }); + } + } + } + } + } + } + + "interface_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let iface_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: iface_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "method_declaration" { + if let Some(meth_name) = child.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + iface_name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } + } + } + } + + "trait_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "enum_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method_declaration" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_php_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "namespace_use_declaration" => { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "namespace_use_clause" { + let name_node = find_child(&child, "qualified_name") + .or_else(|| find_child(&child, "name")); + if let Some(name_node) = name_node { + let full_path = node_text(&name_node, source).to_string(); + let last_name = full_path.split('\\').last().unwrap_or("").to_string(); + let alias = child.child_by_field_name("alias"); + let alias_text = alias + .map(|a| node_text(&a, source).to_string()) + .unwrap_or(last_name); + let mut imp = + Import::new(full_path, vec![alias_text], start_line(node)); + imp.php_use = Some(true); + symbols.imports.push(imp); + } + } + // Single use clause without wrapper + if child.kind() == "qualified_name" || child.kind() == "name" { + let full_path = node_text(&child, source).to_string(); + let last_name = full_path.split('\\').last().unwrap_or("").to_string(); + let mut imp = + Import::new(full_path, vec![last_name], start_line(node)); + imp.php_use = Some(true); + symbols.imports.push(imp); + } + } + } + } + + "function_call_expression" => { + let fn_node = node + .child_by_field_name("function") + .or_else(|| node.child(0)); + if let Some(fn_node) = fn_node { + match fn_node.kind() { + "name" | "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + "qualified_name" => { + let text = node_text(&fn_node, source); + let last = text.split('\\').last().unwrap_or(""); + symbols.calls.push(Call { + name: last.to_string(), + line: start_line(node), + dynamic: None, + }); + } + _ => {} + } + } + } + + "member_call_expression" => { + if let Some(name) = node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + + "scoped_call_expression" => { + if let Some(name) = node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + + "object_creation_expression" => { + // Skip 'new' keyword (child 0) and get class node (child 1) + if let Some(class_node) = node.child(1) { + if class_node.kind() == "name" || class_node.kind() == "qualified_name" { + let text = node_text(&class_node, source); + let last = text.split('\\').last().unwrap_or(""); + symbols.calls.push(Call { + name: last.to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} diff --git a/crates/codegraph-core/src/extractors/python.rs b/crates/codegraph-core/src/extractors/python.rs new file mode 100644 index 00000000..619f1638 --- /dev/null +++ b/crates/codegraph-core/src/extractors/python.rs @@ -0,0 +1,242 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct PythonExtractor; + +impl SymbolExtractor for PythonExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name_text = node_text(&name_node, source); + let mut decorators = Vec::new(); + if let Some(prev) = node.prev_sibling() { + if prev.kind() == "decorator" { + decorators.push(node_text(&prev, source).to_string()); + } + } + let parent_class = find_python_parent_class(node, source); + let (full_name, kind) = match &parent_class { + Some(cls) => (format!("{}.{}", cls, name_text), "method".to_string()), + None => (name_text.to_string(), "function".to_string()), + }; + symbols.definitions.push(Definition { + name: full_name, + kind, + line: start_line(node), + end_line: Some(end_line(node)), + decorators: if decorators.is_empty() { + None + } else { + Some(decorators) + }, + }); + } + } + + "class_definition" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + let superclasses = node + .child_by_field_name("superclasses") + .or_else(|| find_child(node, "argument_list")); + if let Some(superclasses) = superclasses { + for i in 0..superclasses.child_count() { + if let Some(child) = superclasses.child(i) { + if child.kind() == "identifier" { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + } + } + } + } + } + } + + "decorated_definition" => { + // Walk children directly to handle decorated functions/classes + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } + return; + } + + "call" => { + if let Some(fn_node) = node.child_by_field_name("function") { + let call_name = match fn_node.kind() { + "identifier" => Some(node_text(&fn_node, source).to_string()), + "attribute" => fn_node + .child_by_field_name("attribute") + .map(|a| node_text(&a, source).to_string()), + _ => None, + }; + if let Some(name) = call_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + }); + } + } + } + + "import_statement" => { + let mut names = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "dotted_name" || child.kind() == "aliased_import" { + let name = if child.kind() == "aliased_import" { + child + .child_by_field_name("alias") + .or_else(|| child.child_by_field_name("name")) + .map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&child, source).to_string()) + }; + if let Some(name) = name { + names.push(name); + } + } + } + } + if !names.is_empty() { + let mut imp = Import::new(names[0].clone(), names, start_line(node)); + imp.python_import = Some(true); + symbols.imports.push(imp); + } + } + + "import_from_statement" => { + let mut source_str = String::new(); + let mut names = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + match child.kind() { + "dotted_name" | "relative_import" => { + if source_str.is_empty() { + source_str = node_text(&child, source).to_string(); + } else { + names.push(node_text(&child, source).to_string()); + } + } + "aliased_import" => { + let n = child + .child_by_field_name("name") + .or_else(|| child.child(0)); + if let Some(n) = n { + names.push(node_text(&n, source).to_string()); + } + } + "wildcard_import" => { + names.push("*".to_string()); + } + _ => {} + } + } + } + if !source_str.is_empty() { + let mut imp = Import::new(source_str, names, start_line(node)); + imp.python_import = Some(true); + symbols.imports.push(imp); + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn find_python_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "class_definition" { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + current = parent.parent(); + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_py(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_python::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + PythonExtractor.extract(&tree, code.as_bytes(), "test.py") + } + + #[test] + fn finds_function() { + let s = parse_py("def greet(name):\n return name\n"); + assert_eq!(s.definitions.len(), 1); + assert_eq!(s.definitions[0].name, "greet"); + assert_eq!(s.definitions[0].kind, "function"); + } + + #[test] + fn finds_class_and_method() { + let s = parse_py("class Foo:\n def bar(self):\n pass\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Foo")); + assert!(names.contains(&"Foo.bar")); + } + + #[test] + fn finds_imports() { + let s = parse_py("from os.path import join, exists\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "os.path"); + assert!(s.imports[0].names.contains(&"join".to_string())); + } + + #[test] + fn finds_calls() { + let s = parse_py("print('hello')\nos.path.join('a', 'b')\n"); + let call_names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(call_names.contains(&"print")); + assert!(call_names.contains(&"join")); + } + + #[test] + fn finds_inheritance() { + let s = parse_py("class Dog(Animal):\n pass\n"); + assert_eq!(s.classes.len(), 1); + assert_eq!(s.classes[0].name, "Dog"); + assert_eq!(s.classes[0].extends, Some("Animal".to_string())); + } +} diff --git a/crates/codegraph-core/src/extractors/ruby.rs b/crates/codegraph-core/src/extractors/ruby.rs new file mode 100644 index 00000000..ebf0faf2 --- /dev/null +++ b/crates/codegraph-core/src/extractors/ruby.rs @@ -0,0 +1,223 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct RubyExtractor; + +impl SymbolExtractor for RubyExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_ruby_parent_class<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class" | "module" => { + return parent + .child_by_field_name("name") + .map(|n| node_text(&n, source).to_string()); + } + _ => {} + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "class" => { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(superclass) = node.child_by_field_name("superclass") { + // Walk superclass node to find the constant + extract_ruby_superclass(&superclass, &class_name, node, source, symbols); + } + } + } + + "module" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "method" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_ruby_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "singleton_method" => { + if let Some(name_node) = node.child_by_field_name("name") { + let parent_class = find_ruby_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + symbols.definitions.push(Definition { + name: full_name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "call" => { + if let Some(method_node) = node.child_by_field_name("method") { + let method_text = node_text(&method_node, source); + + if method_text == "require" || method_text == "require_relative" { + let args = node.child_by_field_name("arguments"); + if let Some(args) = args { + for i in 0..args.child_count() { + if let Some(arg) = args.child(i) { + let str_content = extract_ruby_string_content(&arg, source); + if let Some(content) = str_content { + let last = content.split('/').last().unwrap_or("").to_string(); + let mut imp = + Import::new(content, vec![last], start_line(node)); + imp.ruby_require = Some(true); + symbols.imports.push(imp); + break; + } + } + } + } + } else if method_text == "include" + || method_text == "extend" + || method_text == "prepend" + { + let parent_class = find_ruby_parent_class(node, source); + if let Some(parent_class) = parent_class { + if let Some(args) = node.child_by_field_name("arguments") { + for i in 0..args.child_count() { + if let Some(arg) = args.child(i) { + if arg.kind() == "constant" + || arg.kind() == "scope_resolution" + { + symbols.classes.push(ClassRelation { + name: parent_class.clone(), + extends: None, + implements: Some( + node_text(&arg, source).to_string(), + ), + line: start_line(node), + }); + } + } + } + } + } + } else { + symbols.calls.push(Call { + name: method_text.to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_ruby_superclass( + superclass: &Node, + class_name: &str, + class_node: &Node, + source: &[u8], + symbols: &mut FileSymbols, +) { + // Direct check for superclass node type + if superclass.kind() == "superclass" { + for i in 0..superclass.child_count() { + if let Some(child) = superclass.child(i) { + if child.kind() == "constant" || child.kind() == "scope_resolution" { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(class_node), + }); + return; + } + } + } + } + // Fallback: check children directly + for i in 0..superclass.child_count() { + if let Some(child) = superclass.child(i) { + if child.kind() == "constant" || child.kind() == "scope_resolution" { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(class_node), + }); + return; + } + } + } +} + +fn extract_ruby_string_content(node: &Node, source: &[u8]) -> Option { + if node.kind() == "string" { + // Look for string_content child + if let Some(content) = find_child(node, "string_content") { + return Some(node_text(&content, source).to_string()); + } + // Fallback: strip quotes from text + let text = node_text(node, source); + let stripped = text + .trim_start_matches(&['\'', '"'][..]) + .trim_end_matches(&['\'', '"'][..]); + if !stripped.is_empty() { + return Some(stripped.to_string()); + } + } + if node.kind() == "string_content" { + return Some(node_text(node, source).to_string()); + } + None +} diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs new file mode 100644 index 00000000..9c7484e1 --- /dev/null +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -0,0 +1,258 @@ +use tree_sitter::{Node, Tree}; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct RustExtractor; + +impl SymbolExtractor for RustExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_node(&tree.root_node(), source, &mut symbols); + symbols + } +} + +fn find_current_impl<'a>(node: &Node<'a>, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if parent.kind() == "impl_item" { + return parent + .child_by_field_name("type") + .map(|n| node_text(&n, source).to_string()); + } + current = parent.parent(); + } + None +} + +fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + match node.kind() { + "function_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let name = node_text(&name_node, source); + let impl_type = find_current_impl(node, source); + let (full_name, kind) = match &impl_type { + Some(t) => (format!("{}.{}", t, name), "method".to_string()), + None => (name.to_string(), "function".to_string()), + }; + symbols.definitions.push(Definition { + name: full_name, + kind, + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "struct_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "enum_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + } + } + + "trait_item" => { + if let Some(name_node) = node.child_by_field_name("name") { + let trait_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: trait_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + }); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "function_signature_item" + || child.kind() == "function_item" + { + if let Some(meth_name) = child.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: format!( + "{}.{}", + trait_name, + node_text(&meth_name, source) + ), + kind: "method".to_string(), + line: start_line(&child), + end_line: Some(end_line(&child)), + decorators: None, + }); + } + } + } + } + } + } + } + + "impl_item" => { + let type_node = node.child_by_field_name("type"); + let trait_node = node.child_by_field_name("trait"); + if let (Some(type_node), Some(trait_node)) = (type_node, trait_node) { + symbols.classes.push(ClassRelation { + name: node_text(&type_node, source).to_string(), + extends: None, + implements: Some(node_text(&trait_node, source).to_string()), + line: start_line(node), + }); + } + } + + "use_declaration" => { + if let Some(arg_node) = node.child(1) { + let use_paths = extract_rust_use_path(&arg_node, source); + for (src, names) in use_paths { + let mut imp = Import::new(src, names, start_line(node)); + imp.rust_use = Some(true); + symbols.imports.push(imp); + } + } + } + + "call_expression" => { + if let Some(fn_node) = node.child_by_field_name("function") { + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + "field_expression" => { + if let Some(field) = fn_node.child_by_field_name("field") { + symbols.calls.push(Call { + name: node_text(&field, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + "scoped_identifier" => { + if let Some(name) = fn_node.child_by_field_name("name") { + symbols.calls.push(Call { + name: node_text(&name, source).to_string(), + line: start_line(node), + dynamic: None, + }); + } + } + _ => {} + } + } + } + + "macro_invocation" => { + if let Some(macro_node) = node.child(0) { + symbols.calls.push(Call { + name: format!("{}!", node_text(¯o_node, source)), + line: start_line(node), + dynamic: None, + }); + } + } + + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_node(&child, source, symbols); + } + } +} + +fn extract_rust_use_path(node: &Node, source: &[u8]) -> Vec<(String, Vec)> { + match node.kind() { + "use_list" => { + let mut results = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + results.extend(extract_rust_use_path(&child, source)); + } + } + results + } + + "scoped_use_list" => { + let path_node = node.child_by_field_name("path"); + let list_node = node.child_by_field_name("list"); + let prefix = path_node + .map(|p| node_text(&p, source).to_string()) + .unwrap_or_default(); + if let Some(list_node) = list_node { + let mut names = Vec::new(); + for i in 0..list_node.child_count() { + if let Some(child) = list_node.child(i) { + match child.kind() { + "identifier" | "self" => { + names.push(node_text(&child, source).to_string()); + } + "use_as_clause" => { + let name = child + .child_by_field_name("alias") + .or_else(|| child.child_by_field_name("name")) + .map(|n| node_text(&n, source).to_string()); + if let Some(name) = name { + names.push(name); + } + } + _ => {} + } + } + } + vec![(prefix, names)] + } else { + vec![(prefix, vec![])] + } + } + + "use_as_clause" => { + let name = node + .child_by_field_name("alias") + .or_else(|| node.child_by_field_name("name")) + .map(|n| node_text(&n, source).to_string()); + vec![( + node_text(node, source).to_string(), + name.into_iter().collect(), + )] + } + + "use_wildcard" => { + let path_node = node.child_by_field_name("path"); + let src = path_node + .map(|p| node_text(&p, source).to_string()) + .unwrap_or_else(|| "*".to_string()); + vec![(src, vec!["*".to_string()])] + } + + "scoped_identifier" | "identifier" => { + let text = node_text(node, source).to_string(); + let last_name = text.split("::").last().unwrap_or("").to_string(); + vec![(text, vec![last_name])] + } + + _ => vec![], + } +} diff --git a/crates/codegraph-core/src/import_resolution.rs b/crates/codegraph-core/src/import_resolution.rs new file mode 100644 index 00000000..89157e6e --- /dev/null +++ b/crates/codegraph-core/src/import_resolution.rs @@ -0,0 +1,197 @@ +use std::path::{Path, PathBuf}; + +use crate::types::{AliasMapping, ImportResolutionInput, PathAliases, ResolvedImport}; + +/// Normalize a path to use forward slashes (cross-platform consistency). +fn normalize_path(p: &str) -> String { + p.replace('\\', "/") +} + +/// Try resolving via path aliases (tsconfig/jsconfig paths). +fn resolve_via_alias( + import_source: &str, + aliases: &PathAliases, + _root_dir: &str, +) -> Option { + // baseUrl resolution + if let Some(base_url) = &aliases.base_url { + if !import_source.starts_with('.') && !import_source.starts_with('/') { + let candidate = PathBuf::from(base_url).join(import_source); + for ext in &["", ".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"] + { + let full = format!("{}{}", candidate.display(), ext); + if Path::new(&full).exists() { + return Some(full); + } + } + } + } + + // Path pattern resolution + for mapping in &aliases.paths { + let prefix = mapping.pattern.trim_end_matches('*'); + if !import_source.starts_with(prefix) { + continue; + } + let rest = &import_source[prefix.len()..]; + for target in &mapping.targets { + let resolved = target.replace('*', rest); + for ext in &["", ".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"] + { + let full = format!("{}{}", resolved, ext); + if Path::new(&full).exists() { + return Some(full); + } + } + } + } + + None +} + +/// Resolve a single import path, mirroring `resolveImportPath()` in builder.js. +pub fn resolve_import_path( + from_file: &str, + import_source: &str, + root_dir: &str, + aliases: &PathAliases, +) -> String { + // Try alias resolution for non-relative imports + if !import_source.starts_with('.') { + if let Some(alias_resolved) = resolve_via_alias(import_source, aliases, root_dir) { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&alias_resolved).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + return normalize_path(&alias_resolved); + } + // Bare specifier (e.g., "lodash") — return as-is + return import_source.to_string(); + } + + // Relative import + let dir = Path::new(from_file).parent().unwrap_or(Path::new("")); + let resolved = dir.join(import_source); + let resolved_str = resolved.display().to_string(); + + // .js → .ts remap + if resolved_str.ends_with(".js") { + let ts_candidate = resolved_str.replace(".js", ".ts"); + if Path::new(&ts_candidate).exists() { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&ts_candidate).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + } + let tsx_candidate = resolved_str.replace(".js", ".tsx"); + if Path::new(&tsx_candidate).exists() { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&tsx_candidate).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + } + } + + // Extension probing + let extensions = [ + ".ts", ".tsx", ".js", ".jsx", ".mjs", ".py", "/index.ts", "/index.tsx", "/index.js", + "/__init__.py", + ]; + for ext in &extensions { + let candidate = format!("{}{}", resolved_str, ext); + if Path::new(&candidate).exists() { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&candidate).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + } + } + + // Exact match + if Path::new(&resolved_str).exists() { + let root = Path::new(root_dir); + if let Ok(rel) = Path::new(&resolved_str).strip_prefix(root) { + return normalize_path(&rel.display().to_string()); + } + } + + // Fallback: return relative path + let root = Path::new(root_dir); + if let Ok(rel) = resolved.strip_prefix(root) { + normalize_path(&rel.display().to_string()) + } else { + normalize_path(&resolved_str) + } +} + +/// Compute proximity-based confidence for call resolution. +/// Mirrors `computeConfidence()` in builder.js. +pub fn compute_confidence( + caller_file: &str, + target_file: &str, + imported_from: Option<&str>, +) -> f64 { + if target_file.is_empty() || caller_file.is_empty() { + return 0.3; + } + if caller_file == target_file { + return 1.0; + } + if let Some(imp) = imported_from { + if imp == target_file { + return 1.0; + } + } + + let caller_dir = Path::new(caller_file) + .parent() + .map(|p| p.display().to_string()) + .unwrap_or_default(); + let target_dir = Path::new(target_file) + .parent() + .map(|p| p.display().to_string()) + .unwrap_or_default(); + + if caller_dir == target_dir { + return 0.7; + } + + let caller_parent = Path::new(&caller_dir) + .parent() + .map(|p| p.display().to_string()) + .unwrap_or_default(); + let target_parent = Path::new(&target_dir) + .parent() + .map(|p| p.display().to_string()) + .unwrap_or_default(); + + if caller_parent == target_parent { + return 0.5; + } + + 0.3 +} + +/// Batch resolve multiple imports. +pub fn resolve_imports_batch( + inputs: &[ImportResolutionInput], + root_dir: &str, + aliases: &PathAliases, +) -> Vec { + inputs + .iter() + .map(|input| { + let resolved = resolve_import_path( + &input.from_file, + &input.import_source, + root_dir, + aliases, + ); + ResolvedImport { + from_file: input.from_file.clone(), + import_source: input.import_source.clone(), + resolved_path: resolved, + } + }) + .collect() +} diff --git a/crates/codegraph-core/src/incremental.rs b/crates/codegraph-core/src/incremental.rs new file mode 100644 index 00000000..cf02d50a --- /dev/null +++ b/crates/codegraph-core/src/incremental.rs @@ -0,0 +1,82 @@ +use std::collections::HashMap; +use send_wrapper::SendWrapper; +use tree_sitter::{Parser, Tree}; + +use napi_derive::napi; + +use crate::extractors::extract_symbols; +use crate::parser_registry::LanguageKind; +use crate::types::FileSymbols; + +struct CacheEntry { + tree: Tree, + lang: LanguageKind, +} + +/// Cache of parse trees for incremental parsing. +/// +/// Keeps the previous tree for each file so tree-sitter can reuse +/// unchanged CST subtrees when re-parsing (old-tree hint). +/// +/// `tree_sitter::Tree` is `!Send`, but the cache is only ever accessed +/// from the JS main thread. `SendWrapper` satisfies napi's `Send` bound +/// while panicking if misused from another thread. +#[napi] +pub struct ParseTreeCache { + entries: SendWrapper>, +} + +#[napi] +impl ParseTreeCache { + #[napi(constructor)] + pub fn new() -> Self { + Self { + entries: SendWrapper::new(HashMap::new()), + } + } + + /// Parse a file, reusing the cached tree if available (old-tree hint). + /// Returns the extracted symbols, or null for unsupported extensions. + #[napi] + pub fn parse_file(&mut self, file_path: String, source: String) -> Option { + let lang = LanguageKind::from_extension(&file_path)?; + + let mut parser = Parser::new(); + parser.set_language(&lang.tree_sitter_language()).ok()?; + + let source_bytes = source.as_bytes(); + + let old_tree = self.entries.get(&file_path).map(|e| &e.tree); + let tree = parser.parse(source_bytes, old_tree)?; + + let symbols = extract_symbols(lang, &tree, source_bytes, &file_path); + + self.entries.insert(file_path, CacheEntry { tree, lang }); + + Some(symbols) + } + + /// Remove a file from the cache. + #[napi] + pub fn remove(&mut self, file_path: String) { + self.entries.remove(&file_path); + } + + /// Check if a file is in the cache. + #[napi] + pub fn contains(&self, file_path: String) -> bool { + self.entries.contains_key(&file_path) + } + + /// Clear the entire cache. + #[napi] + pub fn clear(&mut self) { + self.entries.clear(); + } + + /// Number of files currently cached. + #[napi] + pub fn size(&self) -> u32 { + self.entries.len() as u32 + } +} diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs new file mode 100644 index 00000000..6223460f --- /dev/null +++ b/crates/codegraph-core/src/lib.rs @@ -0,0 +1,84 @@ +pub mod types; +pub mod parser_registry; +pub mod extractors; +pub mod parallel; +pub mod import_resolution; +pub mod cycles; +pub mod incremental; + +use napi_derive::napi; +use types::*; + +/// Parse a single file and return extracted symbols. +#[napi] +pub fn parse_file(file_path: String, source: String) -> Option { + parallel::parse_file(&file_path, &source) +} + +/// Parse multiple files in parallel and return all extracted symbols. +#[napi] +pub fn parse_files(file_paths: Vec, root_dir: String) -> Vec { + parallel::parse_files_parallel(&file_paths, &root_dir) +} + +/// Resolve a single import path. +#[napi] +pub fn resolve_import( + from_file: String, + import_source: String, + root_dir: String, + aliases: Option, +) -> String { + let aliases = aliases.unwrap_or(PathAliases { + base_url: None, + paths: vec![], + }); + import_resolution::resolve_import_path(&from_file, &import_source, &root_dir, &aliases) +} + +/// Batch resolve multiple imports. +#[napi] +pub fn resolve_imports( + inputs: Vec, + root_dir: String, + aliases: Option, +) -> Vec { + let aliases = aliases.unwrap_or(PathAliases { + base_url: None, + paths: vec![], + }); + import_resolution::resolve_imports_batch(&inputs, &root_dir, &aliases) +} + +/// Compute proximity-based confidence for call resolution. +#[napi] +pub fn compute_confidence( + caller_file: String, + target_file: String, + imported_from: Option, +) -> f64 { + import_resolution::compute_confidence( + &caller_file, + &target_file, + imported_from.as_deref(), + ) +} + +/// Detect cycles using Tarjan's SCC algorithm. +/// Returns arrays of node names forming each cycle. +#[napi] +pub fn detect_cycles(edges: Vec) -> Vec> { + cycles::detect_cycles(&edges) +} + +/// Returns the engine name. +#[napi] +pub fn engine_name() -> String { + "native".to_string() +} + +/// Returns the engine version (crate version). +#[napi] +pub fn engine_version() -> String { + env!("CARGO_PKG_VERSION").to_string() +} diff --git a/crates/codegraph-core/src/parallel.rs b/crates/codegraph-core/src/parallel.rs new file mode 100644 index 00000000..f1d3fd29 --- /dev/null +++ b/crates/codegraph-core/src/parallel.rs @@ -0,0 +1,43 @@ +use rayon::prelude::*; +use std::fs; +use tree_sitter::Parser; + +use crate::extractors::extract_symbols; +use crate::parser_registry::LanguageKind; +use crate::types::FileSymbols; + +/// Parse multiple files in parallel using rayon. +/// Each thread creates its own Parser (cheap; Language objects are Send+Sync). +/// Failed files are silently skipped (matches WASM behavior). +pub fn parse_files_parallel(file_paths: &[String], root_dir: &str) -> Vec { + file_paths + .par_iter() + .filter_map(|file_path| { + let lang = LanguageKind::from_extension(file_path)?; + let source = fs::read(file_path).ok()?; + + let mut parser = Parser::new(); + parser + .set_language(&lang.tree_sitter_language()) + .ok()?; + + let tree = parser.parse(&source, None)?; + let symbols = extract_symbols(lang, &tree, &source, file_path); + Some(symbols) + }) + .collect() +} + +/// Parse a single file and return its symbols. +pub fn parse_file(file_path: &str, source: &str) -> Option { + let lang = LanguageKind::from_extension(file_path)?; + let source_bytes = source.as_bytes(); + + let mut parser = Parser::new(); + parser + .set_language(&lang.tree_sitter_language()) + .ok()?; + + let tree = parser.parse(source_bytes, None)?; + Some(extract_symbols(lang, &tree, source_bytes, file_path)) +} diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs new file mode 100644 index 00000000..0fdc766f --- /dev/null +++ b/crates/codegraph-core/src/parser_registry.rs @@ -0,0 +1,63 @@ +use std::path::Path; +use tree_sitter::Language; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum LanguageKind { + JavaScript, + TypeScript, + Tsx, + Python, + Go, + Rust, + Java, + CSharp, + Ruby, + Php, + Hcl, +} + +impl LanguageKind { + /// Determine language from file extension — mirrors `getParser()` in parser.js + pub fn from_extension(file_path: &str) -> Option { + let path = Path::new(file_path); + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + + // .tsx must come before .ts check + if file_path.ends_with(".tsx") { + return Some(Self::Tsx); + } + if file_path.ends_with(".d.ts") || ext == "ts" { + return Some(Self::TypeScript); + } + match ext { + "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), + "py" => Some(Self::Python), + "tf" | "hcl" => Some(Self::Hcl), + "go" => Some(Self::Go), + "rs" => Some(Self::Rust), + "java" => Some(Self::Java), + "cs" => Some(Self::CSharp), + "rb" => Some(Self::Ruby), + "php" => Some(Self::Php), + _ => None, + } + } + + /// Return the native tree-sitter `Language` for this variant. + pub fn tree_sitter_language(&self) -> Language { + match self { + Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Self::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(), + Self::Python => tree_sitter_python::LANGUAGE.into(), + Self::Go => tree_sitter_go::LANGUAGE.into(), + Self::Rust => tree_sitter_rust::LANGUAGE.into(), + Self::Java => tree_sitter_java::LANGUAGE.into(), + Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + Self::Ruby => tree_sitter_ruby::LANGUAGE.into(), + Self::Php => tree_sitter_php::LANGUAGE_PHP.into(), + Self::Hcl => tree_sitter_hcl::LANGUAGE.into(), + } + } +} diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs new file mode 100644 index 00000000..3fcbffe9 --- /dev/null +++ b/crates/codegraph-core/src/types.rs @@ -0,0 +1,137 @@ +use napi_derive::napi; +use serde::{Deserialize, Serialize}; + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Definition { + pub name: String, + pub kind: String, + pub line: u32, + pub end_line: Option, + #[napi(ts_type = "string[] | undefined")] + pub decorators: Option>, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Call { + pub name: String, + pub line: u32, + pub dynamic: Option, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Import { + pub source: String, + pub names: Vec, + pub line: u32, + pub type_only: Option, + pub reexport: Option, + pub wildcard_reexport: Option, + // Language-specific flags + pub python_import: Option, + pub go_import: Option, + pub rust_use: Option, + pub java_import: Option, + pub csharp_using: Option, + pub ruby_require: Option, + pub php_use: Option, +} + +impl Import { + pub fn new(source: String, names: Vec, line: u32) -> Self { + Self { + source, + names, + line, + type_only: None, + reexport: None, + wildcard_reexport: None, + python_import: None, + go_import: None, + rust_use: None, + java_import: None, + csharp_using: None, + ruby_require: None, + php_use: None, + } + } +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ClassRelation { + pub name: String, + pub extends: Option, + pub implements: Option, + pub line: u32, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExportInfo { + pub name: String, + pub kind: String, + pub line: u32, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileSymbols { + pub file: String, + pub definitions: Vec, + pub calls: Vec, + pub imports: Vec, + pub classes: Vec, + pub exports: Vec, +} + +impl FileSymbols { + pub fn new(file: String) -> Self { + Self { + file, + definitions: Vec::new(), + calls: Vec::new(), + imports: Vec::new(), + classes: Vec::new(), + exports: Vec::new(), + } + } +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GraphEdge { + pub source: String, + pub target: String, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PathAliases { + pub base_url: Option, + pub paths: Vec, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AliasMapping { + pub pattern: String, + pub targets: Vec, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImportResolutionInput { + pub from_file: String, + pub import_source: String, +} + +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ResolvedImport { + pub from_file: String, + pub import_source: String, + pub resolved_path: String, +} diff --git a/package.json b/package.json index ed723490..6cea27c1 100644 --- a/package.json +++ b/package.json @@ -54,7 +54,11 @@ }, "optionalDependencies": { "@huggingface/transformers": "^3.8.1", - "@modelcontextprotocol/sdk": "^1.0.0" + "@modelcontextprotocol/sdk": "^1.0.0", + "@optave/codegraph-linux-x64-gnu": "0.1.0", + "@optave/codegraph-darwin-arm64": "0.1.0", + "@optave/codegraph-darwin-x64": "0.1.0", + "@optave/codegraph-win32-x64-msvc": "0.1.0" }, "devDependencies": { "@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0", diff --git a/src/builder.js b/src/builder.js index 3f2299bb..a0a4e4a6 100644 --- a/src/builder.js +++ b/src/builder.js @@ -3,10 +3,13 @@ import fs from 'fs'; import path from 'path'; import { createHash } from 'crypto'; import { openDb, initSchema } from './db.js'; -import { createParsers, getParser, extractSymbols, extractHCLSymbols, extractPythonSymbols, extractGoSymbols, extractRustSymbols, extractJavaSymbols, extractCSharpSymbols, extractRubySymbols, extractPHPSymbols } from './parser.js'; +import { parseFilesAuto, getActiveEngine } from './parser.js'; import { IGNORE_DIRS, EXTENSIONS, normalizePath } from './constants.js'; import { loadConfig } from './config.js'; import { warn, debug, info } from './logger.js'; +import { resolveImportPath, computeConfidence, resolveImportsBatch } from './resolve.js'; + +export { resolveImportPath } from './resolve.js'; export function collectFiles(dir, files = [], config = {}) { let entries; @@ -63,70 +66,6 @@ export function loadPathAliases(rootDir) { return aliases; } -function resolveViaAlias(importSource, aliases, rootDir) { - if (aliases.baseUrl && !importSource.startsWith('.') && !importSource.startsWith('/')) { - const candidate = path.resolve(aliases.baseUrl, importSource); - for (const ext of ['', '.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js']) { - const full = candidate + ext; - if (fs.existsSync(full)) return full; - } - } - - for (const [pattern, targets] of Object.entries(aliases.paths)) { - const prefix = pattern.replace(/\*$/, ''); - if (!importSource.startsWith(prefix)) continue; - const rest = importSource.slice(prefix.length); - for (const target of targets) { - const resolved = target.replace(/\*$/, rest); - for (const ext of ['', '.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js']) { - const full = resolved + ext; - if (fs.existsSync(full)) return full; - } - } - } - return null; -} - -export function resolveImportPath(fromFile, importSource, rootDir, aliases) { - if (!importSource.startsWith('.') && aliases) { - const aliasResolved = resolveViaAlias(importSource, aliases, rootDir); - if (aliasResolved) return normalizePath(path.relative(rootDir, aliasResolved)); - } - if (!importSource.startsWith('.')) return importSource; - const dir = path.dirname(fromFile); - let resolved = path.resolve(dir, importSource); - - if (resolved.endsWith('.js')) { - const tsCandidate = resolved.replace(/\.js$/, '.ts'); - if (fs.existsSync(tsCandidate)) return normalizePath(path.relative(rootDir, tsCandidate)); - const tsxCandidate = resolved.replace(/\.js$/, '.tsx'); - if (fs.existsSync(tsxCandidate)) return normalizePath(path.relative(rootDir, tsxCandidate)); - } - - for (const ext of ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.py', '/index.ts', '/index.tsx', '/index.js', '/__init__.py']) { - const candidate = resolved + ext; - if (fs.existsSync(candidate)) { - return normalizePath(path.relative(rootDir, candidate)); - } - } - if (fs.existsSync(resolved)) return normalizePath(path.relative(rootDir, resolved)); - return normalizePath(path.relative(rootDir, resolved)); -} - -/** - * Compute proximity-based confidence for call resolution. - */ -function computeConfidence(callerFile, targetFile, importedFrom) { - if (!targetFile || !callerFile) return 0.3; - if (callerFile === targetFile) return 1.0; - if (importedFrom === targetFile) return 1.0; - if (path.dirname(callerFile) === path.dirname(targetFile)) return 0.7; - const callerParent = path.dirname(path.dirname(callerFile)); - const targetParent = path.dirname(path.dirname(targetFile)); - if (callerParent === targetParent) return 0.5; - return 0.3; -} - /** * Compute MD5 hash of file contents for incremental builds. */ @@ -193,7 +132,11 @@ export async function buildGraph(rootDir, opts = {}) { const config = loadConfig(rootDir); const incremental = opts.incremental !== false && config.build && config.build.incremental !== false; - const parsers = await createParsers(); + // Engine selection: 'native', 'wasm', or 'auto' (default) + const engineOpts = { engine: opts.engine || 'auto' }; + const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts); + console.log(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); + const aliases = loadPathAliases(rootDir); // Merge config aliases if (config.aliases) { @@ -255,7 +198,6 @@ export async function buildGraph(rootDir, opts = {}) { // First pass: parse files and insert nodes const fileSymbols = new Map(); - let parsed = 0, skipped = 0; // For incremental builds, also load existing symbols that aren't changing if (!isFullBuild) { @@ -268,73 +210,50 @@ export async function buildGraph(rootDir, opts = {}) { ? files.map(f => ({ file: f })) : changed; - const insertMany = db.transaction(() => { - for (const item of filesToParse) { - const filePath = item.file; - const parser = getParser(parsers, filePath); - if (!parser) { skipped++; continue; } - - let code; - if (item.content) { - code = item.content; - } else { - try { code = fs.readFileSync(filePath, 'utf-8'); } - catch (err) { - warn(`Skipping ${path.relative(rootDir, filePath)}: ${err.message}`); - skipped++; - continue; - } - } + // ── Unified parse via parseFilesAuto ─────────────────────────────── + const filePaths = filesToParse.map(item => item.file); + const allSymbols = await parseFilesAuto(filePaths, rootDir, engineOpts); - let tree; - try { tree = parser.parse(code); } - catch (e) { - warn(`Parse error in ${path.relative(rootDir, filePath)}: ${e.message}`); - skipped++; - continue; - } + // Build a hash lookup from incremental data (changed items may carry pre-computed hashes) + const precomputedHashes = new Map(); + for (const item of filesToParse) { + if (item.hash && item.relPath) { + precomputedHashes.set(item.relPath, item.hash); + } + } - const relPath = normalizePath(path.relative(rootDir, filePath)); - const isHCL = filePath.endsWith('.tf') || filePath.endsWith('.hcl'); - const isPython = filePath.endsWith('.py'); - const isGo = filePath.endsWith('.go'); - const isRust = filePath.endsWith('.rs'); - const isJava = filePath.endsWith('.java'); - const isCSharp = filePath.endsWith('.cs'); - const isRuby = filePath.endsWith('.rb'); - const isPHP = filePath.endsWith('.php'); - const symbols = isHCL ? extractHCLSymbols(tree, filePath) - : isPython ? extractPythonSymbols(tree, filePath) - : isGo ? extractGoSymbols(tree, filePath) - : isRust ? extractRustSymbols(tree, filePath) - : isJava ? extractJavaSymbols(tree, filePath) - : isCSharp ? extractCSharpSymbols(tree, filePath) - : isRuby ? extractRubySymbols(tree, filePath) - : isPHP ? extractPHPSymbols(tree, filePath) - : extractSymbols(tree, filePath); + const insertAll = db.transaction(() => { + for (const [relPath, symbols] of allSymbols) { fileSymbols.set(relPath, symbols); insertNode.run(relPath, 'file', relPath, 0, null); - for (const def of symbols.definitions) { insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); } - for (const exp of symbols.exports) { insertNode.run(exp.name, exp.kind, relPath, exp.line, null); } // Update file hash for incremental builds if (upsertHash) { - const hash = item.hash || fileHash(code); - upsertHash.run(relPath, hash, Date.now()); + const existingHash = precomputedHashes.get(relPath); + if (existingHash) { + upsertHash.run(relPath, existingHash, Date.now()); + } else { + const absPath = path.join(rootDir, relPath); + let code; + try { code = fs.readFileSync(absPath, 'utf-8'); } catch { code = null; } + if (code !== null) { + upsertHash.run(relPath, fileHash(code), Date.now()); + } + } } - - parsed++; - if (parsed % 100 === 0) process.stdout.write(` Parsed ${parsed}/${filesToParse.length} files\r`); } }); - insertMany(); + insertAll(); + + const parsed = allSymbols.size; + const skipped = filesToParse.length - parsed; console.log(`Parsed ${parsed} files (${skipped} skipped)`); // Clean up removed file hashes @@ -345,13 +264,33 @@ export async function buildGraph(rootDir, opts = {}) { } } + // ── Batch import resolution ──────────────────────────────────────── + // Collect all (fromFile, importSource) pairs and resolve in one native call + const batchInputs = []; + for (const [relPath, symbols] of fileSymbols) { + const absFile = path.join(rootDir, relPath); + for (const imp of symbols.imports) { + batchInputs.push({ fromFile: absFile, importSource: imp.source }); + } + } + const batchResolved = resolveImportsBatch(batchInputs, rootDir, aliases); + + function getResolved(absFile, importSource) { + if (batchResolved) { + const key = `${absFile}|${importSource}`; + const hit = batchResolved.get(key); + if (hit !== undefined) return hit; + } + return resolveImportPath(absFile, importSource, rootDir, aliases); + } + // Build re-export map for barrel resolution const reexportMap = new Map(); for (const [relPath, symbols] of fileSymbols) { const reexports = symbols.imports.filter(imp => imp.reexport); if (reexports.length > 0) { reexportMap.set(relPath, reexports.map(imp => ({ - source: resolveImportPath(path.join(rootDir, relPath), imp.source, rootDir, aliases), + source: getResolved(path.join(rootDir, relPath), imp.source), names: imp.names, wildcardReexport: imp.wildcardReexport || false }))); @@ -426,7 +365,7 @@ export async function buildGraph(rootDir, opts = {}) { // Import edges for (const imp of symbols.imports) { - const resolvedPath = resolveImportPath(path.join(rootDir, relPath), imp.source, rootDir, aliases); + const resolvedPath = getResolved(path.join(rootDir, relPath), imp.source); const targetRow = getNodeId.get(resolvedPath, 'file', resolvedPath, 0); if (targetRow) { const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports'; @@ -454,7 +393,7 @@ export async function buildGraph(rootDir, opts = {}) { // Build import name -> target file mapping const importedNames = new Map(); for (const imp of symbols.imports) { - const resolvedPath = resolveImportPath(path.join(rootDir, relPath), imp.source, rootDir, aliases); + const resolvedPath = getResolved(path.join(rootDir, relPath), imp.source); for (const name of imp.names) { const cleanName = name.replace(/^\*\s+as\s+/, ''); importedNames.set(cleanName, resolvedPath); diff --git a/src/cli.js b/src/cli.js index 64c2077a..fa6d42c7 100644 --- a/src/cli.js +++ b/src/cli.js @@ -17,8 +17,9 @@ const program = new Command(); program .name('codegraph') .description('Local code dependency graph tool') - .version('1.1.0') + .version('1.2.0') .option('-v, --verbose', 'Enable verbose/debug output') + .option('--engine ', 'Parser engine: native, wasm, or auto (default: auto)', 'auto') .hook('preAction', (thisCommand) => { const opts = thisCommand.opts(); if (opts.verbose) setVerbose(true); @@ -30,7 +31,8 @@ program .option('--no-incremental', 'Force full rebuild (ignore file hashes)') .action(async (dir, opts) => { const root = path.resolve(dir || '.'); - await buildGraph(root, { incremental: opts.incremental }); + const engine = program.opts().engine; + await buildGraph(root, { incremental: opts.incremental, engine }); }); program @@ -220,7 +222,35 @@ program .description('Watch project for file changes and incrementally update the graph') .action(async (dir) => { const root = path.resolve(dir || '.'); - await watchProject(root); + const engine = program.opts().engine; + await watchProject(root, { engine }); + }); + +program + .command('info') + .description('Show codegraph engine info and diagnostics') + .action(async () => { + const { isNativeAvailable, loadNative } = await import('./native.js'); + const { getActiveEngine } = await import('./parser.js'); + + const engine = program.opts().engine; + const { name: activeName, version: activeVersion } = getActiveEngine({ engine }); + const nativeAvailable = isNativeAvailable(); + + console.log('\nCodegraph Diagnostics'); + console.log('===================='); + console.log(` Version : ${program.version()}`); + console.log(` Node.js : ${process.version}`); + console.log(` Platform : ${process.platform}-${process.arch}`); + console.log(` Native engine : ${nativeAvailable ? 'available' : 'unavailable'}`); + if (nativeAvailable) { + const native = loadNative(); + const nativeVersion = typeof native.engineVersion === 'function' ? native.engineVersion() : 'unknown'; + console.log(` Native version: ${nativeVersion}`); + } + console.log(` Engine flag : --engine ${engine}`); + console.log(` Active engine : ${activeName}${activeVersion ? ` (v${activeVersion})` : ''}`); + console.log(); }); program.parse(); diff --git a/src/cycles.js b/src/cycles.js index bd6628e4..554ae074 100644 --- a/src/cycles.js +++ b/src/cycles.js @@ -1,5 +1,8 @@ +import { loadNative } from './native.js'; + /** * Detect circular dependencies in the codebase using Tarjan's SCC algorithm. + * Dispatches to native Rust implementation when available, falls back to JS. * @param {object} db - Open SQLite database * @param {object} opts - { fileLevel: true } * @returns {string[][]} Array of cycles, each cycle is an array of file paths @@ -7,7 +10,7 @@ export function findCycles(db, opts = {}) { const fileLevel = opts.fileLevel !== false; - // Build adjacency list + // Build adjacency list from SQLite (stays in JS — only the algorithm can move to Rust) let edges; if (fileLevel) { edges = db.prepare(` @@ -32,6 +35,20 @@ export function findCycles(db, opts = {}) { `).all(); } + // Try native Rust implementation + const native = loadNative(); + if (native) { + return native.detectCycles(edges); + } + + // Fallback: JS Tarjan + return findCyclesJS(edges); +} + +/** + * Pure-JS Tarjan's SCC implementation. + */ +export function findCyclesJS(edges) { const graph = new Map(); for (const { source, target } of edges) { if (!graph.has(source)) graph.set(source, []); diff --git a/src/index.js b/src/index.js index 6d1b9350..6c3155ca 100644 --- a/src/index.js +++ b/src/index.js @@ -35,5 +35,11 @@ export { loadConfig } from './config.js'; // Shared constants export { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js'; +// Unified parser API +export { parseFileAuto, parseFilesAuto, getActiveEngine } from './parser.js'; + +// Native engine +export { isNativeAvailable } from './native.js'; + // Logger export { setVerbose } from './logger.js'; diff --git a/src/native.js b/src/native.js new file mode 100644 index 00000000..cbe293f9 --- /dev/null +++ b/src/native.js @@ -0,0 +1,75 @@ +/** + * Native addon loader with graceful fallback to WASM. + * + * Tries to load the platform-specific napi-rs binary built from + * crates/codegraph-core. If unavailable the caller should fall back + * to the existing WASM pipeline. + */ + +import { createRequire } from 'node:module'; +import os from 'node:os'; + +let _cached = undefined; // undefined = not yet tried, null = failed, object = module +let _loadError = null; + +/** Map of (platform-arch) → npm package name. */ +const PLATFORM_PACKAGES = { + 'linux-x64': '@optave/codegraph-linux-x64-gnu', + 'darwin-arm64': '@optave/codegraph-darwin-arm64', + 'darwin-x64': '@optave/codegraph-darwin-x64', + 'win32-x64': '@optave/codegraph-win32-x64-msvc', +}; + +/** + * Try to load the native napi addon. + * Returns the module on success, null on failure. + */ +export function loadNative() { + if (_cached !== undefined) return _cached; + + const require = createRequire(import.meta.url); + + // Try the umbrella package first (if published as @optave/codegraph-core) + try { + _cached = require('@optave/codegraph-core'); + return _cached; + } catch { /* try platform package */ } + + // Try the platform-specific package + const key = `${os.platform()}-${os.arch()}`; + const pkg = PLATFORM_PACKAGES[key]; + if (pkg) { + try { + _cached = require(pkg); + return _cached; + } catch (err) { + _loadError = err; + } + } else { + _loadError = new Error(`Unsupported platform: ${key}`); + } + + _cached = null; + return null; +} + +/** + * Check whether the native engine is available on this platform. + */ +export function isNativeAvailable() { + return loadNative() !== null; +} + +/** + * Return the native module or throw if not available. + */ +export function getNative() { + const mod = loadNative(); + if (!mod) { + throw new Error( + `Native codegraph-core not available: ${_loadError?.message || 'unknown error'}. ` + + 'Install the platform package or use --engine wasm.' + ); + } + return mod; +} diff --git a/src/parser.js b/src/parser.js index e32510b1..f6037c18 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,7 +1,10 @@ import { fileURLToPath } from 'node:url'; import path from 'node:path'; +import fs from 'node:fs'; import { Parser, Language } from 'web-tree-sitter'; import { warn, debug } from './logger.js'; +import { loadNative, isNativeAvailable } from './native.js'; +import { normalizePath } from './constants.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -1579,3 +1582,179 @@ export function extractPHPSymbols(tree, filePath) { walk(tree.rootNode); return { definitions, calls, imports, classes, exports }; } + +// ── Unified API ────────────────────────────────────────────────────────────── + +function resolveEngine(opts = {}) { + const pref = opts.engine || 'auto'; + if (pref === 'wasm') return { name: 'wasm', native: null }; + if (pref === 'native' || pref === 'auto') { + const native = loadNative(); + if (native) return { name: 'native', native }; + if (pref === 'native') { + warn('Native engine requested but unavailable — falling back to WASM'); + } + } + return { name: 'wasm', native: null }; +} + +/** + * Normalize native engine output to match the camelCase convention + * used by the WASM extractors. + */ +function normalizeNativeSymbols(result) { + return { + definitions: (result.definitions || []).map(d => ({ + name: d.name, kind: d.kind, line: d.line, + endLine: d.endLine ?? d.end_line ?? null, + decorators: d.decorators + })), + calls: (result.calls || []).map(c => ({ + name: c.name, line: c.line, dynamic: c.dynamic + })), + imports: (result.imports || []).map(i => ({ + source: i.source, names: i.names || [], line: i.line, + typeOnly: i.typeOnly ?? i.type_only, + reexport: i.reexport, + wildcardReexport: i.wildcardReexport ?? i.wildcard_reexport, + pythonImport: i.pythonImport ?? i.python_import, + goImport: i.goImport ?? i.go_import, + rustUse: i.rustUse ?? i.rust_use, + javaImport: i.javaImport ?? i.java_import, + csharpUsing: i.csharpUsing ?? i.csharp_using, + rubyRequire: i.rubyRequire ?? i.ruby_require, + phpUse: i.phpUse ?? i.php_use + })), + classes: (result.classes || []).map(c => ({ + name: c.name, extends: c.extends, implements: c.implements, line: c.line + })), + exports: (result.exports || []).map(e => ({ + name: e.name, kind: e.kind, line: e.line + })) + }; +} + +/** + * WASM extraction helper: picks the right extractor based on file extension. + */ +function wasmExtractSymbols(parsers, filePath, code) { + const parser = getParser(parsers, filePath); + if (!parser) return null; + + let tree; + try { tree = parser.parse(code); } + catch (e) { + warn(`Parse error in ${filePath}: ${e.message}`); + return null; + } + + if (filePath.endsWith('.tf') || filePath.endsWith('.hcl')) return extractHCLSymbols(tree, filePath); + if (filePath.endsWith('.py')) return extractPythonSymbols(tree, filePath); + if (filePath.endsWith('.go')) return extractGoSymbols(tree, filePath); + if (filePath.endsWith('.rs')) return extractRustSymbols(tree, filePath); + if (filePath.endsWith('.java')) return extractJavaSymbols(tree, filePath); + if (filePath.endsWith('.cs')) return extractCSharpSymbols(tree, filePath); + if (filePath.endsWith('.rb')) return extractRubySymbols(tree, filePath); + if (filePath.endsWith('.php')) return extractPHPSymbols(tree, filePath); + return extractSymbols(tree, filePath); +} + +/** + * Parse a single file and return normalized symbols. + * + * @param {string} filePath Absolute path to the file. + * @param {string} source Source code string. + * @param {object} [opts] Options: { engine: 'native'|'wasm'|'auto' } + * @returns {Promise<{definitions, calls, imports, classes, exports}|null>} + */ +export async function parseFileAuto(filePath, source, opts = {}) { + const { name, native } = resolveEngine(opts); + + if (native) { + const result = native.parseFile(filePath, source); + return result ? normalizeNativeSymbols(result) : null; + } + + // WASM path + const parsers = await createParsers(); + return wasmExtractSymbols(parsers, filePath, source); +} + +/** + * Parse multiple files in bulk and return a Map. + * + * @param {string[]} filePaths Absolute paths to files. + * @param {string} rootDir Project root for computing relative paths. + * @param {object} [opts] Options: { engine: 'native'|'wasm'|'auto' } + * @returns {Promise>} + */ +export async function parseFilesAuto(filePaths, rootDir, opts = {}) { + const { name, native } = resolveEngine(opts); + const result = new Map(); + + if (native) { + const nativeResults = native.parseFiles(filePaths, rootDir); + for (const r of nativeResults) { + if (!r) continue; + const relPath = normalizePath(path.relative(rootDir, r.file)); + result.set(relPath, normalizeNativeSymbols(r)); + } + return result; + } + + // WASM path + const parsers = await createParsers(); + for (const filePath of filePaths) { + let code; + try { code = fs.readFileSync(filePath, 'utf-8'); } + catch (err) { + warn(`Skipping ${path.relative(rootDir, filePath)}: ${err.message}`); + continue; + } + const symbols = wasmExtractSymbols(parsers, filePath, code); + if (symbols) { + const relPath = normalizePath(path.relative(rootDir, filePath)); + result.set(relPath, symbols); + } + } + return result; +} + +/** + * Report which engine is active. + * + * @param {object} [opts] Options: { engine: 'native'|'wasm'|'auto' } + * @returns {{ name: 'native'|'wasm', version: string|null }} + */ +export function getActiveEngine(opts = {}) { + const { name, native } = resolveEngine(opts); + const version = native ? (typeof native.engineVersion === 'function' ? native.engineVersion() : null) : null; + return { name, version }; +} + +/** + * Create a native ParseTreeCache for incremental parsing. + * Returns null if the native engine is unavailable (WASM fallback). + */ +export function createParseTreeCache() { + const native = loadNative(); + if (!native || !native.ParseTreeCache) return null; + return new native.ParseTreeCache(); +} + +/** + * Parse a file incrementally using the cache, or fall back to full parse. + * + * @param {object|null} cache ParseTreeCache instance (or null for full parse) + * @param {string} filePath Absolute path to the file + * @param {string} source Source code string + * @param {object} [opts] Options forwarded to parseFileAuto on fallback + * @returns {Promise<{definitions, calls, imports, classes, exports}|null>} + */ +export async function parseFileIncremental(cache, filePath, source, opts = {}) { + if (cache) { + const result = cache.parseFile(filePath, source); + return result ? normalizeNativeSymbols(result) : null; + } + return parseFileAuto(filePath, source, opts); +} diff --git a/src/resolve.js b/src/resolve.js new file mode 100644 index 00000000..352d1e97 --- /dev/null +++ b/src/resolve.js @@ -0,0 +1,147 @@ + +import fs from 'fs'; +import path from 'path'; +import { loadNative } from './native.js'; +import { normalizePath } from './constants.js'; + +// ── Alias format conversion ───────────────────────────────────────── + +/** + * Convert JS alias format { baseUrl, paths: { pattern: [targets] } } + * to native format { baseUrl, paths: [{ pattern, targets }] }. + */ +export function convertAliasesForNative(aliases) { + if (!aliases) return null; + return { + baseUrl: aliases.baseUrl || null, + paths: Object.entries(aliases.paths || {}).map(([pattern, targets]) => ({ + pattern, + targets, + })), + }; +} + +// ── JS fallback implementations ───────────────────────────────────── + +function resolveViaAlias(importSource, aliases, rootDir) { + if (aliases.baseUrl && !importSource.startsWith('.') && !importSource.startsWith('/')) { + const candidate = path.resolve(aliases.baseUrl, importSource); + for (const ext of ['', '.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js']) { + const full = candidate + ext; + if (fs.existsSync(full)) return full; + } + } + + for (const [pattern, targets] of Object.entries(aliases.paths)) { + const prefix = pattern.replace(/\*$/, ''); + if (!importSource.startsWith(prefix)) continue; + const rest = importSource.slice(prefix.length); + for (const target of targets) { + const resolved = target.replace(/\*$/, rest); + for (const ext of ['', '.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js']) { + const full = resolved + ext; + if (fs.existsSync(full)) return full; + } + } + } + return null; +} + +function resolveImportPathJS(fromFile, importSource, rootDir, aliases) { + if (!importSource.startsWith('.') && aliases) { + const aliasResolved = resolveViaAlias(importSource, aliases, rootDir); + if (aliasResolved) return normalizePath(path.relative(rootDir, aliasResolved)); + } + if (!importSource.startsWith('.')) return importSource; + const dir = path.dirname(fromFile); + let resolved = path.resolve(dir, importSource); + + if (resolved.endsWith('.js')) { + const tsCandidate = resolved.replace(/\.js$/, '.ts'); + if (fs.existsSync(tsCandidate)) return normalizePath(path.relative(rootDir, tsCandidate)); + const tsxCandidate = resolved.replace(/\.js$/, '.tsx'); + if (fs.existsSync(tsxCandidate)) return normalizePath(path.relative(rootDir, tsxCandidate)); + } + + for (const ext of ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.py', '/index.ts', '/index.tsx', '/index.js', '/__init__.py']) { + const candidate = resolved + ext; + if (fs.existsSync(candidate)) { + return normalizePath(path.relative(rootDir, candidate)); + } + } + if (fs.existsSync(resolved)) return normalizePath(path.relative(rootDir, resolved)); + return normalizePath(path.relative(rootDir, resolved)); +} + +function computeConfidenceJS(callerFile, targetFile, importedFrom) { + if (!targetFile || !callerFile) return 0.3; + if (callerFile === targetFile) return 1.0; + if (importedFrom === targetFile) return 1.0; + if (path.dirname(callerFile) === path.dirname(targetFile)) return 0.7; + const callerParent = path.dirname(path.dirname(callerFile)); + const targetParent = path.dirname(path.dirname(targetFile)); + if (callerParent === targetParent) return 0.5; + return 0.3; +} + +// ── Public API with native dispatch ───────────────────────────────── + +/** + * Resolve a single import path. + * Tries native, falls back to JS. + */ +export function resolveImportPath(fromFile, importSource, rootDir, aliases) { + const native = loadNative(); + if (native) { + try { + return native.resolveImport(fromFile, importSource, rootDir, convertAliasesForNative(aliases)); + } catch { + // fall through to JS + } + } + return resolveImportPathJS(fromFile, importSource, rootDir, aliases); +} + +/** + * Compute proximity-based confidence for call resolution. + * Tries native, falls back to JS. + */ +export function computeConfidence(callerFile, targetFile, importedFrom) { + const native = loadNative(); + if (native) { + try { + return native.computeConfidence(callerFile, targetFile, importedFrom || null); + } catch { + // fall through to JS + } + } + return computeConfidenceJS(callerFile, targetFile, importedFrom); +} + +/** + * Batch resolve multiple imports in a single native call. + * Returns Map<"fromFile|importSource", resolvedPath> or null when native unavailable. + */ +export function resolveImportsBatch(inputs, rootDir, aliases) { + const native = loadNative(); + if (!native) return null; + + try { + const nativeInputs = inputs.map(({ fromFile, importSource }) => ({ + fromFile, + importSource, + })); + const results = native.resolveImports(nativeInputs, rootDir, convertAliasesForNative(aliases)); + const map = new Map(); + for (const r of results) { + map.set(`${r.fromFile}|${r.importSource}`, r.resolvedPath); + } + return map; + } catch { + return null; + } +} + +// ── Exported for testing ──────────────────────────────────────────── + +export { resolveImportPathJS, computeConfidenceJS }; diff --git a/src/watcher.js b/src/watcher.js index bd4b4266..28227eec 100644 --- a/src/watcher.js +++ b/src/watcher.js @@ -2,9 +2,9 @@ import fs from 'fs'; import path from 'path'; import { openDb, initSchema } from './db.js'; -import { createParsers, getParser, extractSymbols, extractHCLSymbols, extractPythonSymbols } from './parser.js'; +import { parseFileIncremental, createParseTreeCache, getActiveEngine } from './parser.js'; import { IGNORE_DIRS, EXTENSIONS, normalizePath } from './constants.js'; -import { resolveImportPath } from './builder.js'; +import { resolveImportPath } from './resolve.js'; import { warn, debug, info } from './logger.js'; function shouldIgnore(filePath) { @@ -19,7 +19,7 @@ function isTrackedExt(filePath) { /** * Parse a single file and update the database incrementally. */ -function updateFile(db, rootDir, filePath, parsers, stmts) { +async function updateFile(db, rootDir, filePath, stmts, engineOpts, cache) { const relPath = normalizePath(path.relative(rootDir, filePath)); const oldNodes = stmts.countNodes.get(relPath)?.c || 0; @@ -29,12 +29,10 @@ function updateFile(db, rootDir, filePath, parsers, stmts) { stmts.deleteNodes.run(relPath); if (!fs.existsSync(filePath)) { + if (cache) cache.remove(filePath); return { file: relPath, nodesAdded: 0, nodesRemoved: oldNodes, edgesAdded: 0, deleted: true }; } - const parser = getParser(parsers, filePath); - if (!parser) return null; - let code; try { code = fs.readFileSync(filePath, 'utf-8'); } catch (err) { @@ -42,18 +40,8 @@ function updateFile(db, rootDir, filePath, parsers, stmts) { return null; } - let tree; - try { tree = parser.parse(code); } - catch (err) { - warn(`Parse error in ${relPath}: ${err.message}`); - return null; - } - - const isHCL = filePath.endsWith('.tf') || filePath.endsWith('.hcl'); - const isPython = filePath.endsWith('.py'); - const symbols = isHCL ? extractHCLSymbols(tree, filePath) - : isPython ? extractPythonSymbols(tree, filePath) - : extractSymbols(tree, filePath); + const symbols = await parseFileIncremental(cache, filePath, code, engineOpts); + if (!symbols) return null; stmts.insertNode.run(relPath, 'file', relPath, 0, null); @@ -131,7 +119,7 @@ function updateFile(db, rootDir, filePath, parsers, stmts) { }; } -export async function watchProject(rootDir) { +export async function watchProject(rootDir, opts = {}) { const dbPath = path.join(rootDir, '.codegraph', 'graph.db'); if (!fs.existsSync(dbPath)) { console.error('No graph.db found. Run `codegraph build` first.'); @@ -140,7 +128,12 @@ export async function watchProject(rootDir) { const db = openDb(dbPath); initSchema(db); - const parsers = await createParsers(); + const engineOpts = { engine: opts.engine || 'auto' }; + const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts); + console.log(`Watch mode using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); + + const cache = createParseTreeCache(); + console.log(cache ? 'Incremental parsing enabled (native tree cache)' : 'Incremental parsing unavailable (full re-parse)'); const stmts = { insertNode: db.prepare('INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)'), @@ -164,18 +157,16 @@ export async function watchProject(rootDir) { let timer = null; const DEBOUNCE_MS = 300; - function processPending() { + async function processPending() { const files = [...pending]; pending.clear(); - const updates = db.transaction(() => { - const results = []; - for (const filePath of files) { - const result = updateFile(db, rootDir, filePath, parsers, stmts); - if (result) results.push(result); - } - return results; - })(); + const results = []; + for (const filePath of files) { + const result = await updateFile(db, rootDir, filePath, stmts, engineOpts, cache); + if (result) results.push(result); + } + const updates = results; for (const r of updates) { const nodeDelta = r.nodesAdded - r.nodesRemoved; @@ -206,6 +197,7 @@ export async function watchProject(rootDir) { process.on('SIGINT', () => { console.log('\nStopping watcher...'); watcher.close(); + if (cache) cache.clear(); db.close(); process.exit(0); }); diff --git a/tests/engines/parity.test.js b/tests/engines/parity.test.js new file mode 100644 index 00000000..bd941703 --- /dev/null +++ b/tests/engines/parity.test.js @@ -0,0 +1,226 @@ +/** + * Cross-engine parity tests. + * + * Parse the same source snippets with both WASM and native engines, + * then assert the FileSymbols output is equivalent for all 11 languages. + * + * Skipped when the native engine is not installed. + */ + +import { describe, it, expect, beforeAll } from 'vitest'; +import { createParsers, getParser, extractSymbols, extractHCLSymbols, extractPythonSymbols, extractGoSymbols, extractRustSymbols, extractJavaSymbols, extractCSharpSymbols, extractRubySymbols, extractPHPSymbols } from '../../src/parser.js'; +import { isNativeAvailable } from '../../src/native.js'; + +let native; +let parsers; + +function wasmExtract(code, filePath) { + const parser = getParser(parsers, filePath); + if (!parser) return null; + const tree = parser.parse(code); + const isHCL = filePath.endsWith('.tf') || filePath.endsWith('.hcl'); + const isPython = filePath.endsWith('.py'); + const isGo = filePath.endsWith('.go'); + const isRust = filePath.endsWith('.rs'); + const isJava = filePath.endsWith('.java'); + const isCSharp = filePath.endsWith('.cs'); + const isRuby = filePath.endsWith('.rb'); + const isPHP = filePath.endsWith('.php'); + return isHCL ? extractHCLSymbols(tree, filePath) + : isPython ? extractPythonSymbols(tree, filePath) + : isGo ? extractGoSymbols(tree, filePath) + : isRust ? extractRustSymbols(tree, filePath) + : isJava ? extractJavaSymbols(tree, filePath) + : isCSharp ? extractCSharpSymbols(tree, filePath) + : isRuby ? extractRubySymbols(tree, filePath) + : isPHP ? extractPHPSymbols(tree, filePath) + : extractSymbols(tree, filePath); +} + +function nativeExtract(code, filePath) { + return native.parseFile(filePath, code); +} + +/** Normalize symbols for comparison — strip undefined/null optional fields. */ +function normalize(symbols) { + if (!symbols) return symbols; + return { + definitions: (symbols.definitions || []).map(d => ({ + name: d.name, + kind: d.kind, + line: d.line, + endLine: d.endLine ?? d.end_line ?? null, + })), + calls: (symbols.calls || []).map(c => ({ + name: c.name, + line: c.line, + ...(c.dynamic ? { dynamic: true } : {}), + })), + imports: (symbols.imports || []).map(i => ({ + source: i.source, + names: i.names || [], + line: i.line, + })), + classes: (symbols.classes || []).map(c => ({ + name: c.name, + ...(c.extends ? { extends: c.extends } : {}), + ...(c.implements ? { implements: c.implements } : {}), + line: c.line, + })), + exports: (symbols.exports || []).map(e => ({ + name: e.name, + kind: e.kind, + line: e.line, + })), + }; +} + +const hasNative = isNativeAvailable(); + +const describeOrSkip = hasNative ? describe : describe.skip; + +describeOrSkip('Cross-engine parity', () => { + beforeAll(async () => { + if (!hasNative) return; + const { getNative } = await import('../../src/native.js'); + native = getNative(); + parsers = await createParsers(); + }); + + const cases = [ + { + name: 'JavaScript — functions and calls', + file: 'test.js', + code: ` +function greet(name) { return 'Hello ' + name; } +const add = (a, b) => a + b; +greet('world'); +add(1, 2); +`, + }, + { + name: 'TypeScript — interfaces and types', + file: 'test.ts', + code: ` +interface Greeter { greet(name: string): string; } +type ID = string | number; +class MyGreeter implements Greeter { + greet(name: string) { return name; } +} +`, + }, + { + name: 'TSX — class with extends', + file: 'test.tsx', + code: ` +import React from 'react'; +class Button extends React.Component { + render() { return