diff --git a/crates/codegraph-core/src/complexity.rs b/crates/codegraph-core/src/complexity.rs index 609047a7..39e43811 100644 --- a/crates/codegraph-core/src/complexity.rs +++ b/crates/codegraph-core/src/complexity.rs @@ -2,76 +2,353 @@ use tree_sitter::Node; use crate::types::ComplexityMetrics; -/// Language kind for complexity analysis (only JS/TS/TSX supported). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ComplexityLang { - JavaScript, - TypeScript, - Tsx, +// ─── Language-Configurable Complexity Rules ─────────────────────────────── + +/// Language-specific AST node type rules for complexity analysis. +/// Mirrors `COMPLEXITY_RULES` from `src/complexity.js`. +pub struct LangRules { + pub branch_nodes: &'static [&'static str], + pub case_nodes: &'static [&'static str], + pub logical_operators: &'static [&'static str], + pub logical_node_type: &'static str, + pub optional_chain_type: Option<&'static str>, + pub nesting_nodes: &'static [&'static str], + pub function_nodes: &'static [&'static str], + pub if_node_type: Option<&'static str>, + pub else_node_type: Option<&'static str>, + pub elif_node_type: Option<&'static str>, + pub else_via_alternative: bool, + pub switch_like_nodes: &'static [&'static str], } -impl ComplexityLang { - /// Derive from file extension. Returns None for unsupported languages. - pub fn from_extension(path: &str) -> Option { - let ext = path.rsplit('.').next().unwrap_or(""); - match ext { - "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), - "ts" => Some(Self::TypeScript), - "tsx" => Some(Self::Tsx), - _ => None, - } +impl LangRules { + fn is_branch(&self, kind: &str) -> bool { + self.branch_nodes.contains(&kind) + } + fn is_case(&self, kind: &str) -> bool { + self.case_nodes.contains(&kind) + } + fn is_logical_op(&self, kind: &str) -> bool { + self.logical_operators.contains(&kind) + } + fn is_nesting(&self, kind: &str) -> bool { + self.nesting_nodes.contains(&kind) + } + fn is_function(&self, kind: &str) -> bool { + self.function_nodes.contains(&kind) + } + fn is_switch_like(&self, kind: &str) -> bool { + self.switch_like_nodes.contains(&kind) } } -// ─── Node type sets (JS/TS/TSX share the same tree-sitter grammar structure) ── - -fn is_branch_node(kind: &str) -> bool { - matches!( - kind, - "if_statement" - | "else_clause" - | "switch_statement" - | "for_statement" - | "for_in_statement" - | "while_statement" - | "do_statement" - | "catch_clause" - | "ternary_expression" - ) -} - -fn is_nesting_node(kind: &str) -> bool { - matches!( - kind, - "if_statement" - | "switch_statement" - | "for_statement" - | "for_in_statement" - | "while_statement" - | "do_statement" - | "catch_clause" - | "ternary_expression" - ) -} - -fn is_function_node(kind: &str) -> bool { - matches!( - kind, - "function_declaration" - | "function_expression" - | "arrow_function" - | "method_definition" - | "generator_function" - | "generator_function_declaration" - ) -} - -fn is_logical_operator(kind: &str) -> bool { - matches!(kind, "&&" | "||" | "??") -} - -fn is_case_node(kind: &str) -> bool { - kind == "switch_case" +// ─── Per-Language Rules ─────────────────────────────────────────────────── + +pub static JS_TS_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "else_clause", + "switch_statement", + "for_statement", + "for_in_statement", + "while_statement", + "do_statement", + "catch_clause", + "ternary_expression", + ], + case_nodes: &["switch_case"], + logical_operators: &["&&", "||", "??"], + logical_node_type: "binary_expression", + optional_chain_type: Some("optional_chain_expression"), + nesting_nodes: &[ + "if_statement", + "switch_statement", + "for_statement", + "for_in_statement", + "while_statement", + "do_statement", + "catch_clause", + "ternary_expression", + ], + function_nodes: &[ + "function_declaration", + "function_expression", + "arrow_function", + "method_definition", + "generator_function", + "generator_function_declaration", + ], + if_node_type: Some("if_statement"), + else_node_type: Some("else_clause"), + elif_node_type: None, + else_via_alternative: false, + switch_like_nodes: &["switch_statement"], +}; + +pub static PYTHON_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "elif_clause", + "else_clause", + "for_statement", + "while_statement", + "except_clause", + "conditional_expression", + "match_statement", + ], + case_nodes: &["case_clause"], + logical_operators: &["and", "or"], + logical_node_type: "boolean_operator", + optional_chain_type: None, + nesting_nodes: &[ + "if_statement", + "for_statement", + "while_statement", + "except_clause", + "conditional_expression", + ], + function_nodes: &["function_definition", "lambda"], + if_node_type: Some("if_statement"), + else_node_type: Some("else_clause"), + elif_node_type: Some("elif_clause"), + else_via_alternative: false, + switch_like_nodes: &["match_statement"], +}; + +pub static GO_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "for_statement", + "expression_switch_statement", + "type_switch_statement", + "select_statement", + ], + case_nodes: &[ + "expression_case", + "type_case", + "default_case", + "communication_case", + ], + logical_operators: &["&&", "||"], + logical_node_type: "binary_expression", + optional_chain_type: None, + nesting_nodes: &[ + "if_statement", + "for_statement", + "expression_switch_statement", + "type_switch_statement", + "select_statement", + ], + function_nodes: &[ + "function_declaration", + "method_declaration", + "func_literal", + ], + if_node_type: Some("if_statement"), + else_node_type: None, + elif_node_type: None, + else_via_alternative: true, + switch_like_nodes: &[ + "expression_switch_statement", + "type_switch_statement", + ], +}; + +pub static RUST_LANG_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_expression", + "else_clause", + "for_expression", + "while_expression", + "loop_expression", + "if_let_expression", + "while_let_expression", + "match_expression", + ], + case_nodes: &["match_arm"], + logical_operators: &["&&", "||"], + logical_node_type: "binary_expression", + optional_chain_type: None, + nesting_nodes: &[ + "if_expression", + "for_expression", + "while_expression", + "loop_expression", + "if_let_expression", + "while_let_expression", + "match_expression", + ], + function_nodes: &["function_item", "closure_expression"], + if_node_type: Some("if_expression"), + else_node_type: Some("else_clause"), + elif_node_type: None, + else_via_alternative: false, + switch_like_nodes: &["match_expression"], +}; + +pub static JAVA_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "for_statement", + "enhanced_for_statement", + "while_statement", + "do_statement", + "catch_clause", + "ternary_expression", + "switch_expression", + ], + case_nodes: &["switch_label"], + logical_operators: &["&&", "||"], + logical_node_type: "binary_expression", + optional_chain_type: None, + nesting_nodes: &[ + "if_statement", + "for_statement", + "enhanced_for_statement", + "while_statement", + "do_statement", + "catch_clause", + "ternary_expression", + ], + function_nodes: &[ + "method_declaration", + "constructor_declaration", + "lambda_expression", + ], + if_node_type: Some("if_statement"), + else_node_type: None, + elif_node_type: None, + else_via_alternative: true, + switch_like_nodes: &["switch_expression"], +}; + +pub static CSHARP_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "else_clause", + "for_statement", + "for_each_statement", + "while_statement", + "do_statement", + "catch_clause", + "conditional_expression", + "switch_statement", + ], + case_nodes: &["switch_section"], + logical_operators: &["&&", "||", "??"], + logical_node_type: "binary_expression", + optional_chain_type: Some("conditional_access_expression"), + nesting_nodes: &[ + "if_statement", + "for_statement", + "for_each_statement", + "while_statement", + "do_statement", + "catch_clause", + "conditional_expression", + "switch_statement", + ], + function_nodes: &[ + "method_declaration", + "constructor_declaration", + "lambda_expression", + "local_function_statement", + ], + if_node_type: Some("if_statement"), + else_node_type: Some("else_clause"), + elif_node_type: None, + else_via_alternative: false, + switch_like_nodes: &["switch_statement"], +}; + +pub static RUBY_RULES: LangRules = LangRules { + branch_nodes: &[ + "if", + "elsif", + "else", + "unless", + "case", + "for", + "while", + "until", + "rescue", + "conditional", + ], + case_nodes: &["when"], + logical_operators: &["and", "or", "&&", "||"], + logical_node_type: "binary", + optional_chain_type: None, + nesting_nodes: &[ + "if", + "unless", + "case", + "for", + "while", + "until", + "rescue", + "conditional", + ], + function_nodes: &["method", "singleton_method", "lambda", "do_block"], + if_node_type: Some("if"), + else_node_type: Some("else"), + elif_node_type: Some("elsif"), + else_via_alternative: false, + switch_like_nodes: &["case"], +}; + +pub static PHP_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "else_if_clause", + "else_clause", + "for_statement", + "foreach_statement", + "while_statement", + "do_statement", + "catch_clause", + "conditional_expression", + "switch_statement", + ], + case_nodes: &["case_statement", "default_statement"], + logical_operators: &["&&", "||", "and", "or", "??"], + logical_node_type: "binary_expression", + optional_chain_type: Some("nullsafe_member_access_expression"), + nesting_nodes: &[ + "if_statement", + "for_statement", + "foreach_statement", + "while_statement", + "do_statement", + "catch_clause", + "conditional_expression", + "switch_statement", + ], + function_nodes: &[ + "function_definition", + "method_declaration", + "anonymous_function_creation_expression", + "arrow_function", + ], + if_node_type: Some("if_statement"), + else_node_type: Some("else_clause"), + elif_node_type: Some("else_if_clause"), + else_via_alternative: false, + switch_like_nodes: &["switch_statement"], +}; + +/// Look up complexity rules by language ID (matches `COMPLEXITY_RULES` keys in JS). +pub fn lang_rules(lang_id: &str) -> Option<&'static LangRules> { + match lang_id { + "javascript" | "typescript" | "tsx" => Some(&JS_TS_RULES), + "python" => Some(&PYTHON_RULES), + "go" => Some(&GO_RULES), + "rust" => Some(&RUST_LANG_RULES), + "java" => Some(&JAVA_RULES), + "c_sharp" => Some(&CSHARP_RULES), + "ruby" => Some(&RUBY_RULES), + "php" => Some(&PHP_RULES), + _ => None, + } } // ─── Single-traversal DFS complexity computation ────────────────────────── @@ -80,7 +357,10 @@ fn is_case_node(kind: &str) -> bool { /// for a function's AST subtree in a single DFS walk. /// /// This is a faithful port of `computeFunctionComplexity()` from `src/complexity.js`. -pub fn compute_function_complexity(function_node: &Node) -> ComplexityMetrics { +pub fn compute_function_complexity( + function_node: &Node, + rules: &LangRules, +) -> ComplexityMetrics { let mut cognitive: u32 = 0; let mut cyclomatic: u32 = 1; // McCabe starts at 1 let mut max_nesting: u32 = 0; @@ -89,6 +369,7 @@ pub fn compute_function_complexity(function_node: &Node) -> ComplexityMetrics { function_node, 0, true, + rules, &mut cognitive, &mut cyclomatic, &mut max_nesting, @@ -101,10 +382,35 @@ pub fn compute_function_complexity(function_node: &Node) -> ComplexityMetrics { } } +fn walk_children( + node: &Node, + nesting_level: u32, + is_top_function: bool, + rules: &LangRules, + cognitive: &mut u32, + cyclomatic: &mut u32, + max_nesting: &mut u32, +) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk( + &child, + nesting_level, + is_top_function, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + } + } +} + fn walk( node: &Node, nesting_level: u32, is_top_function: bool, + rules: &LangRules, cognitive: &mut u32, cyclomatic: &mut u32, max_nesting: &mut u32, @@ -117,17 +423,17 @@ fn walk( } // Handle logical operators in binary expressions - if kind == "binary_expression" { + if kind == rules.logical_node_type { if let Some(op_node) = node.child(1) { let op = op_node.kind(); - if is_logical_operator(op) { + if rules.is_logical_op(op) { // Cyclomatic: +1 for every logical operator *cyclomatic += 1; // Cognitive: +1 only when operator changes from the previous sibling sequence let mut same_sequence = false; if let Some(parent) = node.parent() { - if parent.kind() == "binary_expression" { + if parent.kind() == rules.logical_node_type { if let Some(parent_op) = parent.child(1) { if parent_op.kind() == op { same_sequence = true; @@ -140,60 +446,123 @@ fn walk( } // Walk children manually to avoid double-counting - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); - } - } + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); return; } } } // Handle optional chaining (cyclomatic only) - if kind == "optional_chain_expression" { - *cyclomatic += 1; + if let Some(opt_type) = rules.optional_chain_type { + if kind == opt_type { + *cyclomatic += 1; + } } - // Handle branch/control flow nodes - if is_branch_node(kind) { - let is_else_if = kind == "if_statement" - && node - .parent() - .map_or(false, |p| p.kind() == "else_clause"); - - if kind == "else_clause" { - // else: +1 cognitive structural, no nesting increment, no cyclomatic - // But only if it's a plain else (not else-if) - let first_child = node.named_child(0); - if first_child.map_or(false, |c| c.kind() == "if_statement") { - // This is else-if: the if_statement child handles its own increment - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); - } + // Handle branch/control flow nodes (skip keyword leaf tokens — childCount > 0 guard) + if rules.is_branch(kind) && node.child_count() > 0 { + // Pattern A: else clause wraps if (JS/C#/Rust) + if let Some(else_type) = rules.else_node_type { + if kind == else_type { + let first_child = node.named_child(0); + if first_child.map_or(false, |c| { + rules.if_node_type.map_or(false, |if_t| c.kind() == if_t) + }) { + // else-if: the if_statement child handles its own increment + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + return; } + // Plain else + *cognitive += 1; + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); return; } - // Plain else - *cognitive += 1; - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if let Some(elif_type) = rules.elif_node_type { + if kind == elif_type { + *cognitive += 1; + *cyclomatic += 1; + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + return; + } + } + + // Detect else-if via Pattern A or C + let mut is_else_if = false; + if rules.if_node_type.map_or(false, |if_t| kind == if_t) { + if rules.else_via_alternative { + // Pattern C (Go/Java): if_statement is the alternative of parent if_statement + if let Some(parent) = node.parent() { + if rules + .if_node_type + .map_or(false, |if_t| parent.kind() == if_t) + { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + is_else_if = true; + } + } + } + } + } else if rules.else_node_type.is_some() { + // Pattern A (JS/C#/Rust): if_statement inside else_clause + if let Some(parent) = node.parent() { + if rules + .else_node_type + .map_or(false, |else_t| parent.kind() == else_t) + { + is_else_if = true; + } } } - return; } if is_else_if { - // else-if: +1 structural cognitive, +1 cyclomatic, NO nesting increment *cognitive += 1; *cyclomatic += 1; - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); - } - } + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); return; } @@ -201,56 +570,82 @@ fn walk( *cognitive += 1 + nesting_level; // structural + nesting *cyclomatic += 1; - // switch_statement doesn't add cyclomatic itself (cases do), but adds cognitive - if kind == "switch_statement" { + // Switch-like nodes don't add cyclomatic themselves (cases do) + if rules.is_switch_like(kind) { *cyclomatic -= 1; // Undo the ++ above; cases handle cyclomatic } - if is_nesting_node(kind) { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk( - &child, - nesting_level + 1, - false, - cognitive, - cyclomatic, - max_nesting, - ); + if rules.is_nesting(kind) { + walk_children( + node, + nesting_level + 1, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + return; + } + } + + // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) + if rules.else_via_alternative { + if rules.if_node_type.map_or(false, |if_t| kind != if_t) { + if let Some(parent) = node.parent() { + if rules + .if_node_type + .map_or(false, |if_t| parent.kind() == if_t) + { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + *cognitive += 1; + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + return; + } + } } } - return; } } - // Handle case nodes (cyclomatic only) - if is_case_node(kind) { + // Handle case nodes (cyclomatic only, skip keyword leaves) + if rules.is_case(kind) && node.child_count() > 0 { *cyclomatic += 1; } // Handle nested function definitions (increase nesting) - if !is_top_function && is_function_node(kind) { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk( - &child, - nesting_level + 1, - false, - cognitive, - cyclomatic, - max_nesting, - ); - } - } + if !is_top_function && rules.is_function(kind) { + walk_children( + node, + nesting_level + 1, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); return; } // Walk children - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); - } - } + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); } // ─── Tests ──────────────────────────────────────────────────────────────── @@ -266,27 +661,26 @@ mod tests { .set_language(&tree_sitter_javascript::LANGUAGE.into()) .unwrap(); let tree = parser.parse(code.as_bytes(), None).unwrap(); - // Find the first function node let root = tree.root_node(); - let func = find_first_function(&root).expect("no function found in test code"); - compute_function_complexity(&func) + let func = find_first_function(&root, &JS_TS_RULES).expect("no function found in test code"); + compute_function_complexity(&func, &JS_TS_RULES) } - fn find_first_function<'a>(node: &Node<'a>) -> Option> { - if is_function_node(node.kind()) { + fn find_first_function<'a>(node: &Node<'a>, rules: &LangRules) -> Option> { + if rules.is_function(node.kind()) { return Some(*node); } // For variable declarations with arrow functions if node.kind() == "variable_declarator" { if let Some(value) = node.child_by_field_name("value") { - if is_function_node(value.kind()) { + if rules.is_function(value.kind()) { return Some(value); } } } for i in 0..node.child_count() { if let Some(child) = node.child(i) { - if let Some(found) = find_first_function(&child) { + if let Some(found) = find_first_function(&child, rules) { return Some(found); } } @@ -429,21 +823,73 @@ mod tests { assert_eq!(m.max_nesting, 1); } + // ─── Python tests ───────────────────────────────────────────────────── + + fn compute_python(code: &str) -> ComplexityMetrics { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_python::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + let root = tree.root_node(); + let func = find_first_function(&root, &PYTHON_RULES).expect("no function found"); + compute_function_complexity(&func, &PYTHON_RULES) + } + #[test] - fn complexity_lang_from_extension() { - assert_eq!( - ComplexityLang::from_extension("foo.js"), - Some(ComplexityLang::JavaScript) - ); - assert_eq!( - ComplexityLang::from_extension("foo.ts"), - Some(ComplexityLang::TypeScript) - ); - assert_eq!( - ComplexityLang::from_extension("foo.tsx"), - Some(ComplexityLang::Tsx) - ); - assert_eq!(ComplexityLang::from_extension("foo.py"), None); - assert_eq!(ComplexityLang::from_extension("foo.go"), None); + fn python_empty_function() { + let m = compute_python("def f():\n pass"); + assert_eq!(m.cognitive, 0); + assert_eq!(m.cyclomatic, 1); + } + + #[test] + fn python_if_elif_else() { + let m = compute_python("def f(x):\n if x > 0:\n return 1\n elif x < 0:\n return -1\n else:\n return 0"); + // if: +1 cog, +1 cyc; elif: +1 cog, +1 cyc; else: +1 cog + assert_eq!(m.cognitive, 3); + assert_eq!(m.cyclomatic, 3); + } + + #[test] + fn python_for_loop() { + let m = compute_python("def f(xs):\n for x in xs:\n print(x)"); + assert_eq!(m.cognitive, 1); + assert_eq!(m.cyclomatic, 2); + } + + // ─── Go tests ───────────────────────────────────────────────────────── + + fn compute_go(code: &str) -> ComplexityMetrics { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_go::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + let root = tree.root_node(); + let func = find_first_function(&root, &GO_RULES).expect("no function found"); + compute_function_complexity(&func, &GO_RULES) + } + + #[test] + fn go_empty_function() { + let m = compute_go("package main\nfunc f() {}"); + assert_eq!(m.cognitive, 0); + assert_eq!(m.cyclomatic, 1); + } + + #[test] + fn go_if_else() { + let m = compute_go("package main\nfunc f(x int) int {\n if x > 0 {\n return 1\n } else {\n return 0\n }\n}"); + // if: +1 cog, +1 cyc; else (via alternative): +1 cog + assert_eq!(m.cognitive, 2); + assert_eq!(m.cyclomatic, 2); + } + + #[test] + fn go_for_loop() { + let m = compute_go("package main\nfunc f() {\n for i := 0; i < 10; i++ {\n println(i)\n }\n}"); + assert_eq!(m.cognitive, 1); + assert_eq!(m.cyclomatic, 2); } } diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 5d9ac600..d1f2ba35 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -1,4 +1,5 @@ use tree_sitter::{Node, Tree}; +use crate::complexity::{compute_function_complexity, CSHARP_RULES}; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -103,7 +104,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(&child), end_line: Some(end_line(&child)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(&child, &CSHARP_RULES)), }); } } @@ -140,7 +141,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), }); } } @@ -159,7 +160,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), }); } } @@ -178,7 +179,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), }); } } diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index edf18c5b..d253189a 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -1,4 +1,5 @@ use tree_sitter::{Node, Tree}; +use crate::complexity::{compute_function_complexity, GO_RULES}; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -23,7 +24,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &GO_RULES)), }); } } @@ -59,7 +60,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &GO_RULES)), }); } } diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 0f6c5679..111aeba4 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -1,4 +1,5 @@ use tree_sitter::{Node, Tree}; +use crate::complexity::{compute_function_complexity, JAVA_RULES}; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -109,7 +110,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(&child), end_line: Some(end_line(&child)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(&child, &JAVA_RULES)), }); } } @@ -146,7 +147,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &JAVA_RULES)), }); } } @@ -165,7 +166,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &JAVA_RULES)), }); } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 61ada78e..63fe6d48 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::compute_function_complexity; +use crate::complexity::{compute_function_complexity, JS_TS_RULES}; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -24,7 +24,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node)), + complexity: Some(compute_function_complexity(node, &JS_TS_RULES)), }); } } @@ -80,7 +80,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node)), + complexity: Some(compute_function_complexity(node, &JS_TS_RULES)), }); } } @@ -138,7 +138,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(&value_n)), decorators: None, - complexity: Some(compute_function_complexity(&value_n)), + complexity: Some(compute_function_complexity(&value_n, &JS_TS_RULES)), }); } } @@ -562,7 +562,7 @@ fn extract_callback_definition(call_node: &Node, source: &[u8]) -> Option Option Option ({ file: f })) : parseChanges; + // ── Phase timing ──────────────────────────────────────────────────── + const _t = {}; + // ── Unified parse via parseFilesAuto ─────────────────────────────── const filePaths = filesToParse.map((item) => item.file); + _t.parse0 = performance.now(); const allSymbols = await parseFilesAuto(filePaths, rootDir, engineOpts); + _t.parseMs = performance.now() - _t.parse0; // Build a lookup from incremental data (changed items may carry pre-computed hashes + stats) const precomputedData = new Map(); @@ -627,7 +633,9 @@ export async function buildGraph(rootDir, opts = {}) { } } }); + _t.insert0 = performance.now(); insertAll(); + _t.insertMs = performance.now() - _t.insert0; const parsed = allSymbols.size; const skipped = filesToParse.length - parsed; @@ -643,6 +651,7 @@ export async function buildGraph(rootDir, opts = {}) { // ── Batch import resolution ──────────────────────────────────────── // Collect all (fromFile, importSource) pairs and resolve in one native call + _t.resolve0 = performance.now(); const batchInputs = []; for (const [relPath, symbols] of fileSymbols) { const absFile = path.join(rootDir, relPath); @@ -651,6 +660,7 @@ export async function buildGraph(rootDir, opts = {}) { } } const batchResolved = resolveImportsBatch(batchInputs, rootDir, aliases); + _t.resolveMs = performance.now() - _t.resolve0; function getResolved(absFile, importSource) { if (batchResolved) { @@ -738,6 +748,7 @@ export async function buildGraph(rootDir, opts = {}) { } // Second pass: build edges + _t.edges0 = performance.now(); let edgeCount = 0; const buildEdges = db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { @@ -915,6 +926,7 @@ export async function buildGraph(rootDir, opts = {}) { } }); buildEdges(); + _t.edgesMs = performance.now() - _t.edges0; // Build line count map for structure metrics (prefer cached _lineCount from parser) const lineCountMap = new Map(); @@ -986,6 +998,7 @@ export async function buildGraph(rootDir, opts = {}) { } // Build directory structure, containment edges, and metrics + _t.structure0 = performance.now(); const relDirs = new Set(); for (const absDir of discoveredDirs) { relDirs.add(normalizePath(path.relative(rootDir, absDir))); @@ -996,8 +1009,10 @@ export async function buildGraph(rootDir, opts = {}) { } catch (err) { debug(`Structure analysis failed: ${err.message}`); } + _t.structureMs = performance.now() - _t.structure0; // Classify node roles (entry, core, utility, adapter, dead, leaf) + _t.roles0 = performance.now(); try { const { classifyNodeRoles } = await import('./structure.js'); const roleSummary = classifyNodeRoles(db); @@ -1009,14 +1024,17 @@ export async function buildGraph(rootDir, opts = {}) { } catch (err) { debug(`Role classification failed: ${err.message}`); } + _t.rolesMs = performance.now() - _t.roles0; // Compute per-function complexity metrics (cognitive, cyclomatic, nesting) + _t.complexity0 = performance.now(); try { const { buildComplexityMetrics } = await import('./complexity.js'); await buildComplexityMetrics(db, allSymbols, rootDir, engineOpts); } catch (err) { debug(`Complexity analysis failed: ${err.message}`); } + _t.complexityMs = performance.now() - _t.complexity0; // Release any remaining cached WASM trees for GC for (const [, symbols] of allSymbols) { @@ -1076,4 +1094,16 @@ export async function buildGraph(rootDir, opts = {}) { } } } + + return { + phases: { + parseMs: +_t.parseMs.toFixed(1), + insertMs: +_t.insertMs.toFixed(1), + resolveMs: +_t.resolveMs.toFixed(1), + edgesMs: +_t.edgesMs.toFixed(1), + structureMs: +_t.structureMs.toFixed(1), + rolesMs: +_t.rolesMs.toFixed(1), + complexityMs: +_t.complexityMs.toFixed(1), + }, + }; } diff --git a/src/complexity.js b/src/complexity.js index f85275b9..1ac81512 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -3,6 +3,7 @@ import path from 'node:path'; import { loadConfig } from './config.js'; import { openReadonlyOrFail } from './db.js'; import { info } from './logger.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; import { isTestFile } from './queries.js'; // ─── Language-Specific Node Type Registry ───────────────────────────────── @@ -294,6 +295,14 @@ export const COMPLEXITY_RULES = new Map([ ['php', PHP_RULES], ]); +// Extensions whose language has complexity rules — used to skip needless WASM init +const COMPLEXITY_EXTENSIONS = new Set(); +for (const entry of LANGUAGE_REGISTRY) { + if (COMPLEXITY_RULES.has(entry.id)) { + for (const ext of entry.extensions) COMPLEXITY_EXTENSIONS.add(ext); + } +} + // ─── Halstead Operator/Operand Classification ──────────────────────────── const JS_TS_HALSTEAD = { @@ -1387,8 +1396,11 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp let parsers = null; let extToLang = null; let needsFallback = false; - for (const [, symbols] of fileSymbols) { + for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree) { + // Only consider files whose language actually has complexity rules + const ext = path.extname(relPath).toLowerCase(); + if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; // Check if all function/method defs have pre-computed complexity (native engine) const hasPrecomputed = symbols.definitions.every( (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, @@ -1400,7 +1412,7 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp } } if (needsFallback) { - const { createParsers, LANGUAGE_REGISTRY } = await import('./parser.js'); + const { createParsers } = await import('./parser.js'); parsers = await createParsers(); extToLang = new Map(); for (const entry of LANGUAGE_REGISTRY) { @@ -1440,8 +1452,9 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp // Only attempt WASM fallback if we actually need AST-based computation if (!allPrecomputed && !tree) { - if (!extToLang) continue; // No WASM parsers available const ext = path.extname(relPath).toLowerCase(); + if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; // Language has no complexity rules + if (!extToLang) continue; // No WASM parsers available langId = extToLang.get(ext); if (!langId) continue;