From a61905aa8e4057874df6f03ca26b9688df205e38 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 26 Feb 2026 22:01:51 -0700 Subject: [PATCH] perf: native complexity for all languages + phase breakdown benchmarks Eliminate WASM parser initialization (~200ms) during native engine builds by computing complexity metrics in Rust for all supported languages, not just JS/TS/TSX. Rust complexity module rewritten with configurable LangRules struct matching the JS-side COMPLEXITY_RULES. Full algorithm ported including three else/elif patterns (Pattern A: else wraps if for JS/C#/Rust, Pattern B: explicit elif for Python/Ruby/PHP, Pattern C: alternative field for Go/Java), switch-like node handling, and leaf token guards. All 8 extractors (JS, Python, Go, Rust, Java, C#, PHP, Ruby) now call compute_function_complexity with language-specific rules instead of returning complexity: None. JS-side safety net: COMPLEXITY_EXTENSIONS guard in buildComplexityMetrics skips needsFallback check for files whose language has no complexity rules. Build phase timing added to buildGraph (parse, insert, resolve, edges, structure, roles, complexity) with phase breakdown table in benchmark reports. Impact: 32 functions changed, 50 affected --- crates/codegraph-core/src/complexity.rs | 768 ++++++++++++++---- .../codegraph-core/src/extractors/csharp.rs | 9 +- crates/codegraph-core/src/extractors/go.rs | 5 +- crates/codegraph-core/src/extractors/java.rs | 7 +- .../src/extractors/javascript.rs | 14 +- crates/codegraph-core/src/extractors/php.rs | 7 +- .../codegraph-core/src/extractors/python.rs | 3 +- crates/codegraph-core/src/extractors/ruby.rs | 5 +- .../src/extractors/rust_lang.rs | 5 +- scripts/benchmark.js | 5 +- scripts/update-benchmark-report.js | 25 + src/builder.js | 30 + src/complexity.js | 19 +- 13 files changed, 713 insertions(+), 189 deletions(-) diff --git a/crates/codegraph-core/src/complexity.rs b/crates/codegraph-core/src/complexity.rs index 609047a7..39e43811 100644 --- a/crates/codegraph-core/src/complexity.rs +++ b/crates/codegraph-core/src/complexity.rs @@ -2,76 +2,353 @@ use tree_sitter::Node; use crate::types::ComplexityMetrics; -/// Language kind for complexity analysis (only JS/TS/TSX supported). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ComplexityLang { - JavaScript, - TypeScript, - Tsx, +// ─── Language-Configurable Complexity Rules ─────────────────────────────── + +/// Language-specific AST node type rules for complexity analysis. +/// Mirrors `COMPLEXITY_RULES` from `src/complexity.js`. +pub struct LangRules { + pub branch_nodes: &'static [&'static str], + pub case_nodes: &'static [&'static str], + pub logical_operators: &'static [&'static str], + pub logical_node_type: &'static str, + pub optional_chain_type: Option<&'static str>, + pub nesting_nodes: &'static [&'static str], + pub function_nodes: &'static [&'static str], + pub if_node_type: Option<&'static str>, + pub else_node_type: Option<&'static str>, + pub elif_node_type: Option<&'static str>, + pub else_via_alternative: bool, + pub switch_like_nodes: &'static [&'static str], } -impl ComplexityLang { - /// Derive from file extension. Returns None for unsupported languages. - pub fn from_extension(path: &str) -> Option { - let ext = path.rsplit('.').next().unwrap_or(""); - match ext { - "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), - "ts" => Some(Self::TypeScript), - "tsx" => Some(Self::Tsx), - _ => None, - } +impl LangRules { + fn is_branch(&self, kind: &str) -> bool { + self.branch_nodes.contains(&kind) + } + fn is_case(&self, kind: &str) -> bool { + self.case_nodes.contains(&kind) + } + fn is_logical_op(&self, kind: &str) -> bool { + self.logical_operators.contains(&kind) + } + fn is_nesting(&self, kind: &str) -> bool { + self.nesting_nodes.contains(&kind) + } + fn is_function(&self, kind: &str) -> bool { + self.function_nodes.contains(&kind) + } + fn is_switch_like(&self, kind: &str) -> bool { + self.switch_like_nodes.contains(&kind) } } -// ─── Node type sets (JS/TS/TSX share the same tree-sitter grammar structure) ── - -fn is_branch_node(kind: &str) -> bool { - matches!( - kind, - "if_statement" - | "else_clause" - | "switch_statement" - | "for_statement" - | "for_in_statement" - | "while_statement" - | "do_statement" - | "catch_clause" - | "ternary_expression" - ) -} - -fn is_nesting_node(kind: &str) -> bool { - matches!( - kind, - "if_statement" - | "switch_statement" - | "for_statement" - | "for_in_statement" - | "while_statement" - | "do_statement" - | "catch_clause" - | "ternary_expression" - ) -} - -fn is_function_node(kind: &str) -> bool { - matches!( - kind, - "function_declaration" - | "function_expression" - | "arrow_function" - | "method_definition" - | "generator_function" - | "generator_function_declaration" - ) -} - -fn is_logical_operator(kind: &str) -> bool { - matches!(kind, "&&" | "||" | "??") -} - -fn is_case_node(kind: &str) -> bool { - kind == "switch_case" +// ─── Per-Language Rules ─────────────────────────────────────────────────── + +pub static JS_TS_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "else_clause", + "switch_statement", + "for_statement", + "for_in_statement", + "while_statement", + "do_statement", + "catch_clause", + "ternary_expression", + ], + case_nodes: &["switch_case"], + logical_operators: &["&&", "||", "??"], + logical_node_type: "binary_expression", + optional_chain_type: Some("optional_chain_expression"), + nesting_nodes: &[ + "if_statement", + "switch_statement", + "for_statement", + "for_in_statement", + "while_statement", + "do_statement", + "catch_clause", + "ternary_expression", + ], + function_nodes: &[ + "function_declaration", + "function_expression", + "arrow_function", + "method_definition", + "generator_function", + "generator_function_declaration", + ], + if_node_type: Some("if_statement"), + else_node_type: Some("else_clause"), + elif_node_type: None, + else_via_alternative: false, + switch_like_nodes: &["switch_statement"], +}; + +pub static PYTHON_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "elif_clause", + "else_clause", + "for_statement", + "while_statement", + "except_clause", + "conditional_expression", + "match_statement", + ], + case_nodes: &["case_clause"], + logical_operators: &["and", "or"], + logical_node_type: "boolean_operator", + optional_chain_type: None, + nesting_nodes: &[ + "if_statement", + "for_statement", + "while_statement", + "except_clause", + "conditional_expression", + ], + function_nodes: &["function_definition", "lambda"], + if_node_type: Some("if_statement"), + else_node_type: Some("else_clause"), + elif_node_type: Some("elif_clause"), + else_via_alternative: false, + switch_like_nodes: &["match_statement"], +}; + +pub static GO_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "for_statement", + "expression_switch_statement", + "type_switch_statement", + "select_statement", + ], + case_nodes: &[ + "expression_case", + "type_case", + "default_case", + "communication_case", + ], + logical_operators: &["&&", "||"], + logical_node_type: "binary_expression", + optional_chain_type: None, + nesting_nodes: &[ + "if_statement", + "for_statement", + "expression_switch_statement", + "type_switch_statement", + "select_statement", + ], + function_nodes: &[ + "function_declaration", + "method_declaration", + "func_literal", + ], + if_node_type: Some("if_statement"), + else_node_type: None, + elif_node_type: None, + else_via_alternative: true, + switch_like_nodes: &[ + "expression_switch_statement", + "type_switch_statement", + ], +}; + +pub static RUST_LANG_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_expression", + "else_clause", + "for_expression", + "while_expression", + "loop_expression", + "if_let_expression", + "while_let_expression", + "match_expression", + ], + case_nodes: &["match_arm"], + logical_operators: &["&&", "||"], + logical_node_type: "binary_expression", + optional_chain_type: None, + nesting_nodes: &[ + "if_expression", + "for_expression", + "while_expression", + "loop_expression", + "if_let_expression", + "while_let_expression", + "match_expression", + ], + function_nodes: &["function_item", "closure_expression"], + if_node_type: Some("if_expression"), + else_node_type: Some("else_clause"), + elif_node_type: None, + else_via_alternative: false, + switch_like_nodes: &["match_expression"], +}; + +pub static JAVA_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "for_statement", + "enhanced_for_statement", + "while_statement", + "do_statement", + "catch_clause", + "ternary_expression", + "switch_expression", + ], + case_nodes: &["switch_label"], + logical_operators: &["&&", "||"], + logical_node_type: "binary_expression", + optional_chain_type: None, + nesting_nodes: &[ + "if_statement", + "for_statement", + "enhanced_for_statement", + "while_statement", + "do_statement", + "catch_clause", + "ternary_expression", + ], + function_nodes: &[ + "method_declaration", + "constructor_declaration", + "lambda_expression", + ], + if_node_type: Some("if_statement"), + else_node_type: None, + elif_node_type: None, + else_via_alternative: true, + switch_like_nodes: &["switch_expression"], +}; + +pub static CSHARP_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "else_clause", + "for_statement", + "for_each_statement", + "while_statement", + "do_statement", + "catch_clause", + "conditional_expression", + "switch_statement", + ], + case_nodes: &["switch_section"], + logical_operators: &["&&", "||", "??"], + logical_node_type: "binary_expression", + optional_chain_type: Some("conditional_access_expression"), + nesting_nodes: &[ + "if_statement", + "for_statement", + "for_each_statement", + "while_statement", + "do_statement", + "catch_clause", + "conditional_expression", + "switch_statement", + ], + function_nodes: &[ + "method_declaration", + "constructor_declaration", + "lambda_expression", + "local_function_statement", + ], + if_node_type: Some("if_statement"), + else_node_type: Some("else_clause"), + elif_node_type: None, + else_via_alternative: false, + switch_like_nodes: &["switch_statement"], +}; + +pub static RUBY_RULES: LangRules = LangRules { + branch_nodes: &[ + "if", + "elsif", + "else", + "unless", + "case", + "for", + "while", + "until", + "rescue", + "conditional", + ], + case_nodes: &["when"], + logical_operators: &["and", "or", "&&", "||"], + logical_node_type: "binary", + optional_chain_type: None, + nesting_nodes: &[ + "if", + "unless", + "case", + "for", + "while", + "until", + "rescue", + "conditional", + ], + function_nodes: &["method", "singleton_method", "lambda", "do_block"], + if_node_type: Some("if"), + else_node_type: Some("else"), + elif_node_type: Some("elsif"), + else_via_alternative: false, + switch_like_nodes: &["case"], +}; + +pub static PHP_RULES: LangRules = LangRules { + branch_nodes: &[ + "if_statement", + "else_if_clause", + "else_clause", + "for_statement", + "foreach_statement", + "while_statement", + "do_statement", + "catch_clause", + "conditional_expression", + "switch_statement", + ], + case_nodes: &["case_statement", "default_statement"], + logical_operators: &["&&", "||", "and", "or", "??"], + logical_node_type: "binary_expression", + optional_chain_type: Some("nullsafe_member_access_expression"), + nesting_nodes: &[ + "if_statement", + "for_statement", + "foreach_statement", + "while_statement", + "do_statement", + "catch_clause", + "conditional_expression", + "switch_statement", + ], + function_nodes: &[ + "function_definition", + "method_declaration", + "anonymous_function_creation_expression", + "arrow_function", + ], + if_node_type: Some("if_statement"), + else_node_type: Some("else_clause"), + elif_node_type: Some("else_if_clause"), + else_via_alternative: false, + switch_like_nodes: &["switch_statement"], +}; + +/// Look up complexity rules by language ID (matches `COMPLEXITY_RULES` keys in JS). +pub fn lang_rules(lang_id: &str) -> Option<&'static LangRules> { + match lang_id { + "javascript" | "typescript" | "tsx" => Some(&JS_TS_RULES), + "python" => Some(&PYTHON_RULES), + "go" => Some(&GO_RULES), + "rust" => Some(&RUST_LANG_RULES), + "java" => Some(&JAVA_RULES), + "c_sharp" => Some(&CSHARP_RULES), + "ruby" => Some(&RUBY_RULES), + "php" => Some(&PHP_RULES), + _ => None, + } } // ─── Single-traversal DFS complexity computation ────────────────────────── @@ -80,7 +357,10 @@ fn is_case_node(kind: &str) -> bool { /// for a function's AST subtree in a single DFS walk. /// /// This is a faithful port of `computeFunctionComplexity()` from `src/complexity.js`. -pub fn compute_function_complexity(function_node: &Node) -> ComplexityMetrics { +pub fn compute_function_complexity( + function_node: &Node, + rules: &LangRules, +) -> ComplexityMetrics { let mut cognitive: u32 = 0; let mut cyclomatic: u32 = 1; // McCabe starts at 1 let mut max_nesting: u32 = 0; @@ -89,6 +369,7 @@ pub fn compute_function_complexity(function_node: &Node) -> ComplexityMetrics { function_node, 0, true, + rules, &mut cognitive, &mut cyclomatic, &mut max_nesting, @@ -101,10 +382,35 @@ pub fn compute_function_complexity(function_node: &Node) -> ComplexityMetrics { } } +fn walk_children( + node: &Node, + nesting_level: u32, + is_top_function: bool, + rules: &LangRules, + cognitive: &mut u32, + cyclomatic: &mut u32, + max_nesting: &mut u32, +) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk( + &child, + nesting_level, + is_top_function, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + } + } +} + fn walk( node: &Node, nesting_level: u32, is_top_function: bool, + rules: &LangRules, cognitive: &mut u32, cyclomatic: &mut u32, max_nesting: &mut u32, @@ -117,17 +423,17 @@ fn walk( } // Handle logical operators in binary expressions - if kind == "binary_expression" { + if kind == rules.logical_node_type { if let Some(op_node) = node.child(1) { let op = op_node.kind(); - if is_logical_operator(op) { + if rules.is_logical_op(op) { // Cyclomatic: +1 for every logical operator *cyclomatic += 1; // Cognitive: +1 only when operator changes from the previous sibling sequence let mut same_sequence = false; if let Some(parent) = node.parent() { - if parent.kind() == "binary_expression" { + if parent.kind() == rules.logical_node_type { if let Some(parent_op) = parent.child(1) { if parent_op.kind() == op { same_sequence = true; @@ -140,60 +446,123 @@ fn walk( } // Walk children manually to avoid double-counting - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); - } - } + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); return; } } } // Handle optional chaining (cyclomatic only) - if kind == "optional_chain_expression" { - *cyclomatic += 1; + if let Some(opt_type) = rules.optional_chain_type { + if kind == opt_type { + *cyclomatic += 1; + } } - // Handle branch/control flow nodes - if is_branch_node(kind) { - let is_else_if = kind == "if_statement" - && node - .parent() - .map_or(false, |p| p.kind() == "else_clause"); - - if kind == "else_clause" { - // else: +1 cognitive structural, no nesting increment, no cyclomatic - // But only if it's a plain else (not else-if) - let first_child = node.named_child(0); - if first_child.map_or(false, |c| c.kind() == "if_statement") { - // This is else-if: the if_statement child handles its own increment - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); - } + // Handle branch/control flow nodes (skip keyword leaf tokens — childCount > 0 guard) + if rules.is_branch(kind) && node.child_count() > 0 { + // Pattern A: else clause wraps if (JS/C#/Rust) + if let Some(else_type) = rules.else_node_type { + if kind == else_type { + let first_child = node.named_child(0); + if first_child.map_or(false, |c| { + rules.if_node_type.map_or(false, |if_t| c.kind() == if_t) + }) { + // else-if: the if_statement child handles its own increment + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + return; } + // Plain else + *cognitive += 1; + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); return; } - // Plain else - *cognitive += 1; - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if let Some(elif_type) = rules.elif_node_type { + if kind == elif_type { + *cognitive += 1; + *cyclomatic += 1; + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + return; + } + } + + // Detect else-if via Pattern A or C + let mut is_else_if = false; + if rules.if_node_type.map_or(false, |if_t| kind == if_t) { + if rules.else_via_alternative { + // Pattern C (Go/Java): if_statement is the alternative of parent if_statement + if let Some(parent) = node.parent() { + if rules + .if_node_type + .map_or(false, |if_t| parent.kind() == if_t) + { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + is_else_if = true; + } + } + } + } + } else if rules.else_node_type.is_some() { + // Pattern A (JS/C#/Rust): if_statement inside else_clause + if let Some(parent) = node.parent() { + if rules + .else_node_type + .map_or(false, |else_t| parent.kind() == else_t) + { + is_else_if = true; + } } } - return; } if is_else_if { - // else-if: +1 structural cognitive, +1 cyclomatic, NO nesting increment *cognitive += 1; *cyclomatic += 1; - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); - } - } + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); return; } @@ -201,56 +570,82 @@ fn walk( *cognitive += 1 + nesting_level; // structural + nesting *cyclomatic += 1; - // switch_statement doesn't add cyclomatic itself (cases do), but adds cognitive - if kind == "switch_statement" { + // Switch-like nodes don't add cyclomatic themselves (cases do) + if rules.is_switch_like(kind) { *cyclomatic -= 1; // Undo the ++ above; cases handle cyclomatic } - if is_nesting_node(kind) { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk( - &child, - nesting_level + 1, - false, - cognitive, - cyclomatic, - max_nesting, - ); + if rules.is_nesting(kind) { + walk_children( + node, + nesting_level + 1, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + return; + } + } + + // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) + if rules.else_via_alternative { + if rules.if_node_type.map_or(false, |if_t| kind != if_t) { + if let Some(parent) = node.parent() { + if rules + .if_node_type + .map_or(false, |if_t| parent.kind() == if_t) + { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + *cognitive += 1; + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); + return; + } + } } } - return; } } - // Handle case nodes (cyclomatic only) - if is_case_node(kind) { + // Handle case nodes (cyclomatic only, skip keyword leaves) + if rules.is_case(kind) && node.child_count() > 0 { *cyclomatic += 1; } // Handle nested function definitions (increase nesting) - if !is_top_function && is_function_node(kind) { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk( - &child, - nesting_level + 1, - false, - cognitive, - cyclomatic, - max_nesting, - ); - } - } + if !is_top_function && rules.is_function(kind) { + walk_children( + node, + nesting_level + 1, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); return; } // Walk children - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - walk(&child, nesting_level, false, cognitive, cyclomatic, max_nesting); - } - } + walk_children( + node, + nesting_level, + false, + rules, + cognitive, + cyclomatic, + max_nesting, + ); } // ─── Tests ──────────────────────────────────────────────────────────────── @@ -266,27 +661,26 @@ mod tests { .set_language(&tree_sitter_javascript::LANGUAGE.into()) .unwrap(); let tree = parser.parse(code.as_bytes(), None).unwrap(); - // Find the first function node let root = tree.root_node(); - let func = find_first_function(&root).expect("no function found in test code"); - compute_function_complexity(&func) + let func = find_first_function(&root, &JS_TS_RULES).expect("no function found in test code"); + compute_function_complexity(&func, &JS_TS_RULES) } - fn find_first_function<'a>(node: &Node<'a>) -> Option> { - if is_function_node(node.kind()) { + fn find_first_function<'a>(node: &Node<'a>, rules: &LangRules) -> Option> { + if rules.is_function(node.kind()) { return Some(*node); } // For variable declarations with arrow functions if node.kind() == "variable_declarator" { if let Some(value) = node.child_by_field_name("value") { - if is_function_node(value.kind()) { + if rules.is_function(value.kind()) { return Some(value); } } } for i in 0..node.child_count() { if let Some(child) = node.child(i) { - if let Some(found) = find_first_function(&child) { + if let Some(found) = find_first_function(&child, rules) { return Some(found); } } @@ -429,21 +823,73 @@ mod tests { assert_eq!(m.max_nesting, 1); } + // ─── Python tests ───────────────────────────────────────────────────── + + fn compute_python(code: &str) -> ComplexityMetrics { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_python::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + let root = tree.root_node(); + let func = find_first_function(&root, &PYTHON_RULES).expect("no function found"); + compute_function_complexity(&func, &PYTHON_RULES) + } + #[test] - fn complexity_lang_from_extension() { - assert_eq!( - ComplexityLang::from_extension("foo.js"), - Some(ComplexityLang::JavaScript) - ); - assert_eq!( - ComplexityLang::from_extension("foo.ts"), - Some(ComplexityLang::TypeScript) - ); - assert_eq!( - ComplexityLang::from_extension("foo.tsx"), - Some(ComplexityLang::Tsx) - ); - assert_eq!(ComplexityLang::from_extension("foo.py"), None); - assert_eq!(ComplexityLang::from_extension("foo.go"), None); + fn python_empty_function() { + let m = compute_python("def f():\n pass"); + assert_eq!(m.cognitive, 0); + assert_eq!(m.cyclomatic, 1); + } + + #[test] + fn python_if_elif_else() { + let m = compute_python("def f(x):\n if x > 0:\n return 1\n elif x < 0:\n return -1\n else:\n return 0"); + // if: +1 cog, +1 cyc; elif: +1 cog, +1 cyc; else: +1 cog + assert_eq!(m.cognitive, 3); + assert_eq!(m.cyclomatic, 3); + } + + #[test] + fn python_for_loop() { + let m = compute_python("def f(xs):\n for x in xs:\n print(x)"); + assert_eq!(m.cognitive, 1); + assert_eq!(m.cyclomatic, 2); + } + + // ─── Go tests ───────────────────────────────────────────────────────── + + fn compute_go(code: &str) -> ComplexityMetrics { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_go::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + let root = tree.root_node(); + let func = find_first_function(&root, &GO_RULES).expect("no function found"); + compute_function_complexity(&func, &GO_RULES) + } + + #[test] + fn go_empty_function() { + let m = compute_go("package main\nfunc f() {}"); + assert_eq!(m.cognitive, 0); + assert_eq!(m.cyclomatic, 1); + } + + #[test] + fn go_if_else() { + let m = compute_go("package main\nfunc f(x int) int {\n if x > 0 {\n return 1\n } else {\n return 0\n }\n}"); + // if: +1 cog, +1 cyc; else (via alternative): +1 cog + assert_eq!(m.cognitive, 2); + assert_eq!(m.cyclomatic, 2); + } + + #[test] + fn go_for_loop() { + let m = compute_go("package main\nfunc f() {\n for i := 0; i < 10; i++ {\n println(i)\n }\n}"); + assert_eq!(m.cognitive, 1); + assert_eq!(m.cyclomatic, 2); } } diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 5d9ac600..d1f2ba35 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -1,4 +1,5 @@ use tree_sitter::{Node, Tree}; +use crate::complexity::{compute_function_complexity, CSHARP_RULES}; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -103,7 +104,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(&child), end_line: Some(end_line(&child)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(&child, &CSHARP_RULES)), }); } } @@ -140,7 +141,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), }); } } @@ -159,7 +160,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), }); } } @@ -178,7 +179,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), }); } } diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index edf18c5b..d253189a 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -1,4 +1,5 @@ use tree_sitter::{Node, Tree}; +use crate::complexity::{compute_function_complexity, GO_RULES}; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -23,7 +24,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &GO_RULES)), }); } } @@ -59,7 +60,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &GO_RULES)), }); } } diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 0f6c5679..111aeba4 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -1,4 +1,5 @@ use tree_sitter::{Node, Tree}; +use crate::complexity::{compute_function_complexity, JAVA_RULES}; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -109,7 +110,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(&child), end_line: Some(end_line(&child)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(&child, &JAVA_RULES)), }); } } @@ -146,7 +147,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &JAVA_RULES)), }); } } @@ -165,7 +166,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: None, + complexity: Some(compute_function_complexity(node, &JAVA_RULES)), }); } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 61ada78e..63fe6d48 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::compute_function_complexity; +use crate::complexity::{compute_function_complexity, JS_TS_RULES}; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -24,7 +24,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node)), + complexity: Some(compute_function_complexity(node, &JS_TS_RULES)), }); } } @@ -80,7 +80,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node)), + complexity: Some(compute_function_complexity(node, &JS_TS_RULES)), }); } } @@ -138,7 +138,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(&value_n)), decorators: None, - complexity: Some(compute_function_complexity(&value_n)), + complexity: Some(compute_function_complexity(&value_n, &JS_TS_RULES)), }); } } @@ -562,7 +562,7 @@ fn extract_callback_definition(call_node: &Node, source: &[u8]) -> Option Option Option ({ file: f })) : parseChanges; + // ── Phase timing ──────────────────────────────────────────────────── + const _t = {}; + // ── Unified parse via parseFilesAuto ─────────────────────────────── const filePaths = filesToParse.map((item) => item.file); + _t.parse0 = performance.now(); const allSymbols = await parseFilesAuto(filePaths, rootDir, engineOpts); + _t.parseMs = performance.now() - _t.parse0; // Build a lookup from incremental data (changed items may carry pre-computed hashes + stats) const precomputedData = new Map(); @@ -627,7 +633,9 @@ export async function buildGraph(rootDir, opts = {}) { } } }); + _t.insert0 = performance.now(); insertAll(); + _t.insertMs = performance.now() - _t.insert0; const parsed = allSymbols.size; const skipped = filesToParse.length - parsed; @@ -643,6 +651,7 @@ export async function buildGraph(rootDir, opts = {}) { // ── Batch import resolution ──────────────────────────────────────── // Collect all (fromFile, importSource) pairs and resolve in one native call + _t.resolve0 = performance.now(); const batchInputs = []; for (const [relPath, symbols] of fileSymbols) { const absFile = path.join(rootDir, relPath); @@ -651,6 +660,7 @@ export async function buildGraph(rootDir, opts = {}) { } } const batchResolved = resolveImportsBatch(batchInputs, rootDir, aliases); + _t.resolveMs = performance.now() - _t.resolve0; function getResolved(absFile, importSource) { if (batchResolved) { @@ -738,6 +748,7 @@ export async function buildGraph(rootDir, opts = {}) { } // Second pass: build edges + _t.edges0 = performance.now(); let edgeCount = 0; const buildEdges = db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { @@ -915,6 +926,7 @@ export async function buildGraph(rootDir, opts = {}) { } }); buildEdges(); + _t.edgesMs = performance.now() - _t.edges0; // Build line count map for structure metrics (prefer cached _lineCount from parser) const lineCountMap = new Map(); @@ -986,6 +998,7 @@ export async function buildGraph(rootDir, opts = {}) { } // Build directory structure, containment edges, and metrics + _t.structure0 = performance.now(); const relDirs = new Set(); for (const absDir of discoveredDirs) { relDirs.add(normalizePath(path.relative(rootDir, absDir))); @@ -996,8 +1009,10 @@ export async function buildGraph(rootDir, opts = {}) { } catch (err) { debug(`Structure analysis failed: ${err.message}`); } + _t.structureMs = performance.now() - _t.structure0; // Classify node roles (entry, core, utility, adapter, dead, leaf) + _t.roles0 = performance.now(); try { const { classifyNodeRoles } = await import('./structure.js'); const roleSummary = classifyNodeRoles(db); @@ -1009,14 +1024,17 @@ export async function buildGraph(rootDir, opts = {}) { } catch (err) { debug(`Role classification failed: ${err.message}`); } + _t.rolesMs = performance.now() - _t.roles0; // Compute per-function complexity metrics (cognitive, cyclomatic, nesting) + _t.complexity0 = performance.now(); try { const { buildComplexityMetrics } = await import('./complexity.js'); await buildComplexityMetrics(db, allSymbols, rootDir, engineOpts); } catch (err) { debug(`Complexity analysis failed: ${err.message}`); } + _t.complexityMs = performance.now() - _t.complexity0; // Release any remaining cached WASM trees for GC for (const [, symbols] of allSymbols) { @@ -1076,4 +1094,16 @@ export async function buildGraph(rootDir, opts = {}) { } } } + + return { + phases: { + parseMs: +_t.parseMs.toFixed(1), + insertMs: +_t.insertMs.toFixed(1), + resolveMs: +_t.resolveMs.toFixed(1), + edgesMs: +_t.edgesMs.toFixed(1), + structureMs: +_t.structureMs.toFixed(1), + rolesMs: +_t.rolesMs.toFixed(1), + complexityMs: +_t.complexityMs.toFixed(1), + }, + }; } diff --git a/src/complexity.js b/src/complexity.js index f85275b9..1ac81512 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -3,6 +3,7 @@ import path from 'node:path'; import { loadConfig } from './config.js'; import { openReadonlyOrFail } from './db.js'; import { info } from './logger.js'; +import { LANGUAGE_REGISTRY } from './parser.js'; import { isTestFile } from './queries.js'; // ─── Language-Specific Node Type Registry ───────────────────────────────── @@ -294,6 +295,14 @@ export const COMPLEXITY_RULES = new Map([ ['php', PHP_RULES], ]); +// Extensions whose language has complexity rules — used to skip needless WASM init +const COMPLEXITY_EXTENSIONS = new Set(); +for (const entry of LANGUAGE_REGISTRY) { + if (COMPLEXITY_RULES.has(entry.id)) { + for (const ext of entry.extensions) COMPLEXITY_EXTENSIONS.add(ext); + } +} + // ─── Halstead Operator/Operand Classification ──────────────────────────── const JS_TS_HALSTEAD = { @@ -1387,8 +1396,11 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp let parsers = null; let extToLang = null; let needsFallback = false; - for (const [, symbols] of fileSymbols) { + for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree) { + // Only consider files whose language actually has complexity rules + const ext = path.extname(relPath).toLowerCase(); + if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; // Check if all function/method defs have pre-computed complexity (native engine) const hasPrecomputed = symbols.definitions.every( (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, @@ -1400,7 +1412,7 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp } } if (needsFallback) { - const { createParsers, LANGUAGE_REGISTRY } = await import('./parser.js'); + const { createParsers } = await import('./parser.js'); parsers = await createParsers(); extToLang = new Map(); for (const entry of LANGUAGE_REGISTRY) { @@ -1440,8 +1452,9 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp // Only attempt WASM fallback if we actually need AST-based computation if (!allPrecomputed && !tree) { - if (!extToLang) continue; // No WASM parsers available const ext = path.extname(relPath).toLowerCase(); + if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; // Language has no complexity rules + if (!extToLang) continue; // No WASM parsers available langId = extToLang.get(ext); if (!langId) continue;