From ee86fd57a4d7cbc69af9a824963d4946a5d3c9ac Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 27 Feb 2026 17:48:00 -0700 Subject: [PATCH] feat: add Halstead, LOC, and MI metrics to Rust native engine The native engine previously computed only cognitive, cyclomatic, and maxNesting complexity. Halstead metrics, LOC, and Maintainability Index were only available via the WASM fallback path, leaving native users with incomplete (all-zero) data for those columns. This adds full-fidelity computation of all metrics in the Rust engine: - Add HalsteadMetrics and LocMetrics NAPI structs to types.rs - Extend ComplexityMetrics with optional halstead, loc, and MI fields - Add HalsteadRules struct with per-language classification tables for all 8 supported languages (JS/TS, Python, Go, Rust, Java, C#, Ruby, PHP), mirroring the JS HALSTEAD_RULES - Add compute_all_metrics() single-pass DFS that computes complexity + Halstead + LOC + MI in one tree walk - Update all 8 extractors to call compute_all_metrics - Update normalizeNativeSymbols in parser.js to pass through new fields - Update buildComplexityMetrics precomputed branch to use actual native values instead of hardcoded zeros Impact: 21 functions changed, 56 affected --- crates/codegraph-core/src/complexity.rs | 622 +++++++++++++++++- .../codegraph-core/src/extractors/csharp.rs | 10 +- crates/codegraph-core/src/extractors/go.rs | 6 +- crates/codegraph-core/src/extractors/java.rs | 8 +- .../src/extractors/javascript.rs | 14 +- crates/codegraph-core/src/extractors/php.rs | 8 +- .../codegraph-core/src/extractors/python.rs | 4 +- crates/codegraph-core/src/extractors/ruby.rs | 6 +- .../src/extractors/rust_lang.rs | 6 +- crates/codegraph-core/src/types.rs | 45 ++ src/complexity.js | 297 +++++++-- src/parser.js | 3 + 12 files changed, 953 insertions(+), 76 deletions(-) diff --git a/crates/codegraph-core/src/complexity.rs b/crates/codegraph-core/src/complexity.rs index 39e43811..4fbdc406 100644 --- a/crates/codegraph-core/src/complexity.rs +++ b/crates/codegraph-core/src/complexity.rs @@ -375,11 +375,7 @@ pub fn compute_function_complexity( &mut max_nesting, ); - ComplexityMetrics { - cognitive, - cyclomatic, - max_nesting, - } + ComplexityMetrics::basic(cognitive, cyclomatic, max_nesting) } fn walk_children( @@ -648,6 +644,622 @@ fn walk( ); } +// ─── Halstead Operator/Operand Classification ───────────────────────────── + +/// Language-specific Halstead classification rules. +pub struct HalsteadRules { + pub operator_leaf_types: &'static [&'static str], + pub operand_leaf_types: &'static [&'static str], + pub compound_operators: &'static [&'static str], + pub skip_types: &'static [&'static str], +} + +pub static JS_TS_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", "**", + "=", "+=", "-=", "*=", "/=", "%=", "**=", "<<=", ">>=", ">>>=", "&=", "|=", "^=", "&&=", "||=", "??=", + "==", "===", "!=", "!==", "<", ">", "<=", ">=", + "&&", "||", "!", "??", + "&", "|", "^", "~", "<<", ">>", ">>>", + "++", "--", + "typeof", "instanceof", "new", "return", "throw", "yield", "await", + "if", "else", "for", "while", "do", "switch", "case", "break", "continue", + "try", "catch", "finally", + "=>", "...", "?", ":", ".", "?.", + ",", ";", + ], + operand_leaf_types: &[ + "identifier", "property_identifier", "shorthand_property_identifier", "shorthand_property_identifier_pattern", + "number", "string_fragment", "regex_pattern", + "true", "false", "null", "undefined", "this", "super", + "private_property_identifier", + ], + compound_operators: &[ + "call_expression", "subscript_expression", "new_expression", "template_substitution", + ], + skip_types: &["type_annotation", "type_parameters", "return_type", "implements_clause"], +}; + +pub static PYTHON_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", "**", "//", + "=", "+=", "-=", "*=", "/=", "%=", "**=", "//=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", + "and", "or", "not", + "&", "|", "^", "~", "<<", ">>", + "if", "else", "elif", "for", "while", "with", "try", "except", "finally", + "raise", "return", "yield", "await", "pass", "break", "continue", + "import", "from", "as", "in", "is", "lambda", "del", + ".", ",", ":", "@", "->", + ], + operand_leaf_types: &[ + "identifier", "integer", "float", "string_content", + "true", "false", "none", + ], + compound_operators: &["call", "subscript", "attribute"], + skip_types: &[], +}; + +pub static GO_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", + "=", ":=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", + "&&", "||", "!", + "&", "|", "^", "~", "<<", ">>", "&^", + "++", "--", + "if", "else", "for", "switch", "select", "case", "default", + "return", "break", "continue", "goto", "fallthrough", + "go", "defer", "range", "chan", "func", "var", "const", "type", "struct", "interface", + ".", ",", ";", ":", "<-", + ], + operand_leaf_types: &[ + "identifier", "field_identifier", "package_identifier", "type_identifier", + "int_literal", "float_literal", "imaginary_literal", "rune_literal", + "interpreted_string_literal", "raw_string_literal", + "true", "false", "nil", "iota", + ], + compound_operators: &["call_expression", "index_expression", "selector_expression"], + skip_types: &[], +}; + +pub static RUST_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", + "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", + "&&", "||", "!", + "&", "|", "^", "<<", ">>", + "if", "else", "for", "while", "loop", "match", + "return", "break", "continue", + "let", "mut", "ref", "as", "in", "move", + "fn", "struct", "enum", "trait", "impl", "pub", "mod", "use", + ".", ",", ";", ":", "::", "=>", "->", "?", + ], + operand_leaf_types: &[ + "identifier", "field_identifier", "type_identifier", + "integer_literal", "float_literal", "string_content", "char_literal", + "true", "false", "self", "Self", + ], + compound_operators: &["call_expression", "index_expression", "field_expression"], + skip_types: &[], +}; + +pub static JAVA_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", + "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>=", ">>>=", + "==", "!=", "<", ">", "<=", ">=", + "&&", "||", "!", + "&", "|", "^", "~", "<<", ">>", ">>>", + "++", "--", + "instanceof", "new", + "if", "else", "for", "while", "do", "switch", "case", + "return", "throw", "break", "continue", + "try", "catch", "finally", + ".", ",", ";", ":", "?", "->", + ], + operand_leaf_types: &[ + "identifier", "type_identifier", + "decimal_integer_literal", "hex_integer_literal", "octal_integer_literal", "binary_integer_literal", + "decimal_floating_point_literal", "hex_floating_point_literal", + "string_literal", "character_literal", + "true", "false", "null", "this", "super", + ], + compound_operators: &["method_invocation", "array_access", "object_creation_expression"], + skip_types: &["type_arguments", "type_parameters"], +}; + +pub static CSHARP_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", + "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", + "&&", "||", "!", "??", "??=", + "&", "|", "^", "~", "<<", ">>", + "++", "--", + "is", "as", "new", "typeof", "sizeof", "nameof", + "if", "else", "for", "foreach", "while", "do", "switch", "case", + "return", "throw", "break", "continue", + "try", "catch", "finally", "await", "yield", + ".", "?.", ",", ";", ":", "=>", "->", + ], + operand_leaf_types: &[ + "identifier", + "integer_literal", "real_literal", + "string_literal", "character_literal", "verbatim_string_literal", "interpolated_string_text", + "true", "false", "null", "this", "base", + ], + compound_operators: &["invocation_expression", "element_access_expression", "object_creation_expression"], + skip_types: &["type_argument_list", "type_parameter_list"], +}; + +pub static RUBY_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", "**", + "=", "+=", "-=", "*=", "/=", "%=", "**=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", "<=>", "===", "=~", "!~", + "&&", "||", "!", "and", "or", "not", + "&", "|", "^", "~", "<<", ">>", + "if", "else", "elsif", "unless", "case", "when", + "for", "while", "until", "do", "begin", "end", + "return", "raise", "break", "next", "redo", "retry", + "rescue", "ensure", "yield", "def", "class", "module", + ".", ",", ":", "::", "=>", "->", + ], + operand_leaf_types: &[ + "identifier", "constant", "instance_variable", "class_variable", "global_variable", + "integer", "float", "string_content", "symbol", + "true", "false", "nil", "self", + ], + compound_operators: &["call", "element_reference"], + skip_types: &[], +}; + +pub static PHP_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", "**", + "=", "+=", "-=", "*=", "/=", "%=", "**=", ".=", "&=", "|=", "^=", "<<=", ">>=", + "==", "===", "!=", "!==", "<", ">", "<=", ">=", "<=>", + "&&", "||", "!", "and", "or", "xor", "??", + "&", "|", "^", "~", "<<", ">>", + "++", "--", + "instanceof", "new", "clone", + "if", "else", "elseif", "for", "foreach", "while", "do", "switch", "case", + "return", "throw", "break", "continue", + "try", "catch", "finally", "echo", "print", "yield", + ".", "->", "?->", "::", ",", ";", ":", "?", "=>", + ], + operand_leaf_types: &[ + "name", "variable_name", + "integer", "float", "string_content", + "true", "false", "null", + ], + compound_operators: &[ + "function_call_expression", "member_call_expression", "scoped_call_expression", + "subscript_expression", "object_creation_expression", + ], + skip_types: &[], +}; + +/// Look up Halstead rules by language ID. +pub fn halstead_rules(lang_id: &str) -> Option<&'static HalsteadRules> { + match lang_id { + "javascript" | "typescript" | "tsx" => Some(&JS_TS_HALSTEAD), + "python" => Some(&PYTHON_HALSTEAD), + "go" => Some(&GO_HALSTEAD), + "rust" => Some(&RUST_HALSTEAD), + "java" => Some(&JAVA_HALSTEAD), + "c_sharp" => Some(&CSHARP_HALSTEAD), + "ruby" => Some(&RUBY_HALSTEAD), + "php" => Some(&PHP_HALSTEAD), + _ => None, + } +} + +/// Comment line prefixes per language, used for LOC metrics. +pub fn comment_prefixes(lang_id: &str) -> &'static [&'static str] { + match lang_id { + "javascript" | "typescript" | "tsx" | "go" | "rust" | "java" | "c_sharp" => { + &["//", "/*", "*", "*/"] + } + "python" | "ruby" => &["#"], + "php" => &["//", "#", "/*", "*", "*/"], + _ => &["//", "/*", "*", "*/"], + } +} + +// ─── Merged Single-Pass: Complexity + Halstead + LOC + MI ───────────────── + +use crate::types::{HalsteadMetrics, LocMetrics}; +use std::collections::HashMap; + +/// Compute all metrics (complexity + Halstead + LOC + MI) in a single DFS walk. +/// +/// This is the primary entry point for extractors. It merges complexity and +/// Halstead classification into one tree traversal, then computes LOC (text-based) +/// and Maintainability Index from the collected data. +/// +/// Returns `None` if no complexity rules exist for the given language. +pub fn compute_all_metrics( + function_node: &Node, + source: &[u8], + lang_id: &str, +) -> Option { + let c_rules = lang_rules(lang_id)?; + let h_rules = halstead_rules(lang_id); + + // ── Complexity state ── + let mut cognitive: u32 = 0; + let mut cyclomatic: u32 = 1; // McCabe starts at 1 + let mut max_nesting: u32 = 0; + + // ── Halstead state ── + let mut operators: HashMap = HashMap::new(); + let mut operands: HashMap = HashMap::new(); + + walk_all( + function_node, + source, + 0, + true, + false, + c_rules, + h_rules, + &mut cognitive, + &mut cyclomatic, + &mut max_nesting, + &mut operators, + &mut operands, + ); + + // ── Build Halstead metrics ── + let halstead = if h_rules.is_some() { + let n1 = operators.len() as u32; + let n2 = operands.len() as u32; + let big_n1: u32 = operators.values().sum(); + let big_n2: u32 = operands.values().sum(); + + let vocabulary = n1 + n2; + let length = big_n1 + big_n2; + let volume = if vocabulary > 0 { + (length as f64) * (vocabulary as f64).log2() + } else { + 0.0 + }; + let difficulty = if n2 > 0 { + (n1 as f64 / 2.0) * (big_n2 as f64 / n2 as f64) + } else { + 0.0 + }; + let effort = difficulty * volume; + let bugs = volume / 3000.0; + + Some(HalsteadMetrics { + n1, + n2, + big_n1, + big_n2, + vocabulary, + length, + volume: round_f64(volume, 2), + difficulty: round_f64(difficulty, 2), + effort: round_f64(effort, 2), + bugs: round_f64(bugs, 4), + }) + } else { + None + }; + + // ── LOC metrics (text-based) ── + let start = function_node.start_byte(); + let end = function_node.end_byte().min(source.len()); + let func_source = &source[start..end]; + let func_text = String::from_utf8_lossy(func_source); + let lines: Vec<&str> = func_text.split('\n').collect(); + let loc_total = lines.len() as u32; + let prefixes = comment_prefixes(lang_id); + + let mut comment_lines: u32 = 0; + let mut blank_lines: u32 = 0; + for line in &lines { + let trimmed = line.trim(); + if trimmed.is_empty() { + blank_lines += 1; + } else if prefixes.iter().any(|p| trimmed.starts_with(p)) { + comment_lines += 1; + } + } + let sloc = (loc_total.saturating_sub(blank_lines).saturating_sub(comment_lines)).max(1); + + let loc_metrics = LocMetrics { + loc: loc_total, + sloc, + comment_lines, + }; + + // ── Maintainability Index ── + let volume = halstead.as_ref().map_or(0.0, |h| h.volume); + let safe_volume = if volume > 1.0 { volume } else { 1.0 }; + let safe_sloc = if sloc > 1 { sloc as f64 } else { 1.0 }; + let comment_ratio = if loc_total > 0 { + comment_lines as f64 / loc_total as f64 + } else { + 0.0 + }; + + let mut mi = 171.0 - 5.2 * safe_volume.ln() - 0.23 * (cyclomatic as f64) - 16.2 * safe_sloc.ln(); + if comment_ratio > 0.0 { + mi += 50.0 * (2.4 * comment_ratio).sqrt().sin(); + } + let normalized = (mi * 100.0 / 171.0).clamp(0.0, 100.0); + let maintainability_index = round_f64(normalized, 1); + + Some(ComplexityMetrics { + cognitive, + cyclomatic, + max_nesting, + halstead: Some(halstead.unwrap_or(HalsteadMetrics { + n1: 0, n2: 0, big_n1: 0, big_n2: 0, + vocabulary: 0, length: 0, + volume: 0.0, difficulty: 0.0, effort: 0.0, bugs: 0.0, + })), + loc: Some(loc_metrics), + maintainability_index: Some(maintainability_index), + }) +} + +/// Round f64 to `decimals` decimal places. +fn round_f64(value: f64, decimals: u32) -> f64 { + let factor = 10_f64.powi(decimals as i32); + (value * factor).round() / factor +} + +#[allow(clippy::too_many_arguments)] +fn walk_all_children( + node: &Node, + source: &[u8], + nesting_level: u32, + is_top_function: bool, + halstead_skip: bool, + c_rules: &LangRules, + h_rules: Option<&HalsteadRules>, + cognitive: &mut u32, + cyclomatic: &mut u32, + max_nesting: &mut u32, + operators: &mut HashMap, + operands: &mut HashMap, +) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_all( + &child, + source, + nesting_level, + is_top_function, + halstead_skip, + c_rules, + h_rules, + cognitive, + cyclomatic, + max_nesting, + operators, + operands, + ); + } + } +} + +#[allow(clippy::too_many_arguments)] +fn walk_all( + node: &Node, + source: &[u8], + nesting_level: u32, + is_top_function: bool, + halstead_skip: bool, + c_rules: &LangRules, + h_rules: Option<&HalsteadRules>, + cognitive: &mut u32, + cyclomatic: &mut u32, + max_nesting: &mut u32, + operators: &mut HashMap, + operands: &mut HashMap, +) { + let kind = node.kind(); + + // ── Halstead classification ── + let skip_h = halstead_skip + || h_rules.map_or(false, |hr| hr.skip_types.contains(&kind)); + + if let Some(hr) = h_rules { + if !skip_h { + // Compound operators (non-leaf): count node type as operator + if hr.compound_operators.contains(&kind) { + *operators.entry(kind.to_string()).or_insert(0) += 1; + } + // Leaf nodes: classify as operator or operand + if node.child_count() == 0 { + if hr.operator_leaf_types.contains(&kind) { + *operators.entry(kind.to_string()).or_insert(0) += 1; + } else if hr.operand_leaf_types.contains(&kind) { + let start = node.start_byte(); + let end = node.end_byte().min(source.len()); + let text = String::from_utf8_lossy(&source[start..end]).to_string(); + *operands.entry(text).or_insert(0) += 1; + } + } + } + } + + // ── Complexity: track nesting depth ── + if nesting_level > *max_nesting { + *max_nesting = nesting_level; + } + + // Handle logical operators in binary expressions + if kind == c_rules.logical_node_type { + if let Some(op_node) = node.child(1) { + let op = op_node.kind(); + if c_rules.is_logical_op(op) { + *cyclomatic += 1; + + let mut same_sequence = false; + if let Some(parent) = node.parent() { + if parent.kind() == c_rules.logical_node_type { + if let Some(parent_op) = parent.child(1) { + if parent_op.kind() == op { + same_sequence = true; + } + } + } + } + if !same_sequence { + *cognitive += 1; + } + + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + } + + // Handle optional chaining (cyclomatic only) + if let Some(opt_type) = c_rules.optional_chain_type { + if kind == opt_type { + *cyclomatic += 1; + } + } + + // Handle branch/control flow nodes (skip keyword leaf tokens — childCount > 0 guard) + if c_rules.is_branch(kind) && node.child_count() > 0 { + // Pattern A: else clause wraps if (JS/C#/Rust) + if let Some(else_type) = c_rules.else_node_type { + if kind == else_type { + let first_child = node.named_child(0); + if first_child.map_or(false, |c| { + c_rules.if_node_type.map_or(false, |if_t| c.kind() == if_t) + }) { + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + *cognitive += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if let Some(elif_type) = c_rules.elif_node_type { + if kind == elif_type { + *cognitive += 1; + *cyclomatic += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + + // Detect else-if via Pattern A or C + let mut is_else_if = false; + if c_rules.if_node_type.map_or(false, |if_t| kind == if_t) { + if c_rules.else_via_alternative { + if let Some(parent) = node.parent() { + if c_rules.if_node_type.map_or(false, |if_t| parent.kind() == if_t) { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + is_else_if = true; + } + } + } + } + } else if c_rules.else_node_type.is_some() { + if let Some(parent) = node.parent() { + if c_rules.else_node_type.map_or(false, |else_t| parent.kind() == else_t) { + is_else_if = true; + } + } + } + } + + if is_else_if { + *cognitive += 1; + *cyclomatic += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + + // Regular branch node + *cognitive += 1 + nesting_level; + *cyclomatic += 1; + + if c_rules.is_switch_like(kind) { + *cyclomatic -= 1; + } + + if c_rules.is_nesting(kind) { + walk_all_children( + node, source, nesting_level + 1, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + + // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) + if c_rules.else_via_alternative { + if c_rules.if_node_type.map_or(false, |if_t| kind != if_t) { + if let Some(parent) = node.parent() { + if c_rules.if_node_type.map_or(false, |if_t| parent.kind() == if_t) { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + *cognitive += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + } + } + } + } + + // Handle case nodes (cyclomatic only, skip keyword leaves) + if c_rules.is_case(kind) && node.child_count() > 0 { + *cyclomatic += 1; + } + + // Handle nested function definitions (increase nesting) + if !is_top_function && c_rules.is_function(kind) { + walk_all_children( + node, source, nesting_level + 1, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + + // Walk children + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); +} + // ─── Tests ──────────────────────────────────────────────────────────────── #[cfg(test)] diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index d1f2ba35..c92b6b6f 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::{compute_function_complexity, CSHARP_RULES}; +use crate::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -104,7 +104,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(&child), end_line: Some(end_line(&child)), decorators: None, - complexity: Some(compute_function_complexity(&child, &CSHARP_RULES)), + complexity: compute_all_metrics(&child, source, "c_sharp"), }); } } @@ -141,7 +141,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), + complexity: compute_all_metrics(node, source, "c_sharp"), }); } } @@ -160,7 +160,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), + complexity: compute_all_metrics(node, source, "c_sharp"), }); } } @@ -179,7 +179,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), + complexity: compute_all_metrics(node, source, "c_sharp"), }); } } diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index d253189a..8d429e87 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::{compute_function_complexity, GO_RULES}; +use crate::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -24,7 +24,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &GO_RULES)), + complexity: compute_all_metrics(node, source, "go"), }); } } @@ -60,7 +60,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &GO_RULES)), + complexity: compute_all_metrics(node, source, "go"), }); } } diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 111aeba4..829eb6f6 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::{compute_function_complexity, JAVA_RULES}; +use crate::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -110,7 +110,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(&child), end_line: Some(end_line(&child)), decorators: None, - complexity: Some(compute_function_complexity(&child, &JAVA_RULES)), + complexity: compute_all_metrics(&child, source, "java"), }); } } @@ -147,7 +147,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &JAVA_RULES)), + complexity: compute_all_metrics(node, source, "java"), }); } } @@ -166,7 +166,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &JAVA_RULES)), + complexity: compute_all_metrics(node, source, "java"), }); } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 63fe6d48..f6451fe2 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::{compute_function_complexity, JS_TS_RULES}; +use crate::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -24,7 +24,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &JS_TS_RULES)), + complexity: compute_all_metrics(node, source, "javascript"), }); } } @@ -80,7 +80,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &JS_TS_RULES)), + complexity: compute_all_metrics(node, source, "javascript"), }); } } @@ -138,7 +138,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(&value_n)), decorators: None, - complexity: Some(compute_function_complexity(&value_n, &JS_TS_RULES)), + complexity: compute_all_metrics(&value_n, source, "javascript"), }); } } @@ -562,7 +562,7 @@ fn extract_callback_definition(call_node: &Node, source: &[u8]) -> Option Option Option, + pub loc: Option, + #[napi(js_name = "maintainabilityIndex")] + pub maintainability_index: Option, +} + +impl ComplexityMetrics { + /// Construct a basic metrics result with only cognitive/cyclomatic/maxNesting. + /// Used by `compute_function_complexity` and existing tests. + pub fn basic(cognitive: u32, cyclomatic: u32, max_nesting: u32) -> Self { + Self { + cognitive, + cyclomatic, + max_nesting, + halstead: None, + loc: None, + maintainability_index: None, + } + } } #[napi(object)] diff --git a/src/complexity.js b/src/complexity.js index 1ac81512..01ffee18 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -1348,6 +1348,226 @@ export function computeFunctionComplexity(functionNode, language) { return { cognitive, cyclomatic, maxNesting }; } +// ─── Merged Single-Pass Computation ─────────────────────────────────────── + +/** + * Compute all metrics (complexity + Halstead + LOC + MI) in a single DFS walk. + * Merges computeFunctionComplexity and computeHalsteadMetrics into one tree + * traversal, avoiding two separate DFS walks per function node at build time. + * LOC is text-based (not tree-based) and computed separately (very cheap). + * + * @param {object} functionNode - tree-sitter node for the function + * @param {string} langId - Language ID (e.g. 'javascript', 'python') + * @returns {{ cognitive: number, cyclomatic: number, maxNesting: number, halstead: object|null, loc: object, mi: number } | null} + */ +export function computeAllMetrics(functionNode, langId) { + const cRules = COMPLEXITY_RULES.get(langId); + if (!cRules) return null; + const hRules = HALSTEAD_RULES.get(langId); + + // ── Complexity state ── + let cognitive = 0; + let cyclomatic = 1; // McCabe starts at 1 + let maxNesting = 0; + + // ── Halstead state ── + const operators = hRules ? new Map() : null; + const operands = hRules ? new Map() : null; + + function walk(node, nestingLevel, isTopFunction, halsteadSkip) { + if (!node) return; + + const type = node.type; + + // ── Halstead classification ── + // Propagate skip through type-annotation subtrees (e.g. TS generics, Java type params) + const skipH = halsteadSkip || (hRules ? hRules.skipTypes.has(type) : false); + if (hRules && !skipH) { + // Compound operators (non-leaf): count node type as operator + if (hRules.compoundOperators.has(type)) { + operators.set(type, (operators.get(type) || 0) + 1); + } + // Leaf nodes: classify as operator or operand + if (node.childCount === 0) { + if (hRules.operatorLeafTypes.has(type)) { + operators.set(type, (operators.get(type) || 0) + 1); + } else if (hRules.operandLeafTypes.has(type)) { + const text = node.text; + operands.set(text, (operands.get(text) || 0) + 1); + } + } + } + + // ── Complexity: track nesting depth ── + if (nestingLevel > maxNesting) maxNesting = nestingLevel; + + // Handle logical operators in binary expressions + if (type === cRules.logicalNodeType) { + const op = node.child(1)?.type; + if (op && cRules.logicalOperators.has(op)) { + cyclomatic++; + const parent = node.parent; + let sameSequence = false; + if (parent && parent.type === cRules.logicalNodeType) { + const parentOp = parent.child(1)?.type; + if (parentOp === op) sameSequence = true; + } + if (!sameSequence) cognitive++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + } + + // Handle optional chaining (cyclomatic only) + if (type === cRules.optionalChainType) { + cyclomatic++; + } + + // Handle branch/control flow nodes (skip keyword leaf tokens like Ruby's `if`) + if (cRules.branchNodes.has(type) && node.childCount > 0) { + // Pattern A: else clause wraps if (JS/C#/Rust) + if (cRules.elseNodeType && type === cRules.elseNodeType) { + const firstChild = node.namedChild(0); + if (firstChild && firstChild.type === cRules.ifNodeType) { + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + cognitive++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if (cRules.elifNodeType && type === cRules.elifNodeType) { + cognitive++; + cyclomatic++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + + // Detect else-if via Pattern A or C + let isElseIf = false; + if (type === cRules.ifNodeType) { + if (cRules.elseViaAlternative) { + isElseIf = + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id; + } else if (cRules.elseNodeType) { + isElseIf = node.parent?.type === cRules.elseNodeType; + } + } + + if (isElseIf) { + cognitive++; + cyclomatic++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + + // Regular branch node + cognitive += 1 + nestingLevel; + cyclomatic++; + + // Switch-like nodes don't add cyclomatic themselves (cases do) + if (cRules.switchLikeNodes?.has(type)) { + cyclomatic--; + } + + if (cRules.nestingNodes.has(type)) { + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel + 1, false, skipH); + } + return; + } + } + + // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) + if ( + cRules.elseViaAlternative && + type !== cRules.ifNodeType && + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id + ) { + cognitive++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + + // Handle case nodes (cyclomatic only, skip keyword leaves) + if (cRules.caseNodes.has(type) && node.childCount > 0) { + cyclomatic++; + } + + // Handle nested function definitions (increase nesting) + if (!isTopFunction && cRules.functionNodes.has(type)) { + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel + 1, false, skipH); + } + return; + } + + // Walk children + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + } + + walk(functionNode, 0, true, false); + + // ── Compute Halstead derived metrics ── + let halstead = null; + if (hRules && operators && operands) { + const n1 = operators.size; + const n2 = operands.size; + let bigN1 = 0; + for (const c of operators.values()) bigN1 += c; + let bigN2 = 0; + for (const c of operands.values()) bigN2 += c; + + const vocabulary = n1 + n2; + const length = bigN1 + bigN2; + const volume = vocabulary > 0 ? length * Math.log2(vocabulary) : 0; + const difficulty = n2 > 0 ? (n1 / 2) * (bigN2 / n2) : 0; + const effort = difficulty * volume; + const bugs = volume / 3000; + + halstead = { + n1, + n2, + bigN1, + bigN2, + vocabulary, + length, + volume: +volume.toFixed(2), + difficulty: +difficulty.toFixed(2), + effort: +effort.toFixed(2), + bugs: +bugs.toFixed(4), + }; + } + + // ── LOC metrics (text-based, cheap) ── + const loc = computeLOCMetrics(functionNode, langId); + + // ── Maintainability Index ── + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, cyclomatic, loc.sloc, commentRatio); + + return { cognitive, cyclomatic, maxNesting, halstead, loc, mi }; +} + // ─── Build-Time: Compute Metrics for Changed Files ──────────────────────── /** @@ -1486,25 +1706,27 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp if (def.complexity) { const row = getNodeId.get(def.name, relPath, def.line); if (!row) continue; + const ch = def.complexity.halstead; + const cl = def.complexity.loc; upsert.run( row.id, def.complexity.cognitive, def.complexity.cyclomatic, def.complexity.maxNesting ?? 0, - 0, - 0, - 0, // loc, sloc, commentLines - 0, - 0, - 0, - 0, // halstead n1, n2, bigN1, bigN2 - 0, - 0, - 0, // vocabulary, length, volume - 0, - 0, - 0, // difficulty, effort, bugs - 0, // maintainabilityIndex + cl ? cl.loc : 0, + cl ? cl.sloc : 0, + cl ? cl.commentLines : 0, + ch ? ch.n1 : 0, + ch ? ch.n2 : 0, + ch ? ch.bigN1 : 0, + ch ? ch.bigN2 : 0, + ch ? ch.vocabulary : 0, + ch ? ch.length : 0, + ch ? ch.volume : 0, + ch ? ch.difficulty : 0, + ch ? ch.effort : 0, + ch ? ch.bugs : 0, + def.complexity.maintainabilityIndex ?? 0, ); analyzed++; continue; @@ -1516,38 +1738,33 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp const funcNode = findFunctionNode(tree.rootNode, def.line, def.endLine, rules); if (!funcNode) continue; - const result = computeFunctionComplexity(funcNode, langId); - if (!result) continue; - - const halstead = computeHalsteadMetrics(funcNode, langId); - const loc = computeLOCMetrics(funcNode, langId); - - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex(volume, result.cyclomatic, loc.sloc, commentRatio); + // Single-pass: complexity + Halstead + LOC + MI in one DFS walk + const metrics = computeAllMetrics(funcNode, langId); + if (!metrics) continue; const row = getNodeId.get(def.name, relPath, def.line); if (!row) continue; + const h = metrics.halstead; upsert.run( row.id, - result.cognitive, - result.cyclomatic, - result.maxNesting, - loc.loc, - loc.sloc, - loc.commentLines, - halstead ? halstead.n1 : 0, - halstead ? halstead.n2 : 0, - halstead ? halstead.bigN1 : 0, - halstead ? halstead.bigN2 : 0, - halstead ? halstead.vocabulary : 0, - halstead ? halstead.length : 0, - volume, - halstead ? halstead.difficulty : 0, - halstead ? halstead.effort : 0, - halstead ? halstead.bugs : 0, - mi, + metrics.cognitive, + metrics.cyclomatic, + metrics.maxNesting, + metrics.loc.loc, + metrics.loc.sloc, + metrics.loc.commentLines, + h ? h.n1 : 0, + h ? h.n2 : 0, + h ? h.bigN1 : 0, + h ? h.bigN2 : 0, + h ? h.vocabulary : 0, + h ? h.length : 0, + h ? h.volume : 0, + h ? h.difficulty : 0, + h ? h.effort : 0, + h ? h.bugs : 0, + metrics.mi, ); analyzed++; } diff --git a/src/parser.js b/src/parser.js index 1af5a527..f70e67c2 100644 --- a/src/parser.js +++ b/src/parser.js @@ -137,6 +137,9 @@ function normalizeNativeSymbols(result) { cognitive: d.complexity.cognitive, cyclomatic: d.complexity.cyclomatic, maxNesting: d.complexity.maxNesting, + halstead: d.complexity.halstead ?? null, + loc: d.complexity.loc ?? null, + maintainabilityIndex: d.complexity.maintainabilityIndex ?? null, } : null, })),