diff --git a/crates/codegraph-core/src/complexity.rs b/crates/codegraph-core/src/complexity.rs index 39e43811..4fbdc406 100644 --- a/crates/codegraph-core/src/complexity.rs +++ b/crates/codegraph-core/src/complexity.rs @@ -375,11 +375,7 @@ pub fn compute_function_complexity( &mut max_nesting, ); - ComplexityMetrics { - cognitive, - cyclomatic, - max_nesting, - } + ComplexityMetrics::basic(cognitive, cyclomatic, max_nesting) } fn walk_children( @@ -648,6 +644,622 @@ fn walk( ); } +// ─── Halstead Operator/Operand Classification ───────────────────────────── + +/// Language-specific Halstead classification rules. +pub struct HalsteadRules { + pub operator_leaf_types: &'static [&'static str], + pub operand_leaf_types: &'static [&'static str], + pub compound_operators: &'static [&'static str], + pub skip_types: &'static [&'static str], +} + +pub static JS_TS_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", "**", + "=", "+=", "-=", "*=", "/=", "%=", "**=", "<<=", ">>=", ">>>=", "&=", "|=", "^=", "&&=", "||=", "??=", + "==", "===", "!=", "!==", "<", ">", "<=", ">=", + "&&", "||", "!", "??", + "&", "|", "^", "~", "<<", ">>", ">>>", + "++", "--", + "typeof", "instanceof", "new", "return", "throw", "yield", "await", + "if", "else", "for", "while", "do", "switch", "case", "break", "continue", + "try", "catch", "finally", + "=>", "...", "?", ":", ".", "?.", + ",", ";", + ], + operand_leaf_types: &[ + "identifier", "property_identifier", "shorthand_property_identifier", "shorthand_property_identifier_pattern", + "number", "string_fragment", "regex_pattern", + "true", "false", "null", "undefined", "this", "super", + "private_property_identifier", + ], + compound_operators: &[ + "call_expression", "subscript_expression", "new_expression", "template_substitution", + ], + skip_types: &["type_annotation", "type_parameters", "return_type", "implements_clause"], +}; + +pub static PYTHON_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", "**", "//", + "=", "+=", "-=", "*=", "/=", "%=", "**=", "//=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", + "and", "or", "not", + "&", "|", "^", "~", "<<", ">>", + "if", "else", "elif", "for", "while", "with", "try", "except", "finally", + "raise", "return", "yield", "await", "pass", "break", "continue", + "import", "from", "as", "in", "is", "lambda", "del", + ".", ",", ":", "@", "->", + ], + operand_leaf_types: &[ + "identifier", "integer", "float", "string_content", + "true", "false", "none", + ], + compound_operators: &["call", "subscript", "attribute"], + skip_types: &[], +}; + +pub static GO_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", + "=", ":=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", + "&&", "||", "!", + "&", "|", "^", "~", "<<", ">>", "&^", + "++", "--", + "if", "else", "for", "switch", "select", "case", "default", + "return", "break", "continue", "goto", "fallthrough", + "go", "defer", "range", "chan", "func", "var", "const", "type", "struct", "interface", + ".", ",", ";", ":", "<-", + ], + operand_leaf_types: &[ + "identifier", "field_identifier", "package_identifier", "type_identifier", + "int_literal", "float_literal", "imaginary_literal", "rune_literal", + "interpreted_string_literal", "raw_string_literal", + "true", "false", "nil", "iota", + ], + compound_operators: &["call_expression", "index_expression", "selector_expression"], + skip_types: &[], +}; + +pub static RUST_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", + "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", + "&&", "||", "!", + "&", "|", "^", "<<", ">>", + "if", "else", "for", "while", "loop", "match", + "return", "break", "continue", + "let", "mut", "ref", "as", "in", "move", + "fn", "struct", "enum", "trait", "impl", "pub", "mod", "use", + ".", ",", ";", ":", "::", "=>", "->", "?", + ], + operand_leaf_types: &[ + "identifier", "field_identifier", "type_identifier", + "integer_literal", "float_literal", "string_content", "char_literal", + "true", "false", "self", "Self", + ], + compound_operators: &["call_expression", "index_expression", "field_expression"], + skip_types: &[], +}; + +pub static JAVA_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", + "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>=", ">>>=", + "==", "!=", "<", ">", "<=", ">=", + "&&", "||", "!", + "&", "|", "^", "~", "<<", ">>", ">>>", + "++", "--", + "instanceof", "new", + "if", "else", "for", "while", "do", "switch", "case", + "return", "throw", "break", "continue", + "try", "catch", "finally", + ".", ",", ";", ":", "?", "->", + ], + operand_leaf_types: &[ + "identifier", "type_identifier", + "decimal_integer_literal", "hex_integer_literal", "octal_integer_literal", "binary_integer_literal", + "decimal_floating_point_literal", "hex_floating_point_literal", + "string_literal", "character_literal", + "true", "false", "null", "this", "super", + ], + compound_operators: &["method_invocation", "array_access", "object_creation_expression"], + skip_types: &["type_arguments", "type_parameters"], +}; + +pub static CSHARP_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", + "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", + "&&", "||", "!", "??", "??=", + "&", "|", "^", "~", "<<", ">>", + "++", "--", + "is", "as", "new", "typeof", "sizeof", "nameof", + "if", "else", "for", "foreach", "while", "do", "switch", "case", + "return", "throw", "break", "continue", + "try", "catch", "finally", "await", "yield", + ".", "?.", ",", ";", ":", "=>", "->", + ], + operand_leaf_types: &[ + "identifier", + "integer_literal", "real_literal", + "string_literal", "character_literal", "verbatim_string_literal", "interpolated_string_text", + "true", "false", "null", "this", "base", + ], + compound_operators: &["invocation_expression", "element_access_expression", "object_creation_expression"], + skip_types: &["type_argument_list", "type_parameter_list"], +}; + +pub static RUBY_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", "**", + "=", "+=", "-=", "*=", "/=", "%=", "**=", "&=", "|=", "^=", "<<=", ">>=", + "==", "!=", "<", ">", "<=", ">=", "<=>", "===", "=~", "!~", + "&&", "||", "!", "and", "or", "not", + "&", "|", "^", "~", "<<", ">>", + "if", "else", "elsif", "unless", "case", "when", + "for", "while", "until", "do", "begin", "end", + "return", "raise", "break", "next", "redo", "retry", + "rescue", "ensure", "yield", "def", "class", "module", + ".", ",", ":", "::", "=>", "->", + ], + operand_leaf_types: &[ + "identifier", "constant", "instance_variable", "class_variable", "global_variable", + "integer", "float", "string_content", "symbol", + "true", "false", "nil", "self", + ], + compound_operators: &["call", "element_reference"], + skip_types: &[], +}; + +pub static PHP_HALSTEAD: HalsteadRules = HalsteadRules { + operator_leaf_types: &[ + "+", "-", "*", "/", "%", "**", + "=", "+=", "-=", "*=", "/=", "%=", "**=", ".=", "&=", "|=", "^=", "<<=", ">>=", + "==", "===", "!=", "!==", "<", ">", "<=", ">=", "<=>", + "&&", "||", "!", "and", "or", "xor", "??", + "&", "|", "^", "~", "<<", ">>", + "++", "--", + "instanceof", "new", "clone", + "if", "else", "elseif", "for", "foreach", "while", "do", "switch", "case", + "return", "throw", "break", "continue", + "try", "catch", "finally", "echo", "print", "yield", + ".", "->", "?->", "::", ",", ";", ":", "?", "=>", + ], + operand_leaf_types: &[ + "name", "variable_name", + "integer", "float", "string_content", + "true", "false", "null", + ], + compound_operators: &[ + "function_call_expression", "member_call_expression", "scoped_call_expression", + "subscript_expression", "object_creation_expression", + ], + skip_types: &[], +}; + +/// Look up Halstead rules by language ID. +pub fn halstead_rules(lang_id: &str) -> Option<&'static HalsteadRules> { + match lang_id { + "javascript" | "typescript" | "tsx" => Some(&JS_TS_HALSTEAD), + "python" => Some(&PYTHON_HALSTEAD), + "go" => Some(&GO_HALSTEAD), + "rust" => Some(&RUST_HALSTEAD), + "java" => Some(&JAVA_HALSTEAD), + "c_sharp" => Some(&CSHARP_HALSTEAD), + "ruby" => Some(&RUBY_HALSTEAD), + "php" => Some(&PHP_HALSTEAD), + _ => None, + } +} + +/// Comment line prefixes per language, used for LOC metrics. +pub fn comment_prefixes(lang_id: &str) -> &'static [&'static str] { + match lang_id { + "javascript" | "typescript" | "tsx" | "go" | "rust" | "java" | "c_sharp" => { + &["//", "/*", "*", "*/"] + } + "python" | "ruby" => &["#"], + "php" => &["//", "#", "/*", "*", "*/"], + _ => &["//", "/*", "*", "*/"], + } +} + +// ─── Merged Single-Pass: Complexity + Halstead + LOC + MI ───────────────── + +use crate::types::{HalsteadMetrics, LocMetrics}; +use std::collections::HashMap; + +/// Compute all metrics (complexity + Halstead + LOC + MI) in a single DFS walk. +/// +/// This is the primary entry point for extractors. It merges complexity and +/// Halstead classification into one tree traversal, then computes LOC (text-based) +/// and Maintainability Index from the collected data. +/// +/// Returns `None` if no complexity rules exist for the given language. +pub fn compute_all_metrics( + function_node: &Node, + source: &[u8], + lang_id: &str, +) -> Option { + let c_rules = lang_rules(lang_id)?; + let h_rules = halstead_rules(lang_id); + + // ── Complexity state ── + let mut cognitive: u32 = 0; + let mut cyclomatic: u32 = 1; // McCabe starts at 1 + let mut max_nesting: u32 = 0; + + // ── Halstead state ── + let mut operators: HashMap = HashMap::new(); + let mut operands: HashMap = HashMap::new(); + + walk_all( + function_node, + source, + 0, + true, + false, + c_rules, + h_rules, + &mut cognitive, + &mut cyclomatic, + &mut max_nesting, + &mut operators, + &mut operands, + ); + + // ── Build Halstead metrics ── + let halstead = if h_rules.is_some() { + let n1 = operators.len() as u32; + let n2 = operands.len() as u32; + let big_n1: u32 = operators.values().sum(); + let big_n2: u32 = operands.values().sum(); + + let vocabulary = n1 + n2; + let length = big_n1 + big_n2; + let volume = if vocabulary > 0 { + (length as f64) * (vocabulary as f64).log2() + } else { + 0.0 + }; + let difficulty = if n2 > 0 { + (n1 as f64 / 2.0) * (big_n2 as f64 / n2 as f64) + } else { + 0.0 + }; + let effort = difficulty * volume; + let bugs = volume / 3000.0; + + Some(HalsteadMetrics { + n1, + n2, + big_n1, + big_n2, + vocabulary, + length, + volume: round_f64(volume, 2), + difficulty: round_f64(difficulty, 2), + effort: round_f64(effort, 2), + bugs: round_f64(bugs, 4), + }) + } else { + None + }; + + // ── LOC metrics (text-based) ── + let start = function_node.start_byte(); + let end = function_node.end_byte().min(source.len()); + let func_source = &source[start..end]; + let func_text = String::from_utf8_lossy(func_source); + let lines: Vec<&str> = func_text.split('\n').collect(); + let loc_total = lines.len() as u32; + let prefixes = comment_prefixes(lang_id); + + let mut comment_lines: u32 = 0; + let mut blank_lines: u32 = 0; + for line in &lines { + let trimmed = line.trim(); + if trimmed.is_empty() { + blank_lines += 1; + } else if prefixes.iter().any(|p| trimmed.starts_with(p)) { + comment_lines += 1; + } + } + let sloc = (loc_total.saturating_sub(blank_lines).saturating_sub(comment_lines)).max(1); + + let loc_metrics = LocMetrics { + loc: loc_total, + sloc, + comment_lines, + }; + + // ── Maintainability Index ── + let volume = halstead.as_ref().map_or(0.0, |h| h.volume); + let safe_volume = if volume > 1.0 { volume } else { 1.0 }; + let safe_sloc = if sloc > 1 { sloc as f64 } else { 1.0 }; + let comment_ratio = if loc_total > 0 { + comment_lines as f64 / loc_total as f64 + } else { + 0.0 + }; + + let mut mi = 171.0 - 5.2 * safe_volume.ln() - 0.23 * (cyclomatic as f64) - 16.2 * safe_sloc.ln(); + if comment_ratio > 0.0 { + mi += 50.0 * (2.4 * comment_ratio).sqrt().sin(); + } + let normalized = (mi * 100.0 / 171.0).clamp(0.0, 100.0); + let maintainability_index = round_f64(normalized, 1); + + Some(ComplexityMetrics { + cognitive, + cyclomatic, + max_nesting, + halstead: Some(halstead.unwrap_or(HalsteadMetrics { + n1: 0, n2: 0, big_n1: 0, big_n2: 0, + vocabulary: 0, length: 0, + volume: 0.0, difficulty: 0.0, effort: 0.0, bugs: 0.0, + })), + loc: Some(loc_metrics), + maintainability_index: Some(maintainability_index), + }) +} + +/// Round f64 to `decimals` decimal places. +fn round_f64(value: f64, decimals: u32) -> f64 { + let factor = 10_f64.powi(decimals as i32); + (value * factor).round() / factor +} + +#[allow(clippy::too_many_arguments)] +fn walk_all_children( + node: &Node, + source: &[u8], + nesting_level: u32, + is_top_function: bool, + halstead_skip: bool, + c_rules: &LangRules, + h_rules: Option<&HalsteadRules>, + cognitive: &mut u32, + cyclomatic: &mut u32, + max_nesting: &mut u32, + operators: &mut HashMap, + operands: &mut HashMap, +) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_all( + &child, + source, + nesting_level, + is_top_function, + halstead_skip, + c_rules, + h_rules, + cognitive, + cyclomatic, + max_nesting, + operators, + operands, + ); + } + } +} + +#[allow(clippy::too_many_arguments)] +fn walk_all( + node: &Node, + source: &[u8], + nesting_level: u32, + is_top_function: bool, + halstead_skip: bool, + c_rules: &LangRules, + h_rules: Option<&HalsteadRules>, + cognitive: &mut u32, + cyclomatic: &mut u32, + max_nesting: &mut u32, + operators: &mut HashMap, + operands: &mut HashMap, +) { + let kind = node.kind(); + + // ── Halstead classification ── + let skip_h = halstead_skip + || h_rules.map_or(false, |hr| hr.skip_types.contains(&kind)); + + if let Some(hr) = h_rules { + if !skip_h { + // Compound operators (non-leaf): count node type as operator + if hr.compound_operators.contains(&kind) { + *operators.entry(kind.to_string()).or_insert(0) += 1; + } + // Leaf nodes: classify as operator or operand + if node.child_count() == 0 { + if hr.operator_leaf_types.contains(&kind) { + *operators.entry(kind.to_string()).or_insert(0) += 1; + } else if hr.operand_leaf_types.contains(&kind) { + let start = node.start_byte(); + let end = node.end_byte().min(source.len()); + let text = String::from_utf8_lossy(&source[start..end]).to_string(); + *operands.entry(text).or_insert(0) += 1; + } + } + } + } + + // ── Complexity: track nesting depth ── + if nesting_level > *max_nesting { + *max_nesting = nesting_level; + } + + // Handle logical operators in binary expressions + if kind == c_rules.logical_node_type { + if let Some(op_node) = node.child(1) { + let op = op_node.kind(); + if c_rules.is_logical_op(op) { + *cyclomatic += 1; + + let mut same_sequence = false; + if let Some(parent) = node.parent() { + if parent.kind() == c_rules.logical_node_type { + if let Some(parent_op) = parent.child(1) { + if parent_op.kind() == op { + same_sequence = true; + } + } + } + } + if !same_sequence { + *cognitive += 1; + } + + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + } + + // Handle optional chaining (cyclomatic only) + if let Some(opt_type) = c_rules.optional_chain_type { + if kind == opt_type { + *cyclomatic += 1; + } + } + + // Handle branch/control flow nodes (skip keyword leaf tokens — childCount > 0 guard) + if c_rules.is_branch(kind) && node.child_count() > 0 { + // Pattern A: else clause wraps if (JS/C#/Rust) + if let Some(else_type) = c_rules.else_node_type { + if kind == else_type { + let first_child = node.named_child(0); + if first_child.map_or(false, |c| { + c_rules.if_node_type.map_or(false, |if_t| c.kind() == if_t) + }) { + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + *cognitive += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if let Some(elif_type) = c_rules.elif_node_type { + if kind == elif_type { + *cognitive += 1; + *cyclomatic += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + + // Detect else-if via Pattern A or C + let mut is_else_if = false; + if c_rules.if_node_type.map_or(false, |if_t| kind == if_t) { + if c_rules.else_via_alternative { + if let Some(parent) = node.parent() { + if c_rules.if_node_type.map_or(false, |if_t| parent.kind() == if_t) { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + is_else_if = true; + } + } + } + } + } else if c_rules.else_node_type.is_some() { + if let Some(parent) = node.parent() { + if c_rules.else_node_type.map_or(false, |else_t| parent.kind() == else_t) { + is_else_if = true; + } + } + } + } + + if is_else_if { + *cognitive += 1; + *cyclomatic += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + + // Regular branch node + *cognitive += 1 + nesting_level; + *cyclomatic += 1; + + if c_rules.is_switch_like(kind) { + *cyclomatic -= 1; + } + + if c_rules.is_nesting(kind) { + walk_all_children( + node, source, nesting_level + 1, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + + // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) + if c_rules.else_via_alternative { + if c_rules.if_node_type.map_or(false, |if_t| kind != if_t) { + if let Some(parent) = node.parent() { + if c_rules.if_node_type.map_or(false, |if_t| parent.kind() == if_t) { + if let Some(alt) = parent.child_by_field_name("alternative") { + if alt.id() == node.id() { + *cognitive += 1; + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + } + } + } + } + } + + // Handle case nodes (cyclomatic only, skip keyword leaves) + if c_rules.is_case(kind) && node.child_count() > 0 { + *cyclomatic += 1; + } + + // Handle nested function definitions (increase nesting) + if !is_top_function && c_rules.is_function(kind) { + walk_all_children( + node, source, nesting_level + 1, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; + } + + // Walk children + walk_all_children( + node, source, nesting_level, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); +} + // ─── Tests ──────────────────────────────────────────────────────────────── #[cfg(test)] diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index d1f2ba35..c92b6b6f 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::{compute_function_complexity, CSHARP_RULES}; +use crate::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -104,7 +104,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(&child), end_line: Some(end_line(&child)), decorators: None, - complexity: Some(compute_function_complexity(&child, &CSHARP_RULES)), + complexity: compute_all_metrics(&child, source, "c_sharp"), }); } } @@ -141,7 +141,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), + complexity: compute_all_metrics(node, source, "c_sharp"), }); } } @@ -160,7 +160,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), + complexity: compute_all_metrics(node, source, "c_sharp"), }); } } @@ -179,7 +179,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &CSHARP_RULES)), + complexity: compute_all_metrics(node, source, "c_sharp"), }); } } diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index d253189a..8d429e87 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::{compute_function_complexity, GO_RULES}; +use crate::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -24,7 +24,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &GO_RULES)), + complexity: compute_all_metrics(node, source, "go"), }); } } @@ -60,7 +60,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &GO_RULES)), + complexity: compute_all_metrics(node, source, "go"), }); } } diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 111aeba4..829eb6f6 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::{compute_function_complexity, JAVA_RULES}; +use crate::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -110,7 +110,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(&child), end_line: Some(end_line(&child)), decorators: None, - complexity: Some(compute_function_complexity(&child, &JAVA_RULES)), + complexity: compute_all_metrics(&child, source, "java"), }); } } @@ -147,7 +147,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &JAVA_RULES)), + complexity: compute_all_metrics(node, source, "java"), }); } } @@ -166,7 +166,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &JAVA_RULES)), + complexity: compute_all_metrics(node, source, "java"), }); } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 63fe6d48..f6451fe2 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -1,5 +1,5 @@ use tree_sitter::{Node, Tree}; -use crate::complexity::{compute_function_complexity, JS_TS_RULES}; +use crate::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -24,7 +24,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &JS_TS_RULES)), + complexity: compute_all_metrics(node, source, "javascript"), }); } } @@ -80,7 +80,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(node)), decorators: None, - complexity: Some(compute_function_complexity(node, &JS_TS_RULES)), + complexity: compute_all_metrics(node, source, "javascript"), }); } } @@ -138,7 +138,7 @@ fn walk_node(node: &Node, source: &[u8], symbols: &mut FileSymbols) { line: start_line(node), end_line: Some(end_line(&value_n)), decorators: None, - complexity: Some(compute_function_complexity(&value_n, &JS_TS_RULES)), + complexity: compute_all_metrics(&value_n, source, "javascript"), }); } } @@ -562,7 +562,7 @@ fn extract_callback_definition(call_node: &Node, source: &[u8]) -> Option Option Option, + pub loc: Option, + #[napi(js_name = "maintainabilityIndex")] + pub maintainability_index: Option, +} + +impl ComplexityMetrics { + /// Construct a basic metrics result with only cognitive/cyclomatic/maxNesting. + /// Used by `compute_function_complexity` and existing tests. + pub fn basic(cognitive: u32, cyclomatic: u32, max_nesting: u32) -> Self { + Self { + cognitive, + cyclomatic, + max_nesting, + halstead: None, + loc: None, + maintainability_index: None, + } + } } #[napi(object)] diff --git a/src/complexity.js b/src/complexity.js index 1ac81512..01ffee18 100644 --- a/src/complexity.js +++ b/src/complexity.js @@ -1348,6 +1348,226 @@ export function computeFunctionComplexity(functionNode, language) { return { cognitive, cyclomatic, maxNesting }; } +// ─── Merged Single-Pass Computation ─────────────────────────────────────── + +/** + * Compute all metrics (complexity + Halstead + LOC + MI) in a single DFS walk. + * Merges computeFunctionComplexity and computeHalsteadMetrics into one tree + * traversal, avoiding two separate DFS walks per function node at build time. + * LOC is text-based (not tree-based) and computed separately (very cheap). + * + * @param {object} functionNode - tree-sitter node for the function + * @param {string} langId - Language ID (e.g. 'javascript', 'python') + * @returns {{ cognitive: number, cyclomatic: number, maxNesting: number, halstead: object|null, loc: object, mi: number } | null} + */ +export function computeAllMetrics(functionNode, langId) { + const cRules = COMPLEXITY_RULES.get(langId); + if (!cRules) return null; + const hRules = HALSTEAD_RULES.get(langId); + + // ── Complexity state ── + let cognitive = 0; + let cyclomatic = 1; // McCabe starts at 1 + let maxNesting = 0; + + // ── Halstead state ── + const operators = hRules ? new Map() : null; + const operands = hRules ? new Map() : null; + + function walk(node, nestingLevel, isTopFunction, halsteadSkip) { + if (!node) return; + + const type = node.type; + + // ── Halstead classification ── + // Propagate skip through type-annotation subtrees (e.g. TS generics, Java type params) + const skipH = halsteadSkip || (hRules ? hRules.skipTypes.has(type) : false); + if (hRules && !skipH) { + // Compound operators (non-leaf): count node type as operator + if (hRules.compoundOperators.has(type)) { + operators.set(type, (operators.get(type) || 0) + 1); + } + // Leaf nodes: classify as operator or operand + if (node.childCount === 0) { + if (hRules.operatorLeafTypes.has(type)) { + operators.set(type, (operators.get(type) || 0) + 1); + } else if (hRules.operandLeafTypes.has(type)) { + const text = node.text; + operands.set(text, (operands.get(text) || 0) + 1); + } + } + } + + // ── Complexity: track nesting depth ── + if (nestingLevel > maxNesting) maxNesting = nestingLevel; + + // Handle logical operators in binary expressions + if (type === cRules.logicalNodeType) { + const op = node.child(1)?.type; + if (op && cRules.logicalOperators.has(op)) { + cyclomatic++; + const parent = node.parent; + let sameSequence = false; + if (parent && parent.type === cRules.logicalNodeType) { + const parentOp = parent.child(1)?.type; + if (parentOp === op) sameSequence = true; + } + if (!sameSequence) cognitive++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + } + + // Handle optional chaining (cyclomatic only) + if (type === cRules.optionalChainType) { + cyclomatic++; + } + + // Handle branch/control flow nodes (skip keyword leaf tokens like Ruby's `if`) + if (cRules.branchNodes.has(type) && node.childCount > 0) { + // Pattern A: else clause wraps if (JS/C#/Rust) + if (cRules.elseNodeType && type === cRules.elseNodeType) { + const firstChild = node.namedChild(0); + if (firstChild && firstChild.type === cRules.ifNodeType) { + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + cognitive++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if (cRules.elifNodeType && type === cRules.elifNodeType) { + cognitive++; + cyclomatic++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + + // Detect else-if via Pattern A or C + let isElseIf = false; + if (type === cRules.ifNodeType) { + if (cRules.elseViaAlternative) { + isElseIf = + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id; + } else if (cRules.elseNodeType) { + isElseIf = node.parent?.type === cRules.elseNodeType; + } + } + + if (isElseIf) { + cognitive++; + cyclomatic++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + + // Regular branch node + cognitive += 1 + nestingLevel; + cyclomatic++; + + // Switch-like nodes don't add cyclomatic themselves (cases do) + if (cRules.switchLikeNodes?.has(type)) { + cyclomatic--; + } + + if (cRules.nestingNodes.has(type)) { + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel + 1, false, skipH); + } + return; + } + } + + // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) + if ( + cRules.elseViaAlternative && + type !== cRules.ifNodeType && + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id + ) { + cognitive++; + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + return; + } + + // Handle case nodes (cyclomatic only, skip keyword leaves) + if (cRules.caseNodes.has(type) && node.childCount > 0) { + cyclomatic++; + } + + // Handle nested function definitions (increase nesting) + if (!isTopFunction && cRules.functionNodes.has(type)) { + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel + 1, false, skipH); + } + return; + } + + // Walk children + for (let i = 0; i < node.childCount; i++) { + walk(node.child(i), nestingLevel, false, skipH); + } + } + + walk(functionNode, 0, true, false); + + // ── Compute Halstead derived metrics ── + let halstead = null; + if (hRules && operators && operands) { + const n1 = operators.size; + const n2 = operands.size; + let bigN1 = 0; + for (const c of operators.values()) bigN1 += c; + let bigN2 = 0; + for (const c of operands.values()) bigN2 += c; + + const vocabulary = n1 + n2; + const length = bigN1 + bigN2; + const volume = vocabulary > 0 ? length * Math.log2(vocabulary) : 0; + const difficulty = n2 > 0 ? (n1 / 2) * (bigN2 / n2) : 0; + const effort = difficulty * volume; + const bugs = volume / 3000; + + halstead = { + n1, + n2, + bigN1, + bigN2, + vocabulary, + length, + volume: +volume.toFixed(2), + difficulty: +difficulty.toFixed(2), + effort: +effort.toFixed(2), + bugs: +bugs.toFixed(4), + }; + } + + // ── LOC metrics (text-based, cheap) ── + const loc = computeLOCMetrics(functionNode, langId); + + // ── Maintainability Index ── + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, cyclomatic, loc.sloc, commentRatio); + + return { cognitive, cyclomatic, maxNesting, halstead, loc, mi }; +} + // ─── Build-Time: Compute Metrics for Changed Files ──────────────────────── /** @@ -1486,25 +1706,27 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp if (def.complexity) { const row = getNodeId.get(def.name, relPath, def.line); if (!row) continue; + const ch = def.complexity.halstead; + const cl = def.complexity.loc; upsert.run( row.id, def.complexity.cognitive, def.complexity.cyclomatic, def.complexity.maxNesting ?? 0, - 0, - 0, - 0, // loc, sloc, commentLines - 0, - 0, - 0, - 0, // halstead n1, n2, bigN1, bigN2 - 0, - 0, - 0, // vocabulary, length, volume - 0, - 0, - 0, // difficulty, effort, bugs - 0, // maintainabilityIndex + cl ? cl.loc : 0, + cl ? cl.sloc : 0, + cl ? cl.commentLines : 0, + ch ? ch.n1 : 0, + ch ? ch.n2 : 0, + ch ? ch.bigN1 : 0, + ch ? ch.bigN2 : 0, + ch ? ch.vocabulary : 0, + ch ? ch.length : 0, + ch ? ch.volume : 0, + ch ? ch.difficulty : 0, + ch ? ch.effort : 0, + ch ? ch.bugs : 0, + def.complexity.maintainabilityIndex ?? 0, ); analyzed++; continue; @@ -1516,38 +1738,33 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp const funcNode = findFunctionNode(tree.rootNode, def.line, def.endLine, rules); if (!funcNode) continue; - const result = computeFunctionComplexity(funcNode, langId); - if (!result) continue; - - const halstead = computeHalsteadMetrics(funcNode, langId); - const loc = computeLOCMetrics(funcNode, langId); - - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex(volume, result.cyclomatic, loc.sloc, commentRatio); + // Single-pass: complexity + Halstead + LOC + MI in one DFS walk + const metrics = computeAllMetrics(funcNode, langId); + if (!metrics) continue; const row = getNodeId.get(def.name, relPath, def.line); if (!row) continue; + const h = metrics.halstead; upsert.run( row.id, - result.cognitive, - result.cyclomatic, - result.maxNesting, - loc.loc, - loc.sloc, - loc.commentLines, - halstead ? halstead.n1 : 0, - halstead ? halstead.n2 : 0, - halstead ? halstead.bigN1 : 0, - halstead ? halstead.bigN2 : 0, - halstead ? halstead.vocabulary : 0, - halstead ? halstead.length : 0, - volume, - halstead ? halstead.difficulty : 0, - halstead ? halstead.effort : 0, - halstead ? halstead.bugs : 0, - mi, + metrics.cognitive, + metrics.cyclomatic, + metrics.maxNesting, + metrics.loc.loc, + metrics.loc.sloc, + metrics.loc.commentLines, + h ? h.n1 : 0, + h ? h.n2 : 0, + h ? h.bigN1 : 0, + h ? h.bigN2 : 0, + h ? h.vocabulary : 0, + h ? h.length : 0, + h ? h.volume : 0, + h ? h.difficulty : 0, + h ? h.effort : 0, + h ? h.bugs : 0, + metrics.mi, ); analyzed++; } diff --git a/src/parser.js b/src/parser.js index 1af5a527..f70e67c2 100644 --- a/src/parser.js +++ b/src/parser.js @@ -137,6 +137,9 @@ function normalizeNativeSymbols(result) { cognitive: d.complexity.cognitive, cyclomatic: d.complexity.cyclomatic, maxNesting: d.complexity.maxNesting, + halstead: d.complexity.halstead ?? null, + loc: d.complexity.loc ?? null, + maintainabilityIndex: d.complexity.maintainabilityIndex ?? null, } : null, })),