diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a4b808..975b3a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable changes to AiDex will be documented in this file. +## [Unreleased] + +### Fixed +- **Type/method name detection across 4 languages**: + - **C**: Function names nested inside `function_declarator` now extracted (previously missed all C function definitions) + - **C++**: Method names now extracted via tree-sitter's `declarator` field instead of pattern-matching node types — handles signatures with `pointer_declarator` (`int* foo()`), `reference_declarator` (`A& foo()`), `array_declarator` (`int (*foo())[10]`), qualified names (`A::foo`), conversion operators (`operator bool()`), destructor names (`~A`), operator overloads (`operator=`), and qualified return types (`std::string foo()`) — no more confusing the return type for the function name + - **C++**: Removed `template_function` from method nodes — it was a workaround for the name-extraction gap and produced duplicate entries for template specializations (`template<> void A::foo()`); now covered by `function_definition` + - **Go**: Method names on receivers (`field_identifier` node) now extracted + - **Ruby**: Class and module names now extracted, including namespaced ones via `scope_resolution` (`class Foo::Bar`, `module A::B`) + ## [1.18.0] - 2026-04-25 ### Added diff --git a/src/parser/extractor.ts b/src/parser/extractor.ts index a5573c6..2c03d72 100644 --- a/src/parser/extractor.ts +++ b/src/parser/extractor.ts @@ -259,6 +259,54 @@ function extractIdentifiersFromComment( } } +/** + * C/C++ declarator wrappers that may appear between `function_definition` + * and the actual `identifier` carrying the function/method name. + */ +const CPP_DECLARATOR_WRAPPERS = new Set([ + 'function_declarator', + 'pointer_declarator', // e.g. `int* foo()` + 'reference_declarator', // e.g. `A& foo()` + 'parenthesized_declarator', // e.g. `int (*foo)()` + 'array_declarator', // e.g. `int (*make_table())[10]` + 'attributed_declarator', // e.g. `int foo() [[nodiscard]] { ... }` +]); + +/** + * C/C++ leaf node types that carry a function/method name. + */ +const CPP_FUNCTION_NAME_LEAVES = new Set([ + 'identifier', + 'field_identifier', + 'qualified_identifier', // e.g. `A::foo`, also direct child for `Foo::operator bool()` + 'destructor_name', // e.g. `~A` + 'operator_name', // e.g. `operator=` + 'operator_cast', // e.g. `operator bool()` + 'template_function', // e.g. `foo` (template specialization name) +]); + +/** + * Walk the declarator chain in C/C++ (`function_definition` → maybe wrappers → + * `function_declarator` → identifier-ish leaf) and return the function name. + * Accepts the input node itself as a leaf, so it also handles cases where the + * `qualified_identifier` / `operator_cast` is a direct child of `function_definition`. + */ +function findCppFunctionName(node: Parser.SyntaxNode): string | null { + if (CPP_FUNCTION_NAME_LEAVES.has(node.type)) { + return node.text; + } + for (const child of node.children) { + if (CPP_FUNCTION_NAME_LEAVES.has(child.type)) { + return child.text; + } + if (CPP_DECLARATOR_WRAPPERS.has(child.type)) { + const inner = findCppFunctionName(child); + if (inner) return inner; + } + } + return null; +} + /** * Extract type information from a type declaration node */ @@ -286,9 +334,11 @@ function extractTypeInfo(node: Parser.SyntaxNode, language: SupportedLanguage): } } - // Find the name child + // Find the name child. + // - 'constant' / 'scope_resolution': Ruby class/module names (incl. namespaced like `Foo::Bar`) const nameNode = node.children.find(c => c.type === 'identifier' || c.type === 'type_identifier' || c.type === 'name' + || c.type === 'constant' || c.type === 'scope_resolution' ); if (!nameNode) { @@ -335,8 +385,20 @@ function extractMethodInfo( if (lower === 'async') isAsync = true; } + // C/C++: extract the name from the `declarator` field to avoid mistaking + // a qualified return type (e.g. `std::string` in `std::string foo()`) for + // the function name. The declarator may be a `function_declarator`, a + // wrapper like `pointer_declarator`/`array_declarator`, or a `qualified_identifier` + // for conversion operators (`Foo::operator bool() const`). + if (!name && node.type === 'function_definition') { + const declarator = node.childForFieldName('declarator'); + if (declarator) name = findCppFunctionName(declarator); + } + for (const child of node.children) { - if (child.type === 'identifier' || child.type === 'property_identifier' || child.type === 'name') { + // 'field_identifier': Go uses this for method names on receivers. + if (child.type === 'identifier' || child.type === 'property_identifier' || child.type === 'name' + || child.type === 'field_identifier') { if (!name) name = child.text; } diff --git a/src/parser/languages/cpp.ts b/src/parser/languages/cpp.ts index 2eaabd1..2b5718e 100644 --- a/src/parser/languages/cpp.ts +++ b/src/parser/languages/cpp.ts @@ -81,7 +81,10 @@ export const CPP_COMMENT_NODES = new Set([ */ export const CPP_METHOD_NODES = new Set([ 'function_definition', - 'template_function', + // 'template_function' is intentionally not listed here — it always appears nested + // inside a `function_definition` (via `qualified_identifier`), so listing it would + // produce duplicate method entries for template specializations like + // `template<> void A::foo() {}`. ]); /**