diff --git a/packages/core/package.json b/packages/core/package.json index 35bba09..592e0ab 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -42,7 +42,9 @@ "remark": "^15.0.1", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", + "tree-sitter-wasms": "^0.1.13", "ts-morph": "^27.0.2", - "unified": "^11.0.5" + "unified": "^11.0.5", + "web-tree-sitter": "^0.25.10" } } diff --git a/packages/core/src/scanner/__tests__/fixtures/go/generated.go b/packages/core/src/scanner/__tests__/fixtures/go/generated.go new file mode 100644 index 0000000..1e52f9c --- /dev/null +++ b/packages/core/src/scanner/__tests__/fixtures/go/generated.go @@ -0,0 +1,16 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: example.proto + +package example + +// This file should be skipped by the scanner. +// Generated files add noise to the index. + +type GeneratedMessage struct { + Field1 string + Field2 int +} + +func NewGeneratedMessage() *GeneratedMessage { + return &GeneratedMessage{} +} diff --git a/packages/core/src/scanner/__tests__/fixtures/go/methods.go b/packages/core/src/scanner/__tests__/fixtures/go/methods.go new file mode 100644 index 0000000..dd579ab --- /dev/null +++ b/packages/core/src/scanner/__tests__/fixtures/go/methods.go @@ -0,0 +1,81 @@ +// Package example demonstrates methods with receivers. +package example + +import ( + "context" + "fmt" + "time" +) + +// ExpBackoff helps implement exponential backoff for retries. +// It is useful in distributed systems for retrying operations. +type ExpBackoff struct { + initialWait time.Duration + maxWait time.Duration + multiplier float64 + numFailures int +} + +// NewExpBackoff creates a new ExpBackoff instance. +func NewExpBackoff(initial, max time.Duration, mult float64) *ExpBackoff { + return &ExpBackoff{ + initialWait: initial, + maxWait: max, + multiplier: mult, + } +} + +// Success resets the backoff state after a successful operation. +func (e *ExpBackoff) Success() { + e.numFailures = 0 +} + +// MarkFailAndGetWait increments failure count and returns wait duration. +// This uses a pointer receiver because it modifies state. +func (e *ExpBackoff) MarkFailAndGetWait() time.Duration { + e.numFailures++ + return e.calculateWait() +} + +// calculateWait computes the wait time (unexported method). +func (e *ExpBackoff) calculateWait() time.Duration { + // Implementation details... + return e.initialWait +} + +// String returns a string representation (value receiver). +func (e ExpBackoff) String() string { + return fmt.Sprintf("ExpBackoff{failures: %d}", e.numFailures) +} + +// Connection represents a network connection. +type Connection struct { + host string + port int + timeout time.Duration + isActive bool +} + +// Connect establishes a connection to the remote host. +func (c *Connection) Connect(ctx context.Context) error { + c.isActive = true + return nil +} + +// Close terminates the connection. +func (c *Connection) Close() error { + c.isActive = false + return nil +} + +// IsActive returns whether the connection is currently active. +// Uses value receiver since it doesn't modify state. +func (c Connection) IsActive() bool { + return c.isActive +} + +// Host returns the connection host. +func (c Connection) Host() string { + return c.host +} + diff --git a/packages/core/src/scanner/__tests__/fixtures/go/simple.go b/packages/core/src/scanner/__tests__/fixtures/go/simple.go new file mode 100644 index 0000000..33c96d1 --- /dev/null +++ b/packages/core/src/scanner/__tests__/fixtures/go/simple.go @@ -0,0 +1,91 @@ +// Package example provides example Go code for scanner testing. +package example + +import ( + "context" + "fmt" +) + +// MaxRetries is the maximum number of retry attempts. +const MaxRetries = 3 + +// DefaultTimeout is the default timeout duration. +const DefaultTimeout = 30 + +// privateConst should not be extracted (unexported). +const privateConst = "hidden" + +// Config holds application configuration. +type Config struct { + Host string + Port int + Timeout int +} + +// Server represents a server instance. +// It handles incoming requests and manages connections. +type Server struct { + config *Config + running bool +} + +// Reader defines the interface for reading data. +type Reader interface { + // Read reads data into the provided buffer. + Read(p []byte) (n int, err error) +} + +// Writer defines the interface for writing data. +type Writer interface { + Write(p []byte) (n int, err error) +} + +// ReadWriter combines Reader and Writer interfaces. +type ReadWriter interface { + Reader + Writer +} + +// ID is a type alias for string identifiers. +type ID string + +// Handler is a function type for request handlers. +type Handler func(ctx context.Context, req Request) Response + +// Request represents an incoming request. +type Request struct { + ID ID + Payload []byte +} + +// Response represents an outgoing response. +type Response struct { + ID ID + Status int + Body []byte +} + +// NewServer creates a new server with the given configuration. +// It initializes all internal state and validates the config. +func NewServer(cfg *Config) *Server { + return &Server{ + config: cfg, + running: false, + } +} + +// Start begins the server and starts accepting connections. +func Start(ctx context.Context) error { + fmt.Println("Starting server...") + return nil +} + +// processRequest handles a single request. +// This is an unexported function. +func processRequest(req Request) Response { + return Response{ + ID: req.ID, + Status: 200, + Body: []byte("OK"), + } +} diff --git a/packages/core/src/scanner/__tests__/fixtures/go/simple_test.go b/packages/core/src/scanner/__tests__/fixtures/go/simple_test.go new file mode 100644 index 0000000..d94b486 --- /dev/null +++ b/packages/core/src/scanner/__tests__/fixtures/go/simple_test.go @@ -0,0 +1,29 @@ +// Package example contains tests for the example package. +package example + +import ( + "testing" +) + +// TestNewServer tests server creation. +func TestNewServer(t *testing.T) { + cfg := &Config{Host: "localhost", Port: 8080} + server := NewServer(cfg) + if server == nil { + t.Error("expected server to be created") + } +} + +// TestProcessRequest tests request processing. +func TestProcessRequest(t *testing.T) { + req := Request{ID: "test-1", Payload: []byte("hello")} + resp := processRequest(req) + if resp.Status != 200 { + t.Errorf("expected status 200, got %d", resp.Status) + } +} + +// helperFunction is a test helper (unexported). +func helperFunction() string { + return "helper" +} diff --git a/packages/core/src/scanner/__tests__/go.test.ts b/packages/core/src/scanner/__tests__/go.test.ts new file mode 100644 index 0000000..36377b9 --- /dev/null +++ b/packages/core/src/scanner/__tests__/go.test.ts @@ -0,0 +1,258 @@ +import * as path from 'node:path'; +import { beforeAll, describe, expect, it } from 'vitest'; +import { GoScanner } from '../go'; +import type { Document } from '../types'; + +describe('GoScanner', () => { + const scanner = new GoScanner(); + const fixturesDir = path.join(__dirname, 'fixtures', 'go'); + + describe('canHandle', () => { + it('should handle .go files', () => { + expect(scanner.canHandle('main.go')).toBe(true); + expect(scanner.canHandle('server.go')).toBe(true); + expect(scanner.canHandle('path/to/file.go')).toBe(true); + }); + + it('should not handle non-Go files', () => { + expect(scanner.canHandle('main.ts')).toBe(false); + expect(scanner.canHandle('main.py')).toBe(false); + expect(scanner.canHandle('main.go.bak')).toBe(false); + expect(scanner.canHandle('README.md')).toBe(false); + }); + + it('should handle case-insensitive extensions', () => { + expect(scanner.canHandle('main.GO')).toBe(true); + expect(scanner.canHandle('main.Go')).toBe(true); + }); + }); + + describe('capabilities', () => { + it('should have correct language', () => { + expect(scanner.language).toBe('go'); + }); + + it('should report syntax capability', () => { + expect(scanner.capabilities.syntax).toBe(true); + }); + + it('should report types capability', () => { + expect(scanner.capabilities.types).toBe(true); + }); + + it('should report documentation capability', () => { + expect(scanner.capabilities.documentation).toBe(true); + }); + }); + + describe('scan', () => { + let simpleDocuments: Document[]; + let methodsDocuments: Document[]; + let testDocuments: Document[]; + + beforeAll(async () => { + // Scan the simple.go fixture + simpleDocuments = await scanner.scan(['simple.go'], fixturesDir); + + // Scan the methods.go fixture + methodsDocuments = await scanner.scan(['methods.go'], fixturesDir); + + // Scan the test file fixture + testDocuments = await scanner.scan(['simple_test.go'], fixturesDir); + }); + + describe('functions', () => { + it('should extract exported functions', () => { + const newServer = simpleDocuments.find( + (d) => d.metadata.name === 'NewServer' && d.type === 'function' + ); + expect(newServer).toBeDefined(); + expect(newServer?.metadata.exported).toBe(true); + expect(newServer?.metadata.signature).toContain('func NewServer'); + expect(newServer?.metadata.docstring).toContain('creates a new server'); + }); + + it('should extract unexported functions', () => { + const processRequest = simpleDocuments.find( + (d) => d.metadata.name === 'processRequest' && d.type === 'function' + ); + expect(processRequest).toBeDefined(); + expect(processRequest?.metadata.exported).toBe(false); + }); + + it('should include function signature', () => { + const start = simpleDocuments.find( + (d) => d.metadata.name === 'Start' && d.type === 'function' + ); + expect(start).toBeDefined(); + expect(start?.metadata.signature).toContain('ctx context.Context'); + expect(start?.metadata.signature).toContain('error'); + }); + }); + + describe('structs', () => { + it('should extract struct declarations', () => { + const config = simpleDocuments.find( + (d) => d.metadata.name === 'Config' && d.type === 'class' + ); + expect(config).toBeDefined(); + expect(config?.language).toBe('go'); + expect(config?.metadata.exported).toBe(true); + }); + + it('should extract doc comments for structs', () => { + const server = simpleDocuments.find( + (d) => d.metadata.name === 'Server' && d.type === 'class' + ); + expect(server).toBeDefined(); + expect(server?.metadata.docstring).toContain('represents a server instance'); + }); + + it('should include struct snippet', () => { + const config = simpleDocuments.find( + (d) => d.metadata.name === 'Config' && d.type === 'class' + ); + expect(config?.metadata.snippet).toContain('Host'); + expect(config?.metadata.snippet).toContain('Port'); + }); + }); + + describe('interfaces', () => { + it('should extract interface declarations', () => { + const reader = simpleDocuments.find( + (d) => d.metadata.name === 'Reader' && d.type === 'interface' + ); + expect(reader).toBeDefined(); + expect(reader?.metadata.exported).toBe(true); + expect(reader?.metadata.signature).toBe('type Reader interface'); + }); + + it('should extract doc comments for interfaces', () => { + const reader = simpleDocuments.find( + (d) => d.metadata.name === 'Reader' && d.type === 'interface' + ); + expect(reader?.metadata.docstring).toContain('reading data'); + }); + + it('should extract embedded interfaces', () => { + const readWriter = simpleDocuments.find( + (d) => d.metadata.name === 'ReadWriter' && d.type === 'interface' + ); + expect(readWriter).toBeDefined(); + expect(readWriter?.metadata.snippet).toContain('Reader'); + expect(readWriter?.metadata.snippet).toContain('Writer'); + }); + }); + + describe('type aliases', () => { + it('should extract type aliases', () => { + const id = simpleDocuments.find((d) => d.metadata.name === 'ID' && d.type === 'type'); + expect(id).toBeDefined(); + expect(id?.metadata.exported).toBe(true); + }); + + it('should extract function types', () => { + const handler = simpleDocuments.find( + (d) => d.metadata.name === 'Handler' && d.type === 'type' + ); + expect(handler).toBeDefined(); + expect(handler?.metadata.signature).toContain('func'); + }); + }); + + describe('constants', () => { + it('should extract exported constants', () => { + const maxRetries = simpleDocuments.find( + (d) => d.metadata.name === 'MaxRetries' && d.type === 'variable' + ); + expect(maxRetries).toBeDefined(); + expect(maxRetries?.metadata.exported).toBe(true); + expect(maxRetries?.metadata.custom?.isConstant).toBe(true); + }); + + it('should not extract unexported constants', () => { + const privateConst = simpleDocuments.find((d) => d.metadata.name === 'privateConst'); + expect(privateConst).toBeUndefined(); + }); + }); + + describe('methods', () => { + it('should extract methods with receivers', () => { + const success = methodsDocuments.find( + (d) => d.metadata.name === 'ExpBackoff.Success' && d.type === 'method' + ); + expect(success).toBeDefined(); + expect(success?.metadata.custom?.receiver).toBe('ExpBackoff'); + }); + + it('should detect pointer receivers', () => { + const markFail = methodsDocuments.find( + (d) => d.metadata.name === 'ExpBackoff.MarkFailAndGetWait' && d.type === 'method' + ); + expect(markFail).toBeDefined(); + expect(markFail?.metadata.custom?.receiverPointer).toBe(true); + }); + + it('should detect value receivers', () => { + const stringMethod = methodsDocuments.find( + (d) => d.metadata.name === 'ExpBackoff.String' && d.type === 'method' + ); + expect(stringMethod).toBeDefined(); + expect(stringMethod?.metadata.custom?.receiverPointer).toBe(false); + }); + + it('should extract method doc comments', () => { + const markFail = methodsDocuments.find( + (d) => d.metadata.name === 'ExpBackoff.MarkFailAndGetWait' && d.type === 'method' + ); + expect(markFail?.metadata.docstring).toContain('increments failure count'); + }); + + it('should handle unexported methods', () => { + const calculateWait = methodsDocuments.find( + (d) => d.metadata.name === 'ExpBackoff.calculateWait' && d.type === 'method' + ); + expect(calculateWait).toBeDefined(); + expect(calculateWait?.metadata.exported).toBe(false); + }); + }); + + describe('generated files', () => { + it('should skip generated files', async () => { + const generatedDocs = await scanner.scan(['generated.go'], fixturesDir); + expect(generatedDocs).toHaveLength(0); + }); + }); + + describe('test files', () => { + it('should mark test file documents with isTest flag', () => { + const testNewServer = testDocuments.find((d) => d.metadata.name === 'TestNewServer'); + expect(testNewServer).toBeDefined(); + expect(testNewServer?.metadata.custom?.isTest).toBe(true); + }); + + it('should extract test functions', () => { + const testFunctions = testDocuments.filter( + (d) => d.type === 'function' && d.metadata.name?.startsWith('Test') + ); + expect(testFunctions.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('document IDs', () => { + it('should generate unique IDs in format file:name:line', () => { + const newServer = simpleDocuments.find((d) => d.metadata.name === 'NewServer'); + expect(newServer?.id).toMatch(/^simple\.go:NewServer:\d+$/); + }); + }); + + describe('embedding text', () => { + it('should build embedding text with type, name, signature, and docstring', () => { + const newServer = simpleDocuments.find((d) => d.metadata.name === 'NewServer'); + expect(newServer?.text).toContain('function NewServer'); + expect(newServer?.text).toContain('func NewServer'); + expect(newServer?.text).toContain('creates a new server'); + }); + }); + }); +}); diff --git a/packages/core/src/scanner/go.ts b/packages/core/src/scanner/go.ts new file mode 100644 index 0000000..1f935f9 --- /dev/null +++ b/packages/core/src/scanner/go.ts @@ -0,0 +1,529 @@ +/** + * Go language scanner using tree-sitter + * + * Extracts functions, methods, structs, interfaces, and type aliases from Go source files. + * Uses tree-sitter queries for declarative pattern matching (similar to Aider's approach). + */ + +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { extractGoDocComment, type ParsedTree, parseCode } from './tree-sitter'; +import type { Document, Scanner, ScannerCapabilities } from './types'; + +/** + * Tree-sitter queries for Go code extraction + * Based on tree-sitter-go grammar: https://github.com/tree-sitter/tree-sitter-go + */ +const GO_QUERIES = { + // Top-level function declarations + functions: ` + (function_declaration + name: (identifier) @name) @definition + `, + + // Method declarations with receivers + methods: ` + (method_declaration + receiver: (parameter_list + (parameter_declaration + name: (identifier)? @receiver_name + type: [ + (pointer_type (type_identifier) @receiver_type) + (type_identifier) @receiver_type + ])) @receiver + name: (field_identifier) @name) @definition + `, + + // Struct type declarations + structs: ` + (type_declaration + (type_spec + name: (type_identifier) @name + type: (struct_type) @struct_body)) @definition + `, + + // Interface type declarations + interfaces: ` + (type_declaration + (type_spec + name: (type_identifier) @name + type: (interface_type) @interface_body)) @definition + `, + + // Type alias declarations (non-struct, non-interface) + typeAliases: ` + (type_declaration + (type_spec + name: (type_identifier) @name + type: [ + (type_identifier) + (qualified_type) + (array_type) + (slice_type) + (map_type) + (channel_type) + (function_type) + ] @alias_type)) @definition + `, + + // Const declarations + constants: ` + (const_declaration + (const_spec + name: (identifier) @name + value: (_)? @value)) @definition + `, + + // Var declarations (package-level) + variables: ` + (var_declaration + (var_spec + name: (identifier) @name + value: (_)? @value)) @definition + `, + + // Package declaration + package: ` + (package_clause + (package_identifier) @name) @definition + `, +}; + +/** + * Go scanner using tree-sitter for parsing + */ +export class GoScanner implements Scanner { + readonly language = 'go'; + readonly capabilities: ScannerCapabilities = { + syntax: true, + types: true, + documentation: true, + }; + + /** Maximum lines for code snippets */ + private static readonly MAX_SNIPPET_LINES = 50; + + canHandle(filePath: string): boolean { + const ext = path.extname(filePath).toLowerCase(); + return ext === '.go'; + } + + async scan(files: string[], repoRoot: string): Promise { + const documents: Document[] = []; + + for (const file of files) { + try { + const absolutePath = path.join(repoRoot, file); + const sourceText = fs.readFileSync(absolutePath, 'utf-8'); + + // Skip generated files + if (this.isGeneratedFile(sourceText)) { + continue; + } + + const fileDocs = await this.extractFromFile(sourceText, file); + documents.push(...fileDocs); + } catch (error) { + // Log error but continue with other files + console.error(`Error scanning ${file}:`, error); + } + } + + return documents; + } + + /** + * Check if a file is generated (should be skipped) + */ + private isGeneratedFile(sourceText: string): boolean { + const firstLine = sourceText.split('\n')[0] || ''; + return firstLine.includes('Code generated') || firstLine.includes('DO NOT EDIT'); + } + + /** + * Extract documents from a single Go file + */ + private async extractFromFile(sourceText: string, relativeFile: string): Promise { + const documents: Document[] = []; + const tree = await parseCode(sourceText, 'go'); + const isTestFile = relativeFile.endsWith('_test.go'); + + // Extract functions + documents.push(...this.extractFunctions(tree, sourceText, relativeFile, isTestFile)); + + // Extract methods + documents.push(...this.extractMethods(tree, sourceText, relativeFile, isTestFile)); + + // Extract structs + documents.push(...this.extractStructs(tree, sourceText, relativeFile, isTestFile)); + + // Extract interfaces + documents.push(...this.extractInterfaces(tree, sourceText, relativeFile, isTestFile)); + + // Extract type aliases + documents.push(...this.extractTypeAliases(tree, sourceText, relativeFile, isTestFile)); + + // Extract constants + documents.push(...this.extractConstants(tree, sourceText, relativeFile, isTestFile)); + + return documents; + } + + /** + * Extract function declarations + */ + private extractFunctions( + tree: ParsedTree, + sourceText: string, + file: string, + isTestFile: boolean + ): Document[] { + const documents: Document[] = []; + const matches = tree.query(GO_QUERIES.functions); + + for (const match of matches) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + const startLine = defCapture.node.startPosition.row + 1; // 1-based + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = this.extractSignature(fullText); + const docstring = extractGoDocComment(sourceText, startLine); + const exported = this.isExported(name); + const snippet = this.truncateSnippet(fullText); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('function', name, signature, docstring), + type: 'function', + language: 'go', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported, + docstring, + snippet, + custom: isTestFile ? { isTest: true } : undefined, + }, + }); + } + + return documents; + } + + /** + * Extract method declarations (functions with receivers) + */ + private extractMethods( + tree: ParsedTree, + sourceText: string, + file: string, + isTestFile: boolean + ): Document[] { + const documents: Document[] = []; + const matches = tree.query(GO_QUERIES.methods); + + for (const match of matches) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + const receiverTypeCapture = match.captures.find((c) => c.name === 'receiver_type'); + const receiverCapture = match.captures.find((c) => c.name === 'receiver'); + + if (!nameCapture || !defCapture) continue; + + const methodName = nameCapture.node.text; + const receiverType = receiverTypeCapture?.node.text || 'Unknown'; + const name = `${receiverType}.${methodName}`; + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = this.extractSignature(fullText); + const docstring = extractGoDocComment(sourceText, startLine); + const exported = this.isExported(methodName); + const snippet = this.truncateSnippet(fullText); + + // Check if receiver is a pointer + const receiverText = receiverCapture?.node.text || ''; + const receiverPointer = receiverText.includes('*'); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('method', name, signature, docstring), + type: 'method', + language: 'go', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported, + docstring, + snippet, + custom: { + receiver: receiverType, + receiverPointer, + ...(isTestFile ? { isTest: true } : {}), + }, + }, + }); + } + + return documents; + } + + /** + * Extract struct declarations + */ + private extractStructs( + tree: ParsedTree, + sourceText: string, + file: string, + isTestFile: boolean + ): Document[] { + const documents: Document[] = []; + const matches = tree.query(GO_QUERIES.structs); + + for (const match of matches) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = `type ${name} struct`; + const docstring = extractGoDocComment(sourceText, startLine); + const exported = this.isExported(name); + const snippet = this.truncateSnippet(fullText); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('struct', name, signature, docstring), + type: 'class', // Map struct to 'class' for consistency with other scanners + language: 'go', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported, + docstring, + snippet, + custom: isTestFile ? { isTest: true } : undefined, + }, + }); + } + + return documents; + } + + /** + * Extract interface declarations + */ + private extractInterfaces( + tree: ParsedTree, + sourceText: string, + file: string, + isTestFile: boolean + ): Document[] { + const documents: Document[] = []; + const matches = tree.query(GO_QUERIES.interfaces); + + for (const match of matches) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = `type ${name} interface`; + const docstring = extractGoDocComment(sourceText, startLine); + const exported = this.isExported(name); + const snippet = this.truncateSnippet(fullText); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('interface', name, signature, docstring), + type: 'interface', + language: 'go', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported, + docstring, + snippet, + custom: isTestFile ? { isTest: true } : undefined, + }, + }); + } + + return documents; + } + + /** + * Extract type alias declarations + */ + private extractTypeAliases( + tree: ParsedTree, + sourceText: string, + file: string, + isTestFile: boolean + ): Document[] { + const documents: Document[] = []; + const matches = tree.query(GO_QUERIES.typeAliases); + + for (const match of matches) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = fullText.trim(); + const docstring = extractGoDocComment(sourceText, startLine); + const exported = this.isExported(name); + const snippet = this.truncateSnippet(fullText); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('type', name, signature, docstring), + type: 'type', + language: 'go', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported, + docstring, + snippet, + custom: isTestFile ? { isTest: true } : undefined, + }, + }); + } + + return documents; + } + + /** + * Extract constant declarations + */ + private extractConstants( + tree: ParsedTree, + sourceText: string, + file: string, + isTestFile: boolean + ): Document[] { + const documents: Document[] = []; + const matches = tree.query(GO_QUERIES.constants); + + for (const match of matches) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + // Only extract exported constants + if (!this.isExported(name)) continue; + + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = fullText.trim(); + const docstring = extractGoDocComment(sourceText, startLine); + const snippet = this.truncateSnippet(fullText); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('constant', name, signature, docstring), + type: 'variable', + language: 'go', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported: true, + docstring, + snippet, + custom: { + isConstant: true, + ...(isTestFile ? { isTest: true } : {}), + }, + }, + }); + } + + return documents; + } + + /** + * Check if a Go identifier is exported (starts with uppercase) + */ + private isExported(name: string): boolean { + if (!name || name.length === 0) return false; + const firstChar = name.charAt(0); + return firstChar === firstChar.toUpperCase() && firstChar !== firstChar.toLowerCase(); + } + + /** + * Extract function/method signature (first line up to the opening brace) + */ + private extractSignature(fullText: string): string { + const braceIndex = fullText.indexOf('{'); + if (braceIndex === -1) return fullText.trim(); + return fullText.slice(0, braceIndex).trim(); + } + + /** + * Build embedding text for vector search + */ + private buildEmbeddingText( + type: string, + name: string, + signature: string, + docstring?: string + ): string { + const parts = [`${type} ${name}`, signature]; + if (docstring) { + parts.push(docstring); + } + return parts.join('\n'); + } + + /** + * Truncate code snippet to maximum lines + */ + private truncateSnippet(text: string): string { + const lines = text.split('\n'); + if (lines.length <= GoScanner.MAX_SNIPPET_LINES) { + return text; + } + const truncated = lines.slice(0, GoScanner.MAX_SNIPPET_LINES).join('\n'); + const remaining = lines.length - GoScanner.MAX_SNIPPET_LINES; + return `${truncated}\n// ... ${remaining} more lines`; + } +} diff --git a/packages/core/src/scanner/index.ts b/packages/core/src/scanner/index.ts index 9c56932..d8fe2af 100644 --- a/packages/core/src/scanner/index.ts +++ b/packages/core/src/scanner/index.ts @@ -1,5 +1,6 @@ // Export types +export { GoScanner } from './go'; export { MarkdownScanner } from './markdown'; export { ScannerRegistry } from './registry'; export type { @@ -18,8 +19,9 @@ export type { // Export scanner implementations export { TypeScriptScanner } from './typescript'; +import { GoScanner } from './go'; import { MarkdownScanner } from './markdown'; -// Create default scanner registry with TypeScript and Markdown +// Create default scanner registry with TypeScript, Markdown, and Go import { ScannerRegistry } from './registry'; import type { ScanOptions } from './types'; import { TypeScriptScanner } from './typescript'; @@ -36,6 +38,9 @@ export function createDefaultRegistry(): ScannerRegistry { // Register Markdown scanner registry.register(new MarkdownScanner()); + // Register Go scanner + registry.register(new GoScanner()); + return registry; } diff --git a/packages/core/src/scanner/tree-sitter.ts b/packages/core/src/scanner/tree-sitter.ts new file mode 100644 index 0000000..9d74540 --- /dev/null +++ b/packages/core/src/scanner/tree-sitter.ts @@ -0,0 +1,227 @@ +/** + * Tree-sitter utility module for multi-language parsing + * + * Provides WASM-based tree-sitter parsing with query support. + * Used by GoScanner and future language scanners (Python, Rust). + */ + +import * as path from 'node:path'; + +// web-tree-sitter types +type ParserType = import('web-tree-sitter').Parser; +type ParserConstructor = typeof import('web-tree-sitter').Parser; +type LanguageType = import('web-tree-sitter').Language; +type LanguageConstructor = typeof import('web-tree-sitter').Language; +type QueryConstructor = typeof import('web-tree-sitter').Query; + +// Cached classes after initialization +let ParserClass: ParserConstructor | null = null; +let LanguageClass: LanguageConstructor | null = null; +let QueryClass: QueryConstructor | null = null; +let parserInitialized = false; + +/** + * Supported languages for tree-sitter parsing + */ +export type TreeSitterLanguage = 'go' | 'python' | 'rust'; + +/** + * Cache of loaded language grammars + */ +const languageCache = new Map(); + +/** + * Initialize the tree-sitter parser (must be called before parsing) + * This is idempotent - safe to call multiple times + */ +export async function initTreeSitter(): Promise { + if (parserInitialized && ParserClass && LanguageClass && QueryClass) return; + + const TreeSitter = await import('web-tree-sitter'); + ParserClass = TreeSitter.Parser; + LanguageClass = TreeSitter.Language; + QueryClass = TreeSitter.Query; + + await ParserClass.init(); + parserInitialized = true; +} + +/** + * Get the WASM file path for a language from tree-sitter-wasms package + */ +function getWasmPath(language: TreeSitterLanguage): string { + // tree-sitter-wasms package structure: node_modules/tree-sitter-wasms/out/tree-sitter-{lang}.wasm + const wasmFileName = `tree-sitter-${language}.wasm`; + + // Try to resolve from node_modules + try { + const packagePath = require.resolve('tree-sitter-wasms/package.json'); + const packageDir = path.dirname(packagePath); + return path.join(packageDir, 'out', wasmFileName); + } catch { + // Fallback: assume it's in a standard location + return path.join(process.cwd(), 'node_modules', 'tree-sitter-wasms', 'out', wasmFileName); + } +} + +/** + * Load a language grammar for tree-sitter + */ +export async function loadLanguage(language: TreeSitterLanguage): Promise { + // Return cached if available + const cached = languageCache.get(language); + if (cached) return cached; + + // Ensure parser is initialized + await initTreeSitter(); + + if (!LanguageClass) { + throw new Error('Tree-sitter not initialized'); + } + + // Load the language WASM + const wasmPath = getWasmPath(language); + const lang = await LanguageClass.load(wasmPath); + + languageCache.set(language, lang); + return lang; +} + +/** + * Create a new parser instance with a specific language + */ +export async function createParser(language: TreeSitterLanguage): Promise { + await initTreeSitter(); + + if (!ParserClass) { + throw new Error('Tree-sitter not initialized'); + } + + const parser = new ParserClass(); + const lang = await loadLanguage(language); + parser.setLanguage(lang); + + return parser; +} + +/** + * Parsed syntax tree with query capabilities + */ +export interface ParsedTree { + /** The root node of the syntax tree */ + rootNode: TreeSitterNode; + /** The source text that was parsed */ + sourceText: string; + /** Execute a tree-sitter query and return matches */ + query(queryString: string): QueryMatch[]; +} + +/** + * A node in the tree-sitter syntax tree + */ +export interface TreeSitterNode { + type: string; + text: string; + startPosition: { row: number; column: number }; + endPosition: { row: number; column: number }; + children: TreeSitterNode[]; + namedChildren: TreeSitterNode[]; + childForFieldName(name: string): TreeSitterNode | null; + parent: TreeSitterNode | null; +} + +/** + * A match from a tree-sitter query + */ +export interface QueryMatch { + pattern: number; + captures: QueryCapture[]; +} + +/** + * A captured node from a query match + */ +export interface QueryCapture { + name: string; + node: TreeSitterNode; +} + +/** + * Parse source code with tree-sitter + */ +export async function parseCode( + sourceText: string, + language: TreeSitterLanguage +): Promise { + const parser = await createParser(language); + const tree = parser.parse(sourceText); + const lang = await loadLanguage(language); + + if (!tree) { + throw new Error(`Failed to parse ${language} code`); + } + + if (!QueryClass) { + throw new Error('Tree-sitter not initialized'); + } + + // Cache the QueryClass reference for use in the closure + const QueryCls = QueryClass; + + return { + rootNode: tree.rootNode as unknown as TreeSitterNode, + sourceText, + query(queryString: string): QueryMatch[] { + // Use new Query(language, source) instead of deprecated lang.query() + const query = new QueryCls(lang, queryString); + const matches = query.matches(tree.rootNode); + + // Convert web-tree-sitter matches to our QueryMatch format + return matches.map((match) => ({ + pattern: match.pattern, + captures: match.captures.map((cap) => ({ + name: cap.name, + node: cap.node as unknown as TreeSitterNode, + })), + })); + }, + }; +} + +/** + * Helper to get text from source by line numbers (1-based) + */ +export function getTextByLines(sourceText: string, startLine: number, endLine: number): string { + const lines = sourceText.split('\n'); + // Convert to 0-based indexing + return lines.slice(startLine - 1, endLine).join('\n'); +} + +/** + * Helper to extract doc comment preceding a node + * Go doc comments are single-line // comments immediately before declarations + */ +export function extractGoDocComment(sourceText: string, nodeStartLine: number): string | undefined { + const lines = sourceText.split('\n'); + const docLines: string[] = []; + + // Walk backwards from the line before the node + for (let i = nodeStartLine - 2; i >= 0; i--) { + const line = lines[i].trim(); + + // Go doc comments start with // + if (line.startsWith('//')) { + // Remove the // prefix and trim + const commentText = line.slice(2).trim(); + docLines.unshift(commentText); + } else if (line === '') { + // Empty line - stop if we already have comments, otherwise continue + if (docLines.length > 0) break; + } else { + // Non-comment, non-empty line - stop + break; + } + } + + return docLines.length > 0 ? docLines.join('\n') : undefined; +} diff --git a/packages/subagents/src/explorer/__tests__/index.test.ts b/packages/subagents/src/explorer/__tests__/index.test.ts index 6b9dbbc..86a23a3 100644 --- a/packages/subagents/src/explorer/__tests__/index.test.ts +++ b/packages/subagents/src/explorer/__tests__/index.test.ts @@ -624,6 +624,9 @@ describe('ExplorerAgent', () => { // Close the indexer to cause an error await indexer.close(); + // Mock the logger to suppress expected error output + const errorSpy = vi.spyOn(context.logger, 'error').mockImplementation(() => {}); + const message: Message = { id: 'msg-error-1', type: 'request', @@ -642,8 +645,10 @@ describe('ExplorerAgent', () => { expect(response).toBeDefined(); expect(response?.type).toBe('error'); expect(response?.payload).toHaveProperty('error'); + expect(errorSpy).toHaveBeenCalled(); - // Reinitialize for other tests + // Restore logger and reinitialize for other tests + errorSpy.mockRestore(); await indexer.initialize(); await indexer.index(); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 811f47f..d9fa567 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -109,12 +109,18 @@ importers: remark-stringify: specifier: ^11.0.0 version: 11.0.0 + tree-sitter-wasms: + specifier: ^0.1.13 + version: 0.1.13 ts-morph: specifier: ^27.0.2 version: 27.0.2 unified: specifier: ^11.0.5 version: 11.0.5 + web-tree-sitter: + specifier: ^0.25.10 + version: 0.25.10 devDependencies: '@types/mdast': specifier: ^4.0.4 @@ -4565,6 +4571,10 @@ packages: hasBin: true dev: true + /tree-sitter-wasms@0.1.13: + resolution: {integrity: sha512-wT+cR6DwaIz80/vho3AvSF0N4txuNx/5bcRKoXouOfClpxh/qqrF4URNLQXbbt8MaAxeksZcZd1j8gcGjc+QxQ==} + dev: false + /trough@2.2.0: resolution: {integrity: sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==} dev: false @@ -5155,6 +5165,15 @@ packages: - yaml dev: true + /web-tree-sitter@0.25.10: + resolution: {integrity: sha512-Y09sF44/13XvgVKgO2cNDw5rGk6s26MgoZPXLESvMXeefBf7i6/73eFurre0IsTW6E14Y0ArIzhUMmjoc7xyzA==} + peerDependencies: + '@types/emscripten': ^1.40.0 + peerDependenciesMeta: + '@types/emscripten': + optional: true + dev: false + /which@2.0.2: resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} engines: {node: '>= 8'}