diff --git a/.clinerules b/.clinerules index c2f9622..c5c6fbd 100644 --- a/.clinerules +++ b/.clinerules @@ -38,6 +38,14 @@ CodeCritique is an AI-powered code review tool that: - Validate all user inputs - Log errors with meaningful context +### Logging + +- Use `verboseLog(options, ...)` from `src/utils/logging.js` for progress and informational diagnostics gated by verbose mode +- Use `debug(...)` only for developer-focused tracing gated by `DEBUG` +- Keep `console.warn(...)` and `console.error(...)` for warnings and errors that should always be shown +- Avoid raw `console.log(...)` in normal code paths +- Document logging-related options such as `verbose` in JSDoc and pass them through to downstream helpers + ### Key Commands ```bash diff --git a/.cursorrules b/.cursorrules index 952f688..002d488 100644 --- a/.cursorrules +++ b/.cursorrules @@ -45,6 +45,14 @@ CodeCritique is an AI-powered code review tool that: - Validate all user inputs - Log errors with meaningful context +### Logging + +- Use `verboseLog(options, ...)` from `src/utils/logging.js` for progress and informational diagnostics gated by verbose mode +- Use `debug(...)` only for developer-focused tracing gated by `DEBUG` +- Keep `console.warn(...)` and `console.error(...)` for warnings and errors that should always be shown +- Avoid raw `console.log(...)` in normal code paths +- Document logging-related options such as `verbose` in JSDoc and pass them through to downstream helpers + ### Key Commands ```bash diff --git a/.roo/rules/01-project-rules.md b/.roo/rules/01-project-rules.md index b7193b9..86adfea 100644 --- a/.roo/rules/01-project-rules.md +++ b/.roo/rules/01-project-rules.md @@ -35,6 +35,14 @@ For comprehensive guidelines, see **AGENTS.md** in the project root. - Validate all user inputs - Log errors with meaningful context +### Logging + +- Use `verboseLog(options, ...)` from `src/utils/logging.js` for progress and informational diagnostics gated by verbose mode +- Use `debug(...)` only for developer-focused tracing gated by `DEBUG` +- Keep `console.warn(...)` and `console.error(...)` for warnings and errors that should always be shown +- Avoid raw `console.log(...)` in normal code paths +- Document logging-related options such as `verbose` in JSDoc and pass them through to downstream helpers + ### Key Commands ```bash diff --git a/AGENTS.md b/AGENTS.md index 22c3244..5136abb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -356,6 +356,14 @@ import { Internal } from './internal.js'; - Handle errors gracefully with meaningful messages - Log errors with context +### Logging + +- Use `verboseLog(options, ...)` from `src/utils/logging.js` for progress and informational diagnostics that should only appear in verbose mode +- Use `debug(...)` only for developer-focused tracing that should require `DEBUG` +- Use `console.warn(...)` and `console.error(...)` for warnings and errors that should always be surfaced +- Avoid raw `console.log(...)` in normal code paths +- When a function accepts logging-related options such as `verbose`, document them in JSDoc and pass them through to downstream helpers + ### Async/Await - Prefer async/await over raw promises diff --git a/CLAUDE.md b/CLAUDE.md index dba03d4..f3017a7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -46,6 +46,14 @@ CodeCritique is an AI-powered code review tool that: - Validate all user inputs - Log errors with meaningful context +### Logging + +- Use `verboseLog(options, ...)` from `src/utils/logging.js` for progress and informational diagnostics gated by verbose mode +- Use `debug(...)` only for developer-focused tracing gated by `DEBUG` +- Keep `console.warn(...)` and `console.error(...)` for warnings and errors that should always be shown +- Avoid raw `console.log(...)` in normal code paths +- Document logging-related options such as `verbose` in JSDoc and pass them through to downstream helpers + ### Key Commands ```bash diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e3329b4..8168032 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -139,6 +139,14 @@ src/ - Handle errors gracefully with meaningful messages - Write self-documenting code with clear variable names +### Logging + +- Use `verboseLog(options, ...)` from `src/utils/logging.js` for progress and informational diagnostics gated by verbose mode +- Use `debug(...)` only for developer-focused tracing gated by `DEBUG` +- Keep `console.warn(...)` and `console.error(...)` for warnings and errors that should always be shown +- Avoid raw `console.log(...)` in normal code paths +- Document logging-related options such as `verbose` in JSDoc and pass them through to downstream helpers + --- ## Commit Conventions diff --git a/src/content-retrieval.js b/src/content-retrieval.js index 77ce2e5..c849b58 100644 --- a/src/content-retrieval.js +++ b/src/content-retrieval.js @@ -23,7 +23,7 @@ import { calculateCosineSimilarity, calculatePathSimilarity } from './embeddings import { inferContextFromDocumentContent } from './utils/context-inference.js'; import { isGenericDocument, getGenericDocumentContext } from './utils/document-detection.js'; import { isDocumentationFile } from './utils/file-validation.js'; -import { debug } from './utils/logging.js'; +import { debug, verboseLog } from './utils/logging.js'; const FILE_EMBEDDINGS_TABLE = TABLE_NAMES.FILE_EMBEDDINGS; const DOCUMENT_CHUNK_TABLE = TABLE_NAMES.DOCUMENT_CHUNK; @@ -79,7 +79,8 @@ export class ContentRetriever { return []; } - console.log( + verboseLog( + options, chalk.cyan(`Native hybrid documentation search - limit: ${limit}, threshold: ${similarityThreshold}, reranking: ${useReranking}`) ); @@ -91,7 +92,7 @@ export class ContentRetriever { return []; } - console.log(chalk.cyan('Performing native hybrid search for documentation...')); + verboseLog(options, chalk.cyan('Performing native hybrid search for documentation...')); let query = table.search(queryText).nearestToText(queryText); const resolvedProjectPath = path.resolve(projectPath); @@ -106,7 +107,7 @@ export class ContentRetriever { } const results = await query.limit(Math.max(limit * 3, 20)).toArray(); - console.log(chalk.green(`Native hybrid search returned ${results.length} documentation results`)); + verboseLog(options, chalk.green(`Native hybrid search returned ${results.length} documentation results`)); // OPTIMIZATION: Enhanced batch file existence checks with parallel processing const docsToCheck = []; @@ -168,7 +169,7 @@ export class ContentRetriever { // Filter results based on project match using the map const projectFilteredResults = results.filter((result, index) => docProjectMatchMap.get(index) === true); - console.log(chalk.blue(`Filtered to ${projectFilteredResults.length} documentation results from current project`)); + verboseLog(options, chalk.blue(`Filtered to ${projectFilteredResults.length} documentation results from current project`)); let finalResults = projectFilteredResults.map((result) => { let similarity; if (result._distance !== undefined) { @@ -197,7 +198,7 @@ export class ContentRetriever { let queryEmbedding = null; if (useReranking && queryContextForReranking && finalResults.length >= 3) { - console.log(chalk.cyan('Applying sophisticated contextual reranking to documentation...')); + verboseLog(options, chalk.cyan('Applying sophisticated contextual reranking to documentation...')); const WEIGHT_INITIAL_SIM = 0.3; const WEIGHT_H1_CHUNK_RERANK = 0.15; const HEAVY_BOOST_SAME_AREA = 0.4; @@ -416,7 +417,7 @@ export class ContentRetriever { finalResults = finalResults.slice(0, limit); } - console.log(chalk.green(`Returning ${finalResults.length} documentation results`)); + verboseLog(options, chalk.green(`Returning ${finalResults.length} documentation results`)); return finalResults; } catch (error) { @@ -443,7 +444,10 @@ export class ContentRetriever { precomputedQueryEmbedding = null, } = options; - console.log(chalk.cyan(`Native hybrid code search - limit: ${limit}, threshold: ${similarityThreshold}, isTestFile: ${isTestFile}`)); + verboseLog( + options, + chalk.cyan(`Native hybrid code search - limit: ${limit}, threshold: ${similarityThreshold}, isTestFile: ${isTestFile}`) + ); try { if (!queryText?.trim()) { @@ -460,7 +464,7 @@ export class ContentRetriever { } // Native hybrid search with automatic vector + FTS + RRF - console.log(chalk.cyan('Performing native hybrid search for code...')); + verboseLog(options, chalk.cyan('Performing native hybrid search for code...')); let query = table.search(queryText).nearestToText(queryText); // Add filtering conditions @@ -472,13 +476,13 @@ export class ContentRetriever { if (isTestFile) { // Only include test files conditions.push(`(path LIKE '%.test.%' OR path LIKE '%.spec.%' OR path LIKE '%_test.py' OR path LIKE 'test_%.py')`); - console.log(chalk.blue(`Filtering to include only test files.`)); + verboseLog(options, chalk.blue(`Filtering to include only test files.`)); } else { // Exclude test files conditions.push( `(path NOT LIKE '%.test.%' AND path NOT LIKE '%.spec.%' AND path NOT LIKE '%_test.py' AND path NOT LIKE 'test_%.py')` ); - console.log(chalk.blue(`Filtering to exclude test files.`)); + verboseLog(options, chalk.blue(`Filtering to exclude test files.`)); } } @@ -526,7 +530,7 @@ export class ContentRetriever { const results = await query.limit(Math.max(limit * 3, 20)).toArray(); - console.log(chalk.green(`Native hybrid search returned ${results.length} results`)); + verboseLog(options, chalk.green(`Native hybrid search returned ${results.length} results`)); // OPTIMIZATION: Batch file existence checks for better performance const resultsToCheck = []; @@ -595,7 +599,7 @@ export class ContentRetriever { // Filter results based on project match using the map const projectFilteredResults = results.filter((result, index) => projectMatchMap.get(index) === true); - console.log(chalk.blue(`Filtered to ${projectFilteredResults.length} results from current project`)); + verboseLog(options, chalk.blue(`Filtered to ${projectFilteredResults.length} results from current project`)); // Map results to expected format let finalResults = projectFilteredResults.map((result) => { @@ -683,7 +687,7 @@ export class ContentRetriever { finalResults = finalResults.slice(0, limit); } - console.log(chalk.green(`Returning ${finalResults.length} optimized hybrid search results`)); + verboseLog(options, chalk.green(`Returning ${finalResults.length} optimized hybrid search results`)); return finalResults; } catch (error) { console.error(chalk.red(`Error in optimized findSimilarCode: ${error.message}`), error); @@ -712,7 +716,7 @@ export class ContentRetriever { this.h1EmbeddingCache.clear(); this.documentContextCache.clear(); this.documentContextPromiseCache.clear(); - console.log(chalk.green('ContentRetriever caches cleared')); + verboseLog({}, chalk.green('ContentRetriever caches cleared')); } /** @@ -739,7 +743,7 @@ export class ContentRetriever { parallelRerankingTime: 0, }; - console.log(chalk.green('ContentRetriever cleanup complete')); + verboseLog({}, chalk.green('ContentRetriever cleanup complete')); } finally { this.cleaningUp = false; } diff --git a/src/custom-documents.js b/src/custom-documents.js index 50e0ed5..dec414b 100644 --- a/src/custom-documents.js +++ b/src/custom-documents.js @@ -18,7 +18,7 @@ import { CacheManager } from './embeddings/cache-manager.js'; import { EmbeddingError, ValidationError } from './embeddings/errors.js'; import { ModelManager } from './embeddings/model-manager.js'; import { calculateCosineSimilarity, calculatePathSimilarity } from './embeddings/similarity-calculator.js'; -import { debug } from './utils/logging.js'; +import { debug, verboseLog } from './utils/logging.js'; import { slugify } from './utils/string-utils.js'; /** @@ -140,7 +140,7 @@ export class CustomDocumentProcessor { this.performanceMetrics.averageChunkSize = chunks.reduce((sum, chunk) => sum + chunk.content.length, 0) / chunks.length; this.performanceMetrics.processingTime += Date.now() - startTime; - console.log(chalk.gray(` Chunked document "${documentTitle}" into ${chunks.length} chunks`)); + verboseLog({}, chalk.gray(` Chunked document "${documentTitle}" into ${chunks.length} chunks`)); return chunks; } catch (error) { console.error(chalk.red(`Error chunking document: ${error.message}`)); @@ -159,18 +159,18 @@ export class CustomDocumentProcessor { try { if (!customDocs || customDocs.length === 0) { - console.log(chalk.gray('No custom documents to process')); + verboseLog({}, chalk.gray('No custom documents to process')); return []; } - console.log(chalk.cyan(`Processing ${customDocs.length} custom documents into chunks...`)); + verboseLog({}, chalk.cyan(`Processing ${customDocs.length} custom documents into chunks...`)); const allChunks = []; let totalBatchAttempts = 0; let successfulBatches = 0; for (const doc of customDocs) { - console.log(chalk.gray(` Processing document: ${doc.title}`)); + verboseLog({}, chalk.gray(` Processing document: ${doc.title}`)); // Chunk the document const chunks = this.chunkDocument(doc); @@ -205,12 +205,12 @@ export class CustomDocumentProcessor { const validChunks = chunksWithEmbeddings.filter((chunk) => chunk !== null); allChunks.push(...validChunks); - console.log(chalk.gray(` Generated embeddings for ${validChunks.length}/${chunks.length} chunks`)); + verboseLog({}, chalk.gray(` Generated embeddings for ${validChunks.length}/${chunks.length} chunks`)); this.performanceMetrics.embeddingsCalculated += validChunks.length; } catch (error) { console.error(chalk.red(`Error in batch embedding generation for document ${doc.title}: ${error.message}`)); // Fallback to individual processing for this document - console.log(chalk.yellow(` Falling back to individual processing for ${doc.title}`)); + verboseLog({}, chalk.yellow(` Falling back to individual processing for ${doc.title}`)); const chunksWithEmbeddings = await Promise.all( chunks.map(async (chunk) => { @@ -235,7 +235,7 @@ export class CustomDocumentProcessor { const validChunks = chunksWithEmbeddings.filter((chunk) => chunk !== null); allChunks.push(...validChunks); - console.log(chalk.gray(` Generated embeddings for ${validChunks.length}/${chunks.length} chunks (fallback)`)); + verboseLog({}, chalk.gray(` Generated embeddings for ${validChunks.length}/${chunks.length} chunks (fallback)`)); } } @@ -252,7 +252,7 @@ export class CustomDocumentProcessor { this.performanceMetrics.documentsProcessed += customDocs.length; this.performanceMetrics.processingTime += Date.now() - startTime; - console.log(chalk.green(`Successfully processed ${allChunks.length} custom document chunks (${Date.now() - startTime}ms)`)); + verboseLog({}, chalk.green(`Successfully processed ${allChunks.length} custom document chunks (${Date.now() - startTime}ms)`)); return allChunks; } catch (error) { console.error(chalk.red(`Error processing custom documents: ${error.message}`)); @@ -285,11 +285,11 @@ export class CustomDocumentProcessor { } if (!chunks || chunks.length === 0) { - console.log(chalk.gray('No custom document chunks available for search')); + verboseLog({}, chalk.gray('No custom document chunks available for search')); return []; } - console.log(chalk.cyan(`Searching ${chunks.length} custom document chunks...`)); + verboseLog({}, chalk.cyan(`Searching ${chunks.length} custom document chunks...`)); // OPTIMIZATION: Use pre-computed query embedding if available let queryEmbedding = precomputedQueryEmbedding; @@ -319,7 +319,7 @@ export class CustomDocumentProcessor { filteredResults = filteredResults.slice(0, limit); } - console.log(chalk.green(`Found ${filteredResults.length} relevant custom document chunks (${Date.now() - startTime}ms)`)); + verboseLog({}, chalk.green(`Found ${filteredResults.length} relevant custom document chunks (${Date.now() - startTime}ms)`)); // Log top results for debugging if (filteredResults.length > 0) { @@ -371,7 +371,7 @@ export class CustomDocumentProcessor { * @private */ async _applyParallelReranking(filteredResults, queryText, queryContextForReranking, queryFilePath, queryEmbedding) { - console.log(chalk.cyan('Applying optimized parallel contextual reranking to custom document chunks...')); + verboseLog({}, chalk.cyan('Applying optimized parallel contextual reranking to custom document chunks...')); const WEIGHT_INITIAL_SIM = 0.4; const WEIGHT_DOCUMENT_TITLE_MATCH = 0.2; @@ -466,7 +466,7 @@ export class CustomDocumentProcessor { // Wait for all reranking calculations to complete in parallel await Promise.all(rerankingPromises); - console.log(chalk.cyan(`Parallel reranking completed for ${filteredResults.length} chunks`)); + verboseLog({}, chalk.cyan(`Parallel reranking completed for ${filteredResults.length} chunks`)); // Log debug info for first few results for (let i = 0; i < Math.min(3, filteredResults.length); i++) { @@ -521,7 +521,7 @@ export class CustomDocumentProcessor { const resolvedProjectPath = path.resolve(projectPath); this.customDocumentChunks.delete(resolvedProjectPath); this.cacheManager.customDocumentChunks.delete(resolvedProjectPath); - console.log(chalk.green(`Cleared custom document chunks for project: ${resolvedProjectPath}`)); + verboseLog({}, chalk.green(`Cleared custom document chunks for project: ${resolvedProjectPath}`)); } catch (error) { console.error(chalk.red(`Error clearing project chunks: ${error.message}`)); } @@ -561,7 +561,7 @@ export class CustomDocumentProcessor { clearCaches() { this.h1EmbeddingCache.clear(); this.customDocumentChunks.clear(); - console.log(chalk.green('CustomDocumentProcessor caches cleared')); + verboseLog({}, chalk.green('CustomDocumentProcessor caches cleared')); } /** @@ -589,7 +589,7 @@ export class CustomDocumentProcessor { processingTime: 0, }; - console.log(chalk.green('CustomDocumentProcessor cleanup complete')); + verboseLog({}, chalk.green('CustomDocumentProcessor cleanup complete')); } finally { this.cleaningUp = false; } diff --git a/src/embeddings/cache-manager.js b/src/embeddings/cache-manager.js index 34a94a5..f97b20a 100644 --- a/src/embeddings/cache-manager.js +++ b/src/embeddings/cache-manager.js @@ -19,6 +19,7 @@ */ import chalk from 'chalk'; +import { verboseLog } from '../utils/logging.js'; import { MAX_EMBEDDING_CACHE_SIZE } from './constants.js'; // ============================================================================ @@ -234,7 +235,8 @@ export class CacheManager { this.stats.misses = 0; this.stats.evictions = 0; - console.log( + verboseLog( + {}, chalk.yellow( `[CACHE] Cleared all caches - Document contexts: ${docCacheSize}, Promise: ${promiseCacheSize}, H1 embeddings: ${h1CacheSize}, Embeddings: ${embeddingCacheSize}, Custom docs: ${customDocCacheSize}` ) @@ -250,7 +252,7 @@ export class CacheManager { if (cacheMap) { const size = cacheMap.size; cacheMap.clear(); - console.log(chalk.yellow(`[CACHE] Cleared ${cacheType} cache - ${size} items`)); + verboseLog({}, chalk.yellow(`[CACHE] Cleared ${cacheType} cache - ${size} items`)); } else { console.warn(chalk.yellow(`[CACHE] Unknown cache type: ${cacheType}`)); } @@ -315,7 +317,7 @@ export class CacheManager { try { this.clearAllCaches(); - console.log(chalk.green('[CACHE] Cache cleanup completed')); + verboseLog({}, chalk.green('[CACHE] Cache cleanup completed')); } finally { this.cleaningUp = false; } diff --git a/src/embeddings/database.js b/src/embeddings/database.js index f0f6c8b..3008843 100644 --- a/src/embeddings/database.js +++ b/src/embeddings/database.js @@ -23,7 +23,7 @@ import path from 'node:path'; import * as lancedb from '@lancedb/lancedb'; import { Field, FixedSizeList, Float32, Int32, Schema, Utf8 } from 'apache-arrow'; import chalk from 'chalk'; -import { debug } from '../utils/logging.js'; +import { debug, verboseLog } from '../utils/logging.js'; import { EMBEDDING_DIMENSIONS, TABLE_NAMES } from './constants.js'; import { LANCEDB_PATH } from './constants.js'; import { createDatabaseError, ERROR_CODES } from './errors.js'; @@ -69,12 +69,12 @@ export class DatabaseManager { */ async getDBConnection() { if (!this.dbConnection) { - console.log(chalk.blue(`Initializing DB connection. Target Path: ${this.dbPath}`)); + verboseLog({}, chalk.blue(`Initializing DB connection. Target Path: ${this.dbPath}`)); if (!fs.existsSync(this.dbPath)) { fs.mkdirSync(this.dbPath, { recursive: true }); } this.dbConnection = await lancedb.connect(this.dbPath); - console.log(chalk.green('LanceDB connected.')); + verboseLog({}, chalk.green('LanceDB connected.')); } return this.dbConnection; } @@ -104,12 +104,12 @@ export class DatabaseManager { */ async closeConnection() { if (this.dbConnection) { - console.log('Closing LanceDB connection...'); + verboseLog({}, 'Closing LanceDB connection...'); await this.dbConnection.close(); this.dbConnection = null; this.tablesInitialized = false; this.tableInitializationPromise = null; - console.log('LanceDB connection closed.'); + verboseLog({}, 'LanceDB connection closed.'); } } @@ -135,11 +135,11 @@ export class DatabaseManager { // Start initialization and store the promise this.tableInitializationPromise = (async () => { try { - console.log(chalk.blue('Initializing database tables and indices...')); + verboseLog({}, chalk.blue('Initializing database tables and indices...')); const db = await this.getDBConnection(); await this.ensureTablesExist(db); this.tablesInitialized = true; - console.log(chalk.green('Database tables and indices initialized successfully.')); + verboseLog({}, chalk.green('Database tables and indices initialized successfully.')); } catch (error) { this.tablesInitialized = false; console.error(chalk.red('Failed to initialize database tables:'), error); @@ -200,18 +200,18 @@ export class DatabaseManager { let fileTable, documentChunkTable, prCommentsTable; if (!tableNames.includes(this.fileEmbeddingsTable)) { - console.log(chalk.yellow(`Creating ${this.fileEmbeddingsTable} table with optimized schema...`)); + verboseLog({}, chalk.yellow(`Creating ${this.fileEmbeddingsTable} table with optimized schema...`)); fileTable = await db.createEmptyTable(this.fileEmbeddingsTable, fileSchema, { mode: 'create' }); - console.log(chalk.green(`Created ${this.fileEmbeddingsTable} table.`)); + verboseLog({}, chalk.green(`Created ${this.fileEmbeddingsTable} table.`)); } else { fileTable = await db.openTable(this.fileEmbeddingsTable); await this._checkSchemaCompatibility(fileTable, this.fileEmbeddingsTable, 'project_path'); } if (!tableNames.includes(this.documentChunkTable)) { - console.log(chalk.yellow(`Creating ${this.documentChunkTable} table with optimized schema...`)); + verboseLog({}, chalk.yellow(`Creating ${this.documentChunkTable} table with optimized schema...`)); documentChunkTable = await db.createEmptyTable(this.documentChunkTable, documentChunkSchema, { mode: 'create' }); - console.log(chalk.green(`Created ${this.documentChunkTable} table.`)); + verboseLog({}, chalk.green(`Created ${this.documentChunkTable} table.`)); } else { documentChunkTable = await db.openTable(this.documentChunkTable); await this._checkSchemaCompatibility(documentChunkTable, this.documentChunkTable, 'project_path'); @@ -219,9 +219,9 @@ export class DatabaseManager { // Create PR comments table if (!tableNames.includes(this.prCommentsTable)) { - console.log(chalk.yellow(`Creating ${this.prCommentsTable} table with optimized schema...`)); + verboseLog({}, chalk.yellow(`Creating ${this.prCommentsTable} table with optimized schema...`)); prCommentsTable = await db.createEmptyTable(this.prCommentsTable, prCommentsSchema, { mode: 'create' }); - console.log(chalk.green(`Created ${this.prCommentsTable} table.`)); + verboseLog({}, chalk.green(`Created ${this.prCommentsTable} table.`)); } else { prCommentsTable = await db.openTable(this.prCommentsTable); } @@ -331,17 +331,18 @@ export class DatabaseManager { async createAdaptiveVectorIndexes(table, tableName, vectorField = 'vector') { try { const rowCount = await table.countRows(); - console.log(chalk.blue(`[${tableName}] Row count: ${rowCount}`)); + verboseLog({}, chalk.blue(`[${tableName}] Row count: ${rowCount}`)); if (rowCount < 100) { - console.log(chalk.blue(`[${tableName}] Skipping indexing for small dataset (${rowCount} rows). Using exact search.`)); + verboseLog({}, chalk.blue(`[${tableName}] Skipping indexing for small dataset (${rowCount} rows). Using exact search.`)); return { indexType: 'exact', rowCount }; } else if (rowCount < 1000) { - console.log(chalk.blue(`[${tableName}] Using exact search for small dataset (${rowCount} rows) - no index needed`)); + verboseLog({}, chalk.blue(`[${tableName}] Using exact search for small dataset (${rowCount} rows) - no index needed`)); return { indexType: 'exact', rowCount }; } else if (rowCount < 10000) { const numPartitions = Math.max(Math.floor(Math.sqrt(rowCount / 50)), 2); - console.log( + verboseLog( + {}, chalk.blue(`[${tableName}] Creating/updating IVF-Flat index for medium dataset (${rowCount} rows, ${numPartitions} partitions)`) ); await table.createIndex(vectorField, { @@ -352,7 +353,8 @@ export class DatabaseManager { } else { const numPartitions = Math.max(Math.floor(Math.sqrt(rowCount / 100)), 8); const numSubVectors = Math.floor(this.embeddingDimensions / 4); - console.log( + verboseLog( + {}, chalk.blue(`[${tableName}] Creating/updating IVF-PQ index for large dataset (${rowCount} rows, ${numPartitions} partitions)`) ); await table.createIndex(vectorField, { @@ -367,7 +369,7 @@ export class DatabaseManager { } } catch (error) { if (error.message.includes('already exists')) { - console.log(chalk.green(`[${tableName}] Index already up-to-date.`)); + verboseLog({}, chalk.green(`[${tableName}] Index already up-to-date.`)); return { indexType: 'existing' }; } console.warn(chalk.yellow(`[${tableName}] Index creation/update failed: ${error.message}. Falling back to exact search.`)); @@ -391,7 +393,7 @@ export class DatabaseManager { try { await this.closeConnection(); - console.log(chalk.green('Database resources cleaned up.')); + verboseLog({}, chalk.green('Database resources cleaned up.')); } catch (error) { console.error(`Error during database cleanup: ${error.message}`); } finally { @@ -406,11 +408,11 @@ export class DatabaseManager { async clearAllEmbeddings() { let db = null; try { - console.log(chalk.cyan('Clearing ALL embeddings by dropping tables...')); - console.log(chalk.red('WARNING: This will affect all projects on this machine!')); + verboseLog({}, chalk.cyan('Clearing ALL embeddings by dropping tables...')); + verboseLog({}, chalk.red('WARNING: This will affect all projects on this machine!')); if (!fs.existsSync(this.dbPath)) { - console.log(chalk.yellow('LanceDB directory does not exist, nothing to clear.')); + verboseLog({}, chalk.yellow('LanceDB directory does not exist, nothing to clear.')); return true; } @@ -420,20 +422,20 @@ export class DatabaseManager { for (const tableName of [this.fileEmbeddingsTable, this.documentChunkTable, this.prCommentsTable]) { if (tableNames.includes(tableName)) { - console.log(chalk.yellow(`Dropping table ${tableName}...`)); + verboseLog({}, chalk.yellow(`Dropping table ${tableName}...`)); await db.dropTable(tableName); - console.log(chalk.green(`Table ${tableName} dropped.`)); + verboseLog({}, chalk.green(`Table ${tableName} dropped.`)); droppedCount++; } else { - console.log(chalk.yellow(`Table ${tableName} does not exist.`)); + verboseLog({}, chalk.yellow(`Table ${tableName} does not exist.`)); } } if (droppedCount > 0) { - console.log(chalk.green('All embedding tables have been dropped.')); - console.log(chalk.yellow('Run the embedding generation process again to recreate tables.')); + verboseLog({}, chalk.green('All embedding tables have been dropped.')); + verboseLog({}, chalk.yellow('Run the embedding generation process again to recreate tables.')); } else { - console.log(chalk.green('No embedding tables found to drop.')); + verboseLog({}, chalk.green('No embedding tables found to drop.')); } // Reset connection state @@ -478,10 +480,10 @@ export class DatabaseManager { throw new Error(`Project path too generic: ${resolvedProjectPath}. For safety, project must be at least 3 levels deep.`); } - console.log(chalk.cyan(`Clearing embeddings for project: ${resolvedProjectPath} (${projectName})`)); + verboseLog({}, chalk.cyan(`Clearing embeddings for project: ${resolvedProjectPath} (${projectName})`)); if (!fs.existsSync(this.dbPath)) { - console.log(chalk.yellow('LanceDB directory does not exist, nothing to clear.')); + verboseLog({}, chalk.yellow('LanceDB directory does not exist, nothing to clear.')); return true; } @@ -520,12 +522,12 @@ export class DatabaseManager { // This embeddings:clear command handles file embeddings, document embeddings, and project summaries if (deletedCount > 0) { - console.log(chalk.green(`Successfully cleared ${deletedCount} embeddings for project: ${resolvedProjectPath}`)); + verboseLog({}, chalk.green(`Successfully cleared ${deletedCount} embeddings for project: ${resolvedProjectPath}`)); // Optimize tables after cleanup to maintain performance await this._optimizeTablesAfterCleanup(db, tableNames); } else { - console.log(chalk.yellow(`No embeddings found for project: ${resolvedProjectPath}`)); + verboseLog({}, chalk.yellow(`No embeddings found for project: ${resolvedProjectPath}`)); } return true; @@ -560,8 +562,8 @@ export class DatabaseManager { if (currentSchema && currentSchema.fields) { const hasRequiredField = currentSchema.fields.some((field) => field.name === requiredField); if (!hasRequiredField) { - console.log(chalk.yellow(`Table ${tableName} has old schema without ${requiredField}. Migration needed.`)); - console.log(chalk.yellow(`Please clear embeddings and regenerate them to use the new schema with project isolation.`)); + console.warn(chalk.yellow(`Table ${tableName} has old schema without ${requiredField}. Migration needed.`)); + console.warn(chalk.yellow(`Please clear embeddings and regenerate them to use the new schema with project isolation.`)); } } } catch (schemaError) { @@ -586,14 +588,14 @@ export class DatabaseManager { `Table ${tableName} does not have project_path field. Cannot perform project-specific cleanup. Please regenerate embeddings to use the new schema with project isolation.` ); } - console.log(chalk.green(`✓ Table ${tableName} has project_path field for proper isolation`)); + verboseLog({}, chalk.green(`✓ Table ${tableName} has project_path field for proper isolation`)); } else { - console.log(chalk.yellow(`Table ${tableName} has no readable schema, skipping validation`)); + console.warn(chalk.yellow(`Table ${tableName} has no readable schema, skipping validation`)); } } catch (schemaError) { // If we can't read the schema, it might be because the table is empty or doesn't exist // In this case, we should just warn and continue - console.log(chalk.yellow(`Warning: Could not validate schema for ${tableName}: ${schemaError.message}`)); + console.warn(chalk.yellow(`Warning: Could not validate schema for ${tableName}: ${schemaError.message}`)); } } @@ -603,15 +605,15 @@ export class DatabaseManager { * @private */ async _createFTSIndexes(tableSpecs) { - console.log(chalk.blue('Creating native FTS indexes...')); + verboseLog({}, chalk.blue('Creating native FTS indexes...')); for (const [table, tableName, contentField] of tableSpecs) { try { await table.createIndex(contentField, { config: lancedb.Index.fts(), replace: false }); - console.log(chalk.green(`FTS index created/updated for ${tableName}`)); + verboseLog({}, chalk.green(`FTS index created/updated for ${tableName}`)); } catch (error) { if (error.message.toLowerCase().includes('already exists')) { - console.log(chalk.green(`FTS index already exists for ${tableName}.`)); + verboseLog({}, chalk.green(`FTS index already exists for ${tableName}.`)); } else { console.warn(chalk.yellow(`FTS index warning for ${tableName}: ${error.message}`)); } @@ -625,7 +627,7 @@ export class DatabaseManager { * @private */ async _createVectorIndexes(tableSpecs) { - console.log(chalk.blue('Creating adaptive vector indexes...')); + verboseLog({}, chalk.blue('Creating adaptive vector indexes...')); const indexResults = []; for (const [table, tableName, vectorField] of tableSpecs) { @@ -633,7 +635,7 @@ export class DatabaseManager { indexResults.push(indexInfo); } - console.log(chalk.green(`Indexing complete - ${JSON.stringify(indexResults)}`)); + verboseLog({}, chalk.green(`Indexing complete - ${JSON.stringify(indexResults)}`)); } /** @@ -642,13 +644,13 @@ export class DatabaseManager { * @private */ async _optimizeTables(tableSpecs) { - console.log(chalk.blue('Optimizing tables to sync indices with data...')); + verboseLog({}, chalk.blue('Optimizing tables to sync indices with data...')); for (const [table, tableName] of tableSpecs) { try { - console.log(chalk.blue(`Optimizing table: ${tableName}`)); + verboseLog({}, chalk.blue(`Optimizing table: ${tableName}`)); await table.optimize(); - console.log(chalk.green(`✓ Table ${tableName} optimized successfully`)); + verboseLog({}, chalk.green(`✓ Table ${tableName} optimized successfully`)); } catch (error) { // Handle legacy FTS index upgrade issues in v0.22.2 if (error.message && error.message.includes('legacy format')) { @@ -663,7 +665,7 @@ export class DatabaseManager { } } - console.log(chalk.green('Table optimization complete')); + verboseLog({}, chalk.green('Table optimization complete')); } /** @@ -704,7 +706,7 @@ export class DatabaseManager { }); if (projectRecords.length > 0) { - console.log(chalk.blue(`Found ${projectRecords.length} ${tableName} records for this project`)); + verboseLog({}, chalk.blue(`Found ${projectRecords.length} ${tableName} records for this project`)); let deletedCount = 0; for (const record of projectRecords) { @@ -716,10 +718,10 @@ export class DatabaseManager { } } - console.log(chalk.green(`Deleted ${deletedCount} ${tableName} records for this project`)); + verboseLog({}, chalk.green(`Deleted ${deletedCount} ${tableName} records for this project`)); return deletedCount; } else { - console.log(chalk.yellow(`No ${tableName} records found for this project`)); + verboseLog({}, chalk.yellow(`No ${tableName} records found for this project`)); return 0; } } @@ -732,11 +734,11 @@ export class DatabaseManager { try { const table = await this.getTable(this.prCommentsTable); if (table) { - console.log(chalk.blue(`Updating vector index for ${this.prCommentsTable}...`)); + verboseLog({}, chalk.blue(`Updating vector index for ${this.prCommentsTable}...`)); await this.createAdaptiveVectorIndexes(table, this.prCommentsTable, 'combined_embedding'); // Optimize table to sync indices with data (conditional due to legacy index issues) - console.log(chalk.blue(`Optimizing ${this.prCommentsTable} table...`)); + verboseLog({}, chalk.blue(`Optimizing ${this.prCommentsTable} table...`)); try { await table.optimize(); } catch (optimizeError) { @@ -747,7 +749,7 @@ export class DatabaseManager { } } - console.log(chalk.green(`Vector index for ${this.prCommentsTable} updated and optimized.`)); + verboseLog({}, chalk.green(`Vector index for ${this.prCommentsTable} updated and optimized.`)); } } catch (error) { console.error(chalk.red(`Error updating PR comments index: ${error.message}`)); @@ -807,7 +809,7 @@ export class DatabaseManager { // Optimize table to sync indices with data (conditional due to legacy index issues) try { await table.optimize(); - console.log(chalk.blue(`✓ Project summaries table optimized`)); + verboseLog({}, chalk.blue(`✓ Project summaries table optimized`)); } catch (optimizeError) { if (optimizeError.message && optimizeError.message.includes('legacy format')) { console.warn(chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`)); @@ -816,7 +818,7 @@ export class DatabaseManager { } } - console.log(chalk.green(`✅ Project summary stored for: ${resolvedProjectPath}`)); + verboseLog({}, chalk.green(`✅ Project summary stored for: ${resolvedProjectPath}`)); return true; } catch (error) { console.error(chalk.red(`Error storing project summary: ${error.message}`)); @@ -879,7 +881,7 @@ export class DatabaseManager { * @private */ async _optimizeTablesAfterCleanup(db, availableTableNames) { - console.log(chalk.blue('Optimizing tables after cleanup...')); + verboseLog({}, chalk.blue('Optimizing tables after cleanup...')); const tablesToOptimize = [ { name: this.fileEmbeddingsTable, displayName: 'File embeddings' }, @@ -892,9 +894,9 @@ export class DatabaseManager { if (availableTableNames.includes(name)) { try { const table = await db.openTable(name); - console.log(chalk.blue(`Optimizing ${displayName} table...`)); + verboseLog({}, chalk.blue(`Optimizing ${displayName} table...`)); await table.optimize(); - console.log(chalk.green(`✓ ${displayName} table optimized`)); + verboseLog({}, chalk.green(`✓ ${displayName} table optimized`)); } catch (error) { if (error.message && error.message.includes('legacy format')) { console.warn( @@ -909,7 +911,7 @@ export class DatabaseManager { } } - console.log(chalk.green('Post-cleanup table optimization complete')); + verboseLog({}, chalk.green('Post-cleanup table optimization complete')); } /** @@ -919,7 +921,7 @@ export class DatabaseManager { * @private */ async _createProjectSummariesTable(db) { - console.log(chalk.blue('Creating project summaries table...')); + verboseLog({}, chalk.blue('Creating project summaries table...')); const schema = new Schema([ new Field('id', new Utf8()), @@ -932,6 +934,6 @@ export class DatabaseManager { // Create table with empty initial data await db.createEmptyTable(PROJECT_SUMMARIES_TABLE, schema); - console.log(chalk.green(`✅ Project summaries table created: ${PROJECT_SUMMARIES_TABLE}`)); + verboseLog({}, chalk.green(`✅ Project summaries table created: ${PROJECT_SUMMARIES_TABLE}`)); } } diff --git a/src/embeddings/database.test.js b/src/embeddings/database.test.js index bb1d2d8..7513dc3 100644 --- a/src/embeddings/database.test.js +++ b/src/embeddings/database.test.js @@ -499,7 +499,7 @@ describe('DatabaseManager', () => { mockTable.schema = { fields: [{ name: 'other_field' }] }; mockDb.tableNames.mockResolvedValue(['file_embeddings']); await dbManager.initializeTables(); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('old schema')); + expect(console.warn).toHaveBeenCalledWith(expect.stringContaining('old schema')); }); it('should handle schema check errors', async () => { @@ -525,7 +525,11 @@ describe('DatabaseManager', () => { mockDb.tableNames.mockResolvedValue(['file_embeddings']); mockTable.query.mockReturnValue({ toArray: vi.fn().mockResolvedValue([]) }); await dbManager.clearProjectEmbeddings('/test/project/deep'); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining(expectedLog)); + if (expectedLog === 'has project_path field') { + expect(console.log).not.toHaveBeenCalledWith(expect.stringContaining(expectedLog)); + } else { + expect(console.warn).toHaveBeenCalledWith(expect.stringContaining(expectedLog)); + } }); }); @@ -542,7 +546,7 @@ describe('DatabaseManager', () => { mockDb.tableNames.mockResolvedValue([]); await dbManager.initializeTables(); if (error.includes('already')) { - expect(console.log).toHaveBeenCalledWith(expect.stringContaining(expectedLog)); + expect(console.log).not.toHaveBeenCalledWith(expect.stringContaining(expectedLog)); } else { expect(console.warn).toHaveBeenCalledWith(expect.stringContaining(expectedLog)); } diff --git a/src/embeddings/factory.js b/src/embeddings/factory.js index 9d40711..d730e0c 100644 --- a/src/embeddings/factory.js +++ b/src/embeddings/factory.js @@ -23,6 +23,7 @@ import chalk from 'chalk'; import { ContentRetriever } from '../content-retrieval.js'; import { CustomDocumentProcessor } from '../custom-documents.js'; +import { verboseLog } from '../utils/logging.js'; import { CacheManager } from './cache-manager.js'; import { EMBEDDING_DIMENSIONS, MODEL_NAME_STRING, MAX_RETRIES, LANCEDB_PATH, FASTEMBED_CACHE_DIR } from './constants.js'; import { DatabaseManager } from './database.js'; @@ -104,7 +105,7 @@ class EmbeddingsSystem { customDocumentProcessor: this.customDocumentProcessor, }; - console.log(chalk.green('[EmbeddingsSystem] System created with dependency injection')); + verboseLog({}, chalk.green('[EmbeddingsSystem] System created with dependency injection')); } // ============================================================================ @@ -131,7 +132,7 @@ class EmbeddingsSystem { await this.initializationPromise; this.initialized = true; this.initializing = false; - console.log(chalk.green('[EmbeddingsSystem] System initialized successfully')); + verboseLog({}, chalk.green('[EmbeddingsSystem] System initialized successfully')); } catch (error) { this.initializing = false; this.initializationPromise = null; @@ -144,7 +145,7 @@ class EmbeddingsSystem { * @private */ async _performInitialization() { - console.log(chalk.blue('[EmbeddingsSystem] Initializing embeddings system...')); + verboseLog({}, chalk.blue('[EmbeddingsSystem] Initializing embeddings system...')); try { // Initialize database and tables @@ -153,7 +154,7 @@ class EmbeddingsSystem { // Initialize the model await this.modelManager.initialize(); - console.log(chalk.green('[EmbeddingsSystem] All components initialized successfully')); + verboseLog({}, chalk.green('[EmbeddingsSystem] All components initialized successfully')); } catch (error) { console.error(chalk.red(`[EmbeddingsSystem] Initialization failed: ${error.message}`)); throw new EmbeddingError(`System initialization failed: ${error.message}`, 'SYSTEM_INITIALIZATION_FAILED', error); @@ -388,7 +389,7 @@ class EmbeddingsSystem { this.cleaningUp = true; try { - console.log(chalk.yellow('[EmbeddingsSystem] Cleaning up system resources...')); + verboseLog({}, chalk.yellow('[EmbeddingsSystem] Cleaning up system resources...')); // Cleanup all components await Promise.all([ @@ -405,7 +406,7 @@ class EmbeddingsSystem { this.initializing = false; this.initializationPromise = null; - console.log(chalk.green('[EmbeddingsSystem] System cleanup completed')); + verboseLog({}, chalk.green('[EmbeddingsSystem] System cleanup completed')); } catch (error) { console.error(chalk.red(`[EmbeddingsSystem] Error during cleanup: ${error.message}`)); throw error; diff --git a/src/embeddings/file-processor.js b/src/embeddings/file-processor.js index 114b14c..6e5c384 100644 --- a/src/embeddings/file-processor.js +++ b/src/embeddings/file-processor.js @@ -19,7 +19,7 @@ import path from 'node:path'; import chalk from 'chalk'; import { isDocumentationFile, shouldProcessFile as utilsShouldProcessFile, batchCheckGitignore } from '../utils/file-validation.js'; import { detectLanguageFromExtension } from '../utils/language-detection.js'; -import { debug } from '../utils/logging.js'; +import { debug, verboseLog } from '../utils/logging.js'; import { extractMarkdownChunks } from '../utils/markdown.js'; import { slugify } from '../utils/string-utils.js'; import { TABLE_NAMES, LANCEDB_DIR_NAME, FASTEMBED_CACHE_DIR_NAME } from './constants.js'; @@ -148,7 +148,7 @@ export class FileProcessor { * @returns {Promise} True if successful, false otherwise */ async generateDirectoryStructureEmbedding(options = {}) { - console.log(chalk.cyan('[generateDirEmb] Starting...')); // Log entry + verboseLog(options, chalk.cyan('[generateDirEmb] Starting...')); // Log entry if (!this.modelManager) { throw createFileProcessingError('ModelManager is required for directory structure embedding'); @@ -217,7 +217,7 @@ export class FileProcessor { debug('[generateDirEmb] Attempting table.add...'); try { await table.add([record]); - console.log(chalk.green('[generateDirEmb] Successfully added directory structure embedding.')); + verboseLog(options, chalk.green('[generateDirEmb] Successfully added directory structure embedding.')); return true; // Indicate success } catch (addError) { console.error(chalk.red(`[generateDirEmb] !!! Error during table.add: ${addError.message}`), addError.stack); @@ -265,10 +265,10 @@ export class FileProcessor { return { processed: 0, failed: filePaths.length, skipped: 0, excluded: 0, files: [], failedFiles: [...filePaths], excludedFiles: [] }; } - console.log(chalk.blue('Ensuring database tables exist before batch processing...')); + verboseLog(options, chalk.blue('Ensuring database tables exist before batch processing...')); try { await this.databaseManager.getDB(); - console.log(chalk.green('Database table check complete.')); + verboseLog(options, chalk.green('Database table check complete.')); } catch (dbError) { console.error(chalk.red(`Failed to initialize database or tables: ${dbError.message}. Aborting batch process.`)); return { processed: 0, failed: filePaths.length, skipped: 0, excluded: 0, files: [], failedFiles: [...filePaths], excludedFiles: [] }; @@ -278,7 +278,7 @@ export class FileProcessor { const exclusionOptions = { excludePatterns, respectGitignore, baseDir: resolvedCanonicalBaseDir }; this.processedFiles.clear(); this.progressTracker.reset(filePaths.length); - console.log(chalk.blue(`Starting batch processing of ${filePaths.length} files...`)); + verboseLog(options, chalk.blue(`Starting batch processing of ${filePaths.length} files...`)); // Generate directory structure embedding first try { @@ -303,12 +303,19 @@ export class FileProcessor { } // Process files in batches - console.log(chalk.cyan('--- Starting Phase 1: File Embeddings ---')); + verboseLog(options, chalk.cyan('--- Starting Phase 1: File Embeddings ---')); const BATCH_SIZE = 50; // Process files in smaller batches for better performance for (let i = 0; i < filePaths.length; i += BATCH_SIZE) { const batch = filePaths.slice(i, i + BATCH_SIZE); - const batchResults = await this._processBatch(batch, resolvedCanonicalBaseDir, exclusionOptions, onProgress, maxLines); + const batchResults = await this._processBatch( + batch, + resolvedCanonicalBaseDir, + exclusionOptions, + onProgress, + maxLines, + options.verbose + ); // Merge results results.processed += batchResults.processed; @@ -323,7 +330,7 @@ export class FileProcessor { // Process document chunks await this._processDocumentChunks(filePaths, resolvedCanonicalBaseDir, excludePatterns); - console.log(chalk.green(`Batch processing complete!`)); + verboseLog(options, chalk.green(`Batch processing complete!`)); // Update progress tracker counts for internal tracking this.progressTracker.processedCount = results.processed; @@ -346,7 +353,7 @@ export class FileProcessor { * @returns {Promise} Batch processing results * @private */ - async _processBatch(filePaths, baseDir, exclusionOptions, onProgress, maxLines = 1000) { + async _processBatch(filePaths, baseDir, exclusionOptions, onProgress, maxLines = 1000, verbose = false) { const results = { processed: 0, failed: 0, skipped: 0, excluded: 0, files: [], failedFiles: [], excludedFiles: [] }; // ============================================================================ @@ -354,12 +361,12 @@ export class FileProcessor { // ============================================================================ let gitignoreCache = new Map(); if (exclusionOptions.respectGitignore !== false) { - console.log(chalk.cyan(`Performing batch gitignore check for ${filePaths.length} files...`)); + verboseLog({}, chalk.cyan(`Performing batch gitignore check for ${filePaths.length} files...`)); const gitStartTime = Date.now(); const absoluteFilePaths = filePaths.map((fp) => (path.isAbsolute(fp) ? path.resolve(fp) : path.resolve(baseDir, fp))); - gitignoreCache = await batchCheckGitignore(absoluteFilePaths, baseDir); + gitignoreCache = await batchCheckGitignore(absoluteFilePaths, baseDir, { verbose }); const gitDuration = ((Date.now() - gitStartTime) / 1000).toFixed(2); - console.log(chalk.green(`✓ Batch gitignore check completed in ${gitDuration}s`)); + verboseLog({}, chalk.green(`✓ Batch gitignore check completed in ${gitDuration}s`)); } // ============================================================================ @@ -382,7 +389,7 @@ export class FileProcessor { existingFilesMap.get(record.path).push(record); } - console.log(chalk.cyan(`Found ${existingRecords.length} existing embeddings for comparison`)); + verboseLog({}, chalk.cyan(`Found ${existingRecords.length} existing embeddings for comparison`)); } catch (queryError) { console.warn(chalk.yellow(`Warning: Could not query existing embeddings: ${queryError.message}`)); } @@ -454,7 +461,7 @@ export class FileProcessor { } } - console.log(chalk.cyan(`Pre-filtered to ${candidateFiles.length} candidate files (excluded ${results.excluded})`)); + verboseLog({}, chalk.cyan(`Pre-filtered to ${candidateFiles.length} candidate files (excluded ${results.excluded})`)); // ============================================================================ // PHASE 4: READ FILES AND CONTENT HASH CHECK @@ -557,7 +564,8 @@ export class FileProcessor { // Generate embeddings only for files that need processing if (filesToActuallyProcess.length > 0) { - console.log( + verboseLog( + {}, chalk.cyan( `Processing ${filesToActuallyProcess.length} new/changed files (skipped ${filesToProcess.length - filesToActuallyProcess.length} unchanged)` ) @@ -606,7 +614,7 @@ export class FileProcessor { await fileTable.optimize(); } catch (optimizeError) { if (optimizeError.message && optimizeError.message.includes('legacy format')) { - console.log( + console.warn( chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`) ); } else { @@ -651,7 +659,7 @@ export class FileProcessor { * @private */ async _processDocumentChunks(filePaths, baseDir) { - console.log(chalk.cyan('--- Starting Phase 2: Document Chunk Embeddings ---')); + verboseLog({}, chalk.cyan('--- Starting Phase 2: Document Chunk Embeddings ---')); const documentChunkTable = await this.databaseManager.getTable(this.documentChunkTable); if (!documentChunkTable) { console.warn(chalk.yellow(`Skipping Phase 2: Document Chunk Embeddings because table ${this.documentChunkTable} was not found.`)); @@ -674,7 +682,7 @@ export class FileProcessor { existingDocChunksMap.get(chunk.original_document_path).push(chunk); } - console.log(chalk.cyan(`Found ${existingChunks.length} existing document chunks for comparison`)); + verboseLog({}, chalk.cyan(`Found ${existingChunks.length} existing document chunks for comparison`)); } catch (queryError) { console.warn(chalk.yellow(`Warning: Could not query existing document chunks, will process all docs: ${queryError.message}`)); existingDocChunksMap = new Map(); @@ -754,11 +762,11 @@ export class FileProcessor { } if (skippedDocCount > 0) { - console.log(chalk.cyan(`Skipped ${skippedDocCount} unchanged documentation files`)); + verboseLog({}, chalk.cyan(`Skipped ${skippedDocCount} unchanged documentation files`)); } if (allDocChunksToEmbed.length > 0) { - console.log(chalk.blue(`Extracted ${allDocChunksToEmbed.length} total document chunks to process for embeddings.`)); + verboseLog({}, chalk.blue(`Extracted ${allDocChunksToEmbed.length} total document chunks to process for embeddings.`)); const chunkContentsForBatching = allDocChunksToEmbed.map((chunk) => chunk.content); const chunkEmbeddings = await this.modelManager.calculateEmbeddingBatch(chunkContentsForBatching); @@ -807,13 +815,14 @@ export class FileProcessor { await documentChunkTable.optimize(); } catch (optimizeError) { if (optimizeError.message && optimizeError.message.includes('legacy format')) { - console.log(chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`)); + console.warn(chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`)); } else { console.warn(chalk.yellow(`Warning: Failed to optimize document chunk table after adding records: ${optimizeError.message}`)); } } - console.log( + verboseLog( + {}, chalk.green(`Successfully added ${allDocChunkRecordsToAdd.length} document chunk embeddings to ${this.documentChunkTable}.`) ); } catch (addError) { @@ -821,7 +830,7 @@ export class FileProcessor { } } - console.log(chalk.green('--- Finished Phase 2: Document Chunk Embeddings ---')); + verboseLog({}, chalk.green('--- Finished Phase 2: Document Chunk Embeddings ---')); } // ============================================================================ @@ -841,7 +850,7 @@ export class FileProcessor { try { this.processedFiles.clear(); this.progressTracker.reset(0); - console.log(chalk.green('[FileProcessor] Resources cleaned up.')); + verboseLog({}, chalk.green('[FileProcessor] Resources cleaned up.')); } catch (error) { console.error(chalk.red(`[FileProcessor] Error during cleanup: ${error.message}`)); } finally { diff --git a/src/embeddings/file-processor.test.js b/src/embeddings/file-processor.test.js index b1c2052..0d06ad7 100644 --- a/src/embeddings/file-processor.test.js +++ b/src/embeddings/file-processor.test.js @@ -30,7 +30,7 @@ vi.mock('../utils/markdown.js', () => ({ extractMarkdownChunks: vi.fn().mockReturnValue({ chunks: [], documentH1: 'Test' }), })); -vi.mock('../utils/logging.js', () => ({ debug: vi.fn() })); +vi.mock('../utils/logging.js', () => ({ debug: vi.fn(), verboseLog: vi.fn() })); // ============================================================================ // Shared Setup @@ -430,7 +430,7 @@ describe('FileProcessor', () => { mockTable.optimize.mockRejectedValue(new Error(errorMsg)); const result = await processor.processBatchEmbeddings(['/test/file.js'], { baseDir: '/test' }); if (errorMsg === 'legacy format') { - expect(console.log).toHaveBeenCalledWith(expect.stringContaining(expectedLog)); + expect(console.warn).toHaveBeenCalledWith(expect.stringContaining(expectedLog)); } else { expect(console.warn).toHaveBeenCalledWith(expect.stringContaining(expectedLog)); } @@ -515,7 +515,6 @@ describe('FileProcessor', () => { mockModelManager.calculateEmbeddingBatch.mockResolvedValue([createMockEmbedding()]); await processor.processBatchEmbeddings(['/test/doc.md'], { baseDir: '/test' }); expect(docMockTable.add).toHaveBeenCalled(); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Successfully added')); }); it('should handle document chunk optimize legacy format error', async () => { @@ -532,7 +531,7 @@ describe('FileProcessor', () => { setupFileSystemMocks('# Title\n\nContent'); mockModelManager.calculateEmbeddingBatch.mockResolvedValue([createMockEmbedding()]); await processor.processBatchEmbeddings(['/test/doc.md'], { baseDir: '/test' }); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('legacy index format')); + expect(console.warn).toHaveBeenCalledWith(expect.stringContaining('legacy index format')); }); it('should handle document chunk processing errors', async () => { diff --git a/src/embeddings/model-manager.js b/src/embeddings/model-manager.js index 2caccda..760f784 100644 --- a/src/embeddings/model-manager.js +++ b/src/embeddings/model-manager.js @@ -22,7 +22,7 @@ import fs from 'node:fs'; import chalk from 'chalk'; import dotenv from 'dotenv'; import { EmbeddingModel, FlagEmbedding } from 'fastembed'; -import { debug } from '../utils/logging.js'; +import { debug, verboseLog } from '../utils/logging.js'; import { EMBEDDING_DIMENSIONS, MODEL_NAME_STRING, MAX_RETRIES } from './constants.js'; import { FASTEMBED_CACHE_DIR } from './constants.js'; import { createModelInitializationError, createEmbeddingGenerationError } from './errors.js'; @@ -48,7 +48,7 @@ export class ModelManager { this.modelInitializationPromise = null; this.cleaningUp = false; - console.log(chalk.magenta(`[ModelManager] Using MODEL = ${this.modelNameString}, DIMENSIONS = ${this.embeddingDimensions}`)); + verboseLog({}, chalk.magenta(`[ModelManager] Using MODEL = ${this.modelNameString}, DIMENSIONS = ${this.embeddingDimensions}`)); } // ============================================================================ @@ -76,13 +76,13 @@ export class ModelManager { // Only print logs if we haven't initialized before if (!this.modelInitialized) { - console.log(chalk.blue(`Attempting to initialize fastembed model. Identifier: ${this.modelNameString}`)); - console.log(chalk.blue(`FastEmbed Cache Directory: ${this.cacheDir}`)); + verboseLog({}, chalk.blue(`Attempting to initialize fastembed model. Identifier: ${this.modelNameString}`)); + verboseLog({}, chalk.blue(`FastEmbed Cache Directory: ${this.cacheDir}`)); } try { if (!fs.existsSync(this.cacheDir)) { - console.log(chalk.yellow(`Creating fastembed cache directory: ${this.cacheDir}`)); + verboseLog({}, chalk.yellow(`Creating fastembed cache directory: ${this.cacheDir}`)); fs.mkdirSync(this.cacheDir, { recursive: true }); } @@ -96,7 +96,7 @@ export class ModelManager { // Only print success message if we haven't initialized before if (!this.modelInitialized) { - console.log(chalk.green('FastEmbed model initialized successfully.')); + verboseLog({}, chalk.green('FastEmbed model initialized successfully.')); this.modelInitialized = true; } break; // Exit loop on success @@ -327,7 +327,7 @@ export class ModelManager { this.cacheManager.clearCache('embedding'); } - console.log(chalk.green('[ModelManager] Model resources cleaned up.')); + verboseLog({}, chalk.green('[ModelManager] Model resources cleaned up.')); } catch (error) { console.error(chalk.red(`[ModelManager] Error during cleanup: ${error.message}`)); } finally { diff --git a/src/feedback-loader.js b/src/feedback-loader.js index 88c9096..b0f3765 100644 --- a/src/feedback-loader.js +++ b/src/feedback-loader.js @@ -15,39 +15,41 @@ import path from 'path'; import chalk from 'chalk'; import { getDefaultEmbeddingsSystem } from './embeddings/factory.js'; import { calculateCosineSimilarity } from './embeddings/similarity-calculator.js'; +import { verboseLog } from './utils/logging.js'; /** * Load feedback data from artifacts directory * * @param {string} feedbackPath - Path to feedback artifacts directory * @param {Object} options - Loading options + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Promise} Loaded feedback data */ export async function loadFeedbackData(feedbackPath, options = {}) { const { verbose = false } = options; if (!feedbackPath) { - if (verbose) console.log(chalk.gray('No feedback path provided')); + verboseLog(verbose, chalk.gray('No feedback path provided')); return {}; } try { if (!fs.existsSync(feedbackPath)) { - if (verbose) console.log(chalk.gray(`Feedback directory not found: ${feedbackPath}`)); + verboseLog(verbose, chalk.gray(`Feedback directory not found: ${feedbackPath}`)); return {}; } - if (verbose) console.log(chalk.cyan(`📁 Loading feedback from: ${feedbackPath}`)); + verboseLog(verbose, chalk.cyan(`📁 Loading feedback from: ${feedbackPath}`)); // Look for feedback files in the directory const feedbackFiles = fs.readdirSync(feedbackPath).filter((file) => file.startsWith('feedback-') && file.endsWith('.json')); if (feedbackFiles.length === 0) { - if (verbose) console.log(chalk.gray('No feedback files found')); + verboseLog(verbose, chalk.gray('No feedback files found')); return {}; } - if (verbose) console.log(chalk.cyan(`📥 Found ${feedbackFiles.length} feedback file(s)`)); + verboseLog(verbose, chalk.cyan(`📥 Found ${feedbackFiles.length} feedback file(s)`)); // Load and merge all feedback files const allFeedback = {}; @@ -64,25 +66,21 @@ export async function loadFeedbackData(feedbackPath, options = {}) { Object.assign(allFeedback, feedbackData.feedback); const itemCount = Object.keys(feedbackData.feedback).length; totalItems += itemCount; - if (verbose) { - console.log(chalk.cyan(`📋 Loaded feedback from ${file}: ${itemCount} items`)); - } + verboseLog(verbose, chalk.cyan(`📋 Loaded feedback from ${file}: ${itemCount} items`)); } } catch (parseError) { - console.log(chalk.yellow(`⚠️ Error parsing feedback file ${file}: ${parseError.message}`)); + console.warn(chalk.yellow(`⚠️ Error parsing feedback file ${file}: ${parseError.message}`)); } } if (totalItems > 0) { - if (verbose) { - console.log(chalk.green(`✅ Successfully loaded ${totalItems} feedback items total`)); - } + verboseLog(verbose, chalk.green(`✅ Successfully loaded ${totalItems} feedback items total`)); return allFeedback; } return {}; } catch (error) { - console.log(chalk.red(`❌ Error loading feedback data: ${error.message}`)); + console.error(chalk.red(`❌ Error loading feedback data: ${error.message}`)); return {}; } } @@ -113,9 +111,9 @@ export async function initializeSemanticSimilarity() { await embeddingsSystem.initialize(); semanticSimilarityInitialized = true; semanticSimilarityAvailable = true; - console.log(chalk.green('[FeedbackLoader] Semantic similarity initialized using embeddings system')); + verboseLog({}, chalk.green('[FeedbackLoader] Semantic similarity initialized using embeddings system')); } catch (error) { - console.log(chalk.yellow(`[FeedbackLoader] Semantic similarity initialization failed: ${error.message}`)); + console.warn(chalk.yellow(`[FeedbackLoader] Semantic similarity initialization failed: ${error.message}`)); semanticSimilarityAvailable = false; } } @@ -157,7 +155,7 @@ async function calculateSemanticSimilarity(text1, text2) { // Cosine similarity ranges from -1 to 1, normalize to 0-1 return (similarity + 1) / 2; } catch (error) { - console.log(chalk.yellow(`[FeedbackLoader] Semantic similarity calculation failed: ${error.message}`)); + console.warn(chalk.yellow(`[FeedbackLoader] Semantic similarity calculation failed: ${error.message}`)); return null; } } @@ -173,9 +171,9 @@ async function calculateSemanticSimilarity(text1, text2) { * @param {string} issueDescription - Description of the current issue * @param {Object} feedbackData - Loaded feedback data * @param {Object} options - Filtering options - * @param {number} options.similarityThreshold - Threshold for considering issues similar (default: 0.7) - * @param {boolean} options.verbose - Enable verbose logging - * @param {boolean} options.useSemanticSimilarity - Use semantic similarity when available (default: true) + * @param {number} [options.similarityThreshold=0.7] - Threshold for considering issues similar + * @param {boolean} [options.verbose=false] - Enable verbose progress logging + * @param {boolean} [options.useSemanticSimilarity=true] - Use semantic similarity when available * @returns {Promise} True if issue should be skipped */ export async function shouldSkipSimilarIssue(issueDescription, feedbackData, options = {}) { @@ -205,9 +203,7 @@ export async function shouldSkipSimilarIssue(issueDescription, feedbackData, opt // Determine if we should use semantic similarity const canUseSemanticSimilarity = useSemanticSimilarity && isSemanticSimilarityAvailable(); - if (verbose && canUseSemanticSimilarity) { - console.log(chalk.cyan('🔍 Using semantic similarity for issue comparison')); - } + verboseLog(verbose && canUseSemanticSimilarity, chalk.cyan('🔍 Using semantic similarity for issue comparison')); // Check similarity with dismissed issues for (const dismissed of dismissedIssues) { @@ -233,11 +229,12 @@ export async function shouldSkipSimilarIssue(issueDescription, feedbackData, opt } if (similarity > similarityThreshold) { - if (verbose) { - console.log(chalk.yellow(`⏭️ Skipping similar dismissed issue (${(similarity * 100).toFixed(1)}% ${similarityMethod} similarity)`)); - console.log(chalk.gray(` Current: ${issueDescription.substring(0, 80)}...`)); - console.log(chalk.gray(` Previous: ${dismissed.originalIssue.substring(0, 80)}...`)); - } + verboseLog( + verbose, + chalk.yellow(`⏭️ Skipping similar dismissed issue (${(similarity * 100).toFixed(1)}% ${similarityMethod} similarity)`) + ); + verboseLog(verbose, chalk.gray(` Current: ${issueDescription.substring(0, 80)}...`)); + verboseLog(verbose, chalk.gray(` Previous: ${dismissed.originalIssue.substring(0, 80)}...`)); return true; } } @@ -252,7 +249,7 @@ export async function shouldSkipSimilarIssue(issueDescription, feedbackData, opt * @param {string} text1 - First text * @param {string} text2 - Second text * @param {Object} options - Options - * @param {boolean} options.useSemanticSimilarity - Use semantic similarity when available (default: true) + * @param {boolean} [options.useSemanticSimilarity=true] - Use semantic similarity when available * @returns {Promise<{similarity: number, method: string}>} Similarity result with method used */ export async function calculateIssueSimilarity(text1, text2, options = {}) { @@ -327,6 +324,8 @@ export function calculateWordSimilarity(text1, text2) { * * @param {Object} feedbackData - Loaded feedback data * @param {Object} options - Extraction options + * @param {number} [options.maxPatterns=10] - Maximum number of dismissed patterns to include + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Array} Array of dismissed issue patterns */ export function extractDismissedPatterns(feedbackData, options = {}) { @@ -355,9 +354,10 @@ export function extractDismissedPatterns(feedbackData, options = {}) { })) .slice(0, maxPatterns); - if (verbose && dismissedIssues.length > 0) { - console.log(chalk.cyan(`📋 Extracted ${dismissedIssues.length} dismissed issue patterns for LLM context`)); - } + verboseLog( + verbose && dismissedIssues.length > 0, + chalk.cyan(`📋 Extracted ${dismissedIssues.length} dismissed issue patterns for LLM context`) + ); return dismissedIssues; } diff --git a/src/index.js b/src/index.js index a3a0d12..90b032b 100755 --- a/src/index.js +++ b/src/index.js @@ -30,6 +30,7 @@ import { ProjectAnalyzer } from './project-analyzer.js'; import { reviewFile, reviewFiles, reviewPullRequest } from './rag-review.js'; import { execGitSafe } from './utils/command.js'; import { ensureBranchExists, findBaseBranch } from './utils/git.js'; +import { verboseLog } from './utils/logging.js'; // Create a default embeddings system instance const embeddingsSystem = getDefaultEmbeddingsSystem(); @@ -406,9 +407,7 @@ async function runCodeReview(options) { const endTime = Date.now(); const duration = ((endTime - startTime) / 1000).toFixed(2); - if (options.verbose) { - console.log(chalk.blue(`Review process took ${duration} seconds.`)); - } + verboseLog(options, chalk.blue(`Review process took ${duration} seconds.`)); // Process and output results if (reviewResult && reviewResult.success) { @@ -630,15 +629,14 @@ async function generateEmbeddings(options) { await embeddingsSystem.storeProjectSummary(projectDir, projectSummary); console.log(chalk.green('✅ Project analysis complete and stored')); - if (options.verbose) { - console.log(chalk.gray(` Project: ${projectSummary.projectName}`)); - console.log( - chalk.gray( - ` Technologies: ${projectSummary.technologies.slice(0, 5).join(', ')}${projectSummary.technologies.length > 5 ? '...' : ''}` - ) - ); - console.log(chalk.gray(` Key patterns: ${projectSummary.keyPatterns.length}`)); - } + verboseLog(options, chalk.gray(` Project: ${projectSummary.projectName}`)); + verboseLog( + options, + chalk.gray( + ` Technologies: ${projectSummary.technologies.slice(0, 5).join(', ')}${projectSummary.technologies.length > 5 ? '...' : ''}` + ) + ); + verboseLog(options, chalk.gray(` Key patterns: ${projectSummary.keyPatterns.length}`)); } catch (error) { console.error(chalk.red('⚠️ Project analysis failed but continuing:'), error.message); } @@ -756,10 +754,12 @@ async function showEmbeddingStats(options) { * * @param {string} directory - Directory to search * @param {object} options - Options from generateEmbeddings command + * @param {boolean} [options.verbose=false] - Enable verbose glob and filtering logs + * @param {string} [options.filePattern] - Optional override pattern instead of the default supported-file patterns + * @param {string[]} [options.excludePatterns] - Additional glob exclusion patterns * @returns {Promise>} Array of file paths */ async function findSupportedFiles(directory, options = {}) { - const verbose = options.verbose || false; const baseDir = path.resolve(directory); // Default patterns match common code files - adjust as needed @@ -835,19 +835,15 @@ async function findSupportedFiles(directory, options = {}) { // Instead, we rely on the shouldProcessFile check in embeddings.js which uses git check-ignore globOptions.ignore = [...excludePatterns]; // Use only explicit excludes - if (verbose) { - console.log(chalk.cyan('Using async glob to find files...')); - console.log(chalk.gray(` Patterns: ${patternsToUse.join(', ')}`)); - console.log(chalk.gray(` Options:`), globOptions); - } + verboseLog(options, chalk.cyan('Using async glob to find files...')); + verboseLog(options, chalk.gray(` Patterns: ${patternsToUse.join(', ')}`)); + verboseLog(options, chalk.gray(` Options:`), globOptions); try { // Use asynchronous glob const files = await glob.glob(patternsToUse, globOptions); - if (verbose) { - console.log(chalk.green(`Glob found ${files.length} potential files.`)); - } + verboseLog(options, chalk.green(`Glob found ${files.length} potential files.`)); // Filter results to ensure they are actual files (glob with stat should mostly handle this) // And apply the final utilsShouldProcessFile check (e.g., for binary content if needed) @@ -864,9 +860,7 @@ async function findSupportedFiles(directory, options = {}) { // finalFiles.push(file); // } // } catch (statError) { - // if (verbose) { - // console.warn(chalk.yellow(`Skipping file due to stat error ${path.relative(baseDir, file)}: ${statError.message}`)); - // } + // console.warn(chalk.yellow(`Skipping file due to stat error ${path.relative(baseDir, file)}: ${statError.message}`)); // } // } @@ -874,9 +868,7 @@ async function findSupportedFiles(directory, options = {}) { const finalFiles = files; // Add log after the filtering loop (now just assignment) - if (verbose) { - console.log(chalk.green(`Finished filtering glob results. ${finalFiles.length} files remain.`)); - } + verboseLog(options, chalk.green(`Finished filtering glob results. ${finalFiles.length} files remain.`)); return finalFiles; } catch (err) { if (err.name === 'AbortError') { @@ -981,13 +973,13 @@ function getChangedFiles(branch, workingDir = process.cwd()) { // REMOVED: checkBranchExists function - Moved to utils.js // --- Output Formatting Functions --- // -// These need to be adapted to the structure returned by cag-review.js functions +// These consume the normalized results returned by the RAG review pipeline /** * Output results in JSON format * - * @param {Array} reviewResults - Array of individual file review results from cag-review - * @param {Object} cliOptions - Command line options + * @param {Array} reviewResults - Array of individual file review results + * @param {Object} options - Command line options */ function outputJson(reviewResults, options) { // Structure the output to be informative @@ -1149,17 +1141,13 @@ function outputText(reviewResults, cliOptions) { return; } if (fileResult.skipped) { - if (cliOptions.verbose) { - console.log(chalk.yellow(`\nSkipped: ${fileResult.filePath}`)); - } + verboseLog(cliOptions, chalk.yellow(`\nSkipped: ${fileResult.filePath}`)); return; } if (!fileResult.results || !fileResult.results.issues?.length) { - if (cliOptions.verbose) { - console.log(chalk.green(`\nNo findings for: ${fileResult.filePath}`)); - if (fileResult.results?.summary) { - console.log(chalk.green(` Summary: ${fileResult.results.summary}`)); - } + verboseLog(cliOptions, chalk.green(`\nNo findings for: ${fileResult.filePath}`)); + if (fileResult.results?.summary) { + verboseLog(cliOptions, chalk.green(` Summary: ${fileResult.results.summary}`)); } return; } @@ -1287,6 +1275,7 @@ async function analyzePRHistory(options) { resume: options.resume, clearExisting: options.clear, projectPath, + verbose: options.verbose, onProgress: (progress) => displayProgress(progress, options.verbose), }; @@ -1305,9 +1294,7 @@ async function analyzePRHistory(options) { const endTime = Date.now(); const duration = ((endTime - startTime) / 1000).toFixed(2); console.error(chalk.red(`\nError during PR history analysis (${duration}s):`), error.message); - if (options.verbose) { - console.error(error.stack); - } + verboseLog(options, error.stack); process.exit(1); } } @@ -1406,9 +1393,7 @@ async function clearPRHistory(options) { } } catch (error) { console.error(chalk.red('Error clearing PR history data:'), error.message); - if (options.verbose) { - console.error(error.stack); - } + verboseLog(options, error.stack); process.exit(1); } } diff --git a/src/llm.js b/src/llm.js index 7b2fdb7..bc7b5b2 100644 --- a/src/llm.js +++ b/src/llm.js @@ -15,6 +15,7 @@ import { Anthropic } from '@anthropic-ai/sdk'; import chalk from 'chalk'; import dotenv from 'dotenv'; +import { verboseLog } from './utils/logging.js'; // Load env variables if present; do not enforce key at import time dotenv.config(); @@ -56,7 +57,7 @@ async function sendPromptToClaude(prompt, options = {}) { const { model = DEFAULT_MODEL, maxTokens = MAX_TOKENS, temperature = 0.7, system = '', jsonSchema = null, cacheTtl = '5m' } = options; try { - console.log(chalk.cyan('Sending prompt to Claude...')); + verboseLog(options, chalk.cyan('Sending prompt to Claude...')); const client = getAnthropicClient(); @@ -104,8 +105,8 @@ async function sendPromptToClaude(prompt, options = {}) { const response = await client.messages.create(requestParams); // Log response structure for debugging - console.log(chalk.gray(` Response stop_reason: ${response.stop_reason}`)); - console.log(chalk.gray(` Response content blocks: ${response.content?.length || 0}`)); + verboseLog(options, chalk.gray(` Response stop_reason: ${response.stop_reason}`)); + verboseLog(options, chalk.gray(` Response content blocks: ${response.content?.length || 0}`)); // Process response based on whether we used tool calling if (jsonSchema) { diff --git a/src/pr-history/analyzer.js b/src/pr-history/analyzer.js index 250cf7c..9613f66 100644 --- a/src/pr-history/analyzer.js +++ b/src/pr-history/analyzer.js @@ -6,6 +6,7 @@ */ import chalk from 'chalk'; +import { verboseLog } from '../utils/logging.js'; import { PRCommentProcessor } from './comment-processor.js'; import { clearPRComments, getPRCommentsStats, getProcessedPRDateRange, shouldSkipPR, storePRCommentsBatch } from './database.js'; import { GitHubAPIClient } from './github-client.js'; @@ -125,6 +126,14 @@ export class PRHistoryAnalyzer { * Analyze PR comment history for a repository * @param {string} repository - Repository in format "owner/repo" * @param {Object} options - Analysis options + * @param {string|null} [options.since=null] - Lower bound ISO date for PR selection + * @param {string|null} [options.until=null] - Upper bound ISO date for PR selection + * @param {number|null} [options.limit=null] - Maximum number of PRs to analyze + * @param {boolean} [options.resume=false] - Resume from saved analysis progress + * @param {boolean} [options.clearExisting=false] - Clear existing stored PR comments before analysis + * @param {Function|null} [options.onProgress=null] - Optional progress callback + * @param {string} [options.projectPath=process.cwd()] - Project path used for project-isolated storage + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Promise} Analysis results */ async analyzeRepository(repository, options = {}) { @@ -136,6 +145,7 @@ export class PRHistoryAnalyzer { clearExisting = false, onProgress = null, projectPath = process.cwd(), + verbose = false, } = options; // Initialize progress tracking @@ -145,14 +155,14 @@ export class PRHistoryAnalyzer { if (resume) { const loaded = await this.progress.load(); if (loaded && this.progress.progress.status === 'completed') { - console.log(chalk.green(`Analysis for ${repository} already completed.`)); + verboseLog(verbose, chalk.green(`Analysis for ${repository} already completed.`)); return await this.getAnalysisResults(repository, projectPath); } } // Clear existing data if requested if (clearExisting) { - console.log(chalk.yellow(`Clearing existing PR comments for ${repository}...`)); + verboseLog(verbose, chalk.yellow(`Clearing existing PR comments for ${repository}...`)); await clearPRComments(repository, projectPath); } @@ -160,30 +170,30 @@ export class PRHistoryAnalyzer { this.progress.setStatus('in_progress'); await this.progress.save(); - console.log(chalk.blue(`Starting PR comment analysis for ${repository}`)); - console.log(chalk.blue(`Options: concurrency=${this.options.concurrency}, batchSize=${this.options.batchSize}`)); + verboseLog(verbose, chalk.blue(`Starting PR comment analysis for ${repository}`)); + verboseLog(verbose, chalk.blue(`Options: concurrency=${this.options.concurrency}, batchSize=${this.options.batchSize}`)); // Step 1: Fetch all merged PRs - const prs = await this.fetchAllPRs(repository, { since, until, limit, resume, onProgress, projectPath }); + const prs = await this.fetchAllPRs(repository, { since, until, limit, resume, onProgress, projectPath, verbose }); if (prs.length === 0) { - console.log(chalk.yellow(`No merged PRs found for ${repository}`)); + verboseLog(verbose, chalk.yellow(`No merged PRs found for ${repository}`)); this.progress.setStatus('completed'); await this.progress.save(); return { repository, total_prs: 0, total_comments: 0, patterns: [] }; } - console.log(chalk.green(`Found ${prs.length} merged PRs to analyze`)); + verboseLog(verbose, chalk.green(`Found ${prs.length} merged PRs to analyze`)); this.progress.updatePRs(prs.length, 0); // Step 2: Process PR comments - const processedComments = await this.processPRComments(prs, { onProgress, projectPath }); + const processedComments = await this.processPRComments(prs, { onProgress, projectPath, verbose }); // Step 3: Store in database if (processedComments.length > 0) { - console.log(chalk.blue(`Storing ${processedComments.length} processed comments in database...`)); + verboseLog(verbose, chalk.blue(`Storing ${processedComments.length} processed comments in database...`)); const storedCount = await storePRCommentsBatch(processedComments, projectPath); - console.log(chalk.green(`Successfully stored ${storedCount} PR comments`)); + verboseLog(verbose, chalk.green(`Successfully stored ${storedCount} PR comments`)); } // Step 4: Generate final results @@ -192,8 +202,8 @@ export class PRHistoryAnalyzer { this.progress.setStatus('completed'); await this.progress.save(); - console.log(chalk.green(`Analysis completed for ${repository}`)); - console.log(chalk.green(`Processed ${results.total_prs} PRs with ${results.total_comments} comments`)); + verboseLog(verbose, chalk.green(`Analysis completed for ${repository}`)); + verboseLog(verbose, chalk.green(`Processed ${results.total_prs} PRs with ${results.total_comments} comments`)); return results; } catch (error) { @@ -210,13 +220,20 @@ export class PRHistoryAnalyzer { * @private * @param {string} repository - Repository in format "owner/repo" * @param {Object} options - Fetch options + * @param {string|null} [options.since=null] - Lower bound ISO date for PR selection + * @param {string|null} [options.until=null] - Upper bound ISO date for PR selection + * @param {number|null} [options.limit=null] - Maximum number of PRs to fetch + * @param {boolean} [options.resume=false] - Resume from the last saved fetched page + * @param {Function|null} [options.onProgress=null] - Optional progress callback + * @param {string} [options.projectPath=process.cwd()] - Project path used for incremental fetch context + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Promise} Array of PRs */ async fetchAllPRs(repository, options = {}) { - const { since, until, limit, resume, onProgress, projectPath = process.cwd() } = options; + const { since, until, limit, resume, onProgress, projectPath = process.cwd(), verbose = false } = options; const [owner, repo] = repository.split('/'); - console.log(chalk.blue(`Fetching merged PRs for ${repository}...`)); + verboseLog(verbose, chalk.blue(`Fetching merged PRs for ${repository}...`)); try { const startPage = resume ? this.progress.progress.last_processed_page + 1 : 1; @@ -259,27 +276,30 @@ export class PRHistoryAnalyzer { * @private * @param {Array} prs - Array of PR objects * @param {Object} options - Processing options + * @param {Function|null} [options.onProgress=null] - Optional progress callback + * @param {string} [options.projectPath=process.cwd()] - Project path used for database filtering + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Promise} Array of processed comments */ async processPRComments(prs, options = {}) { - const { onProgress, projectPath = process.cwd() } = options; + const { onProgress, projectPath = process.cwd(), verbose = false } = options; const allProcessedComments = []; let totalComments = 0; let processedComments = 0; let failedComments = 0; - console.log(chalk.blue(`Processing comments for ${prs.length} PRs...`)); - console.log(chalk.cyan(`This may take several minutes for large repositories...`)); + verboseLog(verbose, chalk.blue(`Processing comments for ${prs.length} PRs...`)); + verboseLog(verbose, chalk.cyan(`This may take several minutes for large repositories...`)); // Get processed PR date range to skip already processed PRs - console.log(chalk.blue(`Checking for already processed PRs...`)); + verboseLog(verbose, chalk.blue(`Checking for already processed PRs...`)); const { oldestPR, newestPR } = await getProcessedPRDateRange(this.progress.repository, projectPath); let skippedPRs = 0; let prsToProcess = prs; if (oldestPR && newestPR) { - console.log(chalk.blue(`Found processed PR range: ${oldestPR} to ${newestPR}`)); + verboseLog(verbose, chalk.blue(`Found processed PR range: ${oldestPR} to ${newestPR}`)); prsToProcess = prs.filter((pr) => { const shouldSkip = shouldSkipPR(pr, oldestPR, newestPR); if (shouldSkip) { @@ -287,25 +307,25 @@ export class PRHistoryAnalyzer { } return !shouldSkip; }); - console.log(chalk.green(`Skipping ${skippedPRs} already processed PRs, processing ${prsToProcess.length} new PRs`)); + verboseLog(verbose, chalk.green(`Skipping ${skippedPRs} already processed PRs, processing ${prsToProcess.length} new PRs`)); } else { - console.log(chalk.blue(`No previously processed PRs found, processing all ${prs.length} PRs`)); + verboseLog(verbose, chalk.blue(`No previously processed PRs found, processing all ${prs.length} PRs`)); } if (prsToProcess.length === 0) { - console.log(chalk.yellow(`All PRs have already been processed!`)); + verboseLog(verbose, chalk.yellow(`All PRs have already been processed!`)); return allProcessedComments; } // First pass: count total comments for better progress tracking - console.log(chalk.blue(`Counting total comments across ${prsToProcess.length} PRs to process...`)); + verboseLog(verbose, chalk.blue(`Counting total comments across ${prsToProcess.length} PRs to process...`)); let estimatedComments = 0; for (let i = 0; i < Math.min(prsToProcess.length, 10); i++) { estimatedComments += (prsToProcess[i].comments || 0) + (prsToProcess[i].review_comments || 0); } const avgCommentsPerPR = estimatedComments / Math.min(prsToProcess.length, 10); const totalEstimatedComments = Math.floor(avgCommentsPerPR * prsToProcess.length); - console.log(chalk.blue(`Estimated ${totalEstimatedComments} total comments to process`)); + verboseLog(verbose, chalk.blue(`Estimated ${totalEstimatedComments} total comments to process`)); // Process PRs in batches for (let i = 0; i < prsToProcess.length; i += this.options.batchSize) { @@ -313,7 +333,8 @@ export class PRHistoryAnalyzer { const batchNumber = Math.floor(i / this.options.batchSize) + 1; const totalBatches = Math.ceil(prsToProcess.length / this.options.batchSize); - console.log( + verboseLog( + verbose, chalk.blue( `Processing PR batch ${batchNumber}/${totalBatches} (PRs ${i + 1}-${Math.min(i + this.options.batchSize, prsToProcess.length)})` ) @@ -362,7 +383,8 @@ export class PRHistoryAnalyzer { } const batchDuration = (Date.now() - batchStartTime) / 1000; - console.log( + verboseLog( + verbose, chalk.blue(`Batch ${batchNumber}/${totalBatches} completed: ${batchCommentCount} comments in ${batchDuration.toFixed(1)}s`) ); // Calculate progress percentage, handling case where totalEstimatedComments is 0 @@ -373,7 +395,7 @@ export class PRHistoryAnalyzer { ? `Progress: ${processedComments}/${totalEstimatedComments} comments processed (${progressPercentage}%)` : `Progress: ${processedComments} comments processed`; - console.log(chalk.blue(progressText)); + verboseLog(verbose, chalk.blue(progressText)); this.progress.updateComments(totalComments, processedComments, failedComments); await this.progress.save(); @@ -384,12 +406,12 @@ export class PRHistoryAnalyzer { } } - console.log(chalk.green(`Processed ${processedComments}/${totalComments} comments from ${prsToProcess.length} PRs`)); + verboseLog(verbose, chalk.green(`Processed ${processedComments}/${totalComments} comments from ${prsToProcess.length} PRs`)); if (skippedPRs > 0) { - console.log(chalk.blue(`Skipped ${skippedPRs} already processed PRs`)); + verboseLog(verbose, chalk.blue(`Skipped ${skippedPRs} already processed PRs`)); } if (failedComments > 0) { - console.log(chalk.yellow(`Failed to process ${failedComments} comments`)); + verboseLog(verbose, chalk.yellow(`Failed to process ${failedComments} comments`)); } return allProcessedComments; diff --git a/src/pr-history/cli-utils.js b/src/pr-history/cli-utils.js index 8afe265..e765b4b 100644 --- a/src/pr-history/cli-utils.js +++ b/src/pr-history/cli-utils.js @@ -9,6 +9,7 @@ import { execSync } from 'child_process'; import fs from 'node:fs'; import path from 'node:path'; import chalk from 'chalk'; +import { verboseLog } from '../utils/logging.js'; /** * Detect GitHub repository from git remote origin @@ -130,9 +131,7 @@ export function validateGitHubToken(options) { * @param {boolean} verbose - Whether to show verbose output */ export function displayProgress(progress, verbose) { - if (verbose) { - console.log(chalk.blue(`[${progress.stage}] ${progress.message} (${progress.current}/${progress.total})`)); - } + verboseLog(verbose, chalk.blue(`[${progress.stage}] ${progress.message} (${progress.current}/${progress.total})`)); } /** diff --git a/src/pr-history/database.js b/src/pr-history/database.js index 747280c..265da75 100644 --- a/src/pr-history/database.js +++ b/src/pr-history/database.js @@ -12,6 +12,7 @@ import chalk from 'chalk'; import stopwords from 'stopwords-iso/stopwords-iso.json' with { type: 'json' }; import { EMBEDDING_DIMENSIONS, TABLE_NAMES } from '../embeddings/constants.js'; import { getDefaultEmbeddingsSystem } from '../embeddings/factory.js'; +import { verboseLog } from '../utils/logging.js'; import { truncateToTokenLimit, cleanupTokenizer } from '../utils/mobilebert-tokenizer.js'; // Create embeddings system instance @@ -107,7 +108,8 @@ export async function storePRCommentsBatch(commentsData, projectPath = process.c await table.optimize(); } catch (optimizeError) { if (optimizeError.message && optimizeError.message.includes('legacy format')) { - console.log( + verboseLog( + {}, chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`) ); } else { @@ -115,7 +117,7 @@ export async function storePRCommentsBatch(commentsData, projectPath = process.c } } - console.log(chalk.green(`Stored batch of ${validRecords.length} PR comments`)); + verboseLog({}, chalk.green(`Stored batch of ${validRecords.length} PR comments`)); } catch (batchError) { console.error(chalk.red(`Error storing batch: ${batchError.message}`)); } @@ -151,7 +153,7 @@ export async function getPRCommentsStats(repository = null, projectPath = proces }; if (!table) { - console.log(chalk.yellow('PR comments table not found, returning empty stats')); + verboseLog({}, chalk.yellow('PR comments table not found, returning empty stats')); return defaultStats; } @@ -163,16 +165,16 @@ export async function getPRCommentsStats(repository = null, projectPath = proces } const whereClause = filters.join(' AND '); - console.log(chalk.blue(`Getting stats with filter: ${whereClause}`)); + verboseLog({}, chalk.blue(`Getting stats with filter: ${whereClause}`)); let totalCount = 0; try { totalCount = await table.countRows(whereClause); - console.log(chalk.blue(`Found ${totalCount} total comments matching filter`)); + verboseLog({}, chalk.blue(`Found ${totalCount} total comments matching filter`)); } catch (countError) { console.warn(chalk.yellow(`Error counting rows: ${countError.message}, trying without filter`)); totalCount = await table.countRows(); - console.log(chalk.blue(`Found ${totalCount} total comments in table`)); + verboseLog({}, chalk.blue(`Found ${totalCount} total comments in table`)); } let results = []; @@ -180,7 +182,7 @@ export async function getPRCommentsStats(repository = null, projectPath = proces try { // Use query() instead of search() for non-vector queries results = await table.query().where(whereClause).limit(10000).toArray(); - console.log(chalk.blue(`Retrieved ${results.length} comments for analysis`)); + verboseLog({}, chalk.blue(`Retrieved ${results.length} comments for analysis`)); } catch (queryError) { console.warn(chalk.yellow(`Error with filtered query: ${queryError.message}, trying without filter`)); try { @@ -192,7 +194,7 @@ export async function getPRCommentsStats(repository = null, projectPath = proces } else { results = results.filter((r) => r.project_path === resolvedProjectPath); } - console.log(chalk.blue(`Retrieved and filtered ${results.length} comments for analysis`)); + verboseLog({}, chalk.blue(`Retrieved and filtered ${results.length} comments for analysis`)); } catch (fallbackError) { console.error(chalk.red(`Fallback query also failed: ${fallbackError.message}`)); results = []; @@ -256,7 +258,7 @@ export async function getPRCommentsStats(repository = null, projectPath = proces latest: latestDate ? latestDate.toISOString().split('T')[0] : 'N/A', }; - console.log(chalk.green(`Stats generated: ${stats.totalComments} comments, ${stats.totalPRs} PRs, ${stats.uniqueAuthors} authors`)); + verboseLog({}, chalk.green(`Stats generated: ${stats.totalComments} comments, ${stats.totalPRs} PRs, ${stats.uniqueAuthors} authors`)); return stats; } catch (error) { console.error(chalk.red(`Error getting PR comments stats: ${error.message}`)); @@ -317,7 +319,7 @@ export async function getProcessedPRDateRange(repository, projectPath = process. const oldestPR = dates[0].toISOString(); const newestPR = dates[dates.length - 1].toISOString(); - console.log(chalk.blue(`Processed PR date range: ${oldestPR} to ${newestPR} (${prDates.size} PRs)`)); + verboseLog({}, chalk.blue(`Processed PR date range: ${oldestPR} to ${newestPR} (${prDates.size} PRs)`)); return { oldestPR, newestPR }; } catch (error) { console.error(chalk.red(`Error getting processed PR date range: ${error.message}`)); @@ -365,7 +367,7 @@ export async function clearPRComments(repository, projectPath = process.cwd()) { await table.delete(deleteQuery); - console.log(chalk.yellow(`Cleared ${countBefore} PR comments for repository ${repository}`)); + verboseLog({}, chalk.yellow(`Cleared ${countBefore} PR comments for repository ${repository}`)); return countBefore; } catch (error) { console.error(chalk.red(`Error clearing PR comments: ${error.message}`)); @@ -513,13 +515,13 @@ async function getClassifier() { async function _initializeClassifier() { try { - console.log(chalk.blue('Initializing MobileBERT classifier...')); + verboseLog({}, chalk.blue('Initializing MobileBERT classifier...')); const cls = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli', { quantized: true, dtype: 'fp32', device: 'cpu', }); - console.log(chalk.green('✓ Local MobileBERT classifier initialized successfully')); + verboseLog({}, chalk.green('✓ Local MobileBERT classifier initialized successfully')); return cls; } catch { console.warn(chalk.yellow('⚠ Failed to initialize MobileBERT, trying fallback model...')); @@ -529,7 +531,7 @@ async function _initializeClassifier() { dtype: 'fp32', device: 'cpu', }); - console.log(chalk.green('✓ Local DistilBERT classifier initialized successfully (fallback)')); + verboseLog({}, chalk.green('✓ Local DistilBERT classifier initialized successfully (fallback)')); return cls; } catch (fallbackError) { console.warn(chalk.yellow('⚠ Failed to initialize any local classifier:'), fallbackError.message); @@ -546,7 +548,7 @@ export async function cleanupClassifier() { try { await classifier.dispose(); classifier = null; - console.log(chalk.green('✓ Local classifier resources cleaned up')); + verboseLog({}, chalk.green('✓ Local classifier resources cleaned up')); } catch (error) { console.warn(chalk.yellow('⚠ Error cleaning up classifier:'), error.message); classifier = null; @@ -677,13 +679,17 @@ function preFilterWithKeywords(candidate) { * Find relevant PR comments using hybrid search with chunking strategy * @param {string} reviewFileContent - Content of the review file * @param {Object} options - Search options + * @param {number} [options.limit=10] - Maximum number of formatted comment matches to return + * @param {string} [options.projectPath=process.cwd()] - Project path used for project-isolated search + * @param {boolean} [options.isTestFile=false] - Whether the reviewed file is a test file + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Promise>} Relevant PR comments with verification */ export async function findRelevantPRComments(reviewFileContent, options = {}) { const { limit = 10, projectPath = process.cwd(), isTestFile = false } = options; try { - console.log(chalk.cyan('🔍 Starting FORWARD Hybrid Search with LLM Verification')); + verboseLog(options, chalk.cyan('🔍 Starting FORWARD Hybrid Search with LLM Verification')); if (!reviewFileContent) { console.warn(chalk.yellow('No review file content provided')); @@ -696,7 +702,7 @@ export async function findRelevantPRComments(reviewFileContent, options = {}) { console.warn(chalk.yellow('No valid chunks created from review file')); return []; } - console.log(chalk.blue(`📝 Created ${codeChunks.length} chunks from the review file.`)); + verboseLog(options, chalk.blue(`📝 Created ${codeChunks.length} chunks from the review file.`)); const chunkEmbeddings = await Promise.all( codeChunks.map(async (chunk) => ({ @@ -715,7 +721,7 @@ export async function findRelevantPRComments(reviewFileContent, options = {}) { const resolvedProjectPath = path.resolve(projectPath); const projectWhereClause = `project_path = '${resolvedProjectPath.replace(/'/g, "''")}'`; - console.log(chalk.blue(`🔒 Project isolation: filtering by project_path = '${resolvedProjectPath}'`)); + verboseLog(options, chalk.blue(`🔒 Project isolation: filtering by project_path = '${resolvedProjectPath}'`)); const searchPromises = chunkEmbeddings.map((chunk) => { if (!chunk.vector) return Promise.resolve([]); @@ -746,29 +752,35 @@ export async function findRelevantPRComments(reviewFileContent, options = {}) { } } - console.log(chalk.blue(`🎯 Found ${candidateMatches.size} unique candidate comments for verification.`)); + verboseLog(options, chalk.blue(`🎯 Found ${candidateMatches.size} unique candidate comments for verification.`)); // --- STEP 3: THE NEW PRE-FILTERING STEP --- const preFilteredCandidates = Array.from(candidateMatches.values()).filter(preFilterWithKeywords); - console.log(chalk.yellow(`⚡ After keyword pre-filtering, ${preFilteredCandidates.length} candidates remain for LLM verification.`)); + verboseLog( + options, + chalk.yellow(`⚡ After keyword pre-filtering, ${preFilteredCandidates.length} candidates remain for LLM verification.`) + ); // --- Step 4: LLM Verification --- const candidatesArray = preFilteredCandidates; const batchSize = HYBRID_SEARCH_CONFIG.LLM_BATCH_SIZE; const verifiedComments = []; - console.log(chalk.cyan(`🤖 Starting LLM verification of ${candidatesArray.length} candidates...`)); + verboseLog(options, chalk.cyan(`🤖 Starting LLM verification of ${candidatesArray.length} candidates...`)); for (let i = 0; i < candidatesArray.length; i += batchSize) { const batch = candidatesArray.slice(i, i + batchSize); const verifiedBatch = await verifyLocally(batch); // SINGLE batch call verifiedComments.push(...verifiedBatch); } - console.log(chalk.green(`✅ LLM verification complete: ${verifiedComments.length}/${candidatesArray.length} comments verified.`)); + verboseLog( + options, + chalk.green(`✅ LLM verification complete: ${verifiedComments.length}/${candidatesArray.length} comments verified.`) + ); // --- Step 4: Filtering and Formatting (same as before) --- let filteredComments = verifiedComments; if (isTestFile) { - console.log(chalk.blue('🧪 Applying test file filtering - prioritizing test-related comments')); + verboseLog(options, chalk.blue('🧪 Applying test file filtering - prioritizing test-related comments')); filteredComments = filteredComments.filter((comment) => { const filePath = comment.file_path || ''; const commentText = comment.comment_text || ''; @@ -780,7 +792,7 @@ export async function findRelevantPRComments(reviewFileContent, options = {}) { ); }); } else { - console.log(chalk.blue('📝 Applying non-test file filtering - excluding test-specific comments')); + verboseLog(options, chalk.blue('📝 Applying non-test file filtering - excluding test-specific comments')); filteredComments = filteredComments.filter((comment) => { const filePath = comment.file_path || ''; const commentText = comment.comment_text || ''; @@ -810,7 +822,7 @@ export async function findRelevantPRComments(reviewFileContent, options = {}) { contentVerified: true, })); - console.log(chalk.green.bold(`\n🎉 Final results: ${formattedResults.length} relevant comments found.`)); + verboseLog(options, chalk.green.bold(`\n🎉 Final results: ${formattedResults.length} relevant comments found.`)); return formattedResults; } catch (error) { console.error(chalk.red(`Error in reverse hybrid search: ${error.message}`)); diff --git a/src/pr-history/database.test.js b/src/pr-history/database.test.js index 1a06a38..755294c 100644 --- a/src/pr-history/database.test.js +++ b/src/pr-history/database.test.js @@ -196,7 +196,7 @@ describe('PR History Database', () => { const result = await storePRCommentsBatch([createValidComment()]); expect(result).toBe(expectedResult); if (errorMessage === 'legacy format') { - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('legacy index format')); + expect(console.warn).not.toHaveBeenCalledWith(expect.stringContaining('legacy index format')); } }); @@ -277,7 +277,7 @@ describe('PR History Database', () => { it('should filter by repository when provided', async () => { mockTable.countRows.mockResolvedValue(0); await getPRCommentsStats('owner/repo'); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('repository')); + expect(mockTable.countRows).toHaveBeenCalledWith(expect.stringContaining("repository = 'owner/repo'")); }); it.each([ diff --git a/src/project-analyzer.js b/src/project-analyzer.js index 1797217..a286941 100644 --- a/src/project-analyzer.js +++ b/src/project-analyzer.js @@ -13,6 +13,7 @@ import { getDefaultEmbeddingsSystem } from './embeddings/factory.js'; import * as llm from './llm.js'; import { FILE_SELECTION_SYSTEM_PROMPT, PROJECT_SUMMARY_SYSTEM_PROMPT } from './prompt-cache.js'; import { isDocumentationFile, isTestFile } from './utils/file-validation.js'; +import { verboseLog } from './utils/logging.js'; // Consolidated file classification configuration const FILE_PATTERNS = { @@ -198,9 +199,7 @@ export class ProjectAnalyzer { const { verbose = false, forceAnalysis = false } = options; try { - if (verbose) { - console.log(chalk.cyan('🔍 Starting project architecture analysis...')); - } + verboseLog(verbose, chalk.cyan('🔍 Starting project architecture analysis...')); // Initialize LLM client if (!this.llm) { @@ -212,16 +211,13 @@ export class ProjectAnalyzer { if (existingSummary && !forceAnalysis) { const currentHash = await this.calculateKeyFilesHash(existingSummary.keyFiles); if (existingSummary.keyFilesHash === currentHash) { - if (verbose) { - console.log(chalk.green('✅ Project analysis up-to-date (no key file changes detected)')); - } + verboseLog(verbose, chalk.green('✅ Project analysis up-to-date (no key file changes detected)')); return existingSummary; } - if (verbose) { - console.log(chalk.yellow('🔄 Key files changed, regenerating analysis...')); - } - } else if (verbose) { - console.log( + verboseLog(verbose, chalk.yellow('🔄 Key files changed, regenerating analysis...')); + } else { + verboseLog( + verbose, chalk.cyan( forceAnalysis ? '🔄 Force analysis requested - regenerating from scratch...' @@ -235,10 +231,8 @@ export class ProjectAnalyzer { ? await this.validateAndUpdateKeyFiles(existingSummary.keyFiles, projectPath) : await this.discoverKeyFilesWithLLM(projectPath); - if (verbose) { - console.log(chalk.gray(` Found ${keyFiles.length} key architectural files`)); - console.log(chalk.cyan('🧠 Generating LLM-based project analysis...')); - } + verboseLog(verbose, chalk.gray(` Found ${keyFiles.length} key architectural files`)); + verboseLog(verbose, chalk.cyan('🧠 Generating LLM-based project analysis...')); // Generate summary const projectSummary = await this.generateProjectSummary(keyFiles, projectPath); @@ -254,12 +248,10 @@ export class ProjectAnalyzer { this.keyFiles = keyFiles; this.lastAnalysisHash = currentHash; - if (verbose) { - console.log(chalk.green('✅ Project analysis complete')); - console.log(chalk.gray(` Technologies: ${(projectSummary.technologies || []).join(', ')}`)); - console.log(chalk.gray(` Key patterns: ${(projectSummary.keyPatterns || []).length} identified`)); - console.log(chalk.gray(` Key files tracked: ${keyFiles.length}`)); - } + verboseLog(verbose, chalk.green('✅ Project analysis complete')); + verboseLog(verbose, chalk.gray(` Technologies: ${(projectSummary.technologies || []).join(', ')}`)); + verboseLog(verbose, chalk.gray(` Key patterns: ${(projectSummary.keyPatterns || []).length} identified`)); + verboseLog(verbose, chalk.gray(` Key files tracked: ${keyFiles.length}`)); return projectSummary; } catch (error) { @@ -300,7 +292,7 @@ export class ProjectAnalyzer { try { const embeddingsSystem = getDefaultEmbeddingsSystem(); await embeddingsSystem.storeProjectSummary(projectPath, projectSummary); - console.log(chalk.green('✅ Project analysis stored in database')); + verboseLog({}, chalk.green('✅ Project analysis stored in database')); } catch (error) { console.error(chalk.yellow('Warning: Could not store analysis:'), error.message); } @@ -328,7 +320,7 @@ export class ProjectAnalyzer { // If we lost more than 30% of key files, trigger fresh discovery if (validatedFiles.length < existingKeyFiles.length * 0.7) { - console.log(chalk.yellow('⚠️ Many key files missing, performing fresh discovery...')); + verboseLog({}, chalk.yellow('⚠️ Many key files missing, performing fresh discovery...')); return await this.discoverKeyFilesWithLLM(projectPath); } @@ -339,10 +331,10 @@ export class ProjectAnalyzer { * Discover key architectural files using LanceDB hybrid search */ async discoverKeyFilesWithLLM(projectPath) { - console.log(chalk.cyan('🔍 Mining codebase embeddings with LanceDB hybrid search...')); + verboseLog({}, chalk.cyan('🔍 Mining codebase embeddings with LanceDB hybrid search...')); const keyFilesByCategory = await this.mineKeyFilesFromEmbeddings(projectPath); - console.log(chalk.cyan(`🧠 LLM analyzing ${keyFilesByCategory.length} candidates from embedding search...`)); + verboseLog({}, chalk.cyan(`🧠 LLM analyzing ${keyFilesByCategory.length} candidates from embedding search...`)); const keyFiles = await this.selectFinalKeyFiles(keyFilesByCategory, projectPath); return keyFiles; @@ -362,7 +354,7 @@ export class ProjectAnalyzer { await table.optimize(); } catch (optimizeError) { if (optimizeError.message && optimizeError.message.includes('legacy format')) { - console.log(chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`)); + console.warn(chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`)); } else { console.warn(chalk.yellow(`Warning: Failed to optimize file embeddings table: ${optimizeError.message}`)); } @@ -371,7 +363,7 @@ export class ProjectAnalyzer { const keyFiles = new Map(); try { - console.log(chalk.gray(` 📊 Using LanceDB hybrid search for project: ${projectPath}`)); + verboseLog({}, chalk.gray(` 📊 Using LanceDB hybrid search for project: ${projectPath}`)); // Unified query function const queryFiles = async (config) => { @@ -414,17 +406,17 @@ export class ProjectAnalyzer { return await query.limit(config.limit || 30).toArray(); } catch (error) { - console.log(chalk.yellow(` ⚠️ Query failed for ${config.category}: ${error.message}`)); + verboseLog({}, chalk.yellow(` ⚠️ Query failed for ${config.category}: ${error.message}`)); return []; } }; // Execute all searches for (const config of DB_SEARCH_CONFIGS) { - console.log(chalk.gray(` 🔍 Searching for ${config.category} files...`)); + verboseLog({}, chalk.gray(` 🔍 Searching for ${config.category} files...`)); const results = await queryFiles(config); - console.log(chalk.gray(` 📦 Found ${results.length} ${config.category} file candidates`)); + verboseLog({}, chalk.gray(` 📦 Found ${results.length} ${config.category} file candidates`)); results.forEach((result) => { if (this.matchesFileType(result.path, result.name, config.matcher)) { @@ -438,7 +430,7 @@ export class ProjectAnalyzer { } const results = Array.from(keyFiles.values()); - console.log(chalk.cyan(`🗃️ Found ${results.length} key files from embeddings database`)); + verboseLog({}, chalk.cyan(`🗃️ Found ${results.length} key files from embeddings database`)); return results; } @@ -475,11 +467,11 @@ export class ProjectAnalyzer { */ async selectFinalKeyFiles(candidates, projectPath) { if (candidates.length === 0) { - console.log(chalk.yellow('⚠️ No candidates found from embeddings search')); + verboseLog({}, chalk.yellow('⚠️ No candidates found from embeddings search')); return []; } - console.log(chalk.cyan(`🤖 LLM analyzing ${candidates.length} candidates...`)); + verboseLog({}, chalk.cyan(`🤖 LLM analyzing ${candidates.length} candidates...`)); const candidatesSummary = candidates .map((file, index) => { @@ -520,7 +512,7 @@ Select files following the criteria in the system instructions.`; jsonSchema: fileSelectionSchema, }); - console.log(chalk.gray(' 📄 LLM Response preview:'), response.content.substring(0, 200)); + verboseLog({}, chalk.gray(' 📄 LLM Response preview:'), response.content.substring(0, 200)); const selectedPaths = response.json.selectedFiles; @@ -546,14 +538,14 @@ Select files following the criteria in the system instructions.`; }) .filter(Boolean); - console.log(chalk.cyan(`🎯 LLM selected ${keyFiles.length} final key files`)); + verboseLog({}, chalk.cyan(`🎯 LLM selected ${keyFiles.length} final key files`)); return keyFiles; } else { throw new Error(`Failed to extract valid JSON array from LLM response`); } } catch (error) { console.error(chalk.red('Error in LLM selection:'), error.message); - console.log(chalk.yellow(' 🔄 Falling back to automatic selection...')); + verboseLog({}, chalk.yellow(' 🔄 Falling back to automatic selection...')); return this.fallbackFileSelection(candidates, projectPath); } } @@ -588,7 +580,7 @@ Select files following the criteria in the system instructions.`; } } - console.log(chalk.yellow(`⚠️ Used fallback selection: ${fallbackFiles.length} files`)); + verboseLog({}, chalk.yellow(`⚠️ Used fallback selection: ${fallbackFiles.length} files`)); return fallbackFiles; } @@ -793,7 +785,7 @@ Follow the analysis guidelines from the system instructions to identify custom i } catch (error) { console.error(chalk.red('Error generating project summary:'), error.message); const fallback = this.createFallbackSummary(projectPath, keyFiles); - console.log(chalk.yellow('Using fallback summary with technologies:'), fallback.technologies); + verboseLog({}, chalk.yellow('Using fallback summary with technologies:'), fallback.technologies); return fallback; } } @@ -857,7 +849,8 @@ Follow the analysis guidelines from the system instructions to identify custom i }; } - console.log( + verboseLog( + {}, chalk.cyan( `✅ Project summary validated - Technologies: ${validatedSummary.technologies.length}, Frameworks: ${validatedSummary.mainFrameworks.length}` ) diff --git a/src/project-analyzer.test.js b/src/project-analyzer.test.js index f16da48..13a8eef 100644 --- a/src/project-analyzer.test.js +++ b/src/project-analyzer.test.js @@ -272,7 +272,6 @@ describe('ProjectAnalyzer', () => { await analyzer.storeAnalysis(mockProjectPath, summary); expect(mockEmbeddingsSystem.storeProjectSummary).toHaveBeenCalledWith(mockProjectPath, summary); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Project analysis stored')); }); it('should handle storage errors gracefully', async () => { @@ -319,7 +318,7 @@ describe('ProjectAnalyzer', () => { await analyzer.validateAndUpdateKeyFiles(existingFiles, mockProjectPath); // With 1 of 3 files found (33%), it should trigger fresh discovery - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Many key files missing')); + expect(mockEmbeddingsSystem.initialize).toHaveBeenCalled(); }); it('should filter out missing files and keep existing ones', async () => { @@ -366,7 +365,7 @@ describe('ProjectAnalyzer', () => { await analyzer.validateAndUpdateKeyFiles(existingFiles, mockProjectPath); // Should trigger discoverKeyFilesWithLLM - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Many key files missing')); + expect(mockEmbeddingsSystem.initialize).toHaveBeenCalled(); }); }); @@ -410,7 +409,7 @@ describe('ProjectAnalyzer', () => { const result = await analyzer.mineKeyFilesFromEmbeddings(mockProjectPath); expect(result).toEqual([]); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Skipping optimization')); + expect(console.warn).toHaveBeenCalledWith(expect.stringContaining('legacy index format')); }); it('should return empty array on query error', async () => { @@ -496,7 +495,6 @@ describe('ProjectAnalyzer', () => { await analyzer.selectFinalKeyFiles(candidates, mockProjectPath); expect(console.error).toHaveBeenCalled(); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Falling back to automatic selection')); }); it('should fallback if LLM returns invalid response', async () => { diff --git a/src/rag-analyzer.js b/src/rag-analyzer.js index 7b758d8..5604b62 100644 --- a/src/rag-analyzer.js +++ b/src/rag-analyzer.js @@ -26,7 +26,7 @@ import { inferContextFromCodeContent, inferContextFromDocumentContent } from './ import { isGenericDocument, getGenericDocumentContext } from './utils/document-detection.js'; import { isTestFile, shouldProcessFile } from './utils/file-validation.js'; import { detectFileType, detectLanguageFromExtension } from './utils/language-detection.js'; -import { debug } from './utils/logging.js'; +import { debug, verboseLog } from './utils/logging.js'; import { addLineNumbers } from './utils/string-utils.js'; // Constants for content processing @@ -56,7 +56,7 @@ async function ensureSemanticSimilarityInitialized() { await initializeSemanticSimilarity(); semanticSimilarityInitialized = true; } catch (error) { - console.log(chalk.yellow(`⚠️ Could not initialize semantic similarity: ${error.message}`)); + console.warn(chalk.yellow(`⚠️ Could not initialize semantic similarity: ${error.message}`)); // Continue without semantic similarity - word-based fallback will be used } } @@ -216,7 +216,7 @@ ${ex.content} * @param {string} projectPath - Project path * @returns {Promise} Project summary or null */ -async function getProjectSummary(projectPath) { +async function getProjectSummary(projectPath, options = {}) { const resolvedPath = path.resolve(projectPath); try { @@ -224,7 +224,7 @@ async function getProjectSummary(projectPath) { const summary = await embeddingsSystem.getProjectSummary(resolvedPath); if (summary) { - console.log(chalk.cyan(`📋 Retrieved project summary for: ${path.basename(resolvedPath)}`)); + verboseLog(options, chalk.cyan(`📋 Retrieved project summary for: ${path.basename(resolvedPath)}`)); } return summary; @@ -430,16 +430,16 @@ async function runAnalysis(filePath, options = {}) { try { // Check if this is a holistic PR review if (options.isHolisticPRReview && filePath === 'PR_HOLISTIC_REVIEW') { - console.log(chalk.blue(`Performing holistic PR review for ${options.prFiles?.length || 0} files`)); + verboseLog(options, chalk.blue(`Performing holistic PR review for ${options.prFiles?.length || 0} files`)); return await performHolisticPRAnalysis(options); } - console.log(chalk.blue(`Analyzing file: ${filePath}`)); + verboseLog(options, chalk.blue(`Analyzing file: ${filePath}`)); // Load feedback data if feedback tracking is enabled let feedbackData = {}; if (options.trackFeedback && options.feedbackPath) { - console.log(chalk.cyan('--- Loading Feedback Data ---')); + verboseLog(options, chalk.cyan('--- Loading Feedback Data ---')); feedbackData = await loadFeedbackData(options.feedbackPath, { verbose: options.verbose }); } @@ -455,16 +455,16 @@ async function runAnalysis(filePath, options = {}) { content = options.diffContent; // For PR reviews, always read the full file content for context awareness fullFileContent = fs.existsSync(filePath) ? fs.readFileSync(filePath, 'utf8') : null; - console.log(chalk.blue(`Analyzing diff only for ${path.basename(filePath)}`)); + verboseLog(options, chalk.blue(`Analyzing diff only for ${path.basename(filePath)}`)); } else { content = fs.readFileSync(filePath, 'utf8'); fullFileContent = content; - console.log(chalk.blue(`Analyzing full file ${path.basename(filePath)}`)); + verboseLog(options, chalk.blue(`Analyzing full file ${path.basename(filePath)}`)); } // Check if file should be processed if (!shouldProcessFile(filePath, content)) { - console.log(chalk.yellow(`Skipping file based on exclusion patterns: ${filePath}`)); + verboseLog(options, chalk.yellow(`Skipping file based on exclusion patterns: ${filePath}`)); return { success: true, skipped: true, @@ -473,7 +473,7 @@ async function runAnalysis(filePath, options = {}) { } // --- Stage 1: CONTEXT RETRIEVAL --- - console.log(chalk.blue('--- Stage 1: Context Retrieval ---')); + verboseLog(options, chalk.blue('--- Stage 1: Context Retrieval ---')); const { language, isTestFile, @@ -485,49 +485,49 @@ async function runAnalysis(filePath, options = {}) { } = await getContextForFile(filePath, content, options); // --- Stage 1.5: PROJECT ARCHITECTURE CONTEXT --- - console.log(chalk.blue('--- Stage 1.5: Retrieving Project Architecture Context ---')); + verboseLog(options, chalk.blue('--- Stage 1.5: Retrieving Project Architecture Context ---')); const projectPath = options.projectPath || process.cwd(); - const projectSummary = await getProjectSummary(projectPath); + const projectSummary = await getProjectSummary(projectPath, options); // --- Stage 2: PREPARE CONTEXT FOR LLM --- - console.log(chalk.blue('--- Stage 2: Preparing Context for LLM ---')); + verboseLog(options, chalk.blue('--- Stage 2: Preparing Context for LLM ---')); // Format the lists that will be passed const formattedCodeExamples = formatContextItems(finalCodeExamples, 'code'); const formattedGuidelines = formatContextItems(finalGuidelineSnippets, 'guideline'); // --- Log the context being sent to the LLM --- > - console.log(chalk.magenta('--- Guidelines Sent to LLM ---')); + verboseLog(options, chalk.magenta('--- Guidelines Sent to LLM ---')); if (formattedGuidelines.length > 0) { formattedGuidelines.forEach((g, i) => { - console.log(chalk.magenta(` [${i + 1}] Path: ${g.path} ${g.headingText ? `(Heading: "${g.headingText}")` : ''}`)); - console.log(chalk.gray(` Content: ${g.content.substring(0, 100).replace(/\\n/g, ' ')}...`)); + verboseLog(options, chalk.magenta(` [${i + 1}] Path: ${g.path} ${g.headingText ? `(Heading: "${g.headingText}")` : ''}`)); + verboseLog(options, chalk.gray(` Content: ${g.content.substring(0, 100).replace(/\\n/g, ' ')}...`)); }); } else { - console.log(chalk.magenta(' (None)')); + verboseLog(options, chalk.magenta(' (None)')); } - console.log(chalk.magenta('--- Code Examples Sent to LLM ---')); + verboseLog(options, chalk.magenta('--- Code Examples Sent to LLM ---')); if (finalCodeExamples.length > 0) { finalCodeExamples.forEach((ex, i) => { - console.log(chalk.magenta(` [${i + 1}] Path: ${ex.path} (Similarity: ${ex.similarity?.toFixed(3) || 'N/A'})`)); - console.log(chalk.gray(` Content: ${ex.content.substring(0, 100).replace(/\\n/g, ' ')}...`)); + verboseLog(options, chalk.magenta(` [${i + 1}] Path: ${ex.path} (Similarity: ${ex.similarity?.toFixed(3) || 'N/A'})`)); + verboseLog(options, chalk.gray(` Content: ${ex.content.substring(0, 100).replace(/\\n/g, ' ')}...`)); }); } else { - console.log(chalk.magenta(' (None)')); + verboseLog(options, chalk.magenta(' (None)')); } - console.log(chalk.magenta('--- Custom Document Chunks Sent to LLM ---')); + verboseLog(options, chalk.magenta('--- Custom Document Chunks Sent to LLM ---')); if (relevantCustomDocChunks && relevantCustomDocChunks.length > 0) { relevantCustomDocChunks.forEach((chunk, i) => { - console.log(chalk.magenta(` [${i + 1}] Document: "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`)); - console.log(chalk.magenta(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`)); - console.log(chalk.gray(` Content: ${chunk.content.substring(0, 100).replace(/\\n/g, ' ')}...`)); + verboseLog(options, chalk.magenta(` [${i + 1}] Document: "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`)); + verboseLog(options, chalk.magenta(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`)); + verboseLog(options, chalk.gray(` Content: ${chunk.content.substring(0, 100).replace(/\\n/g, ' ')}...`)); }); } else { - console.log(chalk.magenta(' (None)')); + verboseLog(options, chalk.magenta(' (None)')); } - console.log(chalk.magenta('---------------------------------')); + verboseLog(options, chalk.magenta('---------------------------------')); // --- End Logging ---> // Prepare context for LLM with the potentially reduced lists @@ -553,7 +553,7 @@ async function runAnalysis(filePath, options = {}) { // Post-process results to filter dismissed issues let filteredResults = lowSeverityFiltered; if (options.trackFeedback && feedbackData && Object.keys(feedbackData).length > 0) { - console.log(chalk.cyan('--- Filtering Results Based on Feedback ---')); + verboseLog(options, chalk.cyan('--- Filtering Results Based on Feedback ---')); filteredResults = await filterAnalysisResults(lowSeverityFiltered, feedbackData, { similarityThreshold: options.feedbackThreshold || 0.7, verbose: options.verbose, @@ -759,11 +759,10 @@ async function callLLMForAnalysis(context, options = {}) { cacheTtl: options.cacheTtl || '5m', // Pass cache TTL option (default: 5m, no extra cost) }); - console.log(chalk.blue('Received LLM response, attempting to parse...')); - - console.log(chalk.gray(`Response type: ${typeof llmResponse}`)); - console.log(chalk.gray(`Response has json: ${!!llmResponse?.json}`)); - console.log(chalk.gray(`Response content length: ${llmResponse?.content?.length || 0} characters`)); + verboseLog(options, chalk.blue('Received LLM response, attempting to parse...')); + verboseLog(options, chalk.gray(`Response type: ${typeof llmResponse}`)); + verboseLog(options, chalk.gray(`Response has json: ${!!llmResponse?.json}`)); + verboseLog(options, chalk.gray(`Response content length: ${llmResponse?.content?.length || 0} characters`)); // Parse the raw LLM response const analysisResponse = parseAnalysisResponse(llmResponse); @@ -780,7 +779,7 @@ async function callLLMForAnalysis(context, options = {}) { }; } - console.log(chalk.green('Successfully parsed LLM response with expected structure')); + verboseLog(options, chalk.green('Successfully parsed LLM response with expected structure')); return analysisResponse; } catch (error) { console.error(chalk.red(`Error calling LLM for analysis: ${error.message}`)); @@ -933,10 +932,10 @@ async function sendPromptToLLM(promptConfig, llmOptions) { }; // Debug: Log prompt structure - console.log(chalk.gray(` Prompt config has systemPrompt: ${!!promptConfig.systemPrompt}`)); - console.log(chalk.gray(` Prompt config has userPrompt: ${!!promptConfig.userPrompt}`)); - console.log(chalk.gray(` System prompt length: ${promptConfig.systemPrompt?.length || 0} chars`)); - console.log(chalk.gray(` User prompt length: ${promptConfig.userPrompt?.length || 0} chars`)); + verboseLog(llmOptions, chalk.gray(` Prompt config has systemPrompt: ${!!promptConfig.systemPrompt}`)); + verboseLog(llmOptions, chalk.gray(` Prompt config has userPrompt: ${!!promptConfig.userPrompt}`)); + verboseLog(llmOptions, chalk.gray(` System prompt length: ${promptConfig.systemPrompt?.length || 0} chars`)); + verboseLog(llmOptions, chalk.gray(` User prompt length: ${promptConfig.userPrompt?.length || 0} chars`)); // Send prompt with system (cached) and user (dynamic) content const response = await llm.sendPromptToClaude(promptConfig.userPrompt, { @@ -984,17 +983,17 @@ function generateAnalysisPrompt(context) { const { context: contextSections } = context; let prHistorySection = ''; - console.log(chalk.blue(`🔍 Checking for PR comments in prompt generation...`)); - console.log(chalk.gray(`Context sections available: ${contextSections ? contextSections.length : 0}`)); + verboseLog(context.options, chalk.blue(`🔍 Checking for PR comments in prompt generation...`)); + verboseLog(context.options, chalk.gray(`Context sections available: ${contextSections ? contextSections.length : 0}`)); if (contextSections && contextSections.length > 0) { contextSections.forEach((section, idx) => { - console.log(chalk.gray(` Section ${idx + 1}: ${section.title} (${section.items?.length || 0} items)`)); + verboseLog(context.options, chalk.gray(` Section ${idx + 1}: ${section.title} (${section.items?.length || 0} items)`)); }); const prComments = contextSections.find((section) => section.title === 'Historical Review Comments'); if (prComments && prComments.items.length > 0) { - console.log(chalk.green(`✅ Adding ${prComments.items.length} PR comments to LLM prompt`)); + verboseLog(context.options, chalk.green(`✅ Adding ${prComments.items.length} PR comments to LLM prompt`)); prHistorySection += ` CONTEXT C: HISTORICAL REVIEW COMMENTS @@ -1013,12 +1012,12 @@ Similar code patterns and issues identified by human reviewers in past PRs prHistorySection += `Use these historical patterns to identify DEFINITE issues in the current code. `; prHistorySection += `Only report issues that EXACTLY match historical patterns with SPECIFIC code fixes.\n\n`; - console.log(chalk.blue(`PR History section preview: ${prHistorySection.substring(0, 200)}...`)); + verboseLog(context.options, chalk.blue(`PR History section preview: ${prHistorySection.substring(0, 200)}...`)); } else { - console.log(chalk.yellow(`❌ No PR comments section found in context`)); + verboseLog(context.options, chalk.yellow(`❌ No PR comments section found in context`)); } } else { - console.log(chalk.yellow(`❌ No context sections available for PR comments`)); + verboseLog(context.options, chalk.yellow(`❌ No context sections available for PR comments`)); } // Detect if this is a diff review @@ -1471,7 +1470,7 @@ async function getPRCommentContext(filePath, options = {}) { let contentForSearch = ''; if (precomputedQueryEmbedding) { - console.log(chalk.blue(`🔍 Using pre-computed query embedding for PR comment search`)); + verboseLog(options, chalk.blue(`🔍 Using pre-computed query embedding for PR comment search`)); // We still need the file content for the search function, but not for embedding try { fileContent = fs.readFileSync(filePath, 'utf8'); @@ -1513,39 +1512,39 @@ async function getPRCommentContext(filePath, options = {}) { // Use semantic search to find similar PR comments let relevantComments = []; - console.log(chalk.blue(`🔍 Searching for PR comments with:`)); - - console.log(chalk.gray(` Project Path: ${projectPath}`)); - console.log(chalk.gray(` File: ${fileName}`)); - console.log(chalk.gray(` Similarity Threshold: ${similarityThreshold}`)); - console.log(chalk.gray(` Content Length: ${contentForSearch.length} chars`)); - console.log(chalk.gray(` Using Pre-computed Embedding: ${precomputedQueryEmbedding ? 'Yes' : 'No'}`)); + verboseLog(options, chalk.blue(`🔍 Searching for PR comments with:`)); + verboseLog(options, chalk.gray(` Project Path: ${projectPath}`)); + verboseLog(options, chalk.gray(` File: ${fileName}`)); + verboseLog(options, chalk.gray(` Similarity Threshold: ${similarityThreshold}`)); + verboseLog(options, chalk.gray(` Content Length: ${contentForSearch.length} chars`)); + verboseLog(options, chalk.gray(` Using Pre-computed Embedding: ${precomputedQueryEmbedding ? 'Yes' : 'No'}`)); try { - console.log(chalk.blue(`🔍 Attempting hybrid search with chunking...`)); + verboseLog(options, chalk.blue(`🔍 Attempting hybrid search with chunking...`)); relevantComments = await findRelevantPRComments(contentForSearch, { projectPath, limit: maxComments, isTestFile: isTest, // Pass test file context for filtering precomputedQueryEmbedding: precomputedQueryEmbedding, // Pass pre-computed embedding if available }); - console.log(chalk.green(`✅ Hybrid search returned ${relevantComments.length} comments`)); + verboseLog(options, chalk.green(`✅ Hybrid search returned ${relevantComments.length} comments`)); if (relevantComments.length > 0) { - console.log(chalk.blue(`Top comment similarities:`)); + verboseLog(options, chalk.blue(`Top comment similarities:`)); relevantComments.slice(0, 3).forEach((comment, idx) => { - console.log( + verboseLog( + options, chalk.gray(` ${idx + 1}. Score: ${comment.similarity_score?.toFixed(3)} - ${comment.comment_text?.substring(0, 80)}...`) ); }); } } catch (dbError) { - console.log(chalk.yellow(`⚠️ Hybrid search failed: ${dbError.message}`)); + console.warn(chalk.yellow(`⚠️ Hybrid search failed: ${dbError.message}`)); debug(`[getPRCommentContext] Hybrid search failed: ${dbError.message}`); // No fallback needed - if hybrid search fails, we just return empty results relevantComments = []; } - console.log('Total relevant comments number:', relevantComments.length); + verboseLog(options, 'Total relevant comments number:', relevantComments.length); // Extract patterns and insights const patterns = extractCommentPatterns(relevantComments); @@ -1674,12 +1673,12 @@ async function performHolisticPRAnalysis(options) { try { const { prFiles, unifiedContext, customDocs } = options; - console.log(chalk.blue(`🔍 Performing holistic analysis of ${prFiles.length} files with unified context...`)); + verboseLog(options, chalk.blue(`🔍 Performing holistic analysis of ${prFiles.length} files with unified context...`)); // Retrieve project architecture summary - console.log(chalk.blue('--- Retrieving Project Architecture Context for Holistic PR Review ---')); + verboseLog(options, chalk.blue('--- Retrieving Project Architecture Context for Holistic PR Review ---')); const projectPath = options.projectPath || process.cwd(); - const projectSummary = await getProjectSummary(projectPath); + const projectSummary = await getProjectSummary(projectPath, options); // Create a synthetic file context for holistic analysis const holisticContext = { @@ -1727,56 +1726,58 @@ async function performHolisticPRAnalysis(options) { }; // Add verbose debug logging similar to individual file reviews - console.log(chalk.magenta('--- Holistic PR Review: Guidelines Sent to LLM ---')); + verboseLog(options, chalk.magenta('--- Holistic PR Review: Guidelines Sent to LLM ---')); if (unifiedContext.guidelines.length > 0) { unifiedContext.guidelines.slice(0, 10).forEach((g, i) => { - console.log( + verboseLog( + options, chalk.magenta( ` [${i + 1}] Path: ${g.path} ${g.headingText || g.heading_text ? `(Heading: "${g.headingText || g.heading_text}")` : ''}` ) ); - console.log(chalk.gray(` Content: ${g.content.substring(0, 100).replace(/\n/g, ' ')}...`)); + verboseLog(options, chalk.gray(` Content: ${g.content.substring(0, 100).replace(/\n/g, ' ')}...`)); }); } else { - console.log(chalk.magenta(' (None)')); + verboseLog(options, chalk.magenta(' (None)')); } - console.log(chalk.magenta('--- Holistic PR Review: Code Examples Sent to LLM ---')); + verboseLog(options, chalk.magenta('--- Holistic PR Review: Code Examples Sent to LLM ---')); if (unifiedContext.codeExamples.length > 0) { unifiedContext.codeExamples.slice(0, 10).forEach((ex, i) => { - console.log(chalk.magenta(` [${i + 1}] Path: ${ex.path} (Similarity: ${ex.similarity?.toFixed(3) || 'N/A'})`)); - console.log(chalk.gray(` Content: ${ex.content.substring(0, 100).replace(/\\n/g, ' ')}...`)); + verboseLog(options, chalk.magenta(` [${i + 1}] Path: ${ex.path} (Similarity: ${ex.similarity?.toFixed(3) || 'N/A'})`)); + verboseLog(options, chalk.gray(` Content: ${ex.content.substring(0, 100).replace(/\\n/g, ' ')}...`)); }); } else { - console.log(chalk.magenta(' (None)')); + verboseLog(options, chalk.magenta(' (None)')); } - console.log(chalk.magenta('--- Holistic PR Review: Top Historic Comments Sent to LLM ---')); + verboseLog(options, chalk.magenta('--- Holistic PR Review: Top Historic Comments Sent to LLM ---')); if (unifiedContext.prComments.length > 0) { unifiedContext.prComments.slice(0, 5).forEach((comment, i) => { - console.log( + verboseLog( + options, chalk.magenta( ` [${i + 1}] PR #${comment.prNumber} by ${comment.author} (Relevance: ${(comment.relevanceScore * 100).toFixed(1)}%)` ) ); - console.log(chalk.gray(` File: ${comment.filePath}`)); - console.log(chalk.gray(` Comment: ${comment.body.substring(0, 100).replace(/\n/g, ' ')}...`)); + verboseLog(options, chalk.gray(` File: ${comment.filePath}`)); + verboseLog(options, chalk.gray(` Comment: ${comment.body.substring(0, 100).replace(/\n/g, ' ')}...`)); }); } else { - console.log(chalk.magenta(' (None)')); + verboseLog(options, chalk.magenta(' (None)')); } - console.log(chalk.magenta('--- Holistic PR Review: Custom Document Chunks Sent to LLM ---')); + verboseLog(options, chalk.magenta('--- Holistic PR Review: Custom Document Chunks Sent to LLM ---')); if (unifiedContext.customDocChunks && unifiedContext.customDocChunks.length > 0) { unifiedContext.customDocChunks.forEach((chunk, i) => { - console.log(chalk.magenta(` [${i + 1}] Document: "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`)); - console.log(chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`)); - console.log(chalk.gray(` Content: ${chunk.content.substring(0, 100).replace(/\n/g, ' ')}...`)); + verboseLog(options, chalk.magenta(` [${i + 1}] Document: "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`)); + verboseLog(options, chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`)); + verboseLog(options, chalk.gray(` Content: ${chunk.content.substring(0, 100).replace(/\n/g, ' ')}...`)); }); } else { - console.log(chalk.magenta(' (None)')); + verboseLog(options, chalk.magenta(' (None)')); } - console.log(chalk.magenta('--- Sending Holistic PR Analysis Prompt to LLM ---')); + verboseLog(options, chalk.magenta('--- Sending Holistic PR Analysis Prompt to LLM ---')); // Call the centralized analysis function const parsedResponse = await callLLMForAnalysis(holisticContext, { @@ -1785,11 +1786,11 @@ async function performHolisticPRAnalysis(options) { }); // Debug logging - console.log(chalk.blue(`🐛 Holistic analysis parsed response:`)); - console.log(chalk.gray(`Summary: ${parsedResponse.summary?.substring(0, 100)}...`)); - console.log(chalk.gray(`Cross-file issues: ${parsedResponse.crossFileIssues?.length || 0}`)); - console.log(chalk.gray(`File-specific issues keys: ${Object.keys(parsedResponse.fileSpecificIssues || {}).join(', ')}`)); - console.log(chalk.gray(`Recommendations: ${parsedResponse.recommendations?.length || 0}`)); + verboseLog(options, chalk.blue(`🐛 Holistic analysis parsed response:`)); + verboseLog(options, chalk.gray(`Summary: ${parsedResponse.summary?.substring(0, 100)}...`)); + verboseLog(options, chalk.gray(`Cross-file issues: ${parsedResponse.crossFileIssues?.length || 0}`)); + verboseLog(options, chalk.gray(`File-specific issues keys: ${Object.keys(parsedResponse.fileSpecificIssues || {}).join(', ')}`)); + verboseLog(options, chalk.gray(`Recommendations: ${parsedResponse.recommendations?.length || 0}`)); // Filter out low severity issues (formatting/style concerns handled by linters) // Note: The LLM prompt instructs not to generate low severity issues, but this filter @@ -1881,12 +1882,15 @@ async function getContextForFile(filePath, content, options = {}) { guidelineQueryEmbedding = await embeddingsSystem.calculateQueryEmbedding(guidelineQuery); } - console.log(chalk.blue('� Starting parallel context retrieval...')); + verboseLog(options, chalk.blue('� Starting parallel context retrieval...')); // Helper function to process custom documents in parallel (with caching) const processCustomDocuments = async () => { // Check if preprocessed chunks are available (from PR-level processing) if (options.preprocessedCustomDocChunks && options.preprocessedCustomDocChunks.length > 0) { - console.log(chalk.blue(`📄 Using preprocessed custom document chunks (${options.preprocessedCustomDocChunks.length} available)`)); + verboseLog( + options, + chalk.blue(`📄 Using preprocessed custom document chunks (${options.preprocessedCustomDocChunks.length} available)`) + ); // Use the guideline query for finding relevant custom document chunks const relevantChunks = await embeddingsSystem.findRelevantCustomDocChunks(guidelineQuery, options.preprocessedCustomDocChunks, { @@ -1898,15 +1902,15 @@ async function getContextForFile(filePath, content, options = {}) { queryFilePath: filePath, }); - console.log(chalk.green(`📄 Found ${relevantChunks.length} relevant custom document chunks`)); + verboseLog(options, chalk.green(`📄 Found ${relevantChunks.length} relevant custom document chunks`)); // Log which chunks made the cut if (relevantChunks.length > 0) { - console.log(chalk.cyan('📋 Custom Document Chunks Selected:')); + verboseLog(options, chalk.cyan('📋 Custom Document Chunks Selected:')); relevantChunks.forEach((chunk, i) => { - console.log(chalk.cyan(` [${i + 1}] "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`)); - console.log(chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`)); - console.log(chalk.gray(` Content: ${chunk.content.substring(0, 80).replace(/\n/g, ' ')}...`)); + verboseLog(options, chalk.cyan(` [${i + 1}] "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`)); + verboseLog(options, chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`)); + verboseLog(options, chalk.gray(` Content: ${chunk.content.substring(0, 80).replace(/\n/g, ' ')}...`)); }); } @@ -1919,17 +1923,17 @@ async function getContextForFile(filePath, content, options = {}) { } try { - console.log(chalk.blue('📄 Processing custom documents for context...')); + verboseLog(options, chalk.blue('📄 Processing custom documents for context...')); // Check if custom documents are already processed for this project let processedChunks = await checkExistingCustomDocumentChunks(projectPath); if (!processedChunks || processedChunks.length === 0) { - console.log(chalk.cyan('📄 Custom documents not yet processed for this project, processing now...')); + verboseLog(options, chalk.cyan('📄 Custom documents not yet processed for this project, processing now...')); // Process custom documents into chunks (only if not already processed) processedChunks = await embeddingsSystem.processCustomDocumentsInMemory(options.customDocs, projectPath); } else { - console.log(chalk.green(`📄 Reusing ${processedChunks.length} already processed custom document chunks`)); + verboseLog(options, chalk.green(`📄 Reusing ${processedChunks.length} already processed custom document chunks`)); } if (processedChunks.length > 0) { @@ -1943,15 +1947,15 @@ async function getContextForFile(filePath, content, options = {}) { queryFilePath: filePath, }); - console.log(chalk.green(`📄 Found ${relevantChunks.length} relevant custom document chunks`)); + verboseLog(options, chalk.green(`📄 Found ${relevantChunks.length} relevant custom document chunks`)); // Log which chunks made the cut if (relevantChunks.length > 0) { - console.log(chalk.cyan('📋 Custom Document Chunks Selected:')); + verboseLog(options, chalk.cyan('📋 Custom Document Chunks Selected:')); relevantChunks.forEach((chunk, i) => { - console.log(chalk.cyan(` [${i + 1}] "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`)); - console.log(chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`)); - console.log(chalk.gray(` Content: ${chunk.content.substring(0, 80).replace(/\n/g, ' ')}...`)); + verboseLog(options, chalk.cyan(` [${i + 1}] "${chunk.document_title}" (Chunk ${chunk.chunk_index + 1})`)); + verboseLog(options, chalk.gray(` Similarity: ${chunk.similarity?.toFixed(3) || 'N/A'}`)); + verboseLog(options, chalk.gray(` Content: ${chunk.content.substring(0, 80).replace(/\n/g, ' ')}...`)); }); } @@ -1970,7 +1974,7 @@ async function getContextForFile(filePath, content, options = {}) { // Use the statically imported function return await embeddingsSystem.getExistingCustomDocumentChunks(projectPath); } catch { - console.log(chalk.gray('No existing custom document chunks found, will process from scratch')); + verboseLog(options, chalk.gray('No existing custom document chunks found, will process from scratch')); return []; } }; @@ -2030,7 +2034,7 @@ async function getContextForFile(filePath, content, options = {}) { const prCommentContext = prContextResult?.comments || []; const prContextAvailable = prCommentContext.length > 0; - console.log(chalk.green(`✅ Found ${prCommentContext.length} relevant PR comments`)); + verboseLog(options, chalk.green(`✅ Found ${prCommentContext.length} relevant PR comments`)); const documentChunks = Array.isArray(guidelineCandidates) ? guidelineCandidates.filter((c) => c.type === 'documentation-chunk') : []; const chunksByDocument = new Map(); @@ -2192,21 +2196,21 @@ async function gatherUnifiedContextForPR(prFiles, options = {}) { let globalCustomDocChunks = []; if (options.customDocs && options.customDocs.length > 0) { const projectPath = options.projectPath || process.cwd(); - console.log(chalk.blue('📄 Processing custom documents once for entire PR...')); + verboseLog(options, chalk.blue('📄 Processing custom documents once for entire PR...')); try { // Check if custom documents are already processed for this project let processedChunks = await embeddingsSystem.getExistingCustomDocumentChunks(projectPath); if (!processedChunks || processedChunks.length === 0) { - console.log(chalk.cyan('📄 Custom documents not yet processed for this project, processing now...')); + verboseLog(options, chalk.cyan('📄 Custom documents not yet processed for this project, processing now...')); processedChunks = await embeddingsSystem.processCustomDocumentsInMemory(options.customDocs, projectPath); } else { - console.log(chalk.green(`📄 Reusing ${processedChunks.length} already processed custom document chunks`)); + verboseLog(options, chalk.green(`📄 Reusing ${processedChunks.length} already processed custom document chunks`)); } globalCustomDocChunks = processedChunks; - console.log(chalk.green(`📄 Custom documents processed: ${globalCustomDocChunks.length} chunks available for PR analysis`)); + verboseLog(options, chalk.green(`📄 Custom documents processed: ${globalCustomDocChunks.length} chunks available for PR analysis`)); } catch (error) { console.error(chalk.red(`Error processing custom documents for PR: ${error.message}`)); } @@ -2306,6 +2310,7 @@ async function gatherUnifiedContextForPR(prFiles, options = {}) { * * @param {Object} analysisResults - Analysis results from LLM * @param {Object} options - Filtering options + * @param {boolean} [options.verbose=false] - Enable verbose logging for filtered issues * @returns {Object} Filtered analysis results without low severity issues */ function filterLowSeverityIssues(analysisResults, options = {}) { @@ -2323,9 +2328,7 @@ function filterLowSeverityIssues(analysisResults, options = {}) { analysisResults.issues = analysisResults.issues.filter((issue) => { const severity = (issue.severity || '').toLowerCase(); if (severity === 'low') { - if (verbose) { - console.log(chalk.yellow(` Filtering low severity issue: "${(issue.description || '').substring(0, 50)}..."`)); - } + verboseLog(verbose, chalk.yellow(` Filtering low severity issue: "${(issue.description || '').substring(0, 50)}..."`)); return false; } return true; @@ -2339,11 +2342,10 @@ function filterLowSeverityIssues(analysisResults, options = {}) { analysisResults.crossFileIssues = analysisResults.crossFileIssues.filter((issue) => { const severity = (issue.severity || '').toLowerCase(); if (severity === 'low') { - if (verbose) { - console.log( - chalk.yellow(` Filtering low severity cross-file issue: "${(issue.message || issue.description || '').substring(0, 50)}..."`) - ); - } + verboseLog( + verbose, + chalk.yellow(` Filtering low severity cross-file issue: "${(issue.message || issue.description || '').substring(0, 50)}..."`) + ); return false; } return true; @@ -2360,11 +2362,10 @@ function filterLowSeverityIssues(analysisResults, options = {}) { analysisResults.fileSpecificIssues[filePath] = issues.filter((issue) => { const severity = (issue.severity || '').toLowerCase(); if (severity === 'low') { - if (verbose) { - console.log( - chalk.yellow(` Filtering low severity issue in ${filePath}: "${(issue.description || '').substring(0, 50)}..."`) - ); - } + verboseLog( + verbose, + chalk.yellow(` Filtering low severity issue in ${filePath}: "${(issue.description || '').substring(0, 50)}..."`) + ); return false; } return true; @@ -2374,9 +2375,10 @@ function filterLowSeverityIssues(analysisResults, options = {}) { } } - if (filteredCount > 0) { - console.log(chalk.cyan(`🔇 Filtered ${filteredCount} low severity issue(s) (formatting/style concerns handled by linters)`)); - } + verboseLog( + verbose && filteredCount > 0, + chalk.cyan(`🔇 Filtered ${filteredCount} low severity issue(s) (formatting/style concerns handled by linters)`) + ); return analysisResults; } @@ -2387,6 +2389,8 @@ function filterLowSeverityIssues(analysisResults, options = {}) { * @param {Object} analysisResults - Raw analysis results from LLM * @param {Object} feedbackData - Loaded feedback data * @param {Object} options - Filtering options + * @param {number} [options.similarityThreshold=0.7] - Threshold for considering issues similar to dismissed feedback + * @param {boolean} [options.verbose=false] - Enable verbose similarity and filtering logs * @returns {Promise} Filtered analysis results */ async function filterAnalysisResults(analysisResults, feedbackData, options = {}) { @@ -2402,12 +2406,11 @@ async function filterAnalysisResults(analysisResults, feedbackData, options = {} await ensureSemanticSimilarityInitialized(); // Log whether semantic similarity is available - if (verbose) { - const usingSemanticSimilarity = isSemanticSimilarityAvailable(); - console.log( - chalk.cyan(`🔍 Filtering issues using ${usingSemanticSimilarity ? 'semantic + word-based similarity' : 'word-based similarity only'}`) - ); - } + const usingSemanticSimilarity = isSemanticSimilarityAvailable(); + verboseLog( + verbose, + chalk.cyan(`🔍 Filtering issues using ${usingSemanticSimilarity ? 'semantic + word-based similarity' : 'word-based similarity only'}`) + ); // Filter issues based on feedback (now async due to semantic similarity) const filterResults = await Promise.all( @@ -2418,9 +2421,7 @@ async function filterAnalysisResults(analysisResults, feedbackData, options = {} verbose, }); - if (shouldSkip && verbose) { - console.log(chalk.yellow(` Filtered issue ${index + 1}: "${issueDescription.substring(0, 50)}..."`)); - } + verboseLog(shouldSkip && verbose, chalk.yellow(` Filtered issue ${index + 1}: "${issueDescription.substring(0, 50)}..."`)); return { issue, shouldSkip }; }) @@ -2430,9 +2431,10 @@ async function filterAnalysisResults(analysisResults, feedbackData, options = {} const filteredCount = originalCount - filteredIssues.length; - if (verbose && filteredCount > 0) { - console.log(chalk.green(`✅ Filtered ${filteredCount} dismissed issues, ${filteredIssues.length} remaining`)); - } + verboseLog( + verbose && filteredCount > 0, + chalk.green(`✅ Filtered ${filteredCount} dismissed issues, ${filteredIssues.length} remaining`) + ); return { ...analysisResults, diff --git a/src/rag-analyzer.test.js b/src/rag-analyzer.test.js index bc1beda..929ba83 100644 --- a/src/rag-analyzer.test.js +++ b/src/rag-analyzer.test.js @@ -77,6 +77,12 @@ vi.mock('./utils/language-detection.js', () => ({ vi.mock('./utils/logging.js', () => ({ debug: vi.fn(), + verboseLog: vi.fn((options, ...args) => { + if (typeof options === 'boolean' ? options : Boolean(options?.verbose)) { + console.log(...args); + } + }), + isVerboseEnabled: vi.fn((options) => Boolean(options?.verbose)), })); vi.mock('./utils/context-inference.js', () => ({ @@ -820,6 +826,23 @@ describe('rag-analyzer', () => { // ========================================================================== describe('verbose logging paths', () => { + it('should suppress context dump logs when not verbose', async () => { + mockEmbeddingsSystem.findSimilarCode.mockResolvedValue([{ path: '/example.js', content: 'code', similarity: 0.9 }]); + mockEmbeddingsSystem.findRelevantDocs.mockResolvedValue([ + { path: '/docs/api.md', content: 'docs', similarity: 0.8, type: 'documentation-chunk', document_title: 'API' }, + ]); + findRelevantPRComments.mockResolvedValue([createMockPRComment()]); + setupSuccessfulLLMResponse(); + + const result = await runAnalysis('/test/file.js'); + + expect(result.success).toBe(true); + const loggedMessages = console.log.mock.calls.flat().join('\n'); + expect(loggedMessages).not.toContain('Guidelines Sent to LLM'); + expect(loggedMessages).not.toContain('Checking for PR comments in prompt generation'); + expect(loggedMessages).not.toContain('Received LLM response, attempting to parse'); + }); + it('should log context information when verbose', async () => { mockEmbeddingsSystem.findSimilarCode.mockResolvedValue([{ path: '/example.js', content: 'code', similarity: 0.9 }]); mockEmbeddingsSystem.findRelevantDocs.mockResolvedValue([ diff --git a/src/rag-review.js b/src/rag-review.js index 592432a..c5692e0 100644 --- a/src/rag-review.js +++ b/src/rag-review.js @@ -12,6 +12,7 @@ import { runAnalysis, gatherUnifiedContextForPR } from './rag-analyzer.js'; import { shouldProcessFile } from './utils/file-validation.js'; import { findBaseBranch, getChangedLinesInfo, getFileContentFromGit } from './utils/git.js'; import { detectFileType, detectLanguageFromExtension } from './utils/language-detection.js'; +import { verboseLog } from './utils/logging.js'; import { shouldChunkPR, chunkPRFiles, combineChunkResults } from './utils/pr-chunking.js'; /** @@ -19,11 +20,12 @@ import { shouldChunkPR, chunkPRFiles, combineChunkResults } from './utils/pr-chu * * @param {string} filePath - Path to the file to review * @param {object} options - Review options + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Promise} Review result object */ async function reviewFile(filePath, options = {}) { try { - console.log(chalk.blue(`Reviewing file: ${filePath}`)); + verboseLog(options, chalk.blue(`Reviewing file: ${filePath}`)); // Analyze the file using the RAG analyzer const analyzeResult = await runAnalysis(filePath, options); @@ -48,7 +50,7 @@ async function reviewFile(filePath, options = {}) { // Convert object results to array format expected by the output functions if (analyzeResult.results && !Array.isArray(analyzeResult.results)) { - console.log(chalk.blue('Converting results object to array format')); + verboseLog(options, chalk.blue('Converting results object to array format')); // Create a new array with one entry containing the object results const resultArray = [ @@ -86,14 +88,14 @@ async function reviewFile(filePath, options = {}) { * * @param {Array} filePaths - Paths to the files to review * @param {Object} options - Review options (passed to each reviewFile call) + * @param {boolean} [options.verbose=false] - Enable verbose progress logging + * @param {number} [options.concurrency=3] - Maximum number of files to review in parallel * @returns {Promise} Aggregated review results { success: boolean, results: Array, message: string, error?: string } */ async function reviewFiles(filePaths, options = {}) { try { const verbose = options.verbose || false; - if (verbose) { - console.log(chalk.blue(`Reviewing ${filePaths.length} files...`)); - } + verboseLog(verbose, chalk.blue(`Reviewing ${filePaths.length} files...`)); // Review files concurrently const results = []; @@ -103,15 +105,12 @@ async function reviewFiles(filePaths, options = {}) { for (let i = 0; i < filePaths.length; i += concurrency) { const batch = filePaths.slice(i, i + concurrency); - if (verbose) { - console.log( - chalk.blue( - `Processing review batch ${Math.floor(i / concurrency) + 1}/${Math.ceil(filePaths.length / concurrency)} (${ - batch.length - } files)` - ) - ); - } + verboseLog( + verbose, + chalk.blue( + `Processing review batch ${Math.floor(i / concurrency) + 1}/${Math.ceil(filePaths.length / concurrency)} (${batch.length} files)` + ) + ); // Pass options down to reviewFile const batchPromises = batch.map((filePath) => reviewFile(filePath, options)); @@ -137,7 +136,7 @@ async function reviewFiles(filePaths, options = {}) { let finalMessage = `Review completed for ${filePaths.length} files. `; finalMessage += `Success: ${successCount}, Skipped: ${skippedCount}, Errors: ${errorCount}.`; - console.log(chalk.green(finalMessage)); + verboseLog(options, chalk.green(finalMessage)); return { success: errorCount === 0, @@ -161,14 +160,13 @@ async function reviewFiles(filePaths, options = {}) { * * @param {Array} changedFilePaths - Array of file paths changed in the PR. * @param {Object} options - Review options (passed to reviewFiles). + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Promise} Aggregated review results. */ async function reviewPullRequest(changedFilePaths, options = {}) { try { const verbose = options.verbose || false; - if (verbose) { - console.log(chalk.blue(`Reviewing ${changedFilePaths.length} changed files from PR...`)); - } + verboseLog(verbose, chalk.blue(`Reviewing ${changedFilePaths.length} changed files from PR...`)); // No longer filter files here, as new files in a different branch won't exist locally. // The downstream functions are responsible for fetching content from git. @@ -176,7 +174,7 @@ async function reviewPullRequest(changedFilePaths, options = {}) { if (filesToReview.length === 0) { const message = 'No processable files found among the changed files provided for PR review.'; - console.log(chalk.yellow(message)); + verboseLog(options, chalk.yellow(message)); return { success: true, message: message, @@ -184,9 +182,7 @@ async function reviewPullRequest(changedFilePaths, options = {}) { }; } - if (verbose) { - console.log(chalk.green(`Reviewing ${filesToReview.length} existing and processable changed files`)); - } + verboseLog(verbose, chalk.green(`Reviewing ${filesToReview.length} existing and processable changed files`)); // Use enhanced PR review with cross-file context return await reviewPullRequestWithCrossFileContext(filesToReview, options); @@ -207,14 +203,18 @@ async function reviewPullRequest(changedFilePaths, options = {}) { * * @param {Array} filesToReview - Array of file paths to review * @param {Object} options - Review options + * @param {boolean} [options.verbose=false] - Enable verbose progress logging + * @param {number} [options.concurrency=3] - Maximum number of fallback per-file reviews to run in parallel + * @param {string} [options.directory] - Working directory for git operations + * @param {string} [options.diffWith='HEAD'] - Branch or ref to compare against + * @param {string} [options.actualBranch] - Actual target branch used for content retrieval + * @param {boolean} [options.skipChunking=false] - Skip PR chunking, used internally for chunk review recursion * @returns {Promise} Aggregated review results */ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {}) { try { const verbose = options.verbose || false; - if (verbose) { - console.log(chalk.blue(`Starting enhanced PR review with cross-file context for ${filesToReview.length} files...`)); - } + verboseLog(verbose, chalk.blue(`Starting enhanced PR review with cross-file context for ${filesToReview.length} files...`)); // Step 1: Get the base branch and collect diff info for all files in the PR const workingDir = options.directory ? path.resolve(options.directory) : process.cwd(); @@ -224,18 +224,14 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} // Get the actual branch name from options passed from index.js const actualTargetBranch = options.actualBranch || targetBranch; - if (verbose) { - console.log(chalk.gray(`Base branch: ${baseBranch}, Target branch: ${targetBranch}`)); - } + verboseLog(verbose, chalk.gray(`Base branch: ${baseBranch}, Target branch: ${targetBranch}`)); const prFiles = []; for (const filePath of filesToReview) { try { // Check if the file should be processed before fetching its content from git if (!shouldProcessFile(filePath, '', options)) { - if (verbose) { - console.log(chalk.yellow(`Skipping file due to exclusion rules: ${path.basename(filePath)}`)); - } + verboseLog(verbose, chalk.yellow(`Skipping file due to exclusion rules: ${path.basename(filePath)}`)); continue; } @@ -247,9 +243,7 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} const diffInfo = getChangedLinesInfo(filePath, baseBranch, actualTargetBranch, workingDir); if (!diffInfo.hasChanges) { - if (verbose) { - console.log(chalk.yellow(`No changes detected in ${path.basename(filePath)}, skipping`)); - } + verboseLog(verbose, chalk.yellow(`No changes detected in ${path.basename(filePath)}, skipping`)); continue; } @@ -275,7 +269,7 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} } if (prFiles.length === 0) { - console.log(chalk.yellow('No files with changes found for review')); + verboseLog(options, chalk.yellow('No files with changes found for review')); return { success: true, results: [], @@ -285,25 +279,22 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} // Check if PR should be chunked based on size and complexity (skip if this is already a chunk) if (!options.skipChunking) { - const chunkingDecision = shouldChunkPR(prFiles); - if (verbose) { - console.log(chalk.blue(`PR size assessment: ${chunkingDecision.estimatedTokens} tokens, ${prFiles.length} files`)); - if (chunkingDecision.shouldChunk) { - console.log(chalk.yellow(`Large PR detected - will chunk into ~${chunkingDecision.recommendedChunks} chunks`)); - } - } + const chunkingDecision = shouldChunkPR(prFiles, options); + verboseLog(verbose, chalk.blue(`PR size assessment: ${chunkingDecision.estimatedTokens} tokens, ${prFiles.length} files`)); + verboseLog( + verbose && chunkingDecision.shouldChunk, + chalk.yellow(`Large PR detected - will chunk into ~${chunkingDecision.recommendedChunks} chunks`) + ); // If PR is too large, use chunked processing if (chunkingDecision.shouldChunk) { - console.log(chalk.blue(`🔄 Using chunked processing for large PR (${chunkingDecision.estimatedTokens} tokens)`)); + verboseLog(options, chalk.blue(`🔄 Using chunked processing for large PR (${chunkingDecision.estimatedTokens} tokens)`)); return await reviewLargePRInChunks(prFiles, options); } } // Step 2: Gather unified context for the entire PR (for regular-sized PRs) - if (verbose) { - console.log(chalk.blue(`Performing unified context retrieval for ${prFiles.length} PR files...`)); - } + verboseLog(verbose, chalk.blue(`Performing unified context retrieval for ${prFiles.length} PR files...`)); const { codeExamples: deduplicatedCodeExamples, guidelines: deduplicatedGuidelines, @@ -311,13 +302,12 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} customDocChunks: deduplicatedCustomDocChunks, } = await gatherUnifiedContextForPR(prFiles, options); - if (verbose) { - console.log( - chalk.green( - `De-duplicated context: ${deduplicatedCodeExamples.length} code examples, ${deduplicatedGuidelines.length} guidelines, ${deduplicatedPRComments.length} PR comments, ${deduplicatedCustomDocChunks.length} custom doc chunks` - ) - ); - } + verboseLog( + verbose, + chalk.green( + `De-duplicated context: ${deduplicatedCodeExamples.length} code examples, ${deduplicatedGuidelines.length} guidelines, ${deduplicatedPRComments.length} PR comments, ${deduplicatedCustomDocChunks.length} custom doc chunks` + ) + ); // Step 3: Create PR context summary for LLM const prContext = { @@ -334,9 +324,7 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} }; // Step 4: Perform holistic PR review with all files and unified context - if (verbose) { - console.log(chalk.blue(`Performing holistic PR review for all ${prFiles.length} files...`)); - } + verboseLog(verbose, chalk.blue(`Performing holistic PR review for all ${prFiles.length} files...`)); try { // Create a comprehensive review context with all files and their diffs @@ -391,15 +379,15 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} for (const key of possibleKeys) { if (holisticResult?.results?.fileSpecificIssues?.[key]) { fileIssues = holisticResult.results.fileSpecificIssues[key]; - console.log(chalk.green(`✅ Found ${fileIssues.length} issues for ${baseName} using key: "${key}"`)); + verboseLog(options, chalk.green(`✅ Found ${fileIssues.length} issues for ${baseName} using key: "${key}"`)); break; } } - console.log(chalk.gray(`🔍 Mapping issues for ${file.filePath}:`)); - console.log(chalk.gray(` - Relative path: "${relativePath}"`)); - console.log(chalk.gray(` - Tried keys: ${possibleKeys.map((k) => `"${k}"`).join(', ')}`)); - console.log(chalk.gray(` - Final issues: ${fileIssues.length}`)); + verboseLog(options, chalk.gray(`🔍 Mapping issues for ${file.filePath}:`)); + verboseLog(options, chalk.gray(` - Relative path: "${relativePath}"`)); + verboseLog(options, chalk.gray(` - Tried keys: ${possibleKeys.map((k) => `"${k}"`).join(', ')}`)); + verboseLog(options, chalk.gray(` - Final issues: ${fileIssues.length}`)); return { success: true, @@ -445,9 +433,7 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} console.error(chalk.red(`Error in holistic PR review: ${error.message}`)); // Fallback to individual file review if holistic review fails - if (verbose) { - console.log(chalk.yellow(`Falling back to individual file reviews...`)); - } + verboseLog(verbose, chalk.yellow(`Falling back to individual file reviews...`)); const results = []; const concurrency = options.concurrency || 3; @@ -524,26 +510,27 @@ async function reviewPullRequestWithCrossFileContext(filesToReview, options = {} * Reviews a large PR by splitting it into manageable chunks and processing them in parallel * @param {Array} prFiles - Array of PR files with diff content * @param {Object} options - Review options + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @returns {Promise} Combined review results */ async function reviewLargePRInChunks(prFiles, options) { - console.log(chalk.blue(`🔄 Large PR detected: ${prFiles.length} files. Splitting into chunks...`)); + verboseLog(options, chalk.blue(`🔄 Large PR detected: ${prFiles.length} files. Splitting into chunks...`)); // Step 1: Gather shared context once for all chunks - console.log(chalk.cyan('📚 Gathering shared context for entire PR...')); + verboseLog(options, chalk.cyan('📚 Gathering shared context for entire PR...')); const sharedContext = await gatherUnifiedContextForPR(prFiles, options); // Step 2: Split PR into manageable chunks // Each chunk includes both diff AND full file content, plus ~25k context overhead const chunks = chunkPRFiles(prFiles, 35000); // Conservative limit accounting for context overhead - console.log(chalk.green(`✂️ Split PR into ${chunks.length} chunks`)); + verboseLog(options, chalk.green(`✂️ Split PR into ${chunks.length} chunks`)); chunks.forEach((chunk, i) => { - console.log(chalk.gray(` Chunk ${i + 1}: ${chunk.files.length} files (~${chunk.totalTokens} tokens)`)); + verboseLog(options, chalk.gray(` Chunk ${i + 1}: ${chunk.files.length} files (~${chunk.totalTokens} tokens)`)); }); // Step 3: Process chunks in parallel - console.log(chalk.blue('🔄 Processing chunks in parallel...')); + verboseLog(options, chalk.blue('🔄 Processing chunks in parallel...')); const settledChunkResults = await Promise.allSettled( chunks.map((chunk, index) => reviewPRChunk(chunk, sharedContext, options, index + 1, chunks.length)) ); @@ -568,8 +555,8 @@ async function reviewLargePRInChunks(prFiles, options) { }); // Step 4: Combine results - console.log(chalk.blue('🔗 Combining chunk results...')); - return combineChunkResults(chunkResults, prFiles.length); + verboseLog(options, chalk.blue('🔗 Combining chunk results...')); + return combineChunkResults(chunkResults, prFiles.length, options); } /** @@ -577,12 +564,13 @@ async function reviewLargePRInChunks(prFiles, options) { * @param {Object} chunk - Chunk object with files array * @param {Object} sharedContext - Pre-gathered shared context * @param {Object} options - Review options + * @param {boolean} [options.verbose=false] - Enable verbose progress logging * @param {number} chunkNumber - Current chunk number * @param {number} totalChunks - Total number of chunks * @returns {Promise} Chunk review results */ async function reviewPRChunk(chunk, sharedContext, options, chunkNumber, totalChunks) { - console.log(chalk.cyan(`📝 Reviewing chunk ${chunkNumber}/${totalChunks} (${chunk.files.length} files)...`)); + verboseLog(options, chalk.cyan(`📝 Reviewing chunk ${chunkNumber}/${totalChunks} (${chunk.files.length} files)...`)); // Create chunk-specific options const chunkOptions = { diff --git a/src/rag-review.test.js b/src/rag-review.test.js index 3c64873..1a74c5e 100644 --- a/src/rag-review.test.js +++ b/src/rag-review.test.js @@ -322,7 +322,8 @@ describe('rag-review', () => { results: [], }), ]), - 2 + 2, + expect.objectContaining({ verbose: true }) ); expect(allSettledSpy).toHaveBeenCalled(); expect(result.success).toBe(true); diff --git a/src/utils/file-validation.js b/src/utils/file-validation.js index 77d2c5c..607474f 100644 --- a/src/utils/file-validation.js +++ b/src/utils/file-validation.js @@ -18,6 +18,7 @@ import { SKIP_FILENAMES, SKIP_FILE_PATTERNS, } from './constants.js'; +import { verboseLog } from './logging.js'; /** * Checks if a file path looks like a test file based on common patterns. @@ -201,9 +202,11 @@ export function shouldProcessFile(filePath, _, options = {}) { * * @param {string[]} filePaths - Array of file paths to check * @param {string} baseDir - Base directory for git operations + * @param {Object} options - Logging options + * @param {boolean} [options.verbose=false] - Enable verbose logging for ignored file summaries * @returns {Promise>} Map of relative paths to isIgnored boolean */ -export async function batchCheckGitignore(filePaths, baseDir = process.cwd()) { +export async function batchCheckGitignore(filePaths, baseDir = process.cwd(), options = {}) { const resultMap = new Map(); if (filePaths.length === 0) { @@ -242,11 +245,11 @@ export async function batchCheckGitignore(filePaths, baseDir = process.cwd()) { // Log ignored files if (ignoredFiles.length > 0) { - console.log(` ℹ️ Found ${ignoredFiles.length} gitignored files to exclude`); + verboseLog(options, ` ℹ️ Found ${ignoredFiles.length} gitignored files to exclude`); const ignoredSample = ignoredFiles.slice(0, 5); - ignoredSample.forEach((f) => console.log(` - ${f}`)); + ignoredSample.forEach((f) => verboseLog(options, ` - ${f}`)); if (ignoredFiles.length > 5) { - console.log(` ... and ${ignoredFiles.length - 5} more`); + verboseLog(options, ` ... and ${ignoredFiles.length - 5} more`); } } } catch (error) { diff --git a/src/utils/git.js b/src/utils/git.js index df2c26f..d21a725 100644 --- a/src/utils/git.js +++ b/src/utils/git.js @@ -9,6 +9,7 @@ import { execSync } from 'child_process'; import path from 'path'; import chalk from 'chalk'; import { execGitSafe } from './command.js'; +import { verboseLog } from './logging.js'; /** * Check if a git branch exists locally @@ -47,19 +48,19 @@ export function ensureBranchExists(branchName, workingDir = process.cwd()) { try { // Check if branch exists locally if (checkBranchExists(branchName, workingDir)) { - console.log(chalk.gray(`Branch '${branchName}' exists locally`)); + verboseLog({}, chalk.gray(`Branch '${branchName}' exists locally`)); return; } - console.log(chalk.yellow(`Branch '${branchName}' not found locally, attempting to fetch...`)); + verboseLog({}, chalk.yellow(`Branch '${branchName}' not found locally, attempting to fetch...`)); // Try to fetch the branch from origin try { execGitSafe('git fetch', ['origin', `${branchName}:${branchName}`], { stdio: 'pipe', cwd: workingDir }); - console.log(chalk.green(`Successfully fetched branch '${branchName}' from origin`)); + verboseLog({}, chalk.green(`Successfully fetched branch '${branchName}' from origin`)); } catch { // If direct fetch fails, try fetching all branches and then checking - console.log(chalk.yellow(`Direct fetch failed, trying to fetch all branches...`)); + verboseLog({}, chalk.yellow(`Direct fetch failed, trying to fetch all branches...`)); execSync('git fetch origin', { stdio: 'pipe', cwd: workingDir }); // Check if branch exists on remote @@ -67,7 +68,7 @@ export function ensureBranchExists(branchName, workingDir = process.cwd()) { execGitSafe('git show-ref', ['--verify', '--quiet', `refs/remotes/origin/${branchName}`], { cwd: workingDir }); // Create local tracking branch without switching working tree. execGitSafe('git branch', ['--track', branchName, `origin/${branchName}`], { stdio: 'pipe', cwd: workingDir }); - console.log(chalk.green(`Successfully created local branch '${branchName}' tracking origin/${branchName}`)); + verboseLog({}, chalk.green(`Successfully created local branch '${branchName}' tracking origin/${branchName}`)); } catch { throw new Error(`Branch '${branchName}' not found locally or on remote origin`); } diff --git a/src/utils/logging.js b/src/utils/logging.js index 760ceff..197a312 100644 --- a/src/utils/logging.js +++ b/src/utils/logging.js @@ -7,6 +7,40 @@ import chalk from 'chalk'; +/** + * Determine whether verbose logging is enabled. + * + * @param {Object|boolean} [options] - Options object or boolean verbose flag + * @returns {boolean} True when verbose logging should be emitted + */ +export function isVerboseEnabled(options = {}) { + const cliVerboseEnabled = process.env.VERBOSE === 'true' || process.argv.includes('--verbose'); + const optionVerboseEnabled = typeof options === 'boolean' ? options : Boolean(options?.verbose); + + return Boolean(cliVerboseEnabled || optionVerboseEnabled); +} + +/** + * Determine whether debug logging is enabled. + * + * @returns {boolean} True when debug logging should be emitted + */ +export function isDebugEnabled() { + return Boolean(process.env.DEBUG); +} + +/** + * Log only when verbose output is enabled. + * + * @param {Object|boolean} options - Options object or boolean verbose flag + * @param {...any} args - Arguments to pass to console.log + */ +export function verboseLog(options, ...args) { + if (isVerboseEnabled(options)) { + console.log(...args); + } +} + /** * Debug function for conditional logging based on environment variables and command line arguments * @@ -14,11 +48,10 @@ import chalk from 'chalk'; * * @example * debug('Processing file: example.js'); - * // Only logs if DEBUG=true, VERBOSE=true, or --verbose flag is present + * // Only logs if DEBUG=true */ export function debug(message) { - const DEBUG = process.env.DEBUG || false; - if (DEBUG || process.env.VERBOSE === 'true' || process.argv.includes('--verbose')) { + if (isDebugEnabled()) { console.log(chalk.cyan(`[DEBUG] ${message}`)); } } diff --git a/src/utils/logging.test.js b/src/utils/logging.test.js index 7bb0963..39b23c5 100644 --- a/src/utils/logging.test.js +++ b/src/utils/logging.test.js @@ -1,4 +1,4 @@ -import { debug } from './logging.js'; +import { debug, verboseLog, isDebugEnabled, isVerboseEnabled } from './logging.js'; describe('logging', () => { let originalEnv; @@ -34,23 +34,23 @@ describe('logging', () => { expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Debug message')); }); - it('should log when VERBOSE is true', () => { + it('should not log when VERBOSE is true without DEBUG', () => { delete process.env.DEBUG; process.env.VERBOSE = 'true'; debug('Verbose message'); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Verbose message')); + expect(console.log).not.toHaveBeenCalled(); }); - it('should log when --verbose flag is present', () => { + it('should not log when --verbose flag is present without DEBUG', () => { delete process.env.DEBUG; delete process.env.VERBOSE; process.argv = [...process.argv, '--verbose']; debug('Verbose flag message'); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('Verbose flag message')); + expect(console.log).not.toHaveBeenCalled(); }); it('should include [DEBUG] prefix in message', () => { @@ -61,4 +61,74 @@ describe('logging', () => { expect(console.log).toHaveBeenCalledWith(expect.stringContaining('[DEBUG]')); }); }); + + describe('isVerboseEnabled', () => { + it('should be false by default', () => { + delete process.env.VERBOSE; + process.argv = process.argv.filter((arg) => arg !== '--verbose'); + + expect(isVerboseEnabled()).toBe(false); + }); + + it('should be true when VERBOSE is true', () => { + process.env.VERBOSE = 'true'; + + expect(isVerboseEnabled()).toBe(true); + }); + + it('should be true when --verbose flag is present', () => { + delete process.env.VERBOSE; + process.argv = [...process.argv, '--verbose']; + + expect(isVerboseEnabled()).toBe(true); + }); + + it('should be true when options.verbose is true', () => { + expect(isVerboseEnabled({ verbose: true })).toBe(true); + }); + + it('should ignore DEBUG when checking verbose mode', () => { + process.env.DEBUG = 'true'; + + expect(isVerboseEnabled()).toBe(false); + }); + }); + + describe('isDebugEnabled', () => { + it('should be false by default', () => { + delete process.env.DEBUG; + + expect(isDebugEnabled()).toBe(false); + }); + + it('should be true when DEBUG is set', () => { + process.env.DEBUG = 'true'; + + expect(isDebugEnabled()).toBe(true); + }); + }); + + describe('verboseLog', () => { + it('should log when VERBOSE is true', () => { + process.env.VERBOSE = 'true'; + + verboseLog({}, 'Verbose message'); + + expect(console.log).toHaveBeenCalledWith('Verbose message'); + }); + + it('should log when options.verbose is true', () => { + verboseLog({ verbose: true }, 'Option verbose message'); + + expect(console.log).toHaveBeenCalledWith('Option verbose message'); + }); + + it('should not log when only DEBUG is set', () => { + process.env.DEBUG = 'true'; + + verboseLog({}, 'Debug only message'); + + expect(console.log).not.toHaveBeenCalled(); + }); + }); }); diff --git a/src/utils/markdown.js b/src/utils/markdown.js index 1d465ea..a6cfd2e 100644 --- a/src/utils/markdown.js +++ b/src/utils/markdown.js @@ -6,6 +6,7 @@ */ import path from 'path'; +import { debug } from './logging.js'; /** * Extracts chunks from Markdown content based on H2 and H3 headings, @@ -52,19 +53,17 @@ export function extractMarkdownChunks(filePath, content, relativePath) { // Check for H1 heading in first few lines if (filePath.includes('README.md') || filePath.includes('RUNBOOK.md')) { // Log only for specific files to reduce noise - console.log(`[extractMarkdownChunks DEBUG] File: ${filePath}, Line ${i + 1} (trimmed): "${trimmedLine}", Attempting H1 match.`); + debug(`[extractMarkdownChunks] File: ${filePath}, Line ${i + 1} (trimmed): "${trimmedLine}", Attempting H1 match.`); } const h1Match = trimmedLine.match(h1Regex); if (h1Match) { documentH1 = h1Match[1].trim(); h1Found = true; - console.log(`[extractMarkdownChunks DEBUG] H1 FOUND for ${filePath}: "${documentH1}" on line ${i + 1}`); + debug(`[extractMarkdownChunks] H1 FOUND for ${filePath}: "${documentH1}" on line ${i + 1}`); } else if (filePath.includes('README.md') || filePath.includes('RUNBOOK.md')) { if (linesProcessedForH1 <= 5 && trimmedLine.startsWith('#')) { // If it starts with # but didn't match - console.log( - `[extractMarkdownChunks DEBUG] File: ${filePath}, Line ${i + 1}: Starts with # but H1Regex DID NOT match "${trimmedLine}"` - ); + debug(`[extractMarkdownChunks] File: ${filePath}, Line ${i + 1}: Starts with # but H1Regex DID NOT match "${trimmedLine}"`); } } } @@ -125,7 +124,7 @@ export function extractMarkdownChunks(filePath, content, relativePath) { if (!documentH1) { documentH1 = path.basename(filePath).replace(path.extname(filePath), ''); - console.log(`[extractMarkdownChunks DEBUG] H1 NOT FOUND for ${filePath}. Using fallback title: "${documentH1}"`); + debug(`[extractMarkdownChunks] H1 NOT FOUND for ${filePath}. Using fallback title: "${documentH1}"`); } return { chunks: chunks.filter((chunk) => chunk.content.length > 0), documentH1 }; diff --git a/src/utils/mobilebert-tokenizer.js b/src/utils/mobilebert-tokenizer.js index 12e558b..fce275f 100644 --- a/src/utils/mobilebert-tokenizer.js +++ b/src/utils/mobilebert-tokenizer.js @@ -7,6 +7,7 @@ import { AutoTokenizer } from '@huggingface/transformers'; import chalk from 'chalk'; +import { verboseLog } from './logging.js'; // Shared tokenizer instance and initialization state let tokenizer = null; @@ -45,9 +46,9 @@ async function getTokenizer() { */ async function _initializeTokenizer() { try { - console.log(chalk.blue('Initializing MobileBERT tokenizer...')); + verboseLog({}, chalk.blue('Initializing MobileBERT tokenizer...')); const tok = await AutoTokenizer.from_pretrained('Xenova/mobilebert-uncased-mnli'); - console.log(chalk.green('✓ MobileBERT tokenizer initialized successfully')); + verboseLog({}, chalk.green('✓ MobileBERT tokenizer initialized successfully')); return tok; } catch (error) { console.warn(chalk.yellow('⚠ Failed to initialize tokenizer, falling back to character estimation'), error.message); @@ -132,7 +133,7 @@ export async function cleanupTokenizer() { await tokenizer.dispose(); } tokenizer = null; - console.log(chalk.green('✓ MobileBERT tokenizer resources cleaned up')); + verboseLog({}, chalk.green('✓ MobileBERT tokenizer resources cleaned up')); } catch (error) { console.warn(chalk.yellow('⚠ Error cleaning up tokenizer:'), error.message); tokenizer = null; diff --git a/src/utils/mobilebert-tokenizer.test.js b/src/utils/mobilebert-tokenizer.test.js index 06135d8..df1422f 100644 --- a/src/utils/mobilebert-tokenizer.test.js +++ b/src/utils/mobilebert-tokenizer.test.js @@ -106,7 +106,6 @@ describe('mobilebert-tokenizer', () => { await cleanupTokenizer(); expect(mockTokenizer.dispose).toHaveBeenCalled(); - expect(console.log).toHaveBeenCalledWith(expect.stringContaining('cleaned up')); }); it('should handle missing dispose method gracefully', async () => { @@ -117,7 +116,7 @@ describe('mobilebert-tokenizer', () => { await cleanupTokenizer(); // Should not throw - expect(console.log).toHaveBeenCalled(); + expect(mockTokenizer.encode).toHaveBeenCalled(); }); it('should handle cleanup errors gracefully', async () => { diff --git a/src/utils/pr-chunking.js b/src/utils/pr-chunking.js index d1a21cd..e4794e1 100644 --- a/src/utils/pr-chunking.js +++ b/src/utils/pr-chunking.js @@ -1,11 +1,14 @@ import chalk from 'chalk'; +import { verboseLog } from './logging.js'; /** * Determines if a PR should be chunked based on estimated token usage * @param {Array} prFiles - Array of PR files with diffContent and content + * @param {Object} options - Logging options + * @param {boolean} [options.verbose=false] - Enable verbose token breakdown logging * @returns {Object} Decision object with shouldChunk flag and estimates */ -export function shouldChunkPR(prFiles) { +export function shouldChunkPR(prFiles, options = {}) { // IMPORTANT: The holistic PR prompt includes BOTH full file content AND diff content // for each file, plus context (code examples, guidelines, PR comments, custom docs) @@ -35,7 +38,8 @@ export function shouldChunkPR(prFiles) { const shouldChunk = totalEstimatedTokens > MAX_SINGLE_REVIEW_TOKENS || prFiles.length > 30; - console.log( + verboseLog( + options, chalk.gray( ` Token breakdown: ${diffTokens} diff + ${fullContentTokens} full content + ${CONTEXT_OVERHEAD_TOKENS} context overhead = ${totalEstimatedTokens} total` ) @@ -166,9 +170,11 @@ function getDirectoryDepth(filePath) { * Combines results from multiple chunk reviews into a single result * @param {Array} chunkResults - Array of chunk review results * @param {number} totalFiles - Total number of files in the PR + * @param {Object} options - Logging options + * @param {boolean} [options.verbose=false] - Enable verbose chunk combination logging * @returns {Object} Combined result object */ -export function combineChunkResults(chunkResults, totalFiles) { +export function combineChunkResults(chunkResults, totalFiles, options = {}) { const combinedResult = { success: true, results: [], @@ -202,7 +208,7 @@ export function combineChunkResults(chunkResults, totalFiles) { // Detect and merge cross-chunk issues combinedResult.crossChunkIssues = detectCrossChunkIssues(chunkResults); - console.log(chalk.green(`✅ Combined results from ${chunkResults.length} chunks: ${combinedResult.results.length} file reviews`)); + verboseLog(options, chalk.green(`✅ Combined results from ${chunkResults.length} chunks: ${combinedResult.results.length} file reviews`)); return combinedResult; } diff --git a/src/zero-shot-classifier-open.js b/src/zero-shot-classifier-open.js index bd7d42d..ceb7e70 100644 --- a/src/zero-shot-classifier-open.js +++ b/src/zero-shot-classifier-open.js @@ -10,6 +10,7 @@ import * as linguistLanguages from 'linguist-languages'; import { LRUCache } from 'lru-cache'; import stopwords from 'stopwords-iso/stopwords-iso.json' with { type: 'json' }; import techKeywords from './technology-keywords.json' with { type: 'json' }; +import { verboseLog } from './utils/logging.js'; import { truncateToTokenLimit } from './utils/mobilebert-tokenizer.js'; // Configure Transformers.js environment @@ -124,14 +125,14 @@ class OpenZeroShotClassifier { async _doInitialize() { try { - console.log('Initializing open-ended zero-shot classifier...'); + verboseLog({}, 'Initializing open-ended zero-shot classifier...'); this.classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli', { quantized: true, }); this.isInitialized = true; - console.log('✓ Open-ended zero-shot classifier initialized successfully'); + verboseLog({}, '✓ Open-ended zero-shot classifier initialized successfully'); } catch (error) { console.error('Error initializing classifier:', error); this.isInitialized = false;