diff --git a/.mcp.json b/.mcp.json index a4cf1f1..79f5712 100644 --- a/.mcp.json +++ b/.mcp.json @@ -2,8 +2,8 @@ "mcpServers": { "codemogger": { "type": "stdio", - "command": "bun", - "args": ["run", "/Users/glaubercosta/recall/bin/codemogger.ts", "mcp"] + "command": "codemogger", + "args": ["mcp"] } } } diff --git a/README.md b/README.md index 0091417..040acf8 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,10 @@ The real advantage isn't speed - it's **finding the right code when you don't kn ripgrep matches thousands of files on common keywords. codemogger returns the 5 most relevant definitions. +## Dependencies note + +`@tursodatabase/database` is currently pinned to a pre-release version (`0.5.0-pre.14`). There is no stable `0.5.x` release yet; once one is published this should be updated. The pre-release is stable enough for development use but may have breaking changes before it reaches a final release. + ## Architecture - **Bun/TypeScript** runtime diff --git a/bun.lock b/bun.lock index 423a15e..d1e5aae 100644 --- a/bun.lock +++ b/bun.lock @@ -1,6 +1,5 @@ { "lockfileVersion": 1, - "configVersion": 0, "workspaces": { "": { "name": "recall", @@ -8,7 +7,7 @@ "@huggingface/transformers": "^3.8.1", "@modelcontextprotocol/sdk": "^1.26.0", "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", - "@tursodatabase/database": "0.5.0-pre.14", + "@tursodatabase/database": "0.5.1", "commander": "^14.0.3", "tree-sitter-c": "^0.24.1", "tree-sitter-c-sharp": "^0.23.1", @@ -33,6 +32,9 @@ }, }, }, + "overrides": { + "global-agent": "^4.1.3", + }, "packages": { "@emnapi/runtime": ["@emnapi/runtime@1.8.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg=="], @@ -118,17 +120,17 @@ "@tree-sitter-grammars/tree-sitter-zig": ["@tree-sitter-grammars/tree-sitter-zig@1.1.2", "", { "dependencies": { "node-addon-api": "^8.3.0", "node-gyp-build": "^4.8.4" }, "peerDependencies": { "tree-sitter": "^0.22.1" }, "optionalPeers": ["tree-sitter"] }, "sha512-J0L31HZ2isy3F5zb2g5QWQOv2r/pbruQNL9ADhuQv2pn5BQOzxt80WcEJaYXBeuJ8GHxVT42slpCna8k1c8LOw=="], - "@tursodatabase/database": ["@tursodatabase/database@0.5.0-pre.14", "", { "dependencies": { "@tursodatabase/database-common": "^0.5.0-pre.14" }, "optionalDependencies": { "@tursodatabase/database-darwin-arm64": "0.5.0-pre.14", "@tursodatabase/database-linux-arm64-gnu": "0.5.0-pre.14", "@tursodatabase/database-linux-x64-gnu": "0.5.0-pre.14", "@tursodatabase/database-win32-x64-msvc": "0.5.0-pre.14" } }, "sha512-NC23ogQjXVdJlZqoCNU9Cse/aP2483XwfrV274zItJc4Ur+yVzA0ZGT2/n9hHHNriw42SAyxBSw0qPSBshEeKQ=="], + "@tursodatabase/database": ["@tursodatabase/database@0.5.1", "", { "dependencies": { "@tursodatabase/database-common": "^0.5.1" }, "optionalDependencies": { "@tursodatabase/database-darwin-arm64": "0.5.1", "@tursodatabase/database-linux-arm64-gnu": "0.5.1", "@tursodatabase/database-linux-x64-gnu": "0.5.1", "@tursodatabase/database-win32-x64-msvc": "0.5.1" } }, "sha512-U2Iohc/ed5b6bQvRCGm+hg3Kh8fC1ji4e7NdMU357Pj26J6Pdnn6nHX0fQT29bQVJsnsakmGMwA2wwG0TrktCA=="], - "@tursodatabase/database-common": ["@tursodatabase/database-common@0.5.0-pre.14", "", {}, "sha512-fruoFXufzIzPOO5Iyok6I8gLmG6z0TDcN3rI0PPQwQrA67QcnVF257Og6858VeA2Z4tzHh1zjgnVloog0/TN2Q=="], + "@tursodatabase/database-common": ["@tursodatabase/database-common@0.5.1", "", {}, "sha512-GoF0NDEInuWShsH0N8S/M7NgSsmRJj0jFbhz1P1bjB2apjY+T9ddSQ5x/YELPB8k5pryFYHUpCNpAxgbIkNaog=="], - "@tursodatabase/database-darwin-arm64": ["@tursodatabase/database-darwin-arm64@0.5.0-pre.14", "", { "os": "darwin", "cpu": "arm64" }, "sha512-r3FYuzWq/i8q1TPwEwjrXlZhNqjq2+Qzc/Gl4G76vXbbz6oxYCiv9novKNAsyI38tCkgCW9nWN5If7CIW/JCcA=="], + "@tursodatabase/database-darwin-arm64": ["@tursodatabase/database-darwin-arm64@0.5.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-7ap9Y2y9xw/dUvEVt/6H2KpA17QrNQZTpN+hKlCLRlKd02Vt1rCJvtgw2f+pjjrvevtOeuMVHAFUiWLJZAHcLA=="], - "@tursodatabase/database-linux-arm64-gnu": ["@tursodatabase/database-linux-arm64-gnu@0.5.0-pre.14", "", { "os": "linux", "cpu": "arm64" }, "sha512-6U4LS7VQF9lqI7m6aeUZOr0BJew1kkNGRH2I8y0XcaM9wtavvdj0lgjTi9aPXrCE6mNj3YVznp/66bgH2QyzFA=="], + "@tursodatabase/database-linux-arm64-gnu": ["@tursodatabase/database-linux-arm64-gnu@0.5.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-C02kk0VRlwNPP+lcjZ+3C255/M35wRXxvCHDL0QcTTFL2I4QoZk6HFUjnM6+PuxOxeoocIR65FaZXWxC1FwPvw=="], - "@tursodatabase/database-linux-x64-gnu": ["@tursodatabase/database-linux-x64-gnu@0.5.0-pre.14", "", { "os": "linux", "cpu": "x64" }, "sha512-7l5mvn/08vll/7aOgQLSoNmC7HxK1tu5jzsRUCQfucnT4NH3J9R+uY5dsi2oPWMyl36iC3LBXkxsaWXYQUcxng=="], + "@tursodatabase/database-linux-x64-gnu": ["@tursodatabase/database-linux-x64-gnu@0.5.1", "", { "os": "linux", "cpu": "x64" }, "sha512-K0AKfmtwhz6KhW/6yXorJqGKlucSlnQLPwJQsCBe1eC/t/wuMXaPPrA9T03G3a0nuLWz5QCfAuamcU/tB88fyA=="], - "@tursodatabase/database-win32-x64-msvc": ["@tursodatabase/database-win32-x64-msvc@0.5.0-pre.14", "", { "os": "win32", "cpu": "x64" }, "sha512-YT1tCoA3obUudDWOVmhtWVgiOEpbZPt88oW8622iG13Zq5193RJG3osz2J5gt1TEjnSdpehpB9pVZhJAzxygmw=="], + "@tursodatabase/database-win32-x64-msvc": ["@tursodatabase/database-win32-x64-msvc@0.5.1", "", { "os": "win32", "cpu": "x64" }, "sha512-mfzjhAhlfjR1eug53Ap5ZaDHBERjgQyaSxRgSR9OLjPeKPdjvFDxfNDdiA6I2KY7sANm4+BhYfg+HSwWKwyAfA=="], "@types/bun": ["@types/bun@1.3.9", "", { "dependencies": { "bun-types": "1.3.9" } }, "sha512-KQ571yULOdWJiMH+RIWIOZ7B2RXQGpL1YQrBtLIV3FqDcCu6FsbFUBwhdKUlCKUpS3PJDsHlJ1QKlpxoVR+xtw=="], @@ -142,8 +144,6 @@ "body-parser": ["body-parser@2.2.2", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.3", "http-errors": "^2.0.0", "iconv-lite": "^0.7.0", "on-finished": "^2.4.1", "qs": "^6.14.1", "raw-body": "^3.0.1", "type-is": "^2.0.1" } }, "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA=="], - "boolean": ["boolean@3.2.0", "", {}, "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw=="], - "bun-types": ["bun-types@1.3.9", "", { "dependencies": { "@types/node": "*" } }, "sha512-+UBWWOakIP4Tswh0Bt0QD0alpTY8cb5hvgiYeWCMet9YukHbzuruIEeXC2D7nMJPB12kbh8C7XJykSexEqGKJg=="], "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], @@ -178,8 +178,6 @@ "detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], - "detect-node": ["detect-node@2.1.0", "", {}, "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="], - "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="], "ee-first": ["ee-first@1.1.1", "", {}, "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="], @@ -192,8 +190,6 @@ "es-object-atoms": ["es-object-atoms@1.1.1", "", { "dependencies": { "es-errors": "^1.3.0" } }, "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA=="], - "es6-error": ["es6-error@4.1.1", "", {}, "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg=="], - "escape-html": ["escape-html@1.0.3", "", {}, "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="], "escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="], @@ -226,7 +222,7 @@ "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="], - "global-agent": ["global-agent@3.0.0", "", { "dependencies": { "boolean": "^3.0.1", "es6-error": "^4.1.1", "matcher": "^3.0.0", "roarr": "^2.15.3", "semver": "^7.3.2", "serialize-error": "^7.0.1" } }, "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q=="], + "global-agent": ["global-agent@4.1.3", "", { "dependencies": { "globalthis": "^1.0.2", "matcher": "^4.0.0", "semver": "^7.3.5", "serialize-error": "^8.1.0" } }, "sha512-KUJEViiuFT3I97t+GYMikLPJS2Lfo/S2F+DQuBWzuzaMPnvt5yyZePzArx36fBzpGTxZjIpDbXLeySLgh+k76g=="], "globalthis": ["globalthis@1.0.4", "", { "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" } }, "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ=="], @@ -262,11 +258,9 @@ "json-schema-typed": ["json-schema-typed@8.0.2", "", {}, "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA=="], - "json-stringify-safe": ["json-stringify-safe@5.0.1", "", {}, "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="], - "long": ["long@5.3.2", "", {}, "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="], - "matcher": ["matcher@3.0.0", "", { "dependencies": { "escape-string-regexp": "^4.0.0" } }, "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng=="], + "matcher": ["matcher@4.0.0", "", { "dependencies": { "escape-string-regexp": "^4.0.0" } }, "sha512-S6x5wmcDmsDRRU/c2dkccDwQPXoFczc5+HpQ2lON8pnvHlnvHAHj5WlLVvw6n6vNyHuVugYrFohYxbS+pvFpKQ=="], "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="], @@ -328,19 +322,15 @@ "require-from-string": ["require-from-string@2.0.2", "", {}, "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw=="], - "roarr": ["roarr@2.15.4", "", { "dependencies": { "boolean": "^3.0.1", "detect-node": "^2.0.4", "globalthis": "^1.0.1", "json-stringify-safe": "^5.0.1", "semver-compare": "^1.0.0", "sprintf-js": "^1.1.2" } }, "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A=="], - "router": ["router@2.2.0", "", { "dependencies": { "debug": "^4.4.0", "depd": "^2.0.0", "is-promise": "^4.0.0", "parseurl": "^1.3.3", "path-to-regexp": "^8.0.0" } }, "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ=="], "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], "semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="], - "semver-compare": ["semver-compare@1.0.0", "", {}, "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow=="], - "send": ["send@1.2.1", "", { "dependencies": { "debug": "^4.4.3", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "fresh": "^2.0.0", "http-errors": "^2.0.1", "mime-types": "^3.0.2", "ms": "^2.1.3", "on-finished": "^2.4.1", "range-parser": "^1.2.1", "statuses": "^2.0.2" } }, "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ=="], - "serialize-error": ["serialize-error@7.0.1", "", { "dependencies": { "type-fest": "^0.13.1" } }, "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw=="], + "serialize-error": ["serialize-error@8.1.0", "", { "dependencies": { "type-fest": "^0.20.2" } }, "sha512-3NnuWfM6vBYoy5gZFvHiYsVbafvI9vZv/+jlIigFn4oP4zjNPK3LhcY0xSCgeb1a5L8jO71Mit9LlNoi2UfDDQ=="], "serve-static": ["serve-static@2.2.1", "", { "dependencies": { "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "parseurl": "^1.3.3", "send": "^1.2.0" } }, "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw=="], @@ -360,8 +350,6 @@ "side-channel-weakmap": ["side-channel-weakmap@1.0.2", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.5", "object-inspect": "^1.13.3", "side-channel-map": "^1.0.1" } }, "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A=="], - "sprintf-js": ["sprintf-js@1.1.3", "", {}, "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="], - "statuses": ["statuses@2.0.2", "", {}, "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw=="], "tar": ["tar@7.5.7", "", { "dependencies": { "@isaacs/fs-minipass": "^4.0.0", "chownr": "^3.0.0", "minipass": "^7.1.2", "minizlib": "^3.1.0", "yallist": "^5.0.0" } }, "sha512-fov56fJiRuThVFXD6o6/Q354S7pnWMJIVlDBYijsTNx6jKSE4pvrDTs6lUnmGvNyfJwFQQwWy3owKz1ucIhveQ=="], @@ -394,7 +382,7 @@ "tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], - "type-fest": ["type-fest@0.13.1", "", {}, "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg=="], + "type-fest": ["type-fest@0.20.2", "", {}, "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ=="], "type-is": ["type-is@2.0.1", "", { "dependencies": { "content-type": "^1.0.5", "media-typer": "^1.1.0", "mime-types": "^3.0.0" } }, "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw=="], diff --git a/package.json b/package.json index c1e2f1f..ab7e609 100644 --- a/package.json +++ b/package.json @@ -1,59 +1 @@ -{ - "name": "codemogger", - "version": "0.1.4", - "description": "Code indexing library with tree-sitter chunking and vector+FTS search for AI coding agents", - "type": "module", - "main": "dist/index.js", - "types": "dist/index.d.ts", - "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js" - } - }, - "bin": { - "codemogger": "dist/cli.mjs" - }, - "repository": { - "type": "git", - "url": "https://github.com/glommer/codemogger" - }, - "files": [ - "dist", - "codemogger.png", - "README.md", - "LICENSE" - ], - "scripts": { - "build": "tsc -p tsconfig.build.json && bun build bin/codemogger.ts --target node --external @tursodatabase/database --external @modelcontextprotocol/sdk --external @huggingface/transformers --external zod --external commander --external web-tree-sitter --external tree-sitter-rust --external tree-sitter-javascript --external tree-sitter-typescript --external tree-sitter-c --external tree-sitter-cpp --external tree-sitter-python --external tree-sitter-go --external tree-sitter-java --external tree-sitter-scala --external tree-sitter-php --external tree-sitter-ruby --external @tree-sitter-grammars/tree-sitter-zig --outfile dist/cli.mjs && node -e \"let f=require('fs');let c=f.readFileSync('dist/cli.mjs','utf8');f.writeFileSync('dist/cli.mjs',c.replace('#!/usr/bin/env bun','#!/usr/bin/env node'))\"", - "prepublishOnly": "bun run build", - "test": "bun test" - }, - "devDependencies": { - "@types/bun": "latest" - }, - "peerDependencies": { - "typescript": "^5" - }, - "dependencies": { - "@huggingface/transformers": "^3.8.1", - "@modelcontextprotocol/sdk": "^1.26.0", - "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", - "@tursodatabase/database": "0.5.0-pre.14", - "commander": "^14.0.3", - "tree-sitter-c": "^0.24.1", - "tree-sitter-c-sharp": "^0.23.1", - "tree-sitter-cpp": "^0.23.4", - "tree-sitter-go": "^0.25.0", - "tree-sitter-java": "^0.23.5", - "tree-sitter-javascript": "^0.25.0", - "tree-sitter-php": "^0.24.2", - "tree-sitter-python": "^0.25.0", - "tree-sitter-ruby": "^0.23.1", - "tree-sitter-rust": "^0.24.0", - "tree-sitter-scala": "^0.24.0", - "tree-sitter-typescript": "^0.23.2", - "web-tree-sitter": "^0.26.5", - "zod": "^4.3.6" - } -} +{"name":"codemogger","version":"0.1.4","description":"Code indexing library with tree-sitter chunking and vector+FTS search for AI coding agents","type":"module","main":"dist/index.js","types":"dist/index.d.ts","exports":{".":{"types":"./dist/index.d.ts","import":"./dist/index.js"}},"bin":{"codemogger":"dist/cli.mjs"},"repository":{"type":"git","url":"https://github.com/glommer/codemogger"},"files":["dist","codemogger.png","README.md","LICENSE"],"scripts":{"build":"tsc -p tsconfig.build.json && bun build bin/codemogger.ts --target node --external @tursodatabase/database --external @modelcontextprotocol/sdk --external @huggingface/transformers --external zod --external commander --external web-tree-sitter --external tree-sitter-rust --external tree-sitter-javascript --external tree-sitter-typescript --external tree-sitter-c --external tree-sitter-cpp --external tree-sitter-python --external tree-sitter-go --external tree-sitter-java --external tree-sitter-scala --external tree-sitter-php --external tree-sitter-ruby --external @tree-sitter-grammars/tree-sitter-zig --outfile dist/cli.mjs && node -e \"let f=require('fs');let c=f.readFileSync('dist/cli.mjs','utf8');f.writeFileSync('dist/cli.mjs',c.replace('#!/usr/bin/env bun','#!/usr/bin/env node'))\"","prepublishOnly":"bun run build","test":"bun test"},"devDependencies":{"@types/bun":"latest"},"peerDependencies":{"typescript":"^5"},"dependencies":{"@huggingface/transformers":"^3.8.1","@modelcontextprotocol/sdk":"^1.26.0","@tree-sitter-grammars/tree-sitter-zig":"^1.1.2","@tursodatabase/database":"0.5.1","commander":"^14.0.3","tree-sitter-c":"^0.24.1","tree-sitter-c-sharp":"^0.23.1","tree-sitter-cpp":"^0.23.4","tree-sitter-go":"^0.25.0","tree-sitter-java":"^0.23.5","tree-sitter-javascript":"^0.25.0","tree-sitter-php":"^0.24.2","tree-sitter-python":"^0.25.0","tree-sitter-ruby":"^0.23.1","tree-sitter-rust":"^0.24.0","tree-sitter-scala":"^0.24.0","tree-sitter-typescript":"^0.23.2","web-tree-sitter":"^0.26.5","zod":"^4.3.6"},"overrides":{"global-agent":"^4.1.3"}} \ No newline at end of file diff --git a/src/chunk/treesitter.ts b/src/chunk/treesitter.ts index 814e6f8..78c1705 100644 --- a/src/chunk/treesitter.ts +++ b/src/chunk/treesitter.ts @@ -3,6 +3,9 @@ import type { Node as SyntaxNode } from "web-tree-sitter" import type { CodeChunk } from "./types.ts" import type { LanguageConfig } from "./languages.ts" +// Chunks longer than this are split at nested top-level nodes rather than kept as one unit. +// 150 lines is roughly the context window a model can attend to without losing detail; +// going much larger hurts retrieval precision. const MAX_CHUNK_LINES = 150 let parserReady: Promise | null = null diff --git a/src/db/schema.ts b/src/db/schema.ts index 0ebc35f..f80aa70 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -81,5 +81,14 @@ SELECT id, name, signature FROM chunks WHERE codebase_id = ? ` } +/** Insert FTS entries for chunks of a single file (used by incremental update) */ +export function populateFtsForFileSQL(codebaseId: number): string { + const table = ftsTableName(codebaseId) + return ` +INSERT INTO ${table} (chunk_id, name, signature) +SELECT id, name, signature FROM chunks WHERE codebase_id = ? AND file_path = ? +` +} + // Core tables (FTS is per-codebase, created dynamically) export const ALL_SCHEMA = [CREATE_CODEBASES_TABLE, CREATE_CHUNKS_TABLE, CREATE_FILES_TABLE] diff --git a/src/db/store.ts b/src/db/store.ts index 7d25e73..95c8939 100644 --- a/src/db/store.ts +++ b/src/db/store.ts @@ -4,11 +4,17 @@ import { ftsTableName, createFtsTableSQL, createFtsIndexSQL, - dropFtsTableSQL, - populateFtsSQL, + populateFtsForFileSQL, } from "./schema.ts" import type { CodeChunk } from "../chunk/types.ts" +// Private row types for database query results — keeps column renames type-safe. +type CodebaseRow = { id: number; root_path: string; name: string; indexed_at: number; file_count: number; chunk_count: number } +type StaleEmbeddingRow = { chunk_key: string; name: string; signature: string; file_path: string; kind: string; snippet: string } +type VectorSearchRow = { chunk_key: string; file_path: string; name: string; kind: string; signature: string; snippet: string; start_line: number; end_line: number; distance: number } +type ChunkDataRow = { chunk_key: string; file_path: string; name: string; kind: string; signature: string; start_line: number; end_line: number } +type IndexedFileRow = { file_path: string; file_hash: string; chunk_count: number; indexed_at: number } + export interface SearchResult { chunkKey: string filePath: string @@ -92,7 +98,7 @@ export class Store { LEFT JOIN indexed_files f ON f.codebase_id = c.id GROUP BY c.id ORDER BY c.root_path` - ).all() as any[] + ).all() as CodebaseRow[] return rows.map(r => ({ id: r.id, rootPath: r.root_path, @@ -248,14 +254,16 @@ export class Store { /** Get chunks that need (re-)embedding (scoped to codebase) */ async getStaleEmbeddings(codebaseId: number, modelName: string, limit?: number): Promise<{ chunkKey: string; name: string; signature: string; filePath: string; kind: string; snippet: string }[]> { - const sql = limit + const sql = limit ? `SELECT chunk_key, name, signature, file_path, kind, snippet FROM chunks WHERE codebase_id = ? AND (embedding IS NULL OR embedding_model != ?) - LIMIT ${limit}` + LIMIT ?` : `SELECT chunk_key, name, signature, file_path, kind, snippet FROM chunks WHERE codebase_id = ? AND (embedding IS NULL OR embedding_model != ?)` - - const rows = await this.db.prepare(sql).all(codebaseId, modelName) as any[] + + const rows = limit + ? await this.db.prepare(sql).all(codebaseId, modelName, limit) as StaleEmbeddingRow[] + : await this.db.prepare(sql).all(codebaseId, modelName) as StaleEmbeddingRow[] return rows.map(r => ({ chunkKey: r.chunk_key, name: r.name, @@ -268,17 +276,27 @@ export class Store { // ── Per-codebase FTS lifecycle ─────────────────────────────────── - /** Drop and rebuild FTS table for a codebase */ - async rebuildFtsTable(codebaseId: number): Promise { - // Drop old table (and its index) - await this.db.exec(dropFtsTableSQL(codebaseId)) - // Create fresh table + /** Drop FTS table for a codebase if it doesn't exist yet (idempotent) */ + async ensureFtsTable(codebaseId: number): Promise { await this.db.exec(createFtsTableSQL(codebaseId)) - // Populate from chunks - await this.db.prepare(populateFtsSQL(codebaseId)).run(codebaseId) - // Build FTS index - await this.db.exec(createFtsIndexSQL(codebaseId)) - // Optimize FTS index for faster queries + try { + await this.db.exec(createFtsIndexSQL(codebaseId)) + } catch { + // Index already exists — ignore + } + } + + /** Insert FTS entries for all chunks belonging to the given file paths */ + async populateFtsForFiles(codebaseId: number, filePaths: string[]): Promise { + if (filePaths.length === 0) return + const stmt = await this.db.prepare(populateFtsForFileSQL(codebaseId)) + for (const fp of filePaths) { + await stmt.run(codebaseId, fp) + } + } + + /** Optimize the FTS index (call once after a batch of changes) */ + async optimizeFts(codebaseId: number): Promise { await this.db.exec(`OPTIMIZE INDEX idx_${ftsTableName(codebaseId)}`) } @@ -301,7 +319,7 @@ export class Store { ORDER BY distance ASC LIMIT ?` - const rows = await this.db.prepare(sql).all(json, limit) as any[] + const rows = await this.db.prepare(sql).all(json, limit) as VectorSearchRow[] return rows.map((row) => ({ chunkKey: row.chunk_key, @@ -318,38 +336,29 @@ export class Store { /** FTS search across all codebases (queries each FTS table, merges results) */ async ftsSearch(query: string, limit: number, includeSnippet: boolean): Promise { - // Find all codebase IDs that have FTS tables - const codebases = await this.db.prepare("SELECT id FROM codebases").all() as { id: number }[] + // Discover existing FTS tables in a single query instead of checking each codebase + const ftsTables = await this.db.prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name GLOB 'fts_*'" + ).all() as { name: string }[] const allResults: SearchResult[] = [] - for (const { id } of codebases) { - const table = ftsTableName(id) - // Check if FTS table exists - const exists = await this.db.prepare( - "SELECT name FROM sqlite_master WHERE type='table' AND name=?" - ).get(table) as { name: string } | undefined - if (!exists) continue - + for (const { name: table } of ftsTables) { try { - const scores = await this.db.prepare( - `SELECT chunk_id, fts_score(name, signature, ?1) AS score - FROM ${table} - WHERE fts_match(name, signature, ?1) + // Join FTS scores with chunk data in a single query (avoids per-row lookups) + const snippetCol = includeSnippet ? "c.snippet," : "" + const rows = await this.db.prepare( + `SELECT c.chunk_key, c.file_path, c.name, c.kind, c.signature, + ${snippetCol} c.start_line, c.end_line, + fts_score(f.name, f.signature, ?1) AS score + FROM ${table} f + JOIN chunks c ON c.id = f.chunk_id + WHERE fts_match(f.name, f.signature, ?1) ORDER BY score DESC LIMIT ?` - ).all(query, limit) as { chunk_id: number; score: number }[] - - if (scores.length === 0) continue - - for (const { chunk_id, score } of scores) { - const dataSql = includeSnippet - ? `SELECT chunk_key, file_path, name, kind, signature, snippet, start_line, end_line FROM chunks WHERE id = ?` - : `SELECT chunk_key, file_path, name, kind, signature, start_line, end_line FROM chunks WHERE id = ?` - - const row = await this.db.prepare(dataSql).get(chunk_id) as any - if (!row) continue + ).all(query, limit) as (ChunkDataRow & { score: number })[] + for (const row of rows) { allResults.push({ chunkKey: row.chunk_key, filePath: row.file_path, @@ -359,19 +368,19 @@ export class Store { snippet: includeSnippet ? row.snippet : "", startLine: row.start_line, endLine: row.end_line, - score, + score: row.score, }) } - } catch (e: any) { - // Only ignore "no such table" / "no such index" errors (FTS not built yet) - const msg = String(e?.message ?? e) + } catch (e: unknown) { + // Ignore stale FTS tables (no such table / no such index); rethrow others + const msg = String((e as Error)?.message ?? e) if (!msg.includes("no such table") && !msg.includes("no such index")) { throw e } } } - // Sort by score descending, take top limit + // Merge across codebases: sort by score descending, take top limit allResults.sort((a, b) => b.score - a.score) return allResults.slice(0, limit) } @@ -391,8 +400,8 @@ export class Store { : "SELECT file_path, file_hash, chunk_count, indexed_at FROM indexed_files ORDER BY file_path" const rows = codebaseId != null - ? await this.db.prepare(sql).all(codebaseId) as any[] - : await this.db.prepare(sql).all() as any[] + ? await this.db.prepare(sql).all(codebaseId) as IndexedFileRow[] + : await this.db.prepare(sql).all() as IndexedFileRow[] return rows.map((row) => ({ filePath: row.file_path, diff --git a/src/embed/local.ts b/src/embed/local.ts index 65edb4f..3eaf391 100644 --- a/src/embed/local.ts +++ b/src/embed/local.ts @@ -25,7 +25,9 @@ export const localEmbed: Embedder = async (texts: string[]): Promise const pipe = await getPipeline() const results: number[][] = [] - // Process in batches to balance throughput and memory + // Process in batches to balance throughput and memory. + // 128 is a common sweet-spot for all-MiniLM-L6-v2 on CPU: large enough to + // amortize tokenization overhead, small enough to avoid OOM on typical hardware. const BATCH = 128 for (let i = 0; i < texts.length; i += BATCH) { const batch = texts.slice(i, i + BATCH) diff --git a/src/index.ts b/src/index.ts index 7ddfe21..4ec1b6f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -95,6 +95,7 @@ export class CodeIndex { // Get or create codebase entry const codebaseId = await store.getOrCreateCodebase(rootDir); + await store.ensureFtsTable(codebaseId); const progressRaw = opts?.onProgress; let lastPct = -1; @@ -205,9 +206,10 @@ export class CodeIndex { progress({ phase: "chunk", current: batchStart + bi + 1, total: filesToProcess.length }); } - // Write chunks to DB + // Write chunks to DB, then incrementally populate FTS entries if (batchChunks.length > 0) { await store.batchUpsertAllFileChunks(codebaseId, batchChunks); + await store.populateFtsForFiles(codebaseId, batchChunks.map(f => f.filePath)); } } @@ -224,15 +226,21 @@ export class CodeIndex { for (let i = 0; i < stale.length; i += EMBED_BATCH) { const slice = stale.slice(i, i + EMBED_BATCH); const texts = slice.map(buildEmbedText); - const vectors = await this.embedder(texts); - await store.batchUpsertEmbeddings( - slice.map((s, j) => ({ - chunkKey: s.chunkKey, - embedding: vectors[j]!, - modelName: this.embeddingModel, - })), - ); - embedded += vectors.length; + try { + const vectors = await this.embedder(texts); + await store.batchUpsertEmbeddings( + slice.map((s, j) => ({ + chunkKey: s.chunkKey, + embedding: vectors[j]!, + modelName: this.embeddingModel, + })), + ); + embedded += vectors.length; + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : String(e); + errors.push(`embed batch ${Math.floor(i / EMBED_BATCH) + 1}: ${msg}`); + process.stderr.write(`[codemogger] warning: embed batch failed: ${msg}\n`); + } progress({ phase: "embed", current: embedded, total: embedTotal }); } @@ -245,10 +253,10 @@ export class CodeIndex { progress({ phase: "cleanup", current: 0, total: 0 }); const removed = await store.removeStaleFiles(codebaseId, activeFiles); - // Phase 5: Build per-codebase FTS table + // Phase 5: Optimize FTS index (entries populated incrementally above) progress({ phase: "fts", current: 0, total: 0 }); const t3 = performance.now(); - await store.rebuildFtsTable(codebaseId); + await store.optimizeFts(codebaseId); const ftsTime = Math.round(performance.now() - t3); // Update codebase timestamp diff --git a/src/scan/walker.ts b/src/scan/walker.ts index ebc562a..6f79b30 100644 --- a/src/scan/walker.ts +++ b/src/scan/walker.ts @@ -58,8 +58,12 @@ export async function scanDirectory( try { const gitignore = await readFile(join(rootDir, ".gitignore"), "utf-8"); ignorePatterns = loadIgnorePatterns(gitignore); - } catch { - // no .gitignore + } catch (err: unknown) { + const code = (err as NodeJS.ErrnoException).code; + if (code !== "ENOENT") { + errors.push(`cannot read .gitignore: ${err}`); + } + // ENOENT = no .gitignore, which is fine } const langFilter = languages ? new Set(languages) : null; @@ -83,12 +87,12 @@ export async function scanDirectory( const fullPath = join(dir, name); - if (entry.isDirectory()) { + if (entry.isDirectory() && !entry.isSymbolicLink()) { await walk(fullPath); continue; } - if (!entry.isFile()) continue; + if (!entry.isFile() || entry.isSymbolicLink()) continue; // Check if this is a supported language const langConfig = detectLanguage(name); diff --git a/test/index.test.ts b/test/index.test.ts new file mode 100644 index 0000000..0aca604 --- /dev/null +++ b/test/index.test.ts @@ -0,0 +1,65 @@ +import { test, expect, beforeEach, afterEach } from "bun:test"; +import { mkdir, writeFile, rm } from "fs/promises"; +import { join } from "path"; +import { tmpdir } from "os"; +import { CodeIndex } from "../src/index.ts"; + +let dir: string; + +beforeEach(async () => { + dir = join(tmpdir(), `codemogger-idx-${Date.now()}`); + await mkdir(dir, { recursive: true }); +}); + +afterEach(async () => { + await rm(dir, { recursive: true, force: true }); +}); + +function makeIndex(dbPath: string) { + // Dummy embedder: returns a zero vector of the right dimension (384) + const embedder = async (texts: string[]) => + texts.map(() => Array.from({ length: 384 }, () => 0)); + return new CodeIndex({ dbPath, embedder, embeddingModel: "test-model" }); +} + +test("indexes a directory and returns chunk count", async () => { + await writeFile(join(dir, "foo.ts"), ` +export function add(a: number, b: number): number { + return a + b; +} +export function sub(a: number, b: number): number { + return a - b; +} + `); + const dbPath = join(dir, "test.db"); + const idx = makeIndex(dbPath); + const result = await idx.index(dir); + expect(result.errors).toHaveLength(0); + expect(result.chunks).toBeGreaterThan(0); +}); + +test("embedding batch errors are recorded in IndexResult.errors", async () => { + await writeFile(join(dir, "foo.ts"), "export function hello() {}"); + const dbPath = join(dir, "test.db"); + let callCount = 0; + const failingEmbedder = async (texts: string[]) => { + callCount++; + throw new Error("mock embed failure"); + }; + const idx = new CodeIndex({ dbPath, embedder: failingEmbedder, embeddingModel: "test-model" }); + const result = await idx.index(dir); + // Should not throw; errors should be captured + expect(result.errors.some(e => e.includes("mock embed failure"))).toBe(true); + expect(callCount).toBeGreaterThan(0); +}); + +test("re-indexing unchanged files does not duplicate chunks", async () => { + await writeFile(join(dir, "foo.ts"), "export function hello() {}"); + const dbPath = join(dir, "test.db"); + const idx = makeIndex(dbPath); + const r1 = await idx.index(dir); + const r2 = await idx.index(dir); + // Second run: no new chunks (file hash unchanged, already skipped) + expect(r2.skipped).toBe(1); + expect(r1.chunks).toBeGreaterThan(0); +}); diff --git a/test/walker.test.ts b/test/walker.test.ts new file mode 100644 index 0000000..766acb3 --- /dev/null +++ b/test/walker.test.ts @@ -0,0 +1,82 @@ +import { test, expect, beforeEach, afterEach } from "bun:test"; +import { mkdir, writeFile, rm, symlink } from "fs/promises"; +import { join } from "path"; +import { tmpdir } from "os"; +import { scanDirectory } from "../src/scan/walker.ts"; + +let dir: string; + +beforeEach(async () => { + dir = join(tmpdir(), `codemogger-test-${Date.now()}`); + await mkdir(dir, { recursive: true }); +}); + +afterEach(async () => { + await rm(dir, { recursive: true, force: true }); +}); + +test("finds TypeScript files", async () => { + await writeFile(join(dir, "a.ts"), "export const x = 1;"); + await writeFile(join(dir, "b.ts"), "export const y = 2;"); + const { files, errors } = await scanDirectory(dir); + expect(errors).toHaveLength(0); + expect(files.map(f => f.relPath).sort()).toEqual(["a.ts", "b.ts"]); +}); + +test("skips files above 1MB", async () => { + await writeFile(join(dir, "big.ts"), "x".repeat(1_100_000)); + await writeFile(join(dir, "small.ts"), "export const x = 1;"); + const { files } = await scanDirectory(dir); + expect(files.map(f => f.relPath)).toEqual(["small.ts"]); +}); + +test("respects .gitignore patterns", async () => { + await writeFile(join(dir, ".gitignore"), "ignored/\n"); + await mkdir(join(dir, "ignored")); + await writeFile(join(dir, "ignored", "secret.ts"), "export const s = 1;"); + await writeFile(join(dir, "visible.ts"), "export const v = 1;"); + const { files } = await scanDirectory(dir); + expect(files.map(f => f.relPath)).toEqual(["visible.ts"]); +}); + +test("does not follow directory symlinks", async () => { + const target = join(tmpdir(), `codemogger-link-target-${Date.now()}`); + await mkdir(target); + await writeFile(join(target, "hidden.ts"), "export const h = 1;"); + await symlink(target, join(dir, "link")); + await writeFile(join(dir, "real.ts"), "export const r = 1;"); + try { + const { files } = await scanDirectory(dir); + expect(files.map(f => f.relPath)).toEqual(["real.ts"]); + } finally { + await rm(target, { recursive: true, force: true }); + } +}); + +test("does not follow file symlinks", async () => { + const target = join(tmpdir(), `codemogger-file-target-${Date.now()}.ts`); + await writeFile(target, "export const h = 1;"); + await symlink(target, join(dir, "link.ts")); + await writeFile(join(dir, "real.ts"), "export const r = 1;"); + try { + const { files } = await scanDirectory(dir); + expect(files.map(f => f.relPath)).toEqual(["real.ts"]); + } finally { + await rm(target, { force: true }); + } +}); + +test("reports unreadable .gitignore as error", async () => { + // Write a directory named .gitignore so readFile() throws EISDIR + await mkdir(join(dir, ".gitignore")); + await writeFile(join(dir, "a.ts"), "export const x = 1;"); + const { errors } = await scanDirectory(dir); + expect(errors.some(e => e.includes(".gitignore"))).toBe(true); +}); + +test("filters by language", async () => { + await writeFile(join(dir, "a.ts"), "export const x = 1;"); + await writeFile(join(dir, "b.rs"), "fn main() {}"); + const { files } = await scanDirectory(dir, ["typescript"]); + expect(files.map(f => f.relPath)).toEqual(["a.ts"]); +});