From 5cd8fb00c59407cc3f7557ccdaf657ca05c71ec4 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Sat, 21 Mar 2026 16:12:53 +1100 Subject: [PATCH] feat(search): incremental index updates instead of all-or-nothing rebuild When the search DB already exists, indexResources now diffs incoming docs against the stored index and only processes the delta: new docs get chunked/embedded/stored, stale docs and their chunks get removed, unchanged docs are skipped entirely. Uses node:sqlite directly to query raw chunk-level IDs from the DB (bypassing retriv's parent-ID deduplication) so exact chunk IDs can be passed to remove(). Bumps retriv to 0.12.0 for listIds() support. Resolves #28 --- package.json | 2 +- pnpm-lock.yaml | 557 ++++++++++++++++++++++++++-------- pnpm-workspace.yaml | 4 +- src/commands/sync-shared.ts | 113 +++++-- src/retriv/index.ts | 42 ++- src/retriv/pool.ts | 3 +- src/retriv/worker.ts | 4 + test/unit/sync-shared.test.ts | 48 ++- 8 files changed, 609 insertions(+), 164 deletions(-) diff --git a/package.json b/package.json index 323afdf3..79bb5c3f 100644 --- a/package.json +++ b/package.json @@ -73,7 +73,7 @@ "oxc-parser": "catalog:deps", "p-limit": "catalog:deps", "pathe": "catalog:", - "retriv": "catalog:deps", + "retriv": "catalog:", "semver": "catalog:", "sqlite-vec": "catalog:deps", "std-env": "catalog:", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a5c68288..e44de1b4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -27,6 +27,9 @@ catalogs: pathe: specifier: ^2.0.3 version: 2.0.3 + retriv: + specifier: ^0.12.0 + version: 0.12.0 semver: specifier: ^7.7.4 version: 7.7.4 @@ -76,9 +79,6 @@ catalogs: p-limit: specifier: ^7.3.0 version: 7.3.0 - retriv: - specifier: ^0.11.0 - version: 0.11.0 sqlite-vec: specifier: ^0.1.7 version: 0.1.7 @@ -172,8 +172,8 @@ importers: specifier: 'catalog:' version: 2.0.3 retriv: - specifier: catalog:deps - version: 0.11.0(@huggingface/transformers@3.8.1)(ai@6.0.68(zod@4.3.6))(sqlite-vec@0.1.7)(typescript@6.0.0-beta) + specifier: 'catalog:' + version: 0.12.0(@huggingface/transformers@3.8.1)(ai@6.0.68(zod@4.3.6))(sqlite-vec@0.1.7)(typescript@6.0.0-beta) semver: specifier: 'catalog:' version: 7.7.4 @@ -313,28 +313,28 @@ packages: '@antfu/install-pkg@1.1.0': resolution: {integrity: sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ==} - '@apify/consts@2.51.0': - resolution: {integrity: sha512-pW9VxTP0H99Gn8JAk2DUdXynASja0vaB2JH8pmF8JbE7WThV21+p7KLPf5EwZ5gAgaw9OY/qDx0slI4iENgyjQ==} + '@apify/consts@2.51.1': + resolution: {integrity: sha512-QV16f41BjmE7uYQgB+JeS5bhbEdFvP8eF1R5LiKlvGkERckSlMl1JIIaW1b/XwJdp3bEBKBGPtNlvYa06wyhwg==} '@apify/datastructures@2.0.3': resolution: {integrity: sha512-E6yQyc/XZDqJopbaGmhzZXMJqwGf96ELtDANZa0t68jcOAJZS+pF7YUfQOLszXq6JQAdnRvTH2caotL6urX7HA==} - '@apify/log@2.5.32': - resolution: {integrity: sha512-4ojuqfsV3WCvqajcZUgHcln5bses0nz4MnnRR5i+If1jywSH2gRxYVNVHxKP5yesNx9I5v6iT498nYSpDlUqog==} + '@apify/log@2.5.33': + resolution: {integrity: sha512-rD+RY/Lvgy2ZAQD6QHbzoGHKvqILSXHZggTv2PN80ZZl7JMVQ22pYpoysYITHl4eGuievCiwrhkvdbNqTHqoPQ==} '@apify/ps-tree@1.2.0': resolution: {integrity: sha512-VHIswI7rD/R4bToeIDuJ9WJXt+qr5SdhfoZ9RzdjmCs9mgy7l0P4RugQEUCcU+WB4sfImbd4CKwzXcn0uYx1yw==} engines: {node: '>= 0.10'} hasBin: true - '@apify/pseudo_url@2.0.73': - resolution: {integrity: sha512-ckX7vTzIVPNnv5KbaPFHqEhCKO1KxLkqBB6QCDEp8b2Y1lA4jNRqCAJIgGOqvnjItSCke+Dn558ACzMjAx994A==} + '@apify/pseudo_url@2.0.74': + resolution: {integrity: sha512-iMa7MzKn/5dWwSmOj3jZ+33NCRUdbyKsOTZytlowQgblV3yL8YFLziWcA1GlH6spIHG8073gIQMOecXvQYpvNA==} '@apify/timeout@0.3.2': resolution: {integrity: sha512-JnOLIOpqfm366q7opKrA6HrL0iYRpYYDn8Mi77sMR2GZ1fPbwMWCVzN23LJWfJV7izetZbCMrqRUXsR1etZ7dA==} - '@apify/utilities@2.25.4': - resolution: {integrity: sha512-UvFM37FG2pLcVe1clXhdbks5Y8eSyx7LKmIc4Cx/JYpaet5RCV1OD9URoDyihp9RqD0TCs2dPOPIaXUknpg1rg==} + '@apify/utilities@2.25.5': + resolution: {integrity: sha512-I53XgSbNw2mYHPbPTIM7CjooHBHapWzvW6eKxpzt5IO9zB3OIzWOk2xRCodi1pAt3+A+BGiJJyddF/cQYGJenA==} '@asamuzakjp/css-color@3.2.0': resolution: {integrity: sha512-K1A6z8tS3XsmCMM86xoWdn7Fkdn9m6RSVtocUrJYIwZnFVkng/PvkEoWtOWmP+Scc6saYWHWZYbndEEXxl24jw==} @@ -364,6 +364,11 @@ packages: engines: {node: '>=6.0.0'} hasBin: true + '@babel/parser@7.29.2': + resolution: {integrity: sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==} + engines: {node: '>=6.0.0'} + hasBin: true + '@babel/parser@8.0.0-rc.2': resolution: {integrity: sha512-29AhEtcq4x8Dp3T72qvUMZHx0OMXCj4Jy/TEReQa+KWLln524Cj1fWb3QFi0l/xSpptQBR6y9RNEXuxpFvwiUQ==} engines: {node: ^20.19.0 || >=22.12.0} @@ -545,156 +550,312 @@ packages: cpu: [ppc64] os: [aix] + '@esbuild/aix-ppc64@0.27.4': + resolution: {integrity: sha512-cQPwL2mp2nSmHHJlCyoXgHGhbEPMrEEU5xhkcy3Hs/O7nGZqEpZ2sUtLaL9MORLtDfRvVl2/3PAuEkYZH0Ty8Q==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [aix] + '@esbuild/android-arm64@0.27.3': resolution: {integrity: sha512-YdghPYUmj/FX2SYKJ0OZxf+iaKgMsKHVPF1MAq/P8WirnSpCStzKJFjOjzsW0QQ7oIAiccHdcqjbHmJxRb/dmg==} engines: {node: '>=18'} cpu: [arm64] os: [android] + '@esbuild/android-arm64@0.27.4': + resolution: {integrity: sha512-gdLscB7v75wRfu7QSm/zg6Rx29VLdy9eTr2t44sfTW7CxwAtQghZ4ZnqHk3/ogz7xao0QAgrkradbBzcqFPasw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [android] + '@esbuild/android-arm@0.27.3': resolution: {integrity: sha512-i5D1hPY7GIQmXlXhs2w8AWHhenb00+GxjxRncS2ZM7YNVGNfaMxgzSGuO8o8SJzRc/oZwU2bcScvVERk03QhzA==} engines: {node: '>=18'} cpu: [arm] os: [android] + '@esbuild/android-arm@0.27.4': + resolution: {integrity: sha512-X9bUgvxiC8CHAGKYufLIHGXPJWnr0OCdR0anD2e21vdvgCI8lIfqFbnoeOz7lBjdrAGUhqLZLcQo6MLhTO2DKQ==} + engines: {node: '>=18'} + cpu: [arm] + os: [android] + '@esbuild/android-x64@0.27.3': resolution: {integrity: sha512-IN/0BNTkHtk8lkOM8JWAYFg4ORxBkZQf9zXiEOfERX/CzxW3Vg1ewAhU7QSWQpVIzTW+b8Xy+lGzdYXV6UZObQ==} engines: {node: '>=18'} cpu: [x64] os: [android] + '@esbuild/android-x64@0.27.4': + resolution: {integrity: sha512-PzPFnBNVF292sfpfhiyiXCGSn9HZg5BcAz+ivBuSsl6Rk4ga1oEXAamhOXRFyMcjwr2DVtm40G65N3GLeH1Lvw==} + engines: {node: '>=18'} + cpu: [x64] + os: [android] + '@esbuild/darwin-arm64@0.27.3': resolution: {integrity: sha512-Re491k7ByTVRy0t3EKWajdLIr0gz2kKKfzafkth4Q8A5n1xTHrkqZgLLjFEHVD+AXdUGgQMq+Godfq45mGpCKg==} engines: {node: '>=18'} cpu: [arm64] os: [darwin] + '@esbuild/darwin-arm64@0.27.4': + resolution: {integrity: sha512-b7xaGIwdJlht8ZFCvMkpDN6uiSmnxxK56N2GDTMYPr2/gzvfdQN8rTfBsvVKmIVY/X7EM+/hJKEIbbHs9oA4tQ==} + engines: {node: '>=18'} + cpu: [arm64] + os: [darwin] + '@esbuild/darwin-x64@0.27.3': resolution: {integrity: sha512-vHk/hA7/1AckjGzRqi6wbo+jaShzRowYip6rt6q7VYEDX4LEy1pZfDpdxCBnGtl+A5zq8iXDcyuxwtv3hNtHFg==} engines: {node: '>=18'} cpu: [x64] os: [darwin] + '@esbuild/darwin-x64@0.27.4': + resolution: {integrity: sha512-sR+OiKLwd15nmCdqpXMnuJ9W2kpy0KigzqScqHI3Hqwr7IXxBp3Yva+yJwoqh7rE8V77tdoheRYataNKL4QrPw==} + engines: {node: '>=18'} + cpu: [x64] + os: [darwin] + '@esbuild/freebsd-arm64@0.27.3': resolution: {integrity: sha512-ipTYM2fjt3kQAYOvo6vcxJx3nBYAzPjgTCk7QEgZG8AUO3ydUhvelmhrbOheMnGOlaSFUoHXB6un+A7q4ygY9w==} engines: {node: '>=18'} cpu: [arm64] os: [freebsd] + '@esbuild/freebsd-arm64@0.27.4': + resolution: {integrity: sha512-jnfpKe+p79tCnm4GVav68A7tUFeKQwQyLgESwEAUzyxk/TJr4QdGog9sqWNcUbr/bZt/O/HXouspuQDd9JxFSw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [freebsd] + '@esbuild/freebsd-x64@0.27.3': resolution: {integrity: sha512-dDk0X87T7mI6U3K9VjWtHOXqwAMJBNN2r7bejDsc+j03SEjtD9HrOl8gVFByeM0aJksoUuUVU9TBaZa2rgj0oA==} engines: {node: '>=18'} cpu: [x64] os: [freebsd] + '@esbuild/freebsd-x64@0.27.4': + resolution: {integrity: sha512-2kb4ceA/CpfUrIcTUl1wrP/9ad9Atrp5J94Lq69w7UwOMolPIGrfLSvAKJp0RTvkPPyn6CIWrNy13kyLikZRZQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [freebsd] + '@esbuild/linux-arm64@0.27.3': resolution: {integrity: sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg==} engines: {node: '>=18'} cpu: [arm64] os: [linux] + '@esbuild/linux-arm64@0.27.4': + resolution: {integrity: sha512-7nQOttdzVGth1iz57kxg9uCz57dxQLHWxopL6mYuYthohPKEK0vU0C3O21CcBK6KDlkYVcnDXY099HcCDXd9dA==} + engines: {node: '>=18'} + cpu: [arm64] + os: [linux] + '@esbuild/linux-arm@0.27.3': resolution: {integrity: sha512-s6nPv2QkSupJwLYyfS+gwdirm0ukyTFNl3KTgZEAiJDd+iHZcbTPPcWCcRYH+WlNbwChgH2QkE9NSlNrMT8Gfw==} engines: {node: '>=18'} cpu: [arm] os: [linux] + '@esbuild/linux-arm@0.27.4': + resolution: {integrity: sha512-aBYgcIxX/wd5n2ys0yESGeYMGF+pv6g0DhZr3G1ZG4jMfruU9Tl1i2Z+Wnj9/KjGz1lTLCcorqE2viePZqj4Eg==} + engines: {node: '>=18'} + cpu: [arm] + os: [linux] + '@esbuild/linux-ia32@0.27.3': resolution: {integrity: sha512-yGlQYjdxtLdh0a3jHjuwOrxQjOZYD/C9PfdbgJJF3TIZWnm/tMd/RcNiLngiu4iwcBAOezdnSLAwQDPqTmtTYg==} engines: {node: '>=18'} cpu: [ia32] os: [linux] + '@esbuild/linux-ia32@0.27.4': + resolution: {integrity: sha512-oPtixtAIzgvzYcKBQM/qZ3R+9TEUd1aNJQu0HhGyqtx6oS7qTpvjheIWBbes4+qu1bNlo2V4cbkISr8q6gRBFA==} + engines: {node: '>=18'} + cpu: [ia32] + os: [linux] + '@esbuild/linux-loong64@0.27.3': resolution: {integrity: sha512-WO60Sn8ly3gtzhyjATDgieJNet/KqsDlX5nRC5Y3oTFcS1l0KWba+SEa9Ja1GfDqSF1z6hif/SkpQJbL63cgOA==} engines: {node: '>=18'} cpu: [loong64] os: [linux] + '@esbuild/linux-loong64@0.27.4': + resolution: {integrity: sha512-8mL/vh8qeCoRcFH2nM8wm5uJP+ZcVYGGayMavi8GmRJjuI3g1v6Z7Ni0JJKAJW+m0EtUuARb6Lmp4hMjzCBWzA==} + engines: {node: '>=18'} + cpu: [loong64] + os: [linux] + '@esbuild/linux-mips64el@0.27.3': resolution: {integrity: sha512-APsymYA6sGcZ4pD6k+UxbDjOFSvPWyZhjaiPyl/f79xKxwTnrn5QUnXR5prvetuaSMsb4jgeHewIDCIWljrSxw==} engines: {node: '>=18'} cpu: [mips64el] os: [linux] + '@esbuild/linux-mips64el@0.27.4': + resolution: {integrity: sha512-1RdrWFFiiLIW7LQq9Q2NES+HiD4NyT8Itj9AUeCl0IVCA459WnPhREKgwrpaIfTOe+/2rdntisegiPWn/r/aAw==} + engines: {node: '>=18'} + cpu: [mips64el] + os: [linux] + '@esbuild/linux-ppc64@0.27.3': resolution: {integrity: sha512-eizBnTeBefojtDb9nSh4vvVQ3V9Qf9Df01PfawPcRzJH4gFSgrObw+LveUyDoKU3kxi5+9RJTCWlj4FjYXVPEA==} engines: {node: '>=18'} cpu: [ppc64] os: [linux] + '@esbuild/linux-ppc64@0.27.4': + resolution: {integrity: sha512-tLCwNG47l3sd9lpfyx9LAGEGItCUeRCWeAx6x2Jmbav65nAwoPXfewtAdtbtit/pJFLUWOhpv0FpS6GQAmPrHA==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [linux] + '@esbuild/linux-riscv64@0.27.3': resolution: {integrity: sha512-3Emwh0r5wmfm3ssTWRQSyVhbOHvqegUDRd0WhmXKX2mkHJe1SFCMJhagUleMq+Uci34wLSipf8Lagt4LlpRFWQ==} engines: {node: '>=18'} cpu: [riscv64] os: [linux] + '@esbuild/linux-riscv64@0.27.4': + resolution: {integrity: sha512-BnASypppbUWyqjd1KIpU4AUBiIhVr6YlHx/cnPgqEkNoVOhHg+YiSVxM1RLfiy4t9cAulbRGTNCKOcqHrEQLIw==} + engines: {node: '>=18'} + cpu: [riscv64] + os: [linux] + '@esbuild/linux-s390x@0.27.3': resolution: {integrity: sha512-pBHUx9LzXWBc7MFIEEL0yD/ZVtNgLytvx60gES28GcWMqil8ElCYR4kvbV2BDqsHOvVDRrOxGySBM9Fcv744hw==} engines: {node: '>=18'} cpu: [s390x] os: [linux] + '@esbuild/linux-s390x@0.27.4': + resolution: {integrity: sha512-+eUqgb/Z7vxVLezG8bVB9SfBie89gMueS+I0xYh2tJdw3vqA/0ImZJ2ROeWwVJN59ihBeZ7Tu92dF/5dy5FttA==} + engines: {node: '>=18'} + cpu: [s390x] + os: [linux] + '@esbuild/linux-x64@0.27.3': resolution: {integrity: sha512-Czi8yzXUWIQYAtL/2y6vogER8pvcsOsk5cpwL4Gk5nJqH5UZiVByIY8Eorm5R13gq+DQKYg0+JyQoytLQas4dA==} engines: {node: '>=18'} cpu: [x64] os: [linux] + '@esbuild/linux-x64@0.27.4': + resolution: {integrity: sha512-S5qOXrKV8BQEzJPVxAwnryi2+Iq5pB40gTEIT69BQONqR7JH1EPIcQ/Uiv9mCnn05jff9umq/5nqzxlqTOg9NA==} + engines: {node: '>=18'} + cpu: [x64] + os: [linux] + '@esbuild/netbsd-arm64@0.27.3': resolution: {integrity: sha512-sDpk0RgmTCR/5HguIZa9n9u+HVKf40fbEUt+iTzSnCaGvY9kFP0YKBWZtJaraonFnqef5SlJ8/TiPAxzyS+UoA==} engines: {node: '>=18'} cpu: [arm64] os: [netbsd] + '@esbuild/netbsd-arm64@0.27.4': + resolution: {integrity: sha512-xHT8X4sb0GS8qTqiwzHqpY00C95DPAq7nAwX35Ie/s+LO9830hrMd3oX0ZMKLvy7vsonee73x0lmcdOVXFzd6Q==} + engines: {node: '>=18'} + cpu: [arm64] + os: [netbsd] + '@esbuild/netbsd-x64@0.27.3': resolution: {integrity: sha512-P14lFKJl/DdaE00LItAukUdZO5iqNH7+PjoBm+fLQjtxfcfFE20Xf5CrLsmZdq5LFFZzb5JMZ9grUwvtVYzjiA==} engines: {node: '>=18'} cpu: [x64] os: [netbsd] + '@esbuild/netbsd-x64@0.27.4': + resolution: {integrity: sha512-RugOvOdXfdyi5Tyv40kgQnI0byv66BFgAqjdgtAKqHoZTbTF2QqfQrFwa7cHEORJf6X2ht+l9ABLMP0dnKYsgg==} + engines: {node: '>=18'} + cpu: [x64] + os: [netbsd] + '@esbuild/openbsd-arm64@0.27.3': resolution: {integrity: sha512-AIcMP77AvirGbRl/UZFTq5hjXK+2wC7qFRGoHSDrZ5v5b8DK/GYpXW3CPRL53NkvDqb9D+alBiC/dV0Fb7eJcw==} engines: {node: '>=18'} cpu: [arm64] os: [openbsd] + '@esbuild/openbsd-arm64@0.27.4': + resolution: {integrity: sha512-2MyL3IAaTX+1/qP0O1SwskwcwCoOI4kV2IBX1xYnDDqthmq5ArrW94qSIKCAuRraMgPOmG0RDTA74mzYNQA9ow==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openbsd] + '@esbuild/openbsd-x64@0.27.3': resolution: {integrity: sha512-DnW2sRrBzA+YnE70LKqnM3P+z8vehfJWHXECbwBmH/CU51z6FiqTQTHFenPlHmo3a8UgpLyH3PT+87OViOh1AQ==} engines: {node: '>=18'} cpu: [x64] os: [openbsd] + '@esbuild/openbsd-x64@0.27.4': + resolution: {integrity: sha512-u8fg/jQ5aQDfsnIV6+KwLOf1CmJnfu1ShpwqdwC0uA7ZPwFws55Ngc12vBdeUdnuWoQYx/SOQLGDcdlfXhYmXQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [openbsd] + '@esbuild/openharmony-arm64@0.27.3': resolution: {integrity: sha512-NinAEgr/etERPTsZJ7aEZQvvg/A6IsZG/LgZy+81wON2huV7SrK3e63dU0XhyZP4RKGyTm7aOgmQk0bGp0fy2g==} engines: {node: '>=18'} cpu: [arm64] os: [openharmony] + '@esbuild/openharmony-arm64@0.27.4': + resolution: {integrity: sha512-JkTZrl6VbyO8lDQO3yv26nNr2RM2yZzNrNHEsj9bm6dOwwu9OYN28CjzZkH57bh4w0I2F7IodpQvUAEd1mbWXg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openharmony] + '@esbuild/sunos-x64@0.27.3': resolution: {integrity: sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==} engines: {node: '>=18'} cpu: [x64] os: [sunos] + '@esbuild/sunos-x64@0.27.4': + resolution: {integrity: sha512-/gOzgaewZJfeJTlsWhvUEmUG4tWEY2Spp5M20INYRg2ZKl9QPO3QEEgPeRtLjEWSW8FilRNacPOg8R1uaYkA6g==} + engines: {node: '>=18'} + cpu: [x64] + os: [sunos] + '@esbuild/win32-arm64@0.27.3': resolution: {integrity: sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==} engines: {node: '>=18'} cpu: [arm64] os: [win32] + '@esbuild/win32-arm64@0.27.4': + resolution: {integrity: sha512-Z9SExBg2y32smoDQdf1HRwHRt6vAHLXcxD2uGgO/v2jK7Y718Ix4ndsbNMU/+1Qiem9OiOdaqitioZwxivhXYg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [win32] + '@esbuild/win32-ia32@0.27.3': resolution: {integrity: sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==} engines: {node: '>=18'} cpu: [ia32] os: [win32] + '@esbuild/win32-ia32@0.27.4': + resolution: {integrity: sha512-DAyGLS0Jz5G5iixEbMHi5KdiApqHBWMGzTtMiJ72ZOLhbu/bzxgAe8Ue8CTS3n3HbIUHQz/L51yMdGMeoxXNJw==} + engines: {node: '>=18'} + cpu: [ia32] + os: [win32] + '@esbuild/win32-x64@0.27.3': resolution: {integrity: sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==} engines: {node: '>=18'} cpu: [x64] os: [win32] + '@esbuild/win32-x64@0.27.4': + resolution: {integrity: sha512-+knoa0BDoeXgkNvvV1vvbZX4+hizelrkwmGJBdT17t8FNPwG2lKemmuMZlmaNQ3ws3DKKCxpb4zRZEIp3UxFCg==} + engines: {node: '>=18'} + cpu: [x64] + os: [win32] + '@eslint-community/eslint-plugin-eslint-comments@4.7.1': resolution: {integrity: sha512-Ql2nJFwA8wUGpILYGOQaT1glPsmvEwE0d+a+l7AALLzQvInqdbXJdx7aSu0DpUX9dB1wMVBMhm99/++S3MdEtQ==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} @@ -720,8 +881,8 @@ packages: eslint: optional: true - '@eslint/config-array@0.21.1': - resolution: {integrity: sha512-aw1gNayWpdI/jSYVgzN5pL0cfzU02GT3NBpeT/DXbx1/1x7ZKxFPd9bwrzygx/qiwIQiJ1sw/zD8qY/kRvlGHA==} + '@eslint/config-array@0.21.2': + resolution: {integrity: sha512-nJl2KGTlrf9GjLimgIru+V/mzgSK0ABCDQRvxw5BjURL7WfH5uoWmizbH7QB6MmnMBd8cIC9uceWnezL1VZWWw==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} '@eslint/config-helpers@0.4.2': @@ -740,8 +901,8 @@ packages: resolution: {integrity: sha512-QUPblTtE51/7/Zhfv8BDwO0qkkzQL7P/aWWbqcf4xWLEYn1oKjdO0gglQBB4GAsu7u6wjijbCmzsUTy6mnk6oQ==} engines: {node: ^20.19.0 || ^22.13.0 || >=24} - '@eslint/eslintrc@3.3.4': - resolution: {integrity: sha512-4h4MVF8pmBsncB60r0wSJiIeUKTSD4m7FmTFThG8RHlsg9ajqckLm9OraguFGZE4vVdpiI1Q4+hFnisopmG6gQ==} + '@eslint/eslintrc@3.3.5': + resolution: {integrity: sha512-4IlJx0X0qftVsN5E+/vGujTRIFtwuLbNsVUe7TO6zYPDR1O6nFwvwhIKEKSrl6dZchmYBITazxKoUYOjdtjlRg==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} '@eslint/js@9.39.2': @@ -2223,8 +2384,8 @@ packages: resolution: {integrity: sha512-2z+rWdzbbSZv6/rhtvzvqeZQHrBaqgogqt85sqFNbabZOuFbCVFb8kPeEtZjiKkbrm395irpNKiYeFeLiQnFPg==} engines: {node: '>=18'} - csv-stringify@6.6.0: - resolution: {integrity: sha512-YW32lKOmIBgbxtu3g5SaiqWNwa/9ISQt2EcgOq0+RAIFufFp9is6tqNnKahqE5kuKvrnYAzs28r+s6pXJR8Vcw==} + csv-stringify@6.7.0: + resolution: {integrity: sha512-UdtziYp5HuTz7e5j8Nvq+a/3HQo+2/aJZ9xntNTpmRRIg/3YYqDVgiS9fvAhtNbnyfbv2ZBe0bqCHqzhE7FqWQ==} data-urls@5.0.0: resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==} @@ -2292,8 +2453,8 @@ packages: devlop@1.1.0: resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==} - devtools-protocol@0.0.1585077: - resolution: {integrity: sha512-felwTo2l5VLTn+5mxuSDgr+2JCvcWun7H199YKV1cDhM/5IF7soujTI/VPOmlSgFSMSQGdsibjkqlyXg8uMUnQ==} + devtools-protocol@0.0.1602427: + resolution: {integrity: sha512-njq3w5kagNNrtv6xVsmI9b/Kvbz99GIlZ6qoDAQH8iWLWr77CuCQf9r6tnRgBmfBgeOO8meJJADXrh5zXFLh1A==} diff-sequences@29.6.3: resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==} @@ -2388,6 +2549,11 @@ packages: engines: {node: '>=18'} hasBin: true + esbuild@0.27.4: + resolution: {integrity: sha512-Rq4vbHnYkK5fws5NF7MYTU68FPRE1ajX7heQ/8QXXWqNgqqJ/GkmmyxIzUnf2Sr/bakf8l54716CcMGHYhMrrQ==} + engines: {node: '>=18'} + hasBin: true + escalade@3.2.0: resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} engines: {node: '>=6'} @@ -2573,6 +2739,10 @@ packages: resolution: {integrity: sha512-A0XeIi7CXU7nPlfHS9loMYEKxUaONu/hTEzHTGba9Huu94Cq1hPivf+DE5erJozZOky0LfvXAyrV/tcswpLI0Q==} engines: {node: ^20.19.0 || ^22.13.0 || >=24} + eslint-visitor-keys@5.0.1: + resolution: {integrity: sha512-tD40eHxA35h0PEIZNeIjkHoDR4YjjJp34biM0mDvplBe//mB+IHCqHDGV7pxF+7MklTvighcCPPZC7ynWyjdTA==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} + eslint@9.39.2: resolution: {integrity: sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} @@ -2680,8 +2850,8 @@ packages: fflate@0.8.2: resolution: {integrity: sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==} - figlet@1.10.0: - resolution: {integrity: sha512-aktIwEZZ6Gp9AWdMXW4YCi0J2Ahuxo67fNJRUIWD81w8pQ0t9TS8FFpbl27ChlTLF06VkwjDesZSzEVzN75rzA==} + figlet@1.11.0: + resolution: {integrity: sha512-EEx3OS/l2bFqcUNN2NM9FPJp8vAMrgbCxsbl2hbcJNNxOEwVe3mEzrhan7TbJQViZa8mMqhihlbCaqD+LyYKTQ==} engines: {node: '>= 17.0.0'} hasBin: true @@ -2720,12 +2890,12 @@ packages: resolution: {integrity: sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==} engines: {node: '>=10'} - fingerprint-generator@2.1.80: - resolution: {integrity: sha512-uVNb9KdgqxtOjBh7FmSUz2FKfomQq9j2hicCdqAjGZ+Nooa1Nuj8gY1wL4cnmCFTlJy+Lz1gMIdsoSE6iqaoAg==} + fingerprint-generator@2.1.81: + resolution: {integrity: sha512-R8Cgnv9AhsTG8MN+DCuFolq2cJPdTNDKOM11EaRSCfRBnBGsPWTTm9e3INld1rzU+bMITvqAcghlCjXOVCrYUA==} engines: {node: '>=16.0.0'} - fingerprint-injector@2.1.80: - resolution: {integrity: sha512-wcCp9QVKAggyVqMAPtQurE9lr6tXZX0w99pMdQfCCLMDpjA0piiF2szoeDGhWEUnF2MN6Mn1ifq3TSnXxbn6Ug==} + fingerprint-injector@2.1.81: + resolution: {integrity: sha512-/HlE+pDTety9ygiYHdlh+7lDhrm5sxOB7ThWdhDwDVqSr7zI4D/Ruqhg7iDmxMLVWTcUCXsiA9h9tgQgSiPolw==} engines: {node: '>=16.0.0'} peerDependencies: playwright: ^1.22.2 @@ -2743,8 +2913,8 @@ packages: flatbuffers@25.9.23: resolution: {integrity: sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==} - flatted@3.3.4: - resolution: {integrity: sha512-3+mMldrTAPdta5kjX2G2J7iX4zxtnwpdA8Tr2ZSjkyPSanvbZAcy6flmtnXbEybHrDcU9641lxrMfFuUxVz9vA==} + flatted@3.4.2: + resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==} foreground-child@3.3.1: resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==} @@ -2764,8 +2934,8 @@ packages: fs-constants@1.0.0: resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==} - fs-extra@11.3.3: - resolution: {integrity: sha512-VWSRii4t0AFm6ixFFmLLx1t7wS1gh+ckoa84aOeapGum0h+EZd1EhEumSB+ZdDLnEPuucsVB9oB7cxJHap6Afg==} + fs-extra@11.3.4: + resolution: {integrity: sha512-CTXd6rk/M3/ULNQj8FBqBWHYBVYybQ3VPBw0xGKFe3tuH7ytT6ACnvzpIQ3UZtB8yvUKC2cXn1a+x+5EVQLovA==} engines: {node: '>=14.14'} fsevents@2.3.3: @@ -2773,8 +2943,8 @@ packages: engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} os: [darwin] - generative-bayesian-network@2.1.80: - resolution: {integrity: sha512-LyCc23TIFvZDkUJclZ3ixCZvd+dhktr9Aug1EKz5VrfJ2eA5J2HrprSwWRna3VObU2Wy8quXMUF8j2em0bJSLw==} + generative-bayesian-network@2.1.81: + resolution: {integrity: sha512-LrYK+CY5n21p437oahz8jRqTgw0i+S08H+ypag1sgZilfCj33k8Tp8kcFtPiWKsEEJ6niN9gRFP12+r06xB4rQ==} get-caller-file@2.0.5: resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==} @@ -2850,8 +3020,8 @@ packages: resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} engines: {node: '>= 0.4'} - got-scraping@4.1.3: - resolution: {integrity: sha512-PTXcxbuWg631hbRNZRa7p0JKCTLDVAy5AMbJtrxkiNHLVb9Fkn5ghOELaxjNXU5axrriPhEhV4/N/omhaOWJeg==} + got-scraping@4.2.1: + resolution: {integrity: sha512-rhOlO1L4H4Cm31smHJqPtAaXOUrhSKsiTrbZSHKFQW1E/mkTDopnHHpRnXJpqzE0faj+zPsVQnyifIqO+K+cLQ==} engines: {node: '>=16'} got@14.6.6: @@ -2875,8 +3045,8 @@ packages: has-property-descriptors@1.0.2: resolution: {integrity: sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==} - header-generator@2.1.80: - resolution: {integrity: sha512-7gvv2Xm6Q0gNN3BzMD/D3sGvSJRcV1+k8XehPmBYTpTkBmKshwnYyi0jJJnpP3S6YP7vdOoEobeBV87aG9YTtQ==} + header-generator@2.1.81: + resolution: {integrity: sha512-6+27UuqCHFx4xrTWIgcSF/x2WJ+PuVLxziXfPaVLRXi1lXIbTkXO+ffHJefVrdRT5/XEeWfJHrSIE2m1hAdWxw==} engines: {node: '>=16.0.0'} hookable@5.5.3: @@ -3653,8 +3823,8 @@ packages: resolution: {integrity: sha512-orRsuYpJVw8LdAwqqLykBj9ecS5/cRHlI5+nvTo8LcCKmzDmqVORXtOIYEEQuL9D4BxtA1lm5isAqzQZCoQ6Eg==} engines: {node: '>=4'} - postcss@8.5.6: - resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==} + postcss@8.5.8: + resolution: {integrity: sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==} engines: {node: ^10 || ^12 || >=14} prebuild-install@7.1.3: @@ -3796,8 +3966,8 @@ packages: resolution: {integrity: sha512-I1XxrZSQ+oErkRR4jYbAyEEu2I0avBvvMM5JN+6EBprOGRCs63ENqZ3vjavq8fBw2+62G5LF5XelKwuJpcvcxw==} engines: {node: '>=10'} - retriv@0.11.0: - resolution: {integrity: sha512-2y+UzK0QIxQFp/TIlh6ZGrmaJjVFq0SW0yHwVf715nRWkHFbnlcNFgyuNffB6tu1iBlrDWHXx8QVVK2ED04aUg==} + retriv@0.12.0: + resolution: {integrity: sha512-YlRtm0sBFKvfOXoNXbVXhKCFMIGybTDg4CyTQjS4o1EB8oYgIe9jkTD41pXgzQ6/tgZK44Gyfn7dd24K/YyZ9g==} peerDependencies: '@ai-sdk/cohere': ^3.0.0 '@ai-sdk/google': ^3.0.0 @@ -3914,8 +4084,8 @@ packages: safer-buffer@2.1.2: resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} - sax@1.4.4: - resolution: {integrity: sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==} + sax@1.6.0: + resolution: {integrity: sha512-6R3J5M4AcbtLUdZmRv2SygeVaM7IhrLXu9BmnOGmmACak8fiUtOsYNWUS4uK7upbmHIBbLBeFeI//477BKLBzA==} engines: {node: '>=11.0.0'} saxes@6.0.0: @@ -4199,24 +4369,24 @@ packages: tldts-core@6.1.86: resolution: {integrity: sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==} - tldts-core@7.0.23: - resolution: {integrity: sha512-0g9vrtDQLrNIiCj22HSe9d4mLVG3g5ph5DZ8zCKBr4OtrspmNB6ss7hVyzArAeE88ceZocIEGkyW1Ime7fxPtQ==} - tldts-core@7.0.26: resolution: {integrity: sha512-5WJ2SqFsv4G2Dwi7ZFVRnz6b2H1od39QME1lc2y5Ew3eWiZMAeqOAfWpRP9jHvhUl881406QtZTODvjttJs+ew==} + tldts-core@7.0.27: + resolution: {integrity: sha512-YQ7uPjgWUibIK6DW5lrKujGwUKhLevU4hcGbP5O6TcIUb+oTjJYJVWPS4nZsIHrEEEG6myk/oqAJUEQmpZrHsg==} + tldts@6.1.86: resolution: {integrity: sha512-WMi/OQ2axVTf/ykqCQgXiIct+mSQDFdH2fkwhPwgEwvJ1kSzZRiinb0zF2Xb8u4+OqPChmyI6MEu4EezNJz+FQ==} hasBin: true - tldts@7.0.23: - resolution: {integrity: sha512-ASdhgQIBSay0R/eXggAkQ53G4nTJqTXqC2kbaBbdDwM7SkjyZyO0OaaN1/FH7U/yCeqOHDwFO5j8+Os/IS1dXw==} - hasBin: true - tldts@7.0.26: resolution: {integrity: sha512-WiGwQjr0qYdNNG8KpMKlSvpxz652lqa3Rd+/hSaDcY4Uo6SKWZq2LAF+hsAhUewTtYhXlorBKgNF3Kk8hnjGoQ==} hasBin: true + tldts@7.0.27: + resolution: {integrity: sha512-I4FZcVFcqCRuT0ph6dCDpPuO4Xgzvh+spkcTr1gK7peIvxWauoloVO0vuy1FQnijT63ss6AsHB6+OIM4aXHbPg==} + hasBin: true + to-valid-identifier@1.0.0: resolution: {integrity: sha512-41wJyvKep3yT2tyPqX/4blcfybknGB4D+oETKLs7Q76UiPqRpUJK3hr1nxelyYO0PHKVzJwlu0aCeEAsGI6rpw==} engines: {node: '>=20'} @@ -4241,8 +4411,8 @@ packages: resolution: {integrity: sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==} engines: {node: '>=16'} - tough-cookie@6.0.0: - resolution: {integrity: sha512-kXuRi1mtaKMrsLUxz3sQYvVl37B0Ns6MzfrtV5DvJceE9bPyspOqk9xxv7XbZWcfLWbFmm997vl83qUWVJA64w==} + tough-cookie@6.0.1: + resolution: {integrity: sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==} engines: {node: '>=16'} tr46@5.1.1: @@ -4255,6 +4425,12 @@ packages: peerDependencies: typescript: '>=4.8.4' + ts-api-utils@2.5.0: + resolution: {integrity: sha512-OJ/ibxhPlqrMM0UiNHJ/0CKQkoKF243/AEmplt3qpRgkW8VG7IfOS41h7V8TjITqdByHzrjcS/2si+y4lIh8NA==} + engines: {node: '>=18.12'} + peerDependencies: + typescript: '>=4.8.4' + ts-declaration-location@1.0.7: resolution: {integrity: sha512-EDyGAwH1gO0Ausm9gV6T2nUvBgXT5kGoCMJPllOaooZ+4VvJiKBdZE7wK18N1deEowhcUptS+5GXZK8U/fvpwA==} peerDependencies: @@ -4700,15 +4876,15 @@ snapshots: package-manager-detector: 1.6.0 tinyexec: 1.0.2 - '@apify/consts@2.51.0': + '@apify/consts@2.51.1': optional: true '@apify/datastructures@2.0.3': optional: true - '@apify/log@2.5.32': + '@apify/log@2.5.33': dependencies: - '@apify/consts': 2.51.0 + '@apify/consts': 2.51.1 ansi-colors: 4.1.3 optional: true @@ -4717,18 +4893,18 @@ snapshots: event-stream: 3.3.4 optional: true - '@apify/pseudo_url@2.0.73': + '@apify/pseudo_url@2.0.74': dependencies: - '@apify/log': 2.5.32 + '@apify/log': 2.5.33 optional: true '@apify/timeout@0.3.2': optional: true - '@apify/utilities@2.25.4': + '@apify/utilities@2.25.5': dependencies: - '@apify/consts': 2.51.0 - '@apify/log': 2.5.32 + '@apify/consts': 2.51.1 + '@apify/log': 2.5.33 optional: true '@asamuzakjp/css-color@3.2.0': @@ -4761,6 +4937,10 @@ snapshots: dependencies: '@babel/types': 7.29.0 + '@babel/parser@7.29.2': + dependencies: + '@babel/types': 7.29.0 + '@babel/parser@8.0.0-rc.2': dependencies: '@babel/types': 8.0.0-rc.2 @@ -4790,17 +4970,17 @@ snapshots: '@crawlee/basic@3.16.0': dependencies: - '@apify/log': 2.5.32 + '@apify/log': 2.5.33 '@apify/timeout': 0.3.2 - '@apify/utilities': 2.25.4 + '@apify/utilities': 2.25.5 '@crawlee/core': 3.16.0 '@crawlee/types': 3.16.0 '@crawlee/utils': 3.16.0 - csv-stringify: 6.6.0 - fs-extra: 11.3.3 - got-scraping: 4.1.3 + csv-stringify: 6.7.0 + fs-extra: 11.3.4 + got-scraping: 4.2.1 ow: 0.28.2 - tldts: 7.0.23 + tldts: 7.0.27 tslib: 2.8.1 type-fest: 4.41.0 transitivePeerDependencies: @@ -4809,12 +4989,12 @@ snapshots: '@crawlee/browser-pool@3.16.0': dependencies: - '@apify/log': 2.5.32 + '@apify/log': 2.5.33 '@apify/timeout': 0.3.2 '@crawlee/core': 3.16.0 '@crawlee/types': 3.16.0 - fingerprint-generator: 2.1.80 - fingerprint-injector: 2.1.80 + fingerprint-generator: 2.1.81 + fingerprint-injector: 2.1.81 lodash.merge: 4.6.2 nanoid: 3.3.11 ow: 0.28.2 @@ -4857,7 +5037,7 @@ snapshots: dependencies: '@crawlee/templates': 3.16.0(@types/node@25.5.0) ansi-colors: 4.1.3 - fs-extra: 11.3.3 + fs-extra: 11.3.4 inquirer: 8.2.7(@types/node@25.5.0) tslib: 2.8.1 yargonaut: 1.1.4 @@ -4868,26 +5048,26 @@ snapshots: '@crawlee/core@3.16.0': dependencies: - '@apify/consts': 2.51.0 + '@apify/consts': 2.51.1 '@apify/datastructures': 2.0.3 - '@apify/log': 2.5.32 - '@apify/pseudo_url': 2.0.73 + '@apify/log': 2.5.33 + '@apify/pseudo_url': 2.0.74 '@apify/timeout': 0.3.2 - '@apify/utilities': 2.25.4 + '@apify/utilities': 2.25.5 '@crawlee/memory-storage': 3.16.0 '@crawlee/types': 3.16.0 '@crawlee/utils': 3.16.0 '@sapphire/async-queue': 1.5.5 '@vladfrangu/async_event_emitter': 2.4.7 - csv-stringify: 6.6.0 - fs-extra: 11.3.3 - got-scraping: 4.1.3 + csv-stringify: 6.7.0 + fs-extra: 11.3.4 + got-scraping: 4.2.1 json5: 2.2.3 minimatch: 9.0.9 ow: 0.28.2 stream-json: 1.9.1 - tldts: 7.0.23 - tough-cookie: 6.0.0 + tldts: 7.0.27 + tough-cookie: 6.0.1 tslib: 2.8.1 type-fest: 4.41.0 transitivePeerDependencies: @@ -4897,14 +5077,14 @@ snapshots: '@crawlee/http@3.16.0': dependencies: '@apify/timeout': 0.3.2 - '@apify/utilities': 2.25.4 + '@apify/utilities': 2.25.5 '@crawlee/basic': 3.16.0 '@crawlee/types': 3.16.0 '@crawlee/utils': 3.16.0 '@types/content-type': 1.1.9 cheerio: 1.0.0-rc.12 content-type: 1.0.5 - got-scraping: 4.1.3 + got-scraping: 4.2.1 iconv-lite: 0.7.2 mime-types: 2.1.35 ow: 0.28.2 @@ -4917,7 +5097,7 @@ snapshots: '@crawlee/jsdom@3.16.0': dependencies: '@apify/timeout': 0.3.2 - '@apify/utilities': 2.25.4 + '@apify/utilities': 2.25.5 '@crawlee/http': 3.16.0 '@crawlee/types': 3.16.0 '@crawlee/utils': 3.16.0 @@ -4936,7 +5116,7 @@ snapshots: '@crawlee/linkedom@3.16.0': dependencies: '@apify/timeout': 0.3.2 - '@apify/utilities': 2.25.4 + '@apify/utilities': 2.25.5 '@crawlee/http': 3.16.0 '@crawlee/types': 3.16.0 linkedom: 0.18.12 @@ -4949,12 +5129,12 @@ snapshots: '@crawlee/memory-storage@3.16.0': dependencies: - '@apify/log': 2.5.32 + '@apify/log': 2.5.33 '@crawlee/types': 3.16.0 '@sapphire/async-queue': 1.5.5 '@sapphire/shapeshift': 3.9.7 content-type: 1.0.5 - fs-extra: 11.3.3 + fs-extra: 11.3.4 json5: 2.2.3 mime-types: 2.1.35 proper-lockfile: 4.1.2 @@ -4964,7 +5144,7 @@ snapshots: '@crawlee/playwright@3.16.0': dependencies: '@apify/datastructures': 2.0.3 - '@apify/log': 2.5.32 + '@apify/log': 2.5.33 '@apify/timeout': 0.3.2 '@crawlee/browser': 3.16.0 '@crawlee/browser-pool': 3.16.0 @@ -4987,13 +5167,13 @@ snapshots: '@crawlee/puppeteer@3.16.0': dependencies: '@apify/datastructures': 2.0.3 - '@apify/log': 2.5.32 + '@apify/log': 2.5.33 '@crawlee/browser': 3.16.0 '@crawlee/browser-pool': 3.16.0 '@crawlee/types': 3.16.0 '@crawlee/utils': 3.16.0 cheerio: 1.0.0-rc.12 - devtools-protocol: 0.0.1585077 + devtools-protocol: 0.0.1602427 jquery: 3.7.1 ow: 0.28.2 tslib: 2.8.1 @@ -5020,16 +5200,16 @@ snapshots: '@crawlee/utils@3.16.0': dependencies: - '@apify/log': 2.5.32 + '@apify/log': 2.5.33 '@apify/ps-tree': 1.2.0 '@crawlee/types': 3.16.0 '@types/sax': 1.2.7 cheerio: 1.0.0-rc.12 file-type: 20.5.0 - got-scraping: 4.1.3 + got-scraping: 4.2.1 ow: 0.28.2 robots-parser: 3.0.1 - sax: 1.4.4 + sax: 1.6.0 tslib: 2.8.1 whatwg-mimetype: 4.0.0 transitivePeerDependencies: @@ -5096,81 +5276,159 @@ snapshots: '@esbuild/aix-ppc64@0.27.3': optional: true + '@esbuild/aix-ppc64@0.27.4': + optional: true + '@esbuild/android-arm64@0.27.3': optional: true + '@esbuild/android-arm64@0.27.4': + optional: true + '@esbuild/android-arm@0.27.3': optional: true + '@esbuild/android-arm@0.27.4': + optional: true + '@esbuild/android-x64@0.27.3': optional: true + '@esbuild/android-x64@0.27.4': + optional: true + '@esbuild/darwin-arm64@0.27.3': optional: true + '@esbuild/darwin-arm64@0.27.4': + optional: true + '@esbuild/darwin-x64@0.27.3': optional: true + '@esbuild/darwin-x64@0.27.4': + optional: true + '@esbuild/freebsd-arm64@0.27.3': optional: true + '@esbuild/freebsd-arm64@0.27.4': + optional: true + '@esbuild/freebsd-x64@0.27.3': optional: true + '@esbuild/freebsd-x64@0.27.4': + optional: true + '@esbuild/linux-arm64@0.27.3': optional: true + '@esbuild/linux-arm64@0.27.4': + optional: true + '@esbuild/linux-arm@0.27.3': optional: true + '@esbuild/linux-arm@0.27.4': + optional: true + '@esbuild/linux-ia32@0.27.3': optional: true + '@esbuild/linux-ia32@0.27.4': + optional: true + '@esbuild/linux-loong64@0.27.3': optional: true + '@esbuild/linux-loong64@0.27.4': + optional: true + '@esbuild/linux-mips64el@0.27.3': optional: true + '@esbuild/linux-mips64el@0.27.4': + optional: true + '@esbuild/linux-ppc64@0.27.3': optional: true + '@esbuild/linux-ppc64@0.27.4': + optional: true + '@esbuild/linux-riscv64@0.27.3': optional: true + '@esbuild/linux-riscv64@0.27.4': + optional: true + '@esbuild/linux-s390x@0.27.3': optional: true + '@esbuild/linux-s390x@0.27.4': + optional: true + '@esbuild/linux-x64@0.27.3': optional: true + '@esbuild/linux-x64@0.27.4': + optional: true + '@esbuild/netbsd-arm64@0.27.3': optional: true + '@esbuild/netbsd-arm64@0.27.4': + optional: true + '@esbuild/netbsd-x64@0.27.3': optional: true + '@esbuild/netbsd-x64@0.27.4': + optional: true + '@esbuild/openbsd-arm64@0.27.3': optional: true + '@esbuild/openbsd-arm64@0.27.4': + optional: true + '@esbuild/openbsd-x64@0.27.3': optional: true + '@esbuild/openbsd-x64@0.27.4': + optional: true + '@esbuild/openharmony-arm64@0.27.3': optional: true + '@esbuild/openharmony-arm64@0.27.4': + optional: true + '@esbuild/sunos-x64@0.27.3': optional: true + '@esbuild/sunos-x64@0.27.4': + optional: true + '@esbuild/win32-arm64@0.27.3': optional: true + '@esbuild/win32-arm64@0.27.4': + optional: true + '@esbuild/win32-ia32@0.27.3': optional: true + '@esbuild/win32-ia32@0.27.4': + optional: true + '@esbuild/win32-x64@0.27.3': optional: true + '@esbuild/win32-x64@0.27.4': + optional: true + '@eslint-community/eslint-plugin-eslint-comments@4.7.1(eslint@9.39.2(jiti@2.6.1))': dependencies: escape-string-regexp: 4.0.0 @@ -5190,7 +5448,7 @@ snapshots: optionalDependencies: eslint: 9.39.2(jiti@2.6.1) - '@eslint/config-array@0.21.1': + '@eslint/config-array@0.21.2': dependencies: '@eslint/object-schema': 2.1.7 debug: 4.4.3 @@ -5214,7 +5472,7 @@ snapshots: dependencies: '@types/json-schema': 7.0.15 - '@eslint/eslintrc@3.3.4': + '@eslint/eslintrc@3.3.5': dependencies: ajv: 6.14.0 debug: 4.4.3 @@ -5977,7 +6235,7 @@ snapshots: minimatch: 10.2.4 semver: 7.7.4 tinyglobby: 0.2.15 - ts-api-utils: 2.4.0(typescript@6.0.0-beta) + ts-api-utils: 2.5.0(typescript@6.0.0-beta) typescript: 6.0.0-beta transitivePeerDependencies: - supports-color @@ -6022,7 +6280,7 @@ snapshots: '@typescript-eslint/visitor-keys@8.56.1': dependencies: '@typescript-eslint/types': 8.56.1 - eslint-visitor-keys: 5.0.0 + eslint-visitor-keys: 5.0.1 '@typescript-eslint/visitor-keys@8.57.0': dependencies: @@ -6117,7 +6375,7 @@ snapshots: '@vue/compiler-core@3.5.27': dependencies: - '@babel/parser': 7.29.0 + '@babel/parser': 7.29.2 '@vue/shared': 3.5.27 entities: 7.0.1 estree-walker: 2.0.2 @@ -6130,14 +6388,14 @@ snapshots: '@vue/compiler-sfc@3.5.27': dependencies: - '@babel/parser': 7.29.0 + '@babel/parser': 7.29.2 '@vue/compiler-core': 3.5.27 '@vue/compiler-dom': 3.5.27 '@vue/compiler-ssr': 3.5.27 '@vue/shared': 3.5.27 estree-walker: 2.0.2 magic-string: 0.30.21 - postcss: 8.5.6 + postcss: 8.5.8 source-map-js: 1.2.1 '@vue/compiler-ssr@3.5.27': @@ -6562,7 +6820,7 @@ snapshots: rrweb-cssom: 0.8.0 optional: true - csv-stringify@6.6.0: + csv-stringify@6.7.0: optional: true data-urls@5.0.0: @@ -6626,7 +6884,7 @@ snapshots: dependencies: dequal: 2.0.3 - devtools-protocol@0.0.1585077: + devtools-protocol@0.0.1602427: optional: true diff-sequences@29.6.3: {} @@ -6737,6 +6995,35 @@ snapshots: '@esbuild/win32-ia32': 0.27.3 '@esbuild/win32-x64': 0.27.3 + esbuild@0.27.4: + optionalDependencies: + '@esbuild/aix-ppc64': 0.27.4 + '@esbuild/android-arm': 0.27.4 + '@esbuild/android-arm64': 0.27.4 + '@esbuild/android-x64': 0.27.4 + '@esbuild/darwin-arm64': 0.27.4 + '@esbuild/darwin-x64': 0.27.4 + '@esbuild/freebsd-arm64': 0.27.4 + '@esbuild/freebsd-x64': 0.27.4 + '@esbuild/linux-arm': 0.27.4 + '@esbuild/linux-arm64': 0.27.4 + '@esbuild/linux-ia32': 0.27.4 + '@esbuild/linux-loong64': 0.27.4 + '@esbuild/linux-mips64el': 0.27.4 + '@esbuild/linux-ppc64': 0.27.4 + '@esbuild/linux-riscv64': 0.27.4 + '@esbuild/linux-s390x': 0.27.4 + '@esbuild/linux-x64': 0.27.4 + '@esbuild/netbsd-arm64': 0.27.4 + '@esbuild/netbsd-x64': 0.27.4 + '@esbuild/openbsd-arm64': 0.27.4 + '@esbuild/openbsd-x64': 0.27.4 + '@esbuild/openharmony-arm64': 0.27.4 + '@esbuild/sunos-x64': 0.27.4 + '@esbuild/win32-arm64': 0.27.4 + '@esbuild/win32-ia32': 0.27.4 + '@esbuild/win32-x64': 0.27.4 + escalade@3.2.0: {} escape-html@1.0.3: {} @@ -6973,14 +7260,16 @@ snapshots: eslint-visitor-keys@5.0.0: {} + eslint-visitor-keys@5.0.1: {} + eslint@9.39.2(jiti@2.6.1): dependencies: '@eslint-community/eslint-utils': 4.9.1(eslint@9.39.2(jiti@2.6.1)) '@eslint-community/regexpp': 4.12.2 - '@eslint/config-array': 0.21.1 + '@eslint/config-array': 0.21.2 '@eslint/config-helpers': 0.4.2 '@eslint/core': 0.17.0 - '@eslint/eslintrc': 3.3.4 + '@eslint/eslintrc': 3.3.5 '@eslint/js': 9.39.2 '@eslint/plugin-kit': 0.4.1 '@humanfs/node': 0.16.7 @@ -7141,7 +7430,7 @@ snapshots: fflate@0.8.2: optional: true - figlet@1.10.0: + figlet@1.11.0: dependencies: commander: 14.0.3 optional: true @@ -7193,27 +7482,27 @@ snapshots: locate-path: 6.0.0 path-exists: 4.0.0 - fingerprint-generator@2.1.80: + fingerprint-generator@2.1.81: dependencies: - generative-bayesian-network: 2.1.80 - header-generator: 2.1.80 + generative-bayesian-network: 2.1.81 + header-generator: 2.1.81 tslib: 2.8.1 optional: true - fingerprint-injector@2.1.80: + fingerprint-injector@2.1.81: dependencies: - fingerprint-generator: 2.1.80 + fingerprint-generator: 2.1.81 tslib: 2.8.1 optional: true flat-cache@4.0.1: dependencies: - flatted: 3.3.4 + flatted: 3.4.2 keyv: 4.5.4 flatbuffers@25.9.23: {} - flatted@3.3.4: {} + flatted@3.4.2: {} foreground-child@3.3.1: dependencies: @@ -7230,7 +7519,7 @@ snapshots: fs-constants@1.0.0: {} - fs-extra@11.3.3: + fs-extra@11.3.4: dependencies: graceful-fs: 4.2.11 jsonfile: 6.2.0 @@ -7240,7 +7529,7 @@ snapshots: fsevents@2.3.3: optional: true - generative-bayesian-network@2.1.80: + generative-bayesian-network@2.1.81: dependencies: adm-zip: 0.5.16 tslib: 2.8.1 @@ -7314,10 +7603,10 @@ snapshots: gopd@1.2.0: {} - got-scraping@4.1.3: + got-scraping@4.2.1: dependencies: got: 14.6.6 - header-generator: 2.1.80 + header-generator: 2.1.81 http2-wrapper: 2.2.1 mimic-response: 4.0.0 ow: 1.1.1 @@ -7356,10 +7645,10 @@ snapshots: dependencies: es-define-property: 1.0.1 - header-generator@2.1.80: + header-generator@2.1.81: dependencies: browserslist: 4.28.1 - generative-bayesian-network: 2.1.80 + generative-bayesian-network: 2.1.81 ow: 0.28.2 tslib: 2.8.1 optional: true @@ -8467,7 +8756,7 @@ snapshots: cssesc: 3.0.0 util-deprecate: 1.0.2 - postcss@8.5.6: + postcss@8.5.8: dependencies: nanoid: 3.3.11 picocolors: 1.1.1 @@ -8628,7 +8917,7 @@ snapshots: ret@0.5.0: {} - retriv@0.11.0(@huggingface/transformers@3.8.1)(ai@6.0.68(zod@4.3.6))(sqlite-vec@0.1.7)(typescript@6.0.0-beta): + retriv@0.12.0(@huggingface/transformers@3.8.1)(ai@6.0.68(zod@4.3.6))(sqlite-vec@0.1.7)(typescript@6.0.0-beta): optionalDependencies: '@huggingface/transformers': 3.8.1 ai: 6.0.68(zod@4.3.6) @@ -8753,7 +9042,7 @@ snapshots: safer-buffer@2.1.2: optional: true - sax@1.4.4: + sax@1.6.0: optional: true saxes@6.0.0: @@ -9073,25 +9362,25 @@ snapshots: tldts-core@6.1.86: optional: true - tldts-core@7.0.23: - optional: true - tldts-core@7.0.26: {} - tldts@6.1.86: - dependencies: - tldts-core: 6.1.86 + tldts-core@7.0.27: optional: true - tldts@7.0.23: + tldts@6.1.86: dependencies: - tldts-core: 7.0.23 + tldts-core: 6.1.86 optional: true tldts@7.0.26: dependencies: tldts-core: 7.0.26 + tldts@7.0.27: + dependencies: + tldts-core: 7.0.27 + optional: true + to-valid-identifier@1.0.0: dependencies: '@sindresorhus/base62': 1.0.0 @@ -9116,9 +9405,9 @@ snapshots: tldts: 6.1.86 optional: true - tough-cookie@6.0.0: + tough-cookie@6.0.1: dependencies: - tldts: 7.0.23 + tldts: 7.0.27 optional: true tr46@5.1.1: @@ -9130,6 +9419,10 @@ snapshots: dependencies: typescript: 6.0.0-beta + ts-api-utils@2.5.0(typescript@6.0.0-beta): + dependencies: + typescript: 6.0.0-beta + ts-declaration-location@1.0.7(typescript@6.0.0-beta): dependencies: picomatch: 4.0.3 @@ -9239,10 +9532,10 @@ snapshots: vite@7.3.1(@types/node@25.5.0)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2): dependencies: - esbuild: 0.27.3 + esbuild: 0.27.4 fdir: 6.5.0(picomatch@4.0.3) picomatch: 4.0.3 - postcss: 8.5.6 + postcss: 8.5.8 rollup: 4.59.0 tinyglobby: 0.2.15 optionalDependencies: @@ -9378,7 +9671,7 @@ snapshots: yargonaut@1.1.4: dependencies: chalk: 1.1.3 - figlet: 1.10.0 + figlet: 1.11.0 parent-require: 1.0.0 optional: true diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 3c15123e..ecd0c1ce 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -5,7 +5,6 @@ shellEmulator: true trustPolicy: no-downgrade packages: - playground - overrides: global-agent: ^4.1.3 catalog: @@ -16,11 +15,11 @@ catalog: mdream: ^1.0.0 ofetch: ^1.5.1 pathe: ^2.0.3 + retriv: ^0.12.0 semver: ^7.7.4 std-env: ^4.0.0 tsx: ^4.21.0 unagent: ^0.0.8 - catalogs: deps: '@clack/prompts': ^1.1.0 @@ -35,7 +34,6 @@ catalogs: mlly: ^1.8.1 oxc-parser: ^0.121.0 p-limit: ^7.3.0 - retriv: ^0.11.0 sqlite-vec: ^0.1.7 tinyglobby: ^0.2.15 typescript: 6.0.0-beta diff --git a/src/commands/sync-shared.ts b/src/commands/sync-shared.ts index 4a11a245..14a451f4 100644 --- a/src/commands/sync-shared.ts +++ b/src/commands/sync-shared.ts @@ -41,7 +41,7 @@ import { parsePackages, readLock, writeLock } from '../core/lockfile.ts' import { parseFrontmatter } from '../core/markdown.ts' import { sanitizeMarkdown } from '../core/sanitize.ts' import { getSharedSkillsDir } from '../core/shared.ts' -import { createIndex, SearchDepsUnavailableError } from '../retriv/index.ts' +import { createIndex, listIndexIds, SearchDepsUnavailableError } from '../retriv/index.ts' import { downloadLlmsDocs, fetchBlogReleases, @@ -730,7 +730,32 @@ export async function fetchAndCacheResources(opts: { } } -/** Index all resources into the search database (single batch) */ +/** + * Extract the parent document ID from a chunk ID. + * Chunk IDs have the form "docId#chunk-N"; non-chunk IDs return as-is. + */ +function parentDocId(id: string): string { + const idx = id.indexOf('#chunk-') + return idx === -1 ? id : id.slice(0, idx) +} + +/** Cap and sort docs by type priority, mutates and truncates allDocs in place */ +function capDocs(allDocs: IndexDoc[], max: number, onProgress: (msg: string) => void): void { + if (allDocs.length <= max) + return + const TYPE_PRIORITY: Record = { doc: 0, issue: 1, discussion: 2, release: 3, source: 4, types: 5 } + allDocs.sort((a, b) => { + const ta = TYPE_PRIORITY[a.metadata?.type || 'doc'] ?? 3 + const tb = TYPE_PRIORITY[b.metadata?.type || 'doc'] ?? 3 + if (ta !== tb) + return ta - tb + return a.id.localeCompare(b.id) + }) + onProgress(`Indexing capped at ${max}/${allDocs.length} docs (prioritized by type)`) + allDocs.length = max +} + +/** Index all resources into the search database, with incremental support */ export async function indexResources(opts: { packageName: string version: string @@ -746,9 +771,7 @@ export async function indexResources(opts: { return const dbPath = getPackageDbPath(packageName, version) - - if (existsSync(dbPath)) - return + const dbExists = existsSync(dbPath) const allDocs = [...opts.docsToIndex] @@ -769,27 +792,77 @@ export async function indexResources(opts: { if (allDocs.length === 0) return - // Cap docs to prevent oversized indexes - if (allDocs.length > MAX_INDEX_DOCS) { - const TYPE_PRIORITY: Record = { doc: 0, issue: 1, discussion: 2, release: 3, source: 4, types: 5 } - allDocs.sort((a, b) => { - const ta = TYPE_PRIORITY[a.metadata?.type || 'doc'] ?? 3 - const tb = TYPE_PRIORITY[b.metadata?.type || 'doc'] ?? 3 - if (ta !== tb) - return ta - tb - return a.id.localeCompare(b.id) - }) - onProgress(`Indexing capped at ${MAX_INDEX_DOCS}/${allDocs.length} docs (prioritized by type)`) - allDocs.length = MAX_INDEX_DOCS + capDocs(allDocs, MAX_INDEX_DOCS, onProgress) + + // Full build when no existing DB + if (!dbExists) { + onProgress(`Building search index (${allDocs.length} docs)`) + try { + await createIndex(allDocs, { + dbPath, + onProgress: ({ phase, current, total }) => { + if (phase === 'storing') { + const d = allDocs[current - 1] + const type = d?.metadata?.type === 'source' || d?.metadata?.type === 'types' ? 'code' : (d?.metadata?.type || 'doc') + onProgress(`Storing ${type} (${current}/${total})`) + } + else if (phase === 'embedding') { + onProgress(`Creating embeddings (${current}/${total})`) + } + }, + }) + } + catch (err) { + if (err instanceof SearchDepsUnavailableError) + onProgress('Search indexing skipped (native deps unavailable)') + else + throw err + } + return + } + + // Incremental update: diff incoming docs against existing index + let existingIds: string[] + try { + existingIds = await listIndexIds({ dbPath }) } + catch (err) { + if (err instanceof SearchDepsUnavailableError) { + onProgress('Search indexing skipped (native deps unavailable)') + return + } + throw err + } + + // Group existing chunk IDs by parent doc ID + const existingParentIds = new Set(existingIds.map(parentDocId)) + const incomingIds = new Set(allDocs.map(d => d.id)) + + // Docs to add: in incoming but not in existing index + const newDocs = allDocs.filter(d => !existingParentIds.has(d.id)) + + // Chunk IDs to remove: their parent doc is no longer in incoming set + const removeIds = existingIds.filter(id => !incomingIds.has(parentDocId(id))) + + if (newDocs.length === 0 && removeIds.length === 0) { + onProgress('Search index up to date') + return + } + + const parts: string[] = [] + if (newDocs.length > 0) + parts.push(`+${newDocs.length} new`) + if (removeIds.length > 0) + parts.push(`-${removeIds.length} stale`) + onProgress(`Updating search index (${parts.join(', ')})`) - onProgress(`Building search index (${allDocs.length} docs)`) try { - await createIndex(allDocs, { + await createIndex(newDocs, { dbPath, + removeIds, onProgress: ({ phase, current, total }) => { if (phase === 'storing') { - const d = allDocs[current - 1] + const d = newDocs[current - 1] const type = d?.metadata?.type === 'source' || d?.metadata?.type === 'types' ? 'code' : (d?.metadata?.type || 'doc') onProgress(`Storing ${type} (${current}/${total})`) } diff --git a/src/retriv/index.ts b/src/retriv/index.ts index c2555dbc..dade6bf7 100644 --- a/src/retriv/index.ts +++ b/src/retriv/index.ts @@ -55,9 +55,11 @@ export async function getDb(config: Pick) { */ export async function createIndexDirect( documents: Document[], - config: IndexConfig, + config: IndexConfig & { removeIds?: string[] }, ): Promise { const db = await getDb(config) + if (config.removeIds?.length) + await db.remove?.(config.removeIds) await db.index(documents, { onProgress: config.onProgress }) await db.close?.() } @@ -68,13 +70,49 @@ export async function createIndexDirect( */ export async function createIndex( documents: Document[], - config: IndexConfig, + config: IndexConfig & { removeIds?: string[] }, ): Promise { // Dynamic import justified: search/searchSnippets shouldn't pull in worker_threads const { createIndexInWorker } = await import('./pool.ts') return createIndexInWorker(documents, config) } +/** + * List all raw document IDs in an existing index. + * Returns chunk IDs (e.g. "doc-id#chunk-0") for chunked docs. + * Queries sqlite directly to bypass createRetriv's parent-ID deduplication, + * so callers can use these IDs for exact removal and parent-ID grouping. + */ +export async function listIndexIds( + config: Pick, +): Promise { + const nodeSqlite = globalThis.process?.getBuiltinModule?.('node:sqlite') as typeof import('node:sqlite') | undefined + if (!nodeSqlite) + return [] + const db = new nodeSqlite.DatabaseSync(config.dbPath, { open: true, readOnly: true }) + try { + const rows = db.prepare('SELECT id FROM documents_meta').all() as Array<{ id: string }> + return rows.map(r => r.id) + } + finally { + db.close() + } +} + +/** + * Remove documents by ID from an existing index. + */ +export async function removeFromIndex( + ids: string[], + config: Pick, +): Promise { + if (ids.length === 0) + return + const db = await getDb(config) + await db.remove?.(ids) + await db.close?.() +} + export async function search( query: string, config: IndexConfig, diff --git a/src/retriv/pool.ts b/src/retriv/pool.ts index a4e611ea..539798d2 100644 --- a/src/retriv/pool.ts +++ b/src/retriv/pool.ts @@ -88,7 +88,7 @@ function drainQueue() { export async function createIndexInWorker( documents: RetrivDocument[], - config: IndexConfig, + config: IndexConfig & { removeIds?: string[] }, ): Promise { return new Promise((resolve, reject) => { const run = () => { @@ -126,6 +126,7 @@ export async function createIndexInWorker( id, documents, dbPath: config.dbPath, + removeIds: config.removeIds, } w.postMessage(msg) diff --git a/src/retriv/worker.ts b/src/retriv/worker.ts index 46a24c41..eaa1fb9d 100644 --- a/src/retriv/worker.ts +++ b/src/retriv/worker.ts @@ -6,6 +6,8 @@ export interface WorkerIndexMessage { id: number documents: RetrivDocument[] dbPath: string + /** Exact IDs (including chunk IDs) to remove before indexing */ + removeIds?: string[] } export interface WorkerShutdownMessage { @@ -54,6 +56,8 @@ if (parentPort) { const { getDb } = await import('./index.ts') const db = await getDb(config) + if (msg.removeIds?.length) + await db.remove?.(msg.removeIds) await db.index(documents, { onProgress: config.onProgress }) await db.close?.() diff --git a/test/unit/sync-shared.test.ts b/test/unit/sync-shared.test.ts index 9e796732..ac1e6e7e 100644 --- a/test/unit/sync-shared.test.ts +++ b/test/unit/sync-shared.test.ts @@ -90,7 +90,7 @@ vi.mock('../../src/core/lockfile', () => ({ vi.mock('../../src/retriv', async (importOriginal) => { const orig = await importOriginal() - return { ...orig, createIndex: vi.fn() } + return { ...orig, createIndex: vi.fn(), listIndexIds: vi.fn().mockResolvedValue([]) } }) vi.mock('../../src/agent', () => ({ @@ -104,7 +104,7 @@ const { getCacheDir, getPackageDbPath, readCachedDocs, writeToCache, writeToRepo const { fetchCrawledDocs, fetchGitDocs, fetchGitHubIssues, fetchGitHubDiscussions, fetchGitHubRaw, fetchLlmsTxt, fetchReadmeContent, fetchReleaseNotes, downloadLlmsDocs, isGhAvailable, isShallowGitDocs, resolveEntryFiles, resolveLocalPackageDocs } = await import('../../src/sources') const { registerProject } = await import('../../src/core/config') const { writeLock } = await import('../../src/core/lockfile') -const { createIndex } = await import('../../src/retriv') +const { createIndex, listIndexIds } = await import('../../src/retriv') const { getShippedSkills, linkShippedSkill, resolvePkgDir } = await import('../../src/cache') const { @@ -693,11 +693,49 @@ describe('sync-shared', () => { onProgress: vi.fn(), } - // 6a: db already exists → skips - it('skips when db exists', async () => { + // 6a: db already exists, no changes → reports up to date + it('reports up to date when db exists and no changes', async () => { vi.mocked(existsSync).mockReturnValue(true) - await indexResources({ ...baseOpts, docsToIndex: [{ id: 'a.md', content: 'x', metadata: {} }] }) + vi.mocked(listIndexIds).mockResolvedValue(['a.md']) + vi.mocked(resolvePkgDir).mockReturnValue(null) + const onProgress = vi.fn() + await indexResources({ ...baseOpts, docsToIndex: [{ id: 'a.md', content: 'x', metadata: {} }], onProgress }) expect(createIndex).not.toHaveBeenCalled() + expect(onProgress).toHaveBeenCalledWith('Search index up to date') + }) + + // 6a2: db exists with new docs → incremental index + it('incrementally indexes new docs when db exists', async () => { + vi.mocked(existsSync).mockReturnValue(true) + vi.mocked(listIndexIds).mockResolvedValue(['a.md']) + vi.mocked(resolvePkgDir).mockReturnValue(null) + const docs = [ + { id: 'a.md', content: 'existing', metadata: { type: 'doc' } }, + { id: 'b.md', content: 'new', metadata: { type: 'doc' } }, + ] + await indexResources({ ...baseOpts, docsToIndex: docs }) + expect(createIndex).toHaveBeenCalled() + const call = vi.mocked(createIndex).mock.calls[0] + // Only the new doc should be indexed + expect(call[0]).toHaveLength(1) + expect(call[0][0].id).toBe('b.md') + // No removals + expect(call[1].removeIds).toEqual([]) + }) + + // 6a3: db exists with stale docs → removes them + it('removes stale docs from existing index', async () => { + vi.mocked(existsSync).mockReturnValue(true) + vi.mocked(listIndexIds).mockResolvedValue(['a.md', 'old.md', 'old.md#chunk-0', 'old.md#chunk-1']) + vi.mocked(resolvePkgDir).mockReturnValue(null) + const docs = [{ id: 'a.md', content: 'content', metadata: { type: 'doc' } }] + await indexResources({ ...baseOpts, docsToIndex: docs }) + expect(createIndex).toHaveBeenCalled() + const call = vi.mocked(createIndex).mock.calls[0] + // No new docs (a.md already exists) + expect(call[0]).toHaveLength(0) + // Stale IDs removed (old.md and its chunks) + expect(call[1].removeIds).toEqual(['old.md', 'old.md#chunk-0', 'old.md#chunk-1']) }) // 6b: empty docs + no entry files → skips