diff --git a/.gitignore b/.gitignore index 9f567cb5..753a64b2 100644 --- a/.gitignore +++ b/.gitignore @@ -28,11 +28,16 @@ Thumbs.db # OpenCode .opencode/ -# Tests (local development only) -tests/ +# Generated prompt files (from scripts/generate-prompts.ts) +lib/prompts/**/*.generated.ts + +# Tests +tests/results/ notes/ test-update.ts # Documentation (local development only) docs/ SCHEMA_NOTES.md + +repomix-output.xml \ No newline at end of file diff --git a/.repomixignore b/.repomixignore new file mode 100644 index 00000000..6bc6e2ee --- /dev/null +++ b/.repomixignore @@ -0,0 +1,9 @@ +.github/ +.logs/ +.opencode/ +dist/ +.repomixignore +repomix-output.xml +bun.lock +package-lock.jsonc +LICENCE diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..d4e30979 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,26 @@ +# Contributing to DCP + +Thank you for your interest in contributing to Dynamic Context Pruning (DCP)! + +## License and Contributions + +This project uses the **GNU Affero General Public License v3.0 (AGPL-3.0)**. + +### Contribution Agreement + +By submitting a Pull Request to this project, you agree that: + +1. Your contributions are licensed under the **AGPL-3.0**. +2. You grant the project maintainer(s) a non-exclusive, perpetual, irrevocable, worldwide, royalty-free, transferable license to use, modify, and re-license your contributions under any terms they choose, including commercial or proprietary licenses. + +This arrangement ensures the project remains Open Source while providing a path for commercial sustainability. + +## Getting Started + +1. Fork the repository. +2. Create a feature branch. +3. Implement your changes and add tests if applicable. +4. Ensure all tests pass and the code is formatted. +5. Submit a Pull Request. + +We look forward to your contributions! diff --git a/LICENSE b/LICENSE index 46ae85da..ca9b0551 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,619 @@ -MIT License - -Copyright (c) 2025 tarquinen - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS diff --git a/README.md b/README.md index 4d39d2cd..eef2f56f 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # Dynamic Context Pruning Plugin +[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/dansmolsky) [![npm version](https://img.shields.io/npm/v/@tarquinen/opencode-dcp.svg)](https://www.npmjs.com/package/@tarquinen/opencode-dcp) -Automatically reduces token usage in OpenCode by removing obsolete tools from conversation history. +Automatically reduces token usage in OpenCode by removing obsolete content from conversation history. -![DCP in action](dcp-demo5.png) +![DCP in action](assets/images/dcp-demo5.png) ## Installation @@ -27,15 +28,17 @@ DCP uses multiple tools and strategies to reduce context size: ### Tools -**Discard** — Exposes a `discard` tool that the AI can call to remove completed or noisy tool content from context. +**Distill** — Exposes a `distill` tool that the AI can call to distill valuable context into concise summaries before removing the tool content. -**Extract** — Exposes an `extract` tool that the AI can call to distill valuable context into concise summaries before removing the tool content. +**Compress** — Exposes a `compress` tool that the AI can call to collapse a large section of conversation (messages and tools) into a single summary. + +**Prune** — Exposes a `prune` tool that the AI can call to remove completed or noisy tool content from context. ### Strategies **Deduplication** — Identifies repeated tool calls (e.g., reading the same file multiple times) and keeps only the most recent output. Runs automatically on every request with zero LLM cost. -**Supersede Writes** — Prunes write tool inputs for files that have subsequently been read. When a file is written and later read, the original write content becomes redundant since the current file state is captured in the read result. Runs automatically on every request with zero LLM cost. +**Supersede Writes** — Removes write tool calls for files that have subsequently been read. When a file is written and later read, the original write content becomes redundant since the current file state is captured in the read result. Runs automatically on every request with zero LLM cost. **Purge Errors** — Prunes tool inputs for tools that returned errors after a configurable number of turns (default: 4). Error messages are preserved for context, but the potentially large input content is removed. Runs automatically on every request with zero LLM cost. @@ -47,9 +50,16 @@ LLM providers like Anthropic and OpenAI cache prompts based on exact prefix matc **Trade-off:** You lose some cache read benefits but gain larger token savings from reduced context size and performance improvements through reduced context poisoning. In most cases, the token savings outweigh the cache miss cost—especially in long sessions where context bloat becomes significant. -> **Note:** In testing, cache hit rates were approximately 65% with DCP enabled vs 85% without. +> **Note:** In testing, cache hit rates were approximately 80% with DCP enabled vs 85% without for most providers. + +**Best use case:** Providers that count usage in requests, such as Github Copilot and Google Antigravity, have no negative price impact. + +**Best use cases:** + +- **Request-based billing** — Providers that count usage in requests, such as Github Copilot and Google Antigravity, have no negative price impact. +- **Uniform token pricing** — Providers that bill cached tokens at the same rate as regular input tokens, such as Cerebras, see pure savings with no cache-miss penalty. -**Best use case:** Providers that count usage in requests, such as Github Copilot and Google Antigravity have no negative price impact. +**Claude Subscriptions:** Anthropic subscription users (who receive "free" caching) may experience faster limit depletion than hit-rate ratios suggest due to the higher relative cost of cache misses. See [Claude Cache Limits](https://she-llac.com/claude-limits) for details. ## Configuration @@ -57,80 +67,96 @@ DCP uses its own config file: - Global: `~/.config/opencode/dcp.jsonc` (or `dcp.json`), created automatically on first run - Custom config directory: `$OPENCODE_CONFIG_DIR/dcp.jsonc` (or `dcp.json`), if `OPENCODE_CONFIG_DIR` is set -- Project: `.opencode/dcp.jsonc` (or `dcp.json`) in your project’s `.opencode` directory - -
-Default Configuration (click to expand) - -```jsonc -{ - "$schema": "https://raw.githubusercontent.com/Opencode-DCP/opencode-dynamic-context-pruning/master/dcp.schema.json", - // Enable or disable the plugin - "enabled": true, - // Enable debug logging to ~/.config/opencode/logs/dcp/ - "debug": false, - // Notification display: "off", "minimal", or "detailed" - "pruneNotification": "detailed", - // Slash commands configuration - "commands": { - "enabled": true, - // Additional tools to protect from pruning via commands (e.g., /dcp sweep) - "protectedTools": [], - }, - // Protect from pruning for message turns - "turnProtection": { - "enabled": false, - "turns": 4, - }, - // Protect file operations from pruning via glob patterns - // Patterns match tool parameters.filePath (e.g. read/write/edit) - "protectedFilePatterns": [], - // LLM-driven context pruning tools - "tools": { - // Shared settings for all prune tools - "settings": { - // Nudge the LLM to use prune tools (every tool results) - "nudgeEnabled": true, - "nudgeFrequency": 10, - // Additional tools to protect from pruning - "protectedTools": [], - }, - // Removes tool content from context without preservation (for completed tasks or noise) - "discard": { - "enabled": true, - }, - // Distills key findings into preserved knowledge before removing raw content - "extract": { - "enabled": true, - // Show distillation content as an ignored message notification - "showDistillation": false, - }, - }, - // Automatic pruning strategies - "strategies": { - // Remove duplicate tool calls (same tool with same arguments) - "deduplication": { - "enabled": true, - // Additional tools to protect from pruning - "protectedTools": [], - }, - // Prune write tool inputs when the file has been subsequently read - "supersedeWrites": { - "enabled": false, - }, - // Prune tool inputs for errored tools after X turns - "purgeErrors": { - "enabled": true, - // Number of turns before errored tool inputs are pruned - "turns": 4, - // Additional tools to protect from pruning - "protectedTools": [], - }, - }, -} -``` - -
+- Project: `.opencode/dcp.jsonc` (or `dcp.json`) in your project's `.opencode` directory + +>
+> Default Configuration (click to expand) +> +> ```jsonc +> { +> "$schema": "https://raw.githubusercontent.com/Opencode-DCP/opencode-dynamic-context-pruning/master/dcp.schema.json", +> // Enable or disable the plugin +> "enabled": true, +> // Enable debug logging to ~/.config/opencode/logs/dcp/ +> "debug": false, +> // Notification display: "off", "minimal", or "detailed" +> "pruneNotification": "detailed", +> // Notification type: "chat" (in-conversation) or "toast" (system toast) +> "pruneNotificationType": "chat", +> // Slash commands configuration +> "commands": { +> "enabled": true, +> // Additional tools to protect from pruning via commands (e.g., /dcp sweep) +> "protectedTools": [], +> }, +> // Protect from pruning for message turns past tool invocation +> "turnProtection": { +> "enabled": false, +> "turns": 4, +> }, +> // Protect file operations from pruning via glob patterns +> // Patterns match tool parameters.filePath (e.g. read/write/edit) +> "protectedFilePatterns": [], +> // LLM-driven context pruning tools +> "tools": { +> // Shared settings for all prune tools +> "settings": { +> // Nudge the LLM to use prune tools (every tool results) +> "nudgeEnabled": true, +> "nudgeFrequency": 10, +> // Token limit at which the model begins actively +> // compressing session context. Best kept around 40% of +> // the model's context window to stay in the "smart zone". +> // Set to "model" to use the model's full context window. +> "contextLimit": 100000, +> // Additional tools to protect from pruning +> "protectedTools": [], +> }, +> // Distills key findings into preserved knowledge before removing raw content +> "distill": { +> // Permission mode: "allow" (no prompt), "ask" (prompt), "deny" (tool not registered) +> "permission": "allow", +> // Show distillation content as an ignored message notification +> "showDistillation": false, +> }, +> // Collapses a range of conversation content into a single summary +> "compress": { +> // Permission mode: "ask" (prompt), "allow" (no prompt), "deny" (tool not registered) +> "permission": "ask", +> // Show summary content as an ignored message notification +> "showCompression": false, +> }, +> // Removes tool content from context without preservation (for completed tasks or noise) +> "prune": { +> // Permission mode: "allow" (no prompt), "ask" (prompt), "deny" (tool not registered) +> "permission": "allow", +> }, +> }, +> // Automatic pruning strategies +> "strategies": { +> // Remove duplicate tool calls (same tool with same arguments) +> "deduplication": { +> "enabled": true, +> // Additional tools to protect from pruning +> "protectedTools": [], +> }, +> // Prune write tool inputs when the file has been subsequently read +> "supersedeWrites": { +> "enabled": true, +> }, +> // Prune tool inputs for errored tools after X turns +> "purgeErrors": { +> "enabled": true, +> // Number of turns before errored tool inputs are pruned +> "turns": 4, +> // Additional tools to protect from pruning +> "protectedTools": [], +> }, +> }, +> } +> ``` +> +>
### Commands @@ -141,14 +167,10 @@ DCP provides a `/dcp` slash command: - `/dcp stats` — Shows cumulative pruning statistics across all sessions. - `/dcp sweep` — Prunes all tools since the last user message. Accepts an optional count: `/dcp sweep 10` prunes the last 10 tools. Respects `commands.protectedTools`. -### Turn Protection - -When enabled, turn protection prevents tool outputs from being pruned for a configurable number of message turns. This gives the AI time to reference recent tool outputs before they become prunable. Applies to both `discard` and `extract` tools, as well as automatic strategies. - ### Protected Tools -By default, these tools are always protected from pruning across all strategies: -`task`, `todowrite`, `todoread`, `discard`, `extract`, `batch`, `write`, `edit` +By default, these tools are always protected from pruning: +`task`, `todowrite`, `todoread`, `distill`, `compress`, `prune`, `batch`, `plan_enter`, `plan_exit` The `protectedTools` arrays in each section add to this default list. @@ -166,4 +188,4 @@ Restart OpenCode after making config changes. ## License -MIT +AGPL-3.0-or-later diff --git a/dcp-demo.png b/assets/images/dcp-demo.png similarity index 100% rename from dcp-demo.png rename to assets/images/dcp-demo.png diff --git a/dcp-demo2.png b/assets/images/dcp-demo2.png similarity index 100% rename from dcp-demo2.png rename to assets/images/dcp-demo2.png diff --git a/dcp-demo3.png b/assets/images/dcp-demo3.png similarity index 100% rename from dcp-demo3.png rename to assets/images/dcp-demo3.png diff --git a/dcp-demo4.png b/assets/images/dcp-demo4.png similarity index 100% rename from dcp-demo4.png rename to assets/images/dcp-demo4.png diff --git a/dcp-demo5.png b/assets/images/dcp-demo5.png similarity index 100% rename from dcp-demo5.png rename to assets/images/dcp-demo5.png diff --git a/dcp.schema.json b/dcp.schema.json index 91db1b3c..28019dd0 100644 --- a/dcp.schema.json +++ b/dcp.schema.json @@ -26,6 +26,12 @@ "default": "detailed", "description": "Level of notification shown when pruning occurs" }, + "pruneNotificationType": { + "type": "string", + "enum": ["chat", "toast"], + "default": "chat", + "description": "Where to display prune notifications (chat message or toast notification)" + }, "commands": { "type": "object", "description": "Configuration for DCP slash commands (/dcp)", @@ -100,46 +106,70 @@ "items": { "type": "string" }, - "default": [ - "task", - "todowrite", - "todoread", - "discard", - "extract", - "batch", - "write", - "edit" - ], + "default": [], "description": "Tool names that should be protected from automatic pruning" + }, + "contextLimit": { + "description": "When session tokens exceed this limit, a compress nudge is injected (\"model\" uses the active model's context limit)", + "default": 100000, + "oneOf": [ + { + "type": "number" + }, + { + "type": "string", + "enum": ["model"] + } + ] } } }, - "discard": { + "distill": { "type": "object", - "description": "Configuration for the discard tool", + "description": "Configuration for the distill tool", "additionalProperties": false, "properties": { - "enabled": { + "permission": { + "type": "string", + "enum": ["ask", "allow", "deny"], + "default": "allow", + "description": "Permission mode (deny disables the tool)" + }, + "showDistillation": { "type": "boolean", - "default": true, - "description": "Enable the discard tool" + "default": false, + "description": "Show distillation output in the UI" } } }, - "extract": { + "compress": { "type": "object", - "description": "Configuration for the extract tool", + "description": "Configuration for the compress tool", "additionalProperties": false, "properties": { - "enabled": { - "type": "boolean", - "default": true, - "description": "Enable the extract tool" + "permission": { + "type": "string", + "enum": ["ask", "allow", "deny"], + "default": "ask", + "description": "Permission mode (deny disables the tool)" }, - "showDistillation": { + "showCompression": { "type": "boolean", "default": false, - "description": "Show distillation output in the UI" + "description": "Show summary output in the UI" + } + } + }, + "prune": { + "type": "object", + "description": "Configuration for the prune tool", + "additionalProperties": false, + "properties": { + "permission": { + "type": "string", + "enum": ["ask", "allow", "deny"], + "default": "allow", + "description": "Permission mode (deny disables the tool)" } } } @@ -165,16 +195,7 @@ "items": { "type": "string" }, - "default": [ - "task", - "todowrite", - "todoread", - "discard", - "extract", - "batch", - "write", - "edit" - ], + "default": [], "description": "Tool names excluded from deduplication" } } @@ -186,7 +207,7 @@ "properties": { "enabled": { "type": "boolean", - "default": false, + "default": true, "description": "Enable supersede writes strategy" } } @@ -211,16 +232,7 @@ "items": { "type": "string" }, - "default": [ - "task", - "todowrite", - "todoread", - "discard", - "extract", - "batch", - "write", - "edit" - ], + "default": [], "description": "Tool names excluded from error purging" } } diff --git a/index.ts b/index.ts index 0c7ae2a7..60c1b3ec 100644 --- a/index.ts +++ b/index.ts @@ -2,12 +2,13 @@ import type { Plugin } from "@opencode-ai/plugin" import { getConfig } from "./lib/config" import { Logger } from "./lib/logger" import { createSessionState } from "./lib/state" -import { createDiscardTool, createExtractTool } from "./lib/strategies" +import { createPruneTool, createDistillTool, createCompressTool } from "./lib/strategies" import { createChatMessageTransformHandler, createCommandExecuteHandler, createSystemPromptHandler, } from "./lib/hooks" +import { configureClientAuth, isSecureMode } from "./lib/auth" const plugin: Plugin = (async (ctx) => { const config = getConfig(ctx) @@ -19,6 +20,11 @@ const plugin: Plugin = (async (ctx) => { const logger = new Logger(config.debug) const state = createSessionState() + if (isSecureMode()) { + configureClientAuth(ctx.client) + // logger.info("Secure mode detected, configured client authentication") + } + logger.info("DCP initialized", { strategies: config.strategies, }) @@ -31,7 +37,7 @@ const plugin: Plugin = (async (ctx) => { state, logger, config, - ), + ) as any, "chat.message": async ( input: { sessionID: string @@ -55,8 +61,8 @@ const plugin: Plugin = (async (ctx) => { ctx.directory, ), tool: { - ...(config.tools.discard.enabled && { - discard: createDiscardTool({ + ...(config.tools.distill.permission !== "deny" && { + distill: createDistillTool({ client: ctx.client, state, logger, @@ -64,8 +70,17 @@ const plugin: Plugin = (async (ctx) => { workingDirectory: ctx.directory, }), }), - ...(config.tools.extract.enabled && { - extract: createExtractTool({ + ...(config.tools.compress.permission !== "deny" && { + compress: createCompressTool({ + client: ctx.client, + state, + logger, + config, + workingDirectory: ctx.directory, + }), + }), + ...(config.tools.prune.permission !== "deny" && { + prune: createPruneTool({ client: ctx.client, state, logger, @@ -84,8 +99,9 @@ const plugin: Plugin = (async (ctx) => { } const toolsToAdd: string[] = [] - if (config.tools.discard.enabled) toolsToAdd.push("discard") - if (config.tools.extract.enabled) toolsToAdd.push("extract") + if (config.tools.distill.permission !== "deny") toolsToAdd.push("distill") + if (config.tools.compress.permission !== "deny") toolsToAdd.push("compress") + if (config.tools.prune.permission !== "deny") toolsToAdd.push("prune") if (toolsToAdd.length > 0) { const existingPrimaryTools = opencodeConfig.experimental?.primary_tools ?? [] @@ -97,6 +113,15 @@ const plugin: Plugin = (async (ctx) => { `Added ${toolsToAdd.map((t) => `'${t}'`).join(" and ")} to experimental.primary_tools via config mutation`, ) } + + // Set tool permissions from DCP config + const permission = opencodeConfig.permission ?? {} + opencodeConfig.permission = { + ...permission, + distill: config.tools.distill.permission, + compress: config.tools.compress.permission, + prune: config.tools.prune.permission, + } as typeof permission }, } }) satisfies Plugin diff --git a/lib/auth.ts b/lib/auth.ts new file mode 100644 index 00000000..8b7aa418 --- /dev/null +++ b/lib/auth.ts @@ -0,0 +1,37 @@ +export function isSecureMode(): boolean { + return !!process.env.OPENCODE_SERVER_PASSWORD +} + +export function getAuthorizationHeader(): string | undefined { + const password = process.env.OPENCODE_SERVER_PASSWORD + if (!password) return undefined + + const username = process.env.OPENCODE_SERVER_USERNAME ?? "opencode" + // Use Buffer for Node.js base64 encoding (btoa may not be available in all Node versions) + const credentials = Buffer.from(`${username}:${password}`).toString("base64") + return `Basic ${credentials}` +} + +export function configureClientAuth(client: any): any { + const authHeader = getAuthorizationHeader() + + if (!authHeader) { + return client + } + + // The SDK client has an internal client with request interceptors + // Access the underlying client to add the interceptor + const innerClient = client._client || client.client + + if (innerClient?.interceptors?.request) { + innerClient.interceptors.request.use((request: Request) => { + // Only add auth header if not already present + if (!request.headers.has("Authorization")) { + request.headers.set("Authorization", authHeader) + } + return request + }) + } + + return client +} diff --git a/lib/commands/context.ts b/lib/commands/context.ts index bd2e8661..15328692 100644 --- a/lib/commands/context.ts +++ b/lib/commands/context.ts @@ -61,7 +61,8 @@ interface TokenBreakdown { tools: number toolCount: number prunedTokens: number - prunedCount: number + prunedToolCount: number + prunedMessageCount: number total: number } @@ -73,7 +74,8 @@ function analyzeTokens(state: SessionState, messages: WithParts[]): TokenBreakdo tools: 0, toolCount: 0, prunedTokens: state.stats.totalPruneTokens, - prunedCount: state.prune.toolIds.length, + prunedToolCount: state.prune.toolIds.size, + prunedMessageCount: state.prune.messageIds.size, total: 0, } @@ -112,43 +114,55 @@ function analyzeTokens(state: SessionState, messages: WithParts[]): TokenBreakdo const toolOutputParts: string[] = [] let firstUserText = "" let foundFirstUser = false + const foundToolIds = new Set() for (const msg of messages) { - if (isMessageCompacted(state, msg)) continue - if (msg.info.role === "user" && isIgnoredUserMessage(msg)) continue - const parts = Array.isArray(msg.parts) ? msg.parts : [] + const isCompacted = isMessageCompacted(state, msg) + const isIgnoredUser = msg.info.role === "user" && isIgnoredUserMessage(msg) + for (const part of parts) { - if (part.type === "text" && msg.info.role === "user") { + if (part.type === "tool") { + const toolPart = part as ToolPart + if (toolPart.callID && !foundToolIds.has(toolPart.callID)) { + breakdown.toolCount++ + foundToolIds.add(toolPart.callID) + } + + const isPruned = toolPart.callID && state.prune.toolIds.has(toolPart.callID) + if (!isCompacted && !isPruned) { + if (toolPart.state?.input) { + const inputStr = + typeof toolPart.state.input === "string" + ? toolPart.state.input + : JSON.stringify(toolPart.state.input) + toolInputParts.push(inputStr) + } + + if (toolPart.state?.status === "completed" && toolPart.state?.output) { + const outputStr = + typeof toolPart.state.output === "string" + ? toolPart.state.output + : JSON.stringify(toolPart.state.output) + toolOutputParts.push(outputStr) + } + } + } else if ( + part.type === "text" && + msg.info.role === "user" && + !isCompacted && + !isIgnoredUser + ) { const textPart = part as TextPart const text = textPart.text || "" userTextParts.push(text) if (!foundFirstUser) { firstUserText += text } - } else if (part.type === "tool") { - const toolPart = part as ToolPart - breakdown.toolCount++ - - if (toolPart.state?.input) { - const inputStr = - typeof toolPart.state.input === "string" - ? toolPart.state.input - : JSON.stringify(toolPart.state.input) - toolInputParts.push(inputStr) - } - - if (toolPart.state?.status === "completed" && toolPart.state?.output) { - const outputStr = - typeof toolPart.state.output === "string" - ? toolPart.state.output - : JSON.stringify(toolPart.state.output) - toolOutputParts.push(outputStr) - } } } - if (msg.info.role === "user" && !isIgnoredUserMessage(msg) && !foundFirstUser) { + if (msg.info.role === "user" && !isIgnoredUser && !foundFirstUser) { foundFirstUser = true } } @@ -164,7 +178,7 @@ function analyzeTokens(state: SessionState, messages: WithParts[]): TokenBreakdo breakdown.system = Math.max(0, firstInput - firstUserTokens) } - breakdown.tools = Math.max(0, toolInputTokens + toolOutputTokens - breakdown.prunedTokens) + breakdown.tools = toolInputTokens + toolOutputTokens breakdown.assistant = Math.max( 0, breakdown.total - breakdown.system - breakdown.user - breakdown.tools, @@ -184,7 +198,7 @@ function formatContextMessage(breakdown: TokenBreakdown): string { const lines: string[] = [] const barWidth = 30 - const toolsInContext = breakdown.toolCount - breakdown.prunedCount + const toolsInContext = breakdown.toolCount - breakdown.prunedToolCount const toolsLabel = `Tools (${toolsInContext})` const categories = [ @@ -221,8 +235,12 @@ function formatContextMessage(breakdown: TokenBreakdown): string { if (breakdown.prunedTokens > 0) { const withoutPruning = breakdown.total + breakdown.prunedTokens + const pruned = [] + if (breakdown.prunedToolCount > 0) pruned.push(`${breakdown.prunedToolCount} tools`) + if (breakdown.prunedMessageCount > 0) + pruned.push(`${breakdown.prunedMessageCount} messages`) lines.push( - ` Pruned: ${breakdown.prunedCount} tools (~${formatTokenCount(breakdown.prunedTokens)})`, + ` Pruned: ${pruned.join(", ")} (~${formatTokenCount(breakdown.prunedTokens)})`, ) lines.push(` Current context: ~${formatTokenCount(breakdown.total)}`) lines.push(` Without DCP: ~${formatTokenCount(withoutPruning)}`) diff --git a/lib/commands/stats.ts b/lib/commands/stats.ts index 24635947..a554309d 100644 --- a/lib/commands/stats.ts +++ b/lib/commands/stats.ts @@ -21,6 +21,7 @@ export interface StatsCommandContext { function formatStatsMessage( sessionTokens: number, sessionTools: number, + sessionMessages: number, allTime: AggregatedStats, ): string { const lines: string[] = [] @@ -31,14 +32,16 @@ function formatStatsMessage( lines.push("") lines.push("Session:") lines.push("─".repeat(60)) - lines.push(` Tokens pruned: ~${formatTokenCount(sessionTokens)}`) - lines.push(` Tools pruned: ${sessionTools}`) + lines.push(` Tokens pruned: ~${formatTokenCount(sessionTokens)}`) + lines.push(` Tools pruned: ${sessionTools}`) + lines.push(` Messages pruned: ${sessionMessages}`) lines.push("") lines.push("All-time:") lines.push("─".repeat(60)) - lines.push(` Tokens saved: ~${formatTokenCount(allTime.totalTokens)}`) - lines.push(` Tools pruned: ${allTime.totalTools}`) - lines.push(` Sessions: ${allTime.sessionCount}`) + lines.push(` Tokens saved: ~${formatTokenCount(allTime.totalTokens)}`) + lines.push(` Tools pruned: ${allTime.totalTools}`) + lines.push(` Messages pruned: ${allTime.totalMessages}`) + lines.push(` Sessions: ${allTime.sessionCount}`) return lines.join("\n") } @@ -48,12 +51,13 @@ export async function handleStatsCommand(ctx: StatsCommandContext): Promise { - if (existingPrunedSet.has(id)) { + if (state.prune.toolIds.has(id)) { return false } const entry = state.toolParameters.get(id) @@ -173,9 +172,9 @@ export async function handleSweepCommand(ctx: SweepCommandContext): Promise): ValidationError[] { } } + if (config.pruneNotificationType !== undefined) { + const validValues = ["chat", "toast"] + if (!validValues.includes(config.pruneNotificationType)) { + errors.push({ + key: "pruneNotificationType", + expected: '"chat" | "toast"', + actual: JSON.stringify(config.pruneNotificationType), + }) + } + } + if (config.protectedFilePatterns !== undefined) { if (!Array.isArray(config.protectedFilePatterns)) { errors.push({ @@ -266,35 +289,75 @@ function validateConfigTypes(config: Record): ValidationError[] { actual: typeof tools.settings.protectedTools, }) } + if (tools.settings.contextLimit !== undefined) { + if ( + typeof tools.settings.contextLimit !== "number" && + tools.settings.contextLimit !== "model" + ) { + errors.push({ + key: "tools.settings.contextLimit", + expected: 'number | "model"', + actual: JSON.stringify(tools.settings.contextLimit), + }) + } + } } - if (tools.discard) { - if (tools.discard.enabled !== undefined && typeof tools.discard.enabled !== "boolean") { + if (tools.distill) { + if (tools.distill.permission !== undefined) { + const validValues = ["ask", "allow", "deny"] + if (!validValues.includes(tools.distill.permission)) { + errors.push({ + key: "tools.distill.permission", + expected: '"ask" | "allow" | "deny"', + actual: JSON.stringify(tools.distill.permission), + }) + } + } + if ( + tools.distill.showDistillation !== undefined && + typeof tools.distill.showDistillation !== "boolean" + ) { errors.push({ - key: "tools.discard.enabled", + key: "tools.distill.showDistillation", expected: "boolean", - actual: typeof tools.discard.enabled, + actual: typeof tools.distill.showDistillation, }) } } - if (tools.extract) { - if (tools.extract.enabled !== undefined && typeof tools.extract.enabled !== "boolean") { - errors.push({ - key: "tools.extract.enabled", - expected: "boolean", - actual: typeof tools.extract.enabled, - }) + if (tools.compress) { + if (tools.compress.permission !== undefined) { + const validValues = ["ask", "allow", "deny"] + if (!validValues.includes(tools.compress.permission)) { + errors.push({ + key: "tools.compress.permission", + expected: '"ask" | "allow" | "deny"', + actual: JSON.stringify(tools.compress.permission), + }) + } } if ( - tools.extract.showDistillation !== undefined && - typeof tools.extract.showDistillation !== "boolean" + tools.compress.showCompression !== undefined && + typeof tools.compress.showCompression !== "boolean" ) { errors.push({ - key: "tools.extract.showDistillation", + key: "tools.compress.showCompression", expected: "boolean", - actual: typeof tools.extract.showDistillation, + actual: typeof tools.compress.showCompression, }) } } + if (tools.prune) { + if (tools.prune.permission !== undefined) { + const validValues = ["ask", "allow", "deny"] + if (!validValues.includes(tools.prune.permission)) { + errors.push({ + key: "tools.prune.permission", + expected: '"ask" | "allow" | "deny"', + actual: JSON.stringify(tools.prune.permission), + }) + } + } + } } // Strategies validators @@ -424,6 +487,7 @@ const defaultConfig: PluginConfig = { enabled: true, debug: false, pruneNotification: "detailed", + pruneNotificationType: "chat", commands: { enabled: true, protectedTools: [...DEFAULT_PROTECTED_TOOLS], @@ -438,14 +502,19 @@ const defaultConfig: PluginConfig = { nudgeEnabled: true, nudgeFrequency: 10, protectedTools: [...DEFAULT_PROTECTED_TOOLS], + contextLimit: 100000, }, - discard: { - enabled: true, - }, - extract: { - enabled: true, + distill: { + permission: "allow", showDistillation: false, }, + compress: { + permission: "ask", + showCompression: false, + }, + prune: { + permission: "allow", + }, }, strategies: { deduplication: { @@ -453,7 +522,7 @@ const defaultConfig: PluginConfig = { protectedTools: [], }, supersedeWrites: { - enabled: false, + enabled: true, }, purgeErrors: { enabled: true, @@ -463,7 +532,9 @@ const defaultConfig: PluginConfig = { }, } -const GLOBAL_CONFIG_DIR = join(homedir(), ".config", "opencode") +const GLOBAL_CONFIG_DIR = process.env.XDG_CONFIG_HOME + ? join(process.env.XDG_CONFIG_HOME, "opencode") + : join(homedir(), ".config", "opencode") const GLOBAL_CONFIG_PATH_JSONC = join(GLOBAL_CONFIG_DIR, "dcp.jsonc") const GLOBAL_CONFIG_PATH_JSON = join(GLOBAL_CONFIG_DIR, "dcp.json") @@ -610,13 +681,18 @@ function mergeTools( ...(override.settings?.protectedTools ?? []), ]), ], + contextLimit: override.settings?.contextLimit ?? base.settings.contextLimit, + }, + distill: { + permission: override.distill?.permission ?? base.distill.permission, + showDistillation: override.distill?.showDistillation ?? base.distill.showDistillation, }, - discard: { - enabled: override.discard?.enabled ?? base.discard.enabled, + compress: { + permission: override.compress?.permission ?? base.compress.permission, + showCompression: override.compress?.showCompression ?? base.compress.showCompression, }, - extract: { - enabled: override.extract?.enabled ?? base.extract.enabled, - showDistillation: override.extract?.showDistillation ?? base.extract.showDistillation, + prune: { + permission: override.prune?.permission ?? base.prune.permission, }, } } @@ -647,8 +723,9 @@ function deepCloneConfig(config: PluginConfig): PluginConfig { ...config.tools.settings, protectedTools: [...config.tools.settings.protectedTools], }, - discard: { ...config.tools.discard }, - extract: { ...config.tools.extract }, + distill: { ...config.tools.distill }, + compress: { ...config.tools.compress }, + prune: { ...config.tools.prune }, }, strategies: { deduplication: { @@ -693,6 +770,8 @@ export function getConfig(ctx: PluginInput): PluginConfig { enabled: result.data.enabled ?? config.enabled, debug: result.data.debug ?? config.debug, pruneNotification: result.data.pruneNotification ?? config.pruneNotification, + pruneNotificationType: + result.data.pruneNotificationType ?? config.pruneNotificationType, commands: mergeCommands(config.commands, result.data.commands as any), turnProtection: { enabled: result.data.turnProtection?.enabled ?? config.turnProtection.enabled, @@ -736,6 +815,8 @@ export function getConfig(ctx: PluginInput): PluginConfig { enabled: result.data.enabled ?? config.enabled, debug: result.data.debug ?? config.debug, pruneNotification: result.data.pruneNotification ?? config.pruneNotification, + pruneNotificationType: + result.data.pruneNotificationType ?? config.pruneNotificationType, commands: mergeCommands(config.commands, result.data.commands as any), turnProtection: { enabled: result.data.turnProtection?.enabled ?? config.turnProtection.enabled, @@ -776,6 +857,8 @@ export function getConfig(ctx: PluginInput): PluginConfig { enabled: result.data.enabled ?? config.enabled, debug: result.data.debug ?? config.debug, pruneNotification: result.data.pruneNotification ?? config.pruneNotification, + pruneNotificationType: + result.data.pruneNotificationType ?? config.pruneNotificationType, commands: mergeCommands(config.commands, result.data.commands as any), turnProtection: { enabled: result.data.turnProtection?.enabled ?? config.turnProtection.enabled, diff --git a/lib/hooks.ts b/lib/hooks.ts index aaf43883..83c74cc2 100644 --- a/lib/hooks.ts +++ b/lib/hooks.ts @@ -4,12 +4,14 @@ import type { PluginConfig } from "./config" import { syncToolCache } from "./state/tool-cache" import { deduplicate, supersedeWrites, purgeErrors } from "./strategies" import { prune, insertPruneToolContext } from "./messages" +import { buildToolIdList } from "./messages/utils" import { checkSession } from "./state" -import { loadPrompt } from "./prompts" +import { renderSystemPrompt } from "./prompts" import { handleStatsCommand } from "./commands/stats" import { handleContextCommand } from "./commands/context" import { handleHelpCommand } from "./commands/help" import { handleSweepCommand } from "./commands/sweep" +import { ensureSessionInitialized } from "./state/state" const INTERNAL_AGENT_SIGNATURES = [ "You are a title generator", @@ -22,7 +24,15 @@ export function createSystemPromptHandler( logger: Logger, config: PluginConfig, ) { - return async (_input: unknown, output: { system: string[] }) => { + return async ( + input: { sessionID?: string; model: { limit: { context: number } } }, + output: { system: string[] }, + ) => { + if (input.model?.limit?.context) { + state.modelContextLimit = input.model.limit.context + logger.debug("Cached model context limit", { limit: state.modelContextLimit }) + } + if (state.isSubAgent) { return } @@ -33,22 +43,17 @@ export function createSystemPromptHandler( return } - const discardEnabled = config.tools.discard.enabled - const extractEnabled = config.tools.extract.enabled - - let promptName: string - if (discardEnabled && extractEnabled) { - promptName = "system/system-prompt-both" - } else if (discardEnabled) { - promptName = "system/system-prompt-discard" - } else if (extractEnabled) { - promptName = "system/system-prompt-extract" - } else { + const flags = { + prune: config.tools.prune.permission !== "deny", + distill: config.tools.distill.permission !== "deny", + compress: config.tools.compress.permission !== "deny", + } + + if (!flags.prune && !flags.distill && !flags.compress) { return } - const syntheticPrompt = loadPrompt(promptName) - output.system.push(syntheticPrompt) + output.system.push(renderSystemPrompt(flags)) } } @@ -66,6 +71,7 @@ export function createChatMessageTransformHandler( } syncToolCache(state, config, logger, output.messages) + buildToolIdList(state, output.messages, logger) deduplicate(state, logger, config, output.messages) supersedeWrites(state, logger, config, output.messages) @@ -97,15 +103,17 @@ export function createCommandExecuteHandler( } if (input.command === "dcp") { - const args = (input.arguments || "").trim().split(/\s+/).filter(Boolean) - const subcommand = args[0]?.toLowerCase() || "" - const _subArgs = args.slice(1) - const messagesResponse = await client.session.messages({ path: { id: input.sessionID }, }) const messages = (messagesResponse.data || messagesResponse) as WithParts[] + await ensureSessionInitialized(client, state, input.sessionID, logger, messages) + + const args = (input.arguments || "").trim().split(/\s+/).filter(Boolean) + const subcommand = args[0]?.toLowerCase() || "" + const _subArgs = args.slice(1) + if (subcommand === "context") { await handleContextCommand({ client, diff --git a/lib/logger.ts b/lib/logger.ts index 972a1fb1..05852abc 100644 --- a/lib/logger.ts +++ b/lib/logger.ts @@ -9,8 +9,8 @@ export class Logger { constructor(enabled: boolean) { this.enabled = enabled - const opencodeConfigDir = join(homedir(), ".config", "opencode") - this.logDir = join(opencodeConfigDir, "logs", "dcp") + const configHome = process.env.XDG_CONFIG_HOME || join(homedir(), ".config") + this.logDir = join(configHome, "opencode", "logs", "dcp") } private async ensureLogDir() { diff --git a/lib/messages/inject.ts b/lib/messages/inject.ts index 491ecd6c..f1b56025 100644 --- a/lib/messages/inject.ts +++ b/lib/messages/inject.ts @@ -2,67 +2,119 @@ import type { SessionState, WithParts } from "../state" import type { Logger } from "../logger" import type { PluginConfig } from "../config" import type { UserMessage } from "@opencode-ai/sdk/v2" -import { loadPrompt } from "../prompts" +import { renderNudge, renderCompressNudge } from "../prompts" import { extractParameterKey, - buildToolIdList, - createSyntheticAssistantMessage, - createSyntheticUserMessage, + createSyntheticTextPart, createSyntheticToolPart, - isDeepSeekOrKimi, isIgnoredUserMessage, } from "./utils" -import { getFilePathFromParameters, isProtectedFilePath } from "../protected-file-patterns" -import { getLastUserMessage } from "../shared-utils" +import { getFilePathsFromParameters, isProtected } from "../protected-file-patterns" +import { getLastUserMessage, isMessageCompacted } from "../shared-utils" +import { getCurrentTokenUsage } from "../strategies/utils" -const getNudgeString = (config: PluginConfig): string => { - const discardEnabled = config.tools.discard.enabled - const extractEnabled = config.tools.extract.enabled - - if (discardEnabled && extractEnabled) { - return loadPrompt(`nudge/nudge-both`) - } else if (discardEnabled) { - return loadPrompt(`nudge/nudge-discard`) - } else if (extractEnabled) { - return loadPrompt(`nudge/nudge-extract`) - } - return "" -} - -const wrapPrunableTools = (content: string): string => ` -The following tools have been invoked and are available for pruning. This list does not mandate immediate action. Consider your current goals and the resources you need before discarding valuable tool inputs or outputs. Consolidate your prunes for efficiency; it is rarely worth pruning a single tiny tool output. Keep the context free of noise. +// XML wrappers +export const wrapPrunableTools = (content: string): string => { + return ` +The following tools have been invoked and are available for pruning. This list does not mandate immediate action. Consider your current goals and the resources you need before pruning valuable tool inputs or outputs. Consolidate your prunes for efficiency; it is rarely worth pruning a single tiny tool output. Keep the context free of noise. ${content} ` +} -const getCooldownMessage = (config: PluginConfig): string => { - const discardEnabled = config.tools.discard.enabled - const extractEnabled = config.tools.extract.enabled +export const wrapCompressContext = (messageCount: number): string => ` +Compress available. Conversation: ${messageCount} messages. +Compress collapses completed task sequences or exploration phases into summaries. +Uses text boundaries [startString, endString, topic, summary]. +` + +export const wrapCooldownMessage = (flags: { + prune: boolean + distill: boolean + compress: boolean +}): string => { + const enabledTools: string[] = [] + if (flags.distill) enabledTools.push("distill") + if (flags.compress) enabledTools.push("compress") + if (flags.prune) enabledTools.push("prune") let toolName: string - if (discardEnabled && extractEnabled) { - toolName = "discard or extract tools" - } else if (discardEnabled) { - toolName = "discard tool" + if (enabledTools.length === 0) { + toolName = "pruning tools" + } else if (enabledTools.length === 1) { + toolName = `${enabledTools[0]} tool` } else { - toolName = "extract tool" + const last = enabledTools.pop() + toolName = `${enabledTools.join(", ")} or ${last} tools` } - return ` -Context management was just performed. Do not use the ${toolName} again. A fresh list will be available after your next tool use. -` + return ` +Context management was just performed. Do NOT use the ${toolName} again. A fresh list will be available after your next tool use. +` +} + +const resolveContextLimit = (config: PluginConfig, state: SessionState): number | undefined => { + const configLimit = config.tools.settings.contextLimit + if (configLimit === "model") { + return state.modelContextLimit + } + return configLimit +} + +const shouldInjectCompressNudge = ( + config: PluginConfig, + state: SessionState, + messages: WithParts[], +): boolean => { + if (config.tools.compress.permission === "deny") { + return false + } + + const contextLimit = resolveContextLimit(config, state) + if (contextLimit === undefined) { + return false + } + + const currentTokens = getCurrentTokenUsage(messages) + return currentTokens > contextLimit +} + +const getNudgeString = (config: PluginConfig): string => { + const flags = { + prune: config.tools.prune.permission !== "deny", + distill: config.tools.distill.permission !== "deny", + compress: config.tools.compress.permission !== "deny", + } + + if (!flags.prune && !flags.distill && !flags.compress) { + return "" + } + + return renderNudge(flags) +} + +const getCooldownMessage = (config: PluginConfig): string => { + return wrapCooldownMessage({ + prune: config.tools.prune.permission !== "deny", + distill: config.tools.distill.permission !== "deny", + compress: config.tools.compress.permission !== "deny", + }) +} + +const buildCompressContext = (state: SessionState, messages: WithParts[]): string => { + const messageCount = messages.filter((msg) => !isMessageCompacted(state, msg)).length + return wrapCompressContext(messageCount) } const buildPrunableToolsList = ( state: SessionState, config: PluginConfig, logger: Logger, - messages: WithParts[], ): string => { const lines: string[] = [] - const toolIdList: string[] = buildToolIdList(state, messages, logger) + const toolIdList = state.toolIdList state.toolParameters.forEach((toolParameterEntry, toolCallId) => { - if (state.prune.toolIds.includes(toolCallId)) { + if (state.prune.toolIds.has(toolCallId)) { return } @@ -71,8 +123,11 @@ const buildPrunableToolsList = ( return } - const filePath = getFilePathFromParameters(toolParameterEntry.parameters) - if (isProtectedFilePath(filePath, config.protectedFilePatterns)) { + const filePaths = getFilePathsFromParameters( + toolParameterEntry.tool, + toolParameterEntry.parameters, + ) + if (isProtected(filePaths, config.protectedFilePatterns)) { return } @@ -88,7 +143,11 @@ const buildPrunableToolsList = ( const description = paramKey ? `${toolParameterEntry.tool}, ${paramKey}` : toolParameterEntry.tool - lines.push(`${numericId}: ${description}`) + const tokenSuffix = + toolParameterEntry.tokenCount !== undefined + ? ` (~${toolParameterEntry.tokenCount} tokens)` + : "" + lines.push(`${numericId}: ${description}${tokenSuffix}`) logger.debug( `Prunable tool found - ID: ${numericId}, Tool: ${toolParameterEntry.tool}, Call ID: ${toolCallId}`, ) @@ -107,65 +166,78 @@ export const insertPruneToolContext = ( logger: Logger, messages: WithParts[], ): void => { - if (!config.tools.discard.enabled && !config.tools.extract.enabled) { + const pruneEnabled = config.tools.prune.permission !== "deny" + const distillEnabled = config.tools.distill.permission !== "deny" + const compressEnabled = config.tools.compress.permission !== "deny" + + if (!pruneEnabled && !distillEnabled && !compressEnabled) { return } - let prunableToolsContent: string + const pruneOrDistillEnabled = pruneEnabled || distillEnabled + const contentParts: string[] = [] if (state.lastToolPrune) { logger.debug("Last tool was prune - injecting cooldown message") - prunableToolsContent = getCooldownMessage(config) + contentParts.push(getCooldownMessage(config)) } else { - const prunableToolsList = buildPrunableToolsList(state, config, logger, messages) - if (!prunableToolsList) { - return + if (pruneOrDistillEnabled) { + const prunableToolsList = buildPrunableToolsList(state, config, logger) + if (prunableToolsList) { + // logger.debug("prunable-tools: \n" + prunableToolsList) + contentParts.push(prunableToolsList) + } } - logger.debug("prunable-tools: \n" + prunableToolsList) + if (compressEnabled) { + const compressContext = buildCompressContext(state, messages) + // logger.debug("compress-context: \n" + compressContext) + contentParts.push(compressContext) + } - let nudgeString = "" - if ( + if (shouldInjectCompressNudge(config, state, messages)) { + logger.info("Inserting compress nudge - token usage exceeds contextLimit") + contentParts.push(renderCompressNudge()) + } else if ( config.tools.settings.nudgeEnabled && state.nudgeCounter >= config.tools.settings.nudgeFrequency ) { logger.info("Inserting prune nudge message") - nudgeString = "\n" + getNudgeString(config) + contentParts.push(getNudgeString(config)) } + } - prunableToolsContent = prunableToolsList + nudgeString + if (contentParts.length === 0) { + return } + const combinedContent = contentParts.join("\n") + const lastUserMessage = getLastUserMessage(messages) if (!lastUserMessage) { return } const userInfo = lastUserMessage.info as UserMessage - const variant = state.variant ?? userInfo.variant - - let lastNonIgnoredMessage: WithParts | undefined - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i] - if (!(msg.info.role === "user" && isIgnoredUserMessage(msg))) { - lastNonIgnoredMessage = msg - break - } + + const lastNonIgnoredMessage = messages.findLast( + (msg) => !(msg.info.role === "user" && isIgnoredUserMessage(msg)), + ) + + if (!lastNonIgnoredMessage) { + return } - if (!lastNonIgnoredMessage || lastNonIgnoredMessage.info.role === "user") { - messages.push(createSyntheticUserMessage(lastUserMessage, prunableToolsContent, variant)) + // When following a user message, append a synthetic text part since models like Claude + // expect assistant turns to start with reasoning parts which cannot be easily faked. + // For all other cases, append a synthetic tool part to the last message which works + // across all models without disrupting their behavior. + if (lastNonIgnoredMessage.info.role === "user") { + const textPart = createSyntheticTextPart(lastNonIgnoredMessage, combinedContent) + lastNonIgnoredMessage.parts.push(textPart) } else { - const providerID = userInfo.model?.providerID || "" const modelID = userInfo.model?.modelID || "" - - if (isDeepSeekOrKimi(providerID, modelID)) { - const toolPart = createSyntheticToolPart(lastNonIgnoredMessage, prunableToolsContent) - lastNonIgnoredMessage.parts.push(toolPart) - } else { - messages.push( - createSyntheticAssistantMessage(lastUserMessage, prunableToolsContent, variant), - ) - } + const toolPart = createSyntheticToolPart(lastNonIgnoredMessage, combinedContent, modelID) + lastNonIgnoredMessage.parts.push(toolPart) } } diff --git a/lib/messages/prune.ts b/lib/messages/prune.ts index fb86036e..09169700 100644 --- a/lib/messages/prune.ts +++ b/lib/messages/prune.ts @@ -1,7 +1,9 @@ import type { SessionState, WithParts } from "../state" import type { Logger } from "../logger" import type { PluginConfig } from "../config" -import { isMessageCompacted } from "../shared-utils" +import { isMessageCompacted, getLastUserMessage } from "../shared-utils" +import { createSyntheticUserMessage, COMPRESS_SUMMARY_PREFIX } from "./utils" +import type { UserMessage } from "@opencode-ai/sdk/v2" const PRUNED_TOOL_OUTPUT_REPLACEMENT = "[Output removed to save context - information superseded or no longer needed]" @@ -14,11 +16,58 @@ export const prune = ( config: PluginConfig, messages: WithParts[], ): void => { + filterCompressedRanges(state, logger, messages) + pruneFullTool(state, logger, messages) pruneToolOutputs(state, logger, messages) pruneToolInputs(state, logger, messages) pruneToolErrors(state, logger, messages) } +const pruneFullTool = (state: SessionState, logger: Logger, messages: WithParts[]): void => { + const messagesToRemove: string[] = [] + + for (const msg of messages) { + if (isMessageCompacted(state, msg)) { + continue + } + + const parts = Array.isArray(msg.parts) ? msg.parts : [] + const partsToRemove: string[] = [] + + for (const part of parts) { + if (part.type !== "tool") { + continue + } + if (!state.prune.toolIds.has(part.callID)) { + continue + } + if (part.tool !== "edit" && part.tool !== "write") { + continue + } + + partsToRemove.push(part.callID) + } + + if (partsToRemove.length === 0) { + continue + } + + msg.parts = parts.filter( + (part) => part.type !== "tool" || !partsToRemove.includes(part.callID), + ) + + if (msg.parts.length === 0) { + messagesToRemove.push(msg.info.id) + } + } + + if (messagesToRemove.length > 0) { + const result = messages.filter((msg) => !messagesToRemove.includes(msg.info.id)) + messages.length = 0 + messages.push(...result) + } +} + const pruneToolOutputs = (state: SessionState, logger: Logger, messages: WithParts[]): void => { for (const msg of messages) { if (isMessageCompacted(state, msg)) { @@ -30,13 +79,13 @@ const pruneToolOutputs = (state: SessionState, logger: Logger, messages: WithPar if (part.type !== "tool") { continue } - if (!state.prune.toolIds.includes(part.callID)) { + if (!state.prune.toolIds.has(part.callID)) { continue } if (part.state.status !== "completed") { continue } - if (part.tool === "question") { + if (part.tool === "question" || part.tool === "edit" || part.tool === "write") { continue } @@ -56,7 +105,7 @@ const pruneToolInputs = (state: SessionState, logger: Logger, messages: WithPart if (part.type !== "tool") { continue } - if (!state.prune.toolIds.includes(part.callID)) { + if (!state.prune.toolIds.has(part.callID)) { continue } if (part.state.status !== "completed") { @@ -84,7 +133,7 @@ const pruneToolErrors = (state: SessionState, logger: Logger, messages: WithPart if (part.type !== "tool") { continue } - if (!state.prune.toolIds.includes(part.callID)) { + if (!state.prune.toolIds.has(part.callID)) { continue } if (part.state.status !== "error") { @@ -103,3 +152,56 @@ const pruneToolErrors = (state: SessionState, logger: Logger, messages: WithPart } } } + +const filterCompressedRanges = ( + state: SessionState, + logger: Logger, + messages: WithParts[], +): void => { + if (!state.prune.messageIds?.size) { + return + } + + const result: WithParts[] = [] + + for (const msg of messages) { + const msgId = msg.info.id + + // Check if there's a summary to inject at this anchor point + const summary = state.compressSummaries?.find((s) => s.anchorMessageId === msgId) + if (summary) { + // Find user message for variant and as base for synthetic message + const msgIndex = messages.indexOf(msg) + const userMessage = getLastUserMessage(messages, msgIndex) + + if (userMessage) { + const userInfo = userMessage.info as UserMessage + const summaryContent = COMPRESS_SUMMARY_PREFIX + summary.summary + result.push( + createSyntheticUserMessage(userMessage, summaryContent, userInfo.variant), + ) + + logger.info("Injected compress summary", { + anchorMessageId: msgId, + summaryLength: summary.summary.length, + }) + } else { + logger.warn("No user message found for compress summary", { + anchorMessageId: msgId, + }) + } + } + + // Skip messages that are in the prune list + if (state.prune.messageIds.has(msgId)) { + continue + } + + // Normal message, include it + result.push(msg) + } + + // Replace messages array contents + messages.length = 0 + messages.push(...result) +} diff --git a/lib/messages/utils.ts b/lib/messages/utils.ts index 406b6f42..a57d626e 100644 --- a/lib/messages/utils.ts +++ b/lib/messages/utils.ts @@ -1,21 +1,16 @@ -import { Logger } from "../logger" +import { ulid } from "ulid" import { isMessageCompacted } from "../shared-utils" +import { Logger } from "../logger" import type { SessionState, WithParts } from "../state" import type { UserMessage } from "@opencode-ai/sdk/v2" -const SYNTHETIC_MESSAGE_ID = "msg_01234567890123456789012345" -const SYNTHETIC_PART_ID = "prt_01234567890123456789012345" -const SYNTHETIC_CALL_ID = "call_01234567890123456789012345" +export const COMPRESS_SUMMARY_PREFIX = "[Compressed conversation block]\n\n" -export const isDeepSeekOrKimi = (providerID: string, modelID: string): boolean => { - const lowerProviderID = providerID.toLowerCase() +const generateUniqueId = (prefix: string): string => `${prefix}_${ulid()}` + +const isGeminiModel = (modelID: string): boolean => { const lowerModelID = modelID.toLowerCase() - return ( - lowerProviderID.includes("deepseek") || - lowerProviderID.includes("kimi") || - lowerModelID.includes("deepseek") || - lowerModelID.includes("kimi") - ) + return lowerModelID.includes("gemini") } export const createSyntheticUserMessage = ( @@ -25,85 +20,73 @@ export const createSyntheticUserMessage = ( ): WithParts => { const userInfo = baseMessage.info as UserMessage const now = Date.now() + const messageId = generateUniqueId("msg") + const partId = generateUniqueId("prt") return { info: { - id: SYNTHETIC_MESSAGE_ID, + id: messageId, sessionID: userInfo.sessionID, role: "user" as const, - agent: userInfo.agent || "code", + agent: userInfo.agent, model: userInfo.model, time: { created: now }, ...(variant !== undefined && { variant }), }, parts: [ { - id: SYNTHETIC_PART_ID, + id: partId, sessionID: userInfo.sessionID, - messageID: SYNTHETIC_MESSAGE_ID, - type: "text", + messageID: messageId, + type: "text" as const, text: content, }, ], } } -export const createSyntheticAssistantMessage = ( - baseMessage: WithParts, - content: string, - variant?: string, -): WithParts => { +export const createSyntheticTextPart = (baseMessage: WithParts, content: string) => { const userInfo = baseMessage.info as UserMessage - const now = Date.now() + const partId = generateUniqueId("prt") return { - info: { - id: SYNTHETIC_MESSAGE_ID, - sessionID: userInfo.sessionID, - role: "assistant" as const, - agent: userInfo.agent || "code", - parentID: userInfo.id, - modelID: userInfo.model.modelID, - providerID: userInfo.model.providerID, - mode: "default", - path: { - cwd: "/", - root: "/", - }, - time: { created: now, completed: now }, - cost: 0, - tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } }, - ...(variant !== undefined && { variant }), - }, - parts: [ - { - id: SYNTHETIC_PART_ID, - sessionID: userInfo.sessionID, - messageID: SYNTHETIC_MESSAGE_ID, - type: "text", - text: content, - }, - ], + id: partId, + sessionID: userInfo.sessionID, + messageID: userInfo.id, + type: "text" as const, + text: content, } } -export const createSyntheticToolPart = (baseMessage: WithParts, content: string) => { +export const createSyntheticToolPart = ( + baseMessage: WithParts, + content: string, + modelID: string, +) => { const userInfo = baseMessage.info as UserMessage const now = Date.now() + const partId = generateUniqueId("prt") + const callId = generateUniqueId("call") + + // Gemini requires thoughtSignature bypass to accept synthetic tool parts + const toolPartMetadata = isGeminiModel(modelID) + ? { google: { thoughtSignature: "skip_thought_signature_validator" } } + : {} + return { - id: SYNTHETIC_PART_ID, + id: partId, sessionID: userInfo.sessionID, - messageID: baseMessage.info.id, + messageID: userInfo.id, type: "tool" as const, - callID: SYNTHETIC_CALL_ID, + callID: callId, tool: "context_info", state: { status: "completed" as const, input: {}, output: content, title: "Context Info", - metadata: {}, + metadata: toolPartMetadata, time: { start: now, end: now }, }, } @@ -129,11 +112,26 @@ export const extractParameterKey = (tool: string, parameters: any): string => { } return parameters.filePath } - if (tool === "write" && parameters.filePath) { + if ((tool === "write" || tool === "edit" || tool === "multiedit") && parameters.filePath) { return parameters.filePath } - if (tool === "edit" && parameters.filePath) { - return parameters.filePath + + if (tool === "apply_patch" && typeof parameters.patchText === "string") { + const pathRegex = /\*\*\* (?:Add|Delete|Update) File: ([^\n\r]+)/g + const paths: string[] = [] + let match + while ((match = pathRegex.exec(parameters.patchText)) !== null) { + paths.push(match[1].trim()) + } + if (paths.length > 0) { + const uniquePaths = [...new Set(paths)] + const count = uniquePaths.length + const plural = count > 1 ? "s" : "" + if (count === 1) return uniquePaths[0] + if (count === 2) return uniquePaths.join(", ") + return `${count} file${plural}: ${uniquePaths[0]}, ${uniquePaths[1]}...` + } + return "patch" } if (tool === "list") { @@ -247,6 +245,7 @@ export function buildToolIdList( } } } + state.toolIdList = toolIds return toolIds } @@ -264,3 +263,7 @@ export const isIgnoredUserMessage = (message: WithParts): boolean => { return true } + +export const findMessageIndex = (messages: WithParts[], messageId: string): number => { + return messages.findIndex((msg) => msg.info.id === messageId) +} diff --git a/lib/prompts/compress-nudge.md b/lib/prompts/compress-nudge.md new file mode 100644 index 00000000..b9ce567b --- /dev/null +++ b/lib/prompts/compress-nudge.md @@ -0,0 +1,10 @@ + +CRITICAL CONTEXT LIMIT +Your session context has exceeded the configured limit. Strict adherence to context compression is required. + +PROTOCOL +You should prioritize context management, but do not interrupt a critical atomic operation if one is in progress. Once the immediate step is done, you must perform context management. + +IMMEDIATE ACTION REQUIRED +PHASE COMPLETION: If a phase is complete, use the `compress` tool to condense the entire sequence into a detailed summary + diff --git a/lib/prompts/compress.md b/lib/prompts/compress.md new file mode 100644 index 00000000..6a083297 --- /dev/null +++ b/lib/prompts/compress.md @@ -0,0 +1,35 @@ +Use this tool to collapse a contiguous range of conversation into a preserved summary. + +THE PHILOSOPHY OF COMPRESS +`compress` transforms verbose conversation sequences into dense, high-fidelity summaries. This is not cleanup - it is crystallization. Your summary becomes the authoritative record of what transpired. + +Think of compression as phase transitions: raw exploration becomes refined understanding. The original context served its purpose; your summary now carries that understanding forward. + +THE SUMMARY +Your summary must be EXHAUSTIVE. Capture file paths, function signatures, decisions made, constraints discovered, key findings... EVERYTHING that maintains context integrity. This is not a brief note - it is an authoritative record so faithful that the original conversation adds no value. + +Yet be LEAN. Strip away the noise: failed attempts that led nowhere, verbose tool outputs, back-and-forth exploration. What remains should be pure signal - golden nuggets of detail that preserve full understanding with zero ambiguity. + +THE WAYS OF COMPRESS +`compress` when a chapter closes - when a phase of work is truly complete and the raw conversation has served its purpose: + +Research concluded and findings are clear +Implementation finished and verified +Exploration exhausted and patterns understood + +Do NOT compress when: +You may need exact code, error messages, or file contents from the range +Work in that area is still active or may resume +You're mid-sprint on related functionality + +Before compressing, ask: _"Is this chapter closed?"_ Compression is irreversible. The summary replaces everything in the range. + +BOUNDARY MATCHING +You specify boundaries by matching unique text strings in the conversation. CRITICAL: In code-centric conversations, strings repeat often. Provide sufficiently unique text to match exactly once. If a match fails (not found or found multiple times), the tool will error - extend your boundary string with more surrounding context in order to make SURE the tool does NOT error. + +THE FORMAT OF COMPRESS +`topic`: Short label (3-5 words) for display - e.g., "Auth System Exploration" +`content`: Object containing: +`startString`: Unique text string marking the beginning of the range +`endString`: Unique text string marking the end of the range +`summary`: Complete technical summary replacing all content in the range diff --git a/lib/prompts/discard-tool-spec.ts b/lib/prompts/discard-tool-spec.ts deleted file mode 100644 index e5084212..00000000 --- a/lib/prompts/discard-tool-spec.ts +++ /dev/null @@ -1,40 +0,0 @@ -export const DISCARD_TOOL_SPEC = `Discards tool outputs from context to manage conversation size and reduce noise. - -## IMPORTANT: The Prunable List -A \`\` list is provided to you showing available tool outputs you can discard when there are tools available for pruning. Each line has the format \`ID: tool, parameter\` (e.g., \`20: read, /path/to/file.ts\`). You MUST only use numeric IDs that appear in this list to select which tools to discard. - -## When to Use This Tool - -Use \`discard\` for removing tool content that is no longer needed - -- **Noise:** Irrelevant, unhelpful, or superseded outputs that provide no value. -- **Task Completion:** Work is complete and there's no valuable information worth preserving. - -## When NOT to Use This Tool - -- **If the output contains useful information:** Keep it in context rather than discarding. -- **If you'll need the output later:** Don't discard files you plan to edit or context you'll need for implementation. - -## Best Practices -- **Strategic Batching:** Don't discard single small tool outputs (like short bash commands) unless they are pure noise. Wait until you have several items to perform high-impact discards. -- **Think ahead:** Before discarding, ask: "Will I need this output for an upcoming task?" If yes, keep it. - -## Format - -- \`ids\`: Array where the first element is the reason, followed by numeric IDs from the \`\` list - -Reasons: \`noise\` | \`completion\` - -## Example - - -Assistant: [Reads 'wrong_file.ts'] -This file isn't relevant to the auth system. I'll remove it to clear the context. -[Uses discard with ids: ["noise", "5"]] - - - -Assistant: [Runs tests, they pass] -The tests passed and I don't need to preserve any details. I'll clean up now. -[Uses discard with ids: ["completion", "20", "21"]] -` diff --git a/lib/prompts/distill.md b/lib/prompts/distill.md new file mode 100644 index 00000000..39a78cc9 --- /dev/null +++ b/lib/prompts/distill.md @@ -0,0 +1,28 @@ +Use this tool to distill relevant findings from a selection of raw tool outputs into preserved knowledge, in order to denoise key bits and parts of context. + +THE PRUNABLE TOOLS LIST +A will show in context when outputs are available for distillation (you don't need to look for it). Each entry follows the format `ID: tool, parameter (~token usage)` (e.g., `20: read, /path/to/file.ts (~1500 tokens)`). You MUST select outputs by their numeric ID. THESE ARE YOUR ONLY VALID TARGETS. + +THE PHILOSOPHY OF DISTILLATION +`distill` is your favored instrument for transforming raw tool outputs into preserved knowledge. This is not mere summarization; it is high-fidelity extraction that makes the original output obsolete. + +Your distillation must be COMPLETE. Capture function signatures, type definitions, business logic, constraints, configuration values... EVERYTHING essential. Think of it as creating a high signal technical substitute so faithful that re-fetching the original would yield no additional value. Be thorough; be comprehensive; leave no ambiguity, ensure that your distillation stands alone, and is designed for easy retrieval and comprehension. + +AIM FOR IMPACT. Distillation is most powerful when applied to outputs that contain signal buried in noise. A single line requires no distillation; a hundred lines of API documentation do. Make sure the distillation is meaningful. + +THE WAYS OF DISTILL +`distill` when you have extracted the essence from tool outputs and the raw form has served its purpose. +Here are some examples: +EXPLORATION: You've read extensively and grasp the architecture. The original file contents are no longer needed; your understanding, synthesized, is sufficient. +PRESERVATION: Valuable technical details (signatures, logic, constraints) coexist with noise. Preserve the former; discard the latter. + +Not everything should be distilled. Prefer keeping raw outputs when: +PRECISION MATTERS: You will edit the file, grep for exact strings, or need line-accurate references. Distillation sacrifices precision for essence. +UNCERTAINTY REMAINS: If you might need to re-examine the original, defer. Distillation is irreversible; be certain before you commit. + +Before distilling, ask yourself: _"Will I need the raw output for upcoming work?"_ If you plan to edit a file you just read, keep it intact. Distillation is for completed exploration, not active work. + +THE FORMAT OF DISTILL +`targets`: Array of objects, each containing: +`id`: Numeric ID (as string) from the `` list +`distillation`: Complete technical substitute for that tool output diff --git a/lib/prompts/extract-tool-spec.ts b/lib/prompts/extract-tool-spec.ts deleted file mode 100644 index 9324dc0c..00000000 --- a/lib/prompts/extract-tool-spec.ts +++ /dev/null @@ -1,47 +0,0 @@ -export const EXTRACT_TOOL_SPEC = `Extracts key findings from tool outputs into distilled knowledge, then removes the raw outputs from context. - -## IMPORTANT: The Prunable List -A \`\` list is provided to you showing available tool outputs you can extract from when there are tools available for pruning. Each line has the format \`ID: tool, parameter\` (e.g., \`20: read, /path/to/file.ts\`). You MUST only use numeric IDs that appear in this list to select which tools to extract. - -## When to Use This Tool - -Use \`extract\` when you have gathered useful information that you want to **preserve in distilled form** before removing the raw outputs: - -- **Task Completion:** You completed a unit of work and want to preserve key findings. -- **Knowledge Preservation:** You have context that contains valuable information, but also a lot of unnecessary detail - you only need to preserve some specifics. - -## When NOT to Use This Tool - -- **If you need precise syntax:** If you'll edit a file or grep for exact strings, keep the raw output. -- **If uncertain:** Prefer keeping over re-fetching. - - -## Best Practices -- **Strategic Batching:** Wait until you have several items or a few large outputs to extract, rather than doing tiny, frequent extractions. Aim for high-impact extractions that significantly reduce context size. -- **Think ahead:** Before extracting, ask: "Will I need the raw output for an upcoming task?" If you researched a file you'll later edit, do NOT extract it. - -## Format - -- \`ids\`: Array of numeric IDs as strings from the \`\` list -- \`distillation\`: Array of strings, one per ID (positional: distillation[0] is for ids[0], etc.) - -Each distillation string should capture the essential information you need to preserve - function signatures, logic, constraints, values, etc. Be as detailed as needed for your task. - -## Example - - -Assistant: [Reads auth service and user types] -I'll preserve the key details before extracting. -[Uses extract with: - ids: ["10", "11"], - distillation: [ - "auth.ts: validateToken(token: string) -> User|null checks cache first (5min TTL) then OIDC. hashPassword uses bcrypt 12 rounds. Tokens must be 128+ chars.", - "user.ts: interface User { id: string; email: string; permissions: ('read'|'write'|'admin')[]; status: 'active'|'suspended' }" - ] -] - - - -Assistant: [Reads 'auth.ts' to understand the login flow] -I've understood the auth flow. I'll need to modify this file to add the new validation, so I'm keeping this read in context rather than extracting. -` diff --git a/lib/prompts/index.ts b/lib/prompts/index.ts index bdfbc865..e764099a 100644 --- a/lib/prompts/index.ts +++ b/lib/prompts/index.ts @@ -1,26 +1,47 @@ -// Tool specs -import { DISCARD_TOOL_SPEC } from "./discard-tool-spec" -import { EXTRACT_TOOL_SPEC } from "./extract-tool-spec" +// Generated prompts (from .md files via scripts/generate-prompts.ts) +import { SYSTEM as SYSTEM_PROMPT } from "./_codegen/system.generated" +import { NUDGE } from "./_codegen/nudge.generated" +import { COMPRESS_NUDGE } from "./_codegen/compress-nudge.generated" +import { PRUNE as PRUNE_TOOL_SPEC } from "./_codegen/prune.generated" +import { DISTILL as DISTILL_TOOL_SPEC } from "./_codegen/distill.generated" +import { COMPRESS as COMPRESS_TOOL_SPEC } from "./_codegen/compress.generated" -// System prompts -import { SYSTEM_PROMPT_BOTH } from "./system/both" -import { SYSTEM_PROMPT_DISCARD } from "./system/discard" -import { SYSTEM_PROMPT_EXTRACT } from "./system/extract" +export interface ToolFlags { + distill: boolean + compress: boolean + prune: boolean +} -// Nudge prompts -import { NUDGE_BOTH } from "./nudge/both" -import { NUDGE_DISCARD } from "./nudge/discard" -import { NUDGE_EXTRACT } from "./nudge/extract" +function processConditionals(template: string, flags: ToolFlags): string { + const tools = ["distill", "compress", "prune"] as const + let result = template + // Strip comments: // ... // + result = result.replace(/\/\/.*?\/\//g, "") + // Process tool conditionals + for (const tool of tools) { + const regex = new RegExp(`<${tool}>([\\s\\S]*?)`, "g") + result = result.replace(regex, (_, content) => (flags[tool] ? content : "")) + } + // Collapse multiple blank/whitespace-only lines to single blank line + return result.replace(/\n([ \t]*\n)+/g, "\n\n").trim() +} + +export function renderSystemPrompt(flags: ToolFlags): string { + return processConditionals(SYSTEM_PROMPT, flags) +} + +export function renderNudge(flags: ToolFlags): string { + return processConditionals(NUDGE, flags) +} + +export function renderCompressNudge(): string { + return COMPRESS_NUDGE +} const PROMPTS: Record = { - "discard-tool-spec": DISCARD_TOOL_SPEC, - "extract-tool-spec": EXTRACT_TOOL_SPEC, - "system/system-prompt-both": SYSTEM_PROMPT_BOTH, - "system/system-prompt-discard": SYSTEM_PROMPT_DISCARD, - "system/system-prompt-extract": SYSTEM_PROMPT_EXTRACT, - "nudge/nudge-both": NUDGE_BOTH, - "nudge/nudge-discard": NUDGE_DISCARD, - "nudge/nudge-extract": NUDGE_EXTRACT, + "prune-tool-spec": PRUNE_TOOL_SPEC, + "distill-tool-spec": DISTILL_TOOL_SPEC, + "compress-tool-spec": COMPRESS_TOOL_SPEC, } export function loadPrompt(name: string, vars?: Record): string { diff --git a/lib/prompts/nudge.md b/lib/prompts/nudge.md new file mode 100644 index 00000000..4e4d8e4a --- /dev/null +++ b/lib/prompts/nudge.md @@ -0,0 +1,12 @@ + +CRITICAL CONTEXT WARNING +Your context window is filling with tool. Strict adherence to context hygiene is required. + +PROTOCOL +You should prioritize context management, but do not interrupt a critical atomic operation if one is in progress. Once the immediate step is done, you must perform context management. + +IMMEDIATE ACTION REQUIRED +KNOWLEDGE PRESERVATION: If holding valuable raw data you POTENTIALLY will need in your task, use the `distill` tool. Produce a high-fidelity distillation to preserve insights - be thorough +PHASE COMPLETION: If a phase is complete, use the `compress` tool to condense the entire sequence into a detailed summary +NOISE REMOVAL: If you read files or ran commands that yielded no value, use the `prune` tool to remove them. If newer tools supersedes older ones, prune the old + diff --git a/lib/prompts/nudge/both.ts b/lib/prompts/nudge/both.ts deleted file mode 100644 index 50fc0a9d..00000000 --- a/lib/prompts/nudge/both.ts +++ /dev/null @@ -1,10 +0,0 @@ -export const NUDGE_BOTH = ` -**CRITICAL CONTEXT WARNING:** Your context window is filling with tool outputs. Strict adherence to context hygiene is required. - -**Immediate Actions Required:** -1. **Task Completion:** If a sub-task is complete, decide: use \`discard\` if no valuable context to preserve (default), or use \`extract\` if insights are worth keeping. -2. **Noise Removal:** If you read files or ran commands that yielded no value, use \`discard\` to remove them. -3. **Knowledge Preservation:** If you are holding valuable raw data you'll need to reference later, use \`extract\` to distill the insights and remove the raw entry. - -**Protocol:** You should prioritize this cleanup, but do not interrupt a critical atomic operation if one is in progress. Once the immediate step is done, you must perform context management. -` diff --git a/lib/prompts/nudge/discard.ts b/lib/prompts/nudge/discard.ts deleted file mode 100644 index 18e92504..00000000 --- a/lib/prompts/nudge/discard.ts +++ /dev/null @@ -1,9 +0,0 @@ -export const NUDGE_DISCARD = ` -**CRITICAL CONTEXT WARNING:** Your context window is filling with tool outputs. Strict adherence to context hygiene is required. - -**Immediate Actions Required:** -1. **Task Completion:** If a sub-task is complete, use the \`discard\` tool to remove the tools used. -2. **Noise Removal:** If you read files or ran commands that yielded no value, use the \`discard\` tool to remove them. - -**Protocol:** You should prioritize this cleanup, but do not interrupt a critical atomic operation if one is in progress. Once the immediate step is done, you must discard unneeded tool outputs. -` diff --git a/lib/prompts/nudge/extract.ts b/lib/prompts/nudge/extract.ts deleted file mode 100644 index 243f5855..00000000 --- a/lib/prompts/nudge/extract.ts +++ /dev/null @@ -1,9 +0,0 @@ -export const NUDGE_EXTRACT = ` -**CRITICAL CONTEXT WARNING:** Your context window is filling with tool outputs. Strict adherence to context hygiene is required. - -**Immediate Actions Required:** -1. **Task Completion:** If you have completed work, extract key findings from the tools used. Scale distillation depth to the value of the content. -2. **Knowledge Preservation:** If you are holding valuable raw data you'll need to reference later, use the \`extract\` tool with high-fidelity distillation to preserve the insights and remove the raw entry. - -**Protocol:** You should prioritize this cleanup, but do not interrupt a critical atomic operation if one is in progress. Once the immediate step is done, you must extract valuable findings from tool outputs. -` diff --git a/lib/prompts/prune.md b/lib/prompts/prune.md new file mode 100644 index 00000000..be18b009 --- /dev/null +++ b/lib/prompts/prune.md @@ -0,0 +1,18 @@ +Use this tool to remove tool outputs from context entirely. No preservation - pure deletion. + +THE PRUNABLE TOOLS LIST +A `` section surfaces in context showing outputs eligible for removal. Each line reads `ID: tool, parameter (~token usage)` (e.g., `20: read, /path/to/file.ts (~1500 tokens)`). Reference outputs by their numeric ID - these are your ONLY valid targets for pruning. + +THE WAYS OF PRUNE +`prune` is surgical excision - eliminating noise (irrelevant or unhelpful outputs), superseded information (older outputs replaced by newer data), or wrong targets (you accessed something that turned out to be irrelevant). Use it to keep your context lean and focused. + +BATCH WISELY! Pruning is most effective when consolidated. Don't prune a single tiny output - accumulate several candidates before acting. + +Do NOT prune when: +NEEDED LATER: You plan to edit the file or reference this context for implementation. +UNCERTAINTY: If you might need to re-examine the original, keep it. + +Before pruning, ask: _"Is this noise, or will it serve me?"_ If the latter, keep it. Pruning that forces re-fetching is a net loss. + +THE FORMAT OF PRUNE +`ids`: Array of numeric IDs (as strings) from the `` list diff --git a/lib/prompts/system.md b/lib/prompts/system.md new file mode 100644 index 00000000..4ea2b6b1 --- /dev/null +++ b/lib/prompts/system.md @@ -0,0 +1,42 @@ + + +You operate a context-constrained environment and MUST PROACTIVELY MANAGE IT TO AVOID CONTEXT ROT. Efficient context management is CRITICAL to maintaining performance and ensuring successful task completion. + +AVAILABLE TOOLS FOR CONTEXT MANAGEMENT +`distill`: condense key findings from tool calls into high-fidelity distillation to preserve gained insights. Use to extract valuable knowledge to the user's request. BE THOROUGH, your distillation MUST be high-signal, low noise and complete +`compress`: squash contiguous portion of the conversation and replace it with a low level technical summary. Use to filter noise from the conversation and retain purified understanding. Compress conversation phases ORGANICALLY as they get completed, think meso, not micro nor macro. Do not be cheap with that low level technical summary and BE MINDFUL of specifics that must be crystallized to retain UNAMBIGUOUS full picture. +`prune`: remove individual tool calls that are noise, irrelevant, or superseded. No preservation of content. DO NOT let irrelevant tool calls accumulate. DO NOT PRUNE TOOL OUTPUTS THAT YOU MAY NEED LATER + +THE DISTILL TOOL +`distill` is the favored way to target specific tools and crystalize their value into high-signal low-noise knowledge nuggets. Your distillation must be comprehensive, capturing technical details (symbols, signatures, logic, constraints) such that the raw output is no longer needed. THINK complete technical substitute. `distill` is typically best used when you are certain the raw information is not needed anymore, but the knowledge it contains is valuable to retain so you maintain context authenticity and understanding. Be conservative in your approach to distilling, but do NOT hesitate to distill when appropriate. + + +THE COMPRESS TOOL +`compress` is a sledgehammer and should be used accordingly. It's purpose is to reduce whole part of the conversation to its essence and technical details in order to leave room for newer context. Your summary MUST be technical and specific enough to preserve FULL understanding of WHAT TRANSPIRED, such that NO AMBIGUITY remains about what was done, found, or decided. Your compress summary must be thorough and precise. `compress` will replace everything in the range you match, user and assistant messages, tool inputs and outputs. It is preferred to not compress preemptively, but rather wait for natural breakpoints in the conversation. Those breakpoints are to be infered from user messages. You WILL NOT compress based on thinking that you are done with the task, wait for conversation queues that the user has moved on from current phase. + +This tool will typically be used at the end of a phase of work, when conversation starts to accumulate noise that would better served summarized, or when you've done significant exploration and can FULLY synthesize your findings and understanding into a technical summary. + +Make sure to match enough of the context with start and end strings so you're not faced with an error calling the tool. Be VERY CAREFUL AND CONSERVATIVE when using `compress`. + + +THE PRUNE TOOL +`prune` is your last resort for context management. It is a blunt instrument that removes tool outputs entirely, without ANY preservation. It is best used to eliminate noise, irrelevant information, or superseded outputs that no longer add value to the conversation. You MUST NOT prune tool outputs that you may need later. Prune is a targeted nuke, not a general cleanup tool. + +Contemplate only pruning when you are certain that the tool output is irrelevant to the current task or has been superseded by more recent information. If in doubt, defer for when you are definitive. Evaluate WHAT SHOULD be pruned before jumping the gun. + + +TIMING +Prefer managing context at the START of a new agentic loop (after receiving a user message) rather than at the END of your previous turn. At turn start, you have fresh signal about what the user needs next - you can better judge what's still relevant versus noise from prior work. Managing at turn end means making retention decisions before knowing what comes next. + +EVALUATE YOUR CONTEXT AND MANAGE REGULARLY TO AVOID CONTEXT ROT. AVOID USING MANAGEMENT TOOLS AS THE ONLY TOOL CALLS IN YOUR RESPONSE, PARALLELIZE WITH OTHER RELEVANT TOOLS TO TASK CONTINUATION (read, edit, bash...). It is imperative you understand the value or lack thereof of the context you manage and make informed decisions to maintain a decluttered, high-quality and relevant context. + +The session is your responsibility, and effective context management is CRITICAL to your success. Be PROACTIVE, DELIBERATE, and STRATEGIC in your approach to context management. The session is your oyster - keep it clean, relevant, and high-quality to ensure optimal performance and successful task completion. + +Be respectful of the user's API usage, manage context methodically as you work through the task and avoid calling ONLY context management tools in your responses. + + + +This chat environment injects context information on your behalf in the form of a list to help you manage context effectively. Carefully read the list and use it to inform your management decisions. The list is automatically updated after each turn to reflect the current state of manageable tools and context usage. If no list is present, do NOT attempt to prune anything. +There may be tools in session context that do not appear in the list, this is expected, remember that you can ONLY prune what you see in list. + + diff --git a/lib/prompts/system/both.ts b/lib/prompts/system/both.ts deleted file mode 100644 index 9c53a748..00000000 --- a/lib/prompts/system/both.ts +++ /dev/null @@ -1,60 +0,0 @@ -export const SYSTEM_PROMPT_BOTH = ` - - -ENVIRONMENT -You are operating in a context-constrained environment and thus must proactively manage your context window using the \`discard\` and \`extract\` tools. The environment calls the \`context_info\` tool to provide an up-to-date list after each turn. Use this information when deciding what to prune. - -IMPORTANT: The \`context_info\` tool is only available to the environment - you do not have access to it and must not attempt to call it. - -TWO TOOLS FOR CONTEXT MANAGEMENT -- \`discard\`: Remove tool outputs that are no longer needed (completed tasks, noise, outdated info). No preservation of content. -- \`extract\`: Extract key findings into distilled knowledge before removing raw outputs. Use when you need to preserve information. - -CHOOSING THE RIGHT TOOL -Ask: "Do I need to preserve any information from this output?" -- **No** → \`discard\` (default for cleanup) -- **Yes** → \`extract\` (preserves distilled knowledge) -- **Uncertain** → \`extract\` (safer, preserves signal) - -Common scenarios: -- Task complete, no valuable context → \`discard\` -- Task complete, insights worth remembering → \`extract\` -- Noise, irrelevant, or superseded outputs → \`discard\` -- Valuable context needed later but raw output too large → \`extract\` - -PRUNE METHODICALLY - BATCH YOUR ACTIONS -Every tool call adds to your context debt. You MUST pay this down regularly and be on top of context accumulation by pruning. Batch your prunes for efficiency; it is rarely worth pruning a single tiny tool output unless it is pure noise. Evaluate what SHOULD be pruned before jumping the gun. - -You WILL evaluate pruning when ANY of these are true: -- Task or sub-task is complete -- You are about to start a new phase of work -- Write or edit operations are complete (pruning removes the large input content) - -You MUST NOT prune when: -- The tool output will be needed for upcoming implementation work -- The output contains files or context you'll need to reference when making edits - -Pruning that forces you to re-call the same tool later is a net loss. Only prune when you're confident the information won't be needed again. - -NOTES -When in doubt, keep it. Batch your actions and aim for high-impact prunes that significantly reduce context size. -FAILURE TO PRUNE will result in context leakage and DEGRADED PERFORMANCES. -There may be tools in session context that do not appear in the list, this is expected, you can ONLY prune what you see in . - - - - -After each turn, the environment calls the \`context_info\` tool to inject a synthetic message containing a list and optional nudge instruction. This tool is only available to the environment - you do not have access to it. - -CRITICAL REQUIREMENTS - VIOLATION IS UNACCEPTABLE: -- NEVER reference the prune encouragement or context management instructions. Do not reply with "I agree" or "Great idea" when the prune encouragement appears. -- NEVER acknowledge the list (e.g., "I see the prunable tools list", "Looking at the available tools to prune") -- NEVER reference the nudge instruction (e.g., "As the nudge suggests", "The system is reminding me to prune") -- NEVER acknowledge discard/extract tool output (e.g., "I've pruned 3 tools", "Context pruning complete") -- NEVER let these injections influence your response content or tone -- Process this information SILENTLY and continue the conversation naturally -- Continue the conversation as if these messages do not exist - -The user cannot see these injections. Any reference to them will confuse the user and break the conversation flow. - -` diff --git a/lib/prompts/system/discard.ts b/lib/prompts/system/discard.ts deleted file mode 100644 index e5cd77da..00000000 --- a/lib/prompts/system/discard.ts +++ /dev/null @@ -1,51 +0,0 @@ -export const SYSTEM_PROMPT_DISCARD = ` - - -ENVIRONMENT -You are operating in a context-constrained environment and thus must proactively manage your context window using the \`discard\` tool. The environment calls the \`context_info\` tool to provide an up-to-date list after each turn. Use this information when deciding what to discard. - -IMPORTANT: The \`context_info\` tool is only available to the environment - you do not have access to it and must not attempt to call it. - -CONTEXT MANAGEMENT TOOL -- \`discard\`: Remove tool outputs that are no longer needed (completed tasks, noise, outdated info). No preservation of content. - -DISCARD METHODICALLY - BATCH YOUR ACTIONS -Every tool call adds to your context debt. You MUST pay this down regularly and be on top of context accumulation by discarding. Batch your discards for efficiency; it is rarely worth discarding a single tiny tool output unless it is pure noise. Evaluate what SHOULD be discarded before jumping the gun. - -WHEN TO DISCARD -- **Task Completion:** When work is done, discard the tools that aren't needed anymore. -- **Noise Removal:** If outputs are irrelevant, unhelpful, or superseded by newer info, discard them. - -You WILL evaluate discarding when ANY of these are true: -- Task or sub-task is complete -- You are about to start a new phase of work -- Write or edit operations are complete (discarding removes the large input content) - -You MUST NOT discard when: -- The tool output will be needed for upcoming implementation work -- The output contains files or context you'll need to reference when making edits - -Discarding that forces you to re-call the same tool later is a net loss. Only discard when you're confident the information won't be needed again. - -NOTES -When in doubt, keep it. Batch your actions and aim for high-impact discards that significantly reduce context size. -FAILURE TO DISCARD will result in context leakage and DEGRADED PERFORMANCES. -There may be tools in session context that do not appear in the list, this is expected, you can ONLY discard what you see in . - - - - -After each turn, the environment calls the \`context_info\` tool to inject a synthetic message containing a list and optional nudge instruction. This tool is only available to the environment - you do not have access to it. - -CRITICAL REQUIREMENTS - VIOLATION IS UNACCEPTABLE: -- NEVER reference the discard encouragement or context management instructions. Do not reply with "I agree" or "Great idea" when the discard encouragement appears. -- NEVER acknowledge the list (e.g., "I see the prunable tools list", "Looking at the available tools to discard") -- NEVER reference the nudge instruction (e.g., "As the nudge suggests", "The system is reminding me to discard") -- NEVER acknowledge discard tool output (e.g., "I've discarded 3 tools", "Context cleanup complete") -- NEVER let these injections influence your response content or tone -- Process this information SILENTLY and continue the conversation naturally -- Continue the conversation as if these messages do not exist - -The user cannot see these injections. Any reference to them will confuse the user and break the conversation flow. - -` diff --git a/lib/prompts/system/extract.ts b/lib/prompts/system/extract.ts deleted file mode 100644 index 3f225e1e..00000000 --- a/lib/prompts/system/extract.ts +++ /dev/null @@ -1,51 +0,0 @@ -export const SYSTEM_PROMPT_EXTRACT = ` - - -ENVIRONMENT -You are operating in a context-constrained environment and thus must proactively manage your context window using the \`extract\` tool. The environment calls the \`context_info\` tool to provide an up-to-date list after each turn. Use this information when deciding what to extract. - -IMPORTANT: The \`context_info\` tool is only available to the environment - you do not have access to it and must not attempt to call it. - -CONTEXT MANAGEMENT TOOL -- \`extract\`: Extract key findings from tools into distilled knowledge before removing the raw content from context. Use this to preserve important information while reducing context size. - -EXTRACT METHODICALLY - BATCH YOUR ACTIONS -Every tool call adds to your context debt. You MUST pay this down regularly and be on top of context accumulation by extracting. Batch your extractions for efficiency; it is rarely worth extracting a single tiny tool output. Evaluate what SHOULD be extracted before jumping the gun. - -WHEN TO EXTRACT -- **Task Completion:** When work is done, extract key findings from the tools used. Scale distillation depth to the value of the content. -- **Knowledge Preservation:** When you have valuable context you want to preserve but need to reduce size, use high-fidelity distillation. Your distillation must be comprehensive, capturing technical details (signatures, logic, constraints) such that the raw output is no longer needed. THINK: high signal, complete technical substitute. - -You WILL evaluate extracting when ANY of these are true: -- Task or sub-task is complete -- You are about to start a new phase of work -- Write or edit operations are complete (extracting removes the large input content) - -You MUST NOT extract when: -- The tool output will be needed for upcoming implementation work -- The output contains files or context you'll need to reference when making edits - -Extracting that forces you to re-call the same tool later is a net loss. Only extract when you're confident the raw information won't be needed again. - -NOTES -When in doubt, keep it. Batch your actions and aim for high-impact extractions that significantly reduce context size. -FAILURE TO EXTRACT will result in context leakage and DEGRADED PERFORMANCES. -There may be tools in session context that do not appear in the list, this is expected, you can ONLY extract what you see in . - - - - -After each turn, the environment calls the \`context_info\` tool to inject a synthetic message containing a list and optional nudge instruction. This tool is only available to the environment - you do not have access to it. - -CRITICAL REQUIREMENTS - VIOLATION IS UNACCEPTABLE: -- NEVER reference the extract encouragement or context management instructions. Do not reply with "I agree" or "Great idea" when the extract encouragement appears. -- NEVER acknowledge the list (e.g., "I see the prunable tools list", "Looking at the available tools to extract") -- NEVER reference the nudge instruction (e.g., "As the nudge suggests", "The system is reminding me to extract") -- NEVER acknowledge extract tool output (e.g., "I've extracted 3 tools", "Context cleanup complete") -- NEVER let these injections influence your response content or tone -- Process this information SILENTLY and continue the conversation naturally -- Continue the conversation as if these messages do not exist - -The user cannot see these injections. Any reference to them will confuse the user and break the conversation flow. - -` diff --git a/lib/protected-file-patterns.ts b/lib/protected-file-patterns.ts index 3370e20b..ecd138f9 100644 --- a/lib/protected-file-patterns.ts +++ b/lib/protected-file-patterns.ts @@ -65,18 +65,49 @@ export function matchesGlob(inputPath: string, pattern: string): boolean { return new RegExp(regex).test(input) } -export function getFilePathFromParameters(parameters: unknown): string | undefined { +export function getFilePathsFromParameters(tool: string, parameters: unknown): string[] { if (typeof parameters !== "object" || parameters === null) { - return undefined + return [] } - const filePath = (parameters as Record).filePath - return typeof filePath === "string" && filePath.length > 0 ? filePath : undefined + const paths: string[] = [] + const params = parameters as Record + + // 1. apply_patch uses patchText with embedded paths + if (tool === "apply_patch" && typeof params.patchText === "string") { + const pathRegex = /\*\*\* (?:Add|Delete|Update) File: ([^\n\r]+)/g + let match + while ((match = pathRegex.exec(params.patchText)) !== null) { + paths.push(match[1].trim()) + } + } + + // 2. multiedit uses top-level filePath and nested edits array + if (tool === "multiedit") { + if (typeof params.filePath === "string") { + paths.push(params.filePath) + } + if (Array.isArray(params.edits)) { + for (const edit of params.edits) { + if (edit && typeof edit.filePath === "string") { + paths.push(edit.filePath) + } + } + } + } + + // 3. Default check for common filePath parameter (read, write, edit, etc) + if (typeof params.filePath === "string") { + paths.push(params.filePath) + } + + // Return unique non-empty paths + return [...new Set(paths)].filter((p) => p.length > 0) } -export function isProtectedFilePath(filePath: string | undefined, patterns: string[]): boolean { - if (!filePath) return false +export function isProtected(filePaths: string[], patterns: string[]): boolean { + if (!filePaths || filePaths.length === 0) return false if (!patterns || patterns.length === 0) return false - return patterns.some((pattern) => matchesGlob(filePath, pattern)) + return filePaths.some((path) => patterns.some((pattern) => matchesGlob(path, pattern))) } diff --git a/lib/shared-utils.ts b/lib/shared-utils.ts index 902ea403..0baab713 100644 --- a/lib/shared-utils.ts +++ b/lib/shared-utils.ts @@ -2,11 +2,21 @@ import { SessionState, WithParts } from "./state" import { isIgnoredUserMessage } from "./messages/utils" export const isMessageCompacted = (state: SessionState, msg: WithParts): boolean => { - return msg.info.time.created < state.lastCompaction + if (msg.info.time.created < state.lastCompaction) { + return true + } + if (state.prune.messageIds.has(msg.info.id)) { + return true + } + return false } -export const getLastUserMessage = (messages: WithParts[]): WithParts | null => { - for (let i = messages.length - 1; i >= 0; i--) { +export const getLastUserMessage = ( + messages: WithParts[], + startIndex?: number, +): WithParts | null => { + const start = startIndex ?? messages.length - 1 + for (let i = start; i >= 0; i--) { const msg = messages[i] if (msg.info.role === "user" && !isIgnoredUserMessage(msg)) { return msg diff --git a/lib/state/persistence.ts b/lib/state/persistence.ts index 172ff75f..0c368380 100644 --- a/lib/state/persistence.ts +++ b/lib/state/persistence.ts @@ -8,17 +8,30 @@ import * as fs from "fs/promises" import { existsSync } from "fs" import { homedir } from "os" import { join } from "path" -import type { SessionState, SessionStats, Prune } from "./types" +import type { SessionState, SessionStats, CompressSummary } from "./types" import type { Logger } from "../logger" +/** Prune state as stored on disk (arrays for JSON compatibility) */ +export interface PersistedPrune { + toolIds: string[] + messageIds: string[] +} + export interface PersistedSessionState { sessionName?: string - prune: Prune + prune: PersistedPrune + compressSummaries: CompressSummary[] stats: SessionStats lastUpdated: string } -const STORAGE_DIR = join(homedir(), ".local", "share", "opencode", "storage", "plugin", "dcp") +const STORAGE_DIR = join( + process.env.XDG_DATA_HOME || join(homedir(), ".local", "share"), + "opencode", + "storage", + "plugin", + "dcp", +) async function ensureStorageDir(): Promise { if (!existsSync(STORAGE_DIR)) { @@ -44,7 +57,11 @@ export async function saveSessionState( const state: PersistedSessionState = { sessionName: sessionName, - prune: sessionState.prune, + prune: { + toolIds: [...sessionState.prune.toolIds], + messageIds: [...sessionState.prune.messageIds], + }, + compressSummaries: sessionState.compressSummaries, stats: sessionState.stats, lastUpdated: new Date().toISOString(), } @@ -86,6 +103,26 @@ export async function loadSessionState( return null } + if (Array.isArray(state.compressSummaries)) { + const validSummaries = state.compressSummaries.filter( + (s): s is CompressSummary => + s !== null && + typeof s === "object" && + typeof s.anchorMessageId === "string" && + typeof s.summary === "string", + ) + if (validSummaries.length !== state.compressSummaries.length) { + logger.warn("Filtered out malformed compressSummaries entries", { + sessionId: sessionId, + original: state.compressSummaries.length, + valid: validSummaries.length, + }) + } + state.compressSummaries = validSummaries + } else { + state.compressSummaries = [] + } + logger.info("Loaded session state from disk", { sessionId: sessionId, }) @@ -103,6 +140,7 @@ export async function loadSessionState( export interface AggregatedStats { totalTokens: number totalTools: number + totalMessages: number sessionCount: number } @@ -110,6 +148,7 @@ export async function loadAllSessionStats(logger: Logger): Promise state.lastCompaction) { state.lastCompaction = lastCompactionTimestamp - state.toolParameters.clear() - state.prune.toolIds = [] - logger.info("Detected compaction from messages - cleared tool cache", { + resetOnCompaction(state) + logger.info("Detected compaction - reset stale state", { timestamp: lastCompactionTimestamp, }) } @@ -44,18 +48,22 @@ export function createSessionState(): SessionState { sessionId: null, isSubAgent: false, prune: { - toolIds: [], + toolIds: new Set(), + messageIds: new Set(), }, + compressSummaries: [], stats: { pruneTokenCounter: 0, totalPruneTokens: 0, }, toolParameters: new Map(), + toolIdList: [], nudgeCounter: 0, lastToolPrune: false, lastCompaction: 0, currentTurn: 0, variant: undefined, + modelContextLimit: undefined, } } @@ -63,18 +71,22 @@ export function resetSessionState(state: SessionState): void { state.sessionId = null state.isSubAgent = false state.prune = { - toolIds: [], + toolIds: new Set(), + messageIds: new Set(), } + state.compressSummaries = [] state.stats = { pruneTokenCounter: 0, totalPruneTokens: 0, } state.toolParameters.clear() + state.toolIdList = [] state.nudgeCounter = 0 state.lastToolPrune = false state.lastCompaction = 0 state.currentTurn = 0 state.variant = undefined + state.modelContextLimit = undefined } export async function ensureSessionInitialized( @@ -107,36 +119,12 @@ export async function ensureSessionInitialized( } state.prune = { - toolIds: persisted.prune.toolIds || [], + toolIds: new Set(persisted.prune.toolIds || []), + messageIds: new Set(persisted.prune.messageIds || []), } + state.compressSummaries = persisted.compressSummaries || [] state.stats = { pruneTokenCounter: persisted.stats?.pruneTokenCounter || 0, totalPruneTokens: persisted.stats?.totalPruneTokens || 0, } } - -function findLastCompactionTimestamp(messages: WithParts[]): number { - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i] - if (msg.info.role === "assistant" && msg.info.summary === true) { - return msg.info.time.created - } - } - return 0 -} - -export function countTurns(state: SessionState, messages: WithParts[]): number { - let turnCount = 0 - for (const msg of messages) { - if (isMessageCompacted(state, msg)) { - continue - } - const parts = Array.isArray(msg.parts) ? msg.parts : [] - for (const part of parts) { - if (part.type === "step-start") { - turnCount++ - } - } - } - return turnCount -} diff --git a/lib/state/tool-cache.ts b/lib/state/tool-cache.ts index 38d3b54b..a11d9bdd 100644 --- a/lib/state/tool-cache.ts +++ b/lib/state/tool-cache.ts @@ -2,6 +2,7 @@ import type { SessionState, ToolStatus, WithParts } from "./index" import type { Logger } from "../logger" import { PluginConfig } from "../config" import { isMessageCompacted } from "../shared-utils" +import { countToolTokens } from "../strategies/utils" const MAX_TOOL_CACHE_SIZE = 1000 @@ -43,16 +44,15 @@ export async function syncToolCache( turnProtectionTurns > 0 && state.currentTurn - turnCounter < turnProtectionTurns - state.lastToolPrune = - (part.tool === "discard" || part.tool === "extract") && - part.state.status === "completed" - - const allProtectedTools = config.tools.settings.protectedTools - - if (part.tool === "discard" || part.tool === "extract") { + if (part.tool === "distill" || part.tool === "compress" || part.tool === "prune") { state.nudgeCounter = 0 - } else if (!allProtectedTools.includes(part.tool) && !isProtectedByTurn) { - state.nudgeCounter++ + state.lastToolPrune = true + } else { + state.lastToolPrune = false + const allProtectedTools = config.tools.settings.protectedTools + if (!allProtectedTools.includes(part.tool) && !isProtectedByTurn) { + state.nudgeCounter++ + } } if (state.toolParameters.has(part.callID)) { @@ -63,14 +63,21 @@ export async function syncToolCache( continue } + const allProtectedTools = config.tools.settings.protectedTools + const isProtectedTool = allProtectedTools.includes(part.tool) + const tokenCount = isProtectedTool ? undefined : countToolTokens(part) + state.toolParameters.set(part.callID, { tool: part.tool, parameters: part.state?.input ?? {}, status: part.state.status as ToolStatus | undefined, error: part.state.status === "error" ? part.state.error : undefined, turn: turnCounter, + tokenCount, }) - logger.info(`Cached tool id: ${part.callID} (created on turn ${turnCounter})`) + logger.info( + `Cached tool id: ${part.callID} (turn ${turnCounter}${tokenCount !== undefined ? `, ~${tokenCount} tokens` : ""})`, + ) } } diff --git a/lib/state/types.ts b/lib/state/types.ts index 1e41170d..3aa41a88 100644 --- a/lib/state/types.ts +++ b/lib/state/types.ts @@ -13,6 +13,7 @@ export interface ToolParameterEntry { status?: ToolStatus error?: string turn: number + tokenCount?: number } export interface SessionStats { @@ -20,19 +21,28 @@ export interface SessionStats { totalPruneTokens: number } +export interface CompressSummary { + anchorMessageId: string + summary: string +} + export interface Prune { - toolIds: string[] + toolIds: Set + messageIds: Set } export interface SessionState { sessionId: string | null isSubAgent: boolean prune: Prune + compressSummaries: CompressSummary[] stats: SessionStats toolParameters: Map + toolIdList: string[] nudgeCounter: number lastToolPrune: boolean lastCompaction: number currentTurn: number variant: string | undefined + modelContextLimit: number | undefined } diff --git a/lib/state/utils.ts b/lib/state/utils.ts index 4cc10ce1..343a3574 100644 --- a/lib/state/utils.ts +++ b/lib/state/utils.ts @@ -1,3 +1,6 @@ +import type { SessionState, WithParts } from "./types" +import { isMessageCompacted } from "../shared-utils" + export async function isSubAgentSession(client: any, sessionID: string): Promise { try { const result = await client.session.get({ path: { id: sessionID } }) @@ -6,3 +9,38 @@ export async function isSubAgentSession(client: any, sessionID: string): Promise return false } } + +export function findLastCompactionTimestamp(messages: WithParts[]): number { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (msg.info.role === "assistant" && msg.info.summary === true) { + return msg.info.time.created + } + } + return 0 +} + +export function countTurns(state: SessionState, messages: WithParts[]): number { + let turnCount = 0 + for (const msg of messages) { + if (isMessageCompacted(state, msg)) { + continue + } + const parts = Array.isArray(msg.parts) ? msg.parts : [] + for (const part of parts) { + if (part.type === "step-start") { + turnCount++ + } + } + } + return turnCount +} + +export function resetOnCompaction(state: SessionState): void { + state.toolParameters.clear() + state.prune.toolIds = new Set() + state.prune.messageIds = new Set() + state.compressSummaries = [] + state.nudgeCounter = 0 + state.lastToolPrune = false +} diff --git a/lib/strategies/deduplication.ts b/lib/strategies/deduplication.ts index fb6ce4ed..33c43a88 100644 --- a/lib/strategies/deduplication.ts +++ b/lib/strategies/deduplication.ts @@ -1,8 +1,7 @@ import { PluginConfig } from "../config" import { Logger } from "../logger" import type { SessionState, WithParts } from "../state" -import { buildToolIdList } from "../messages/utils" -import { getFilePathFromParameters, isProtectedFilePath } from "../protected-file-patterns" +import { getFilePathsFromParameters, isProtected } from "../protected-file-patterns" import { calculateTokensSaved } from "./utils" /** @@ -20,15 +19,13 @@ export const deduplicate = ( return } - // Build list of all tool call IDs from messages (chronological order) - const allToolIds = buildToolIdList(state, messages, logger) + const allToolIds = state.toolIdList if (allToolIds.length === 0) { return } // Filter out IDs already pruned - const alreadyPruned = new Set(state.prune.toolIds) - const unprunedIds = allToolIds.filter((id) => !alreadyPruned.has(id)) + const unprunedIds = allToolIds.filter((id) => !state.prune.toolIds.has(id)) if (unprunedIds.length === 0) { return @@ -51,8 +48,8 @@ export const deduplicate = ( continue } - const filePath = getFilePathFromParameters(metadata.parameters) - if (isProtectedFilePath(filePath, config.protectedFilePatterns)) { + const filePaths = getFilePathsFromParameters(metadata.tool, metadata.parameters) + if (isProtected(filePaths, config.protectedFilePatterns)) { continue } @@ -77,7 +74,9 @@ export const deduplicate = ( state.stats.totalPruneTokens += calculateTokensSaved(state, messages, newPruneIds) if (newPruneIds.length > 0) { - state.prune.toolIds.push(...newPruneIds) + for (const id of newPruneIds) { + state.prune.toolIds.add(id) + } logger.debug(`Marked ${newPruneIds.length} duplicate tool calls for pruning`) } } diff --git a/lib/strategies/index.ts b/lib/strategies/index.ts index 5444964c..e0680e6b 100644 --- a/lib/strategies/index.ts +++ b/lib/strategies/index.ts @@ -1,4 +1,4 @@ export { deduplicate } from "./deduplication" -export { createDiscardTool, createExtractTool } from "./tools" +export { createPruneTool, createDistillTool, createCompressTool } from "../tools" export { supersedeWrites } from "./supersede-writes" export { purgeErrors } from "./purge-errors" diff --git a/lib/strategies/purge-errors.ts b/lib/strategies/purge-errors.ts index c3debf69..65b43e35 100644 --- a/lib/strategies/purge-errors.ts +++ b/lib/strategies/purge-errors.ts @@ -1,8 +1,7 @@ import { PluginConfig } from "../config" import { Logger } from "../logger" import type { SessionState, WithParts } from "../state" -import { buildToolIdList } from "../messages/utils" -import { getFilePathFromParameters, isProtectedFilePath } from "../protected-file-patterns" +import { getFilePathsFromParameters, isProtected } from "../protected-file-patterns" import { calculateTokensSaved } from "./utils" /** @@ -23,15 +22,13 @@ export const purgeErrors = ( return } - // Build list of all tool call IDs from messages (chronological order) - const allToolIds = buildToolIdList(state, messages, logger) + const allToolIds = state.toolIdList if (allToolIds.length === 0) { return } // Filter out IDs already pruned - const alreadyPruned = new Set(state.prune.toolIds) - const unprunedIds = allToolIds.filter((id) => !alreadyPruned.has(id)) + const unprunedIds = allToolIds.filter((id) => !state.prune.toolIds.has(id)) if (unprunedIds.length === 0) { return @@ -53,8 +50,8 @@ export const purgeErrors = ( continue } - const filePath = getFilePathFromParameters(metadata.parameters) - if (isProtectedFilePath(filePath, config.protectedFilePatterns)) { + const filePaths = getFilePathsFromParameters(metadata.tool, metadata.parameters) + if (isProtected(filePaths, config.protectedFilePatterns)) { continue } @@ -72,7 +69,9 @@ export const purgeErrors = ( if (newPruneIds.length > 0) { state.stats.totalPruneTokens += calculateTokensSaved(state, messages, newPruneIds) - state.prune.toolIds.push(...newPruneIds) + for (const id of newPruneIds) { + state.prune.toolIds.add(id) + } logger.debug( `Marked ${newPruneIds.length} error tool calls for pruning (older than ${turnThreshold} turns)`, ) diff --git a/lib/strategies/supersede-writes.ts b/lib/strategies/supersede-writes.ts index ef765c42..66c90251 100644 --- a/lib/strategies/supersede-writes.ts +++ b/lib/strategies/supersede-writes.ts @@ -1,8 +1,7 @@ import { PluginConfig } from "../config" import { Logger } from "../logger" import type { SessionState, WithParts } from "../state" -import { buildToolIdList } from "../messages/utils" -import { getFilePathFromParameters, isProtectedFilePath } from "../protected-file-patterns" +import { getFilePathsFromParameters, isProtected } from "../protected-file-patterns" import { calculateTokensSaved } from "./utils" /** @@ -23,16 +22,13 @@ export const supersedeWrites = ( return } - // Build list of all tool call IDs from messages (chronological order) - const allToolIds = buildToolIdList(state, messages, logger) + const allToolIds = state.toolIdList if (allToolIds.length === 0) { return } // Filter out IDs already pruned - const alreadyPruned = new Set(state.prune.toolIds) - - const unprunedIds = allToolIds.filter((id) => !alreadyPruned.has(id)) + const unprunedIds = allToolIds.filter((id) => !state.prune.toolIds.has(id)) if (unprunedIds.length === 0) { return } @@ -51,12 +47,13 @@ export const supersedeWrites = ( continue } - const filePath = getFilePathFromParameters(metadata.parameters) - if (!filePath) { + const filePaths = getFilePathsFromParameters(metadata.tool, metadata.parameters) + if (filePaths.length === 0) { continue } + const filePath = filePaths[0] - if (isProtectedFilePath(filePath, config.protectedFilePatterns)) { + if (isProtected(filePaths, config.protectedFilePatterns)) { continue } @@ -85,7 +82,7 @@ export const supersedeWrites = ( // For each write, check if there's a read that comes after it for (const write of writes) { // Skip if already pruned - if (alreadyPruned.has(write.id)) { + if (state.prune.toolIds.has(write.id)) { continue } @@ -99,7 +96,9 @@ export const supersedeWrites = ( if (newPruneIds.length > 0) { state.stats.totalPruneTokens += calculateTokensSaved(state, messages, newPruneIds) - state.prune.toolIds.push(...newPruneIds) + for (const id of newPruneIds) { + state.prune.toolIds.add(id) + } logger.debug(`Marked ${newPruneIds.length} superseded write tool calls for pruning`) } } diff --git a/lib/strategies/tools.ts b/lib/strategies/tools.ts deleted file mode 100644 index 44f6742f..00000000 --- a/lib/strategies/tools.ts +++ /dev/null @@ -1,220 +0,0 @@ -import { tool } from "@opencode-ai/plugin" -import type { SessionState, ToolParameterEntry, WithParts } from "../state" -import type { PluginConfig } from "../config" -import { buildToolIdList } from "../messages/utils" -import { PruneReason, sendUnifiedNotification } from "../ui/notification" -import { formatPruningResultForTool } from "../ui/utils" -import { ensureSessionInitialized } from "../state" -import { saveSessionState } from "../state/persistence" -import type { Logger } from "../logger" -import { loadPrompt } from "../prompts" -import { calculateTokensSaved, getCurrentParams } from "./utils" -import { getFilePathFromParameters, isProtectedFilePath } from "../protected-file-patterns" - -const DISCARD_TOOL_DESCRIPTION = loadPrompt("discard-tool-spec") -const EXTRACT_TOOL_DESCRIPTION = loadPrompt("extract-tool-spec") - -export interface PruneToolContext { - client: any - state: SessionState - logger: Logger - config: PluginConfig - workingDirectory: string -} - -// Shared logic for executing prune operations. -async function executePruneOperation( - ctx: PruneToolContext, - toolCtx: { sessionID: string }, - ids: string[], - reason: PruneReason, - toolName: string, - distillation?: string[], -): Promise { - const { client, state, logger, config, workingDirectory } = ctx - const sessionId = toolCtx.sessionID - - logger.info(`${toolName} tool invoked`) - logger.info(JSON.stringify(reason ? { ids, reason } : { ids })) - - if (!ids || ids.length === 0) { - logger.debug(`${toolName} tool called but ids is empty or undefined`) - throw new Error( - `No IDs provided. Check the list for available IDs to ${toolName.toLowerCase()}.`, - ) - } - - const numericToolIds: number[] = ids - .map((id) => parseInt(id, 10)) - .filter((n): n is number => !isNaN(n)) - - if (numericToolIds.length === 0) { - logger.debug(`No numeric tool IDs provided for ${toolName}: ` + JSON.stringify(ids)) - throw new Error("No numeric IDs provided. Format: ids: [id1, id2, ...]") - } - - // Fetch messages to calculate tokens and find current agent - const messagesResponse = await client.session.messages({ - path: { id: sessionId }, - }) - const messages: WithParts[] = messagesResponse.data || messagesResponse - - await ensureSessionInitialized(ctx.client, state, sessionId, logger, messages) - - const currentParams = getCurrentParams(state, messages, logger) - const toolIdList: string[] = buildToolIdList(state, messages, logger) - - // Validate that all numeric IDs are within bounds - if (numericToolIds.some((id) => id < 0 || id >= toolIdList.length)) { - logger.debug("Invalid tool IDs provided: " + numericToolIds.join(", ")) - throw new Error( - "Invalid IDs provided. Only use numeric IDs from the list.", - ) - } - - // Validate that all IDs exist in cache and aren't protected - // (rejects hallucinated IDs and turn-protected tools not shown in ) - for (const index of numericToolIds) { - const id = toolIdList[index] - const metadata = state.toolParameters.get(id) - if (!metadata) { - logger.debug( - "Rejecting prune request - ID not in cache (turn-protected or hallucinated)", - { index, id }, - ) - throw new Error( - "Invalid IDs provided. Only use numeric IDs from the list.", - ) - } - const allProtectedTools = config.tools.settings.protectedTools - if (allProtectedTools.includes(metadata.tool)) { - logger.debug("Rejecting prune request - protected tool", { - index, - id, - tool: metadata.tool, - }) - throw new Error( - "Invalid IDs provided. Only use numeric IDs from the list.", - ) - } - - const filePath = getFilePathFromParameters(metadata.parameters) - if (isProtectedFilePath(filePath, config.protectedFilePatterns)) { - logger.debug("Rejecting prune request - protected file path", { - index, - id, - tool: metadata.tool, - filePath, - }) - throw new Error( - "Invalid IDs provided. Only use numeric IDs from the list.", - ) - } - } - - const pruneToolIds: string[] = numericToolIds.map((index) => toolIdList[index]) - state.prune.toolIds.push(...pruneToolIds) - - const toolMetadata = new Map() - for (const id of pruneToolIds) { - const toolParameters = state.toolParameters.get(id) - if (toolParameters) { - toolMetadata.set(id, toolParameters) - } else { - logger.debug("No metadata found for ID", { id }) - } - } - - state.stats.pruneTokenCounter += calculateTokensSaved(state, messages, pruneToolIds) - - await sendUnifiedNotification( - client, - logger, - config, - state, - sessionId, - pruneToolIds, - toolMetadata, - reason, - currentParams, - workingDirectory, - distillation, - ) - - state.stats.totalPruneTokens += state.stats.pruneTokenCounter - state.stats.pruneTokenCounter = 0 - state.nudgeCounter = 0 - - saveSessionState(state, logger).catch((err) => - logger.error("Failed to persist state", { error: err.message }), - ) - - return formatPruningResultForTool(pruneToolIds, toolMetadata, workingDirectory) -} - -export function createDiscardTool(ctx: PruneToolContext): ReturnType { - return tool({ - description: DISCARD_TOOL_DESCRIPTION, - args: { - ids: tool.schema - .array(tool.schema.string()) - .describe( - "First element is the reason ('completion' or 'noise'), followed by numeric IDs as strings to discard", - ), - }, - async execute(args, toolCtx) { - // Parse reason from first element, numeric IDs from the rest - const reason = args.ids?.[0] - const validReasons = ["completion", "noise"] as const - if (typeof reason !== "string" || !validReasons.includes(reason as any)) { - ctx.logger.debug("Invalid discard reason provided: " + reason) - throw new Error( - "No valid reason found. Use 'completion' or 'noise' as the first element.", - ) - } - - const numericIds = args.ids.slice(1) - - return executePruneOperation(ctx, toolCtx, numericIds, reason as PruneReason, "Discard") - }, - }) -} - -export function createExtractTool(ctx: PruneToolContext): ReturnType { - return tool({ - description: EXTRACT_TOOL_DESCRIPTION, - args: { - ids: tool.schema - .array(tool.schema.string()) - .describe("Numeric IDs as strings to extract from the list"), - distillation: tool.schema - .array(tool.schema.string()) - .describe( - "REQUIRED. Array of strings, one per ID (positional: distillation[0] is for ids[0], etc.)", - ), - }, - async execute(args, toolCtx) { - if (!args.distillation || args.distillation.length === 0) { - ctx.logger.debug( - "Extract tool called without distillation: " + JSON.stringify(args), - ) - throw new Error( - "Missing distillation. You must provide a distillation string for each ID.", - ) - } - - // Log the distillation for debugging/analysis - ctx.logger.info("Distillation data received:") - ctx.logger.info(JSON.stringify(args.distillation, null, 2)) - - return executePruneOperation( - ctx, - toolCtx, - args.ids, - "extraction" as PruneReason, - "Extract", - args.distillation, - ) - }, - }) -} diff --git a/lib/strategies/utils.ts b/lib/strategies/utils.ts index 7ae04154..d89bb730 100644 --- a/lib/strategies/utils.ts +++ b/lib/strategies/utils.ts @@ -1,9 +1,31 @@ import { SessionState, WithParts } from "../state" -import { UserMessage } from "@opencode-ai/sdk/v2" +import { AssistantMessage, UserMessage } from "@opencode-ai/sdk/v2" import { Logger } from "../logger" import { countTokens as anthropicCountTokens } from "@anthropic-ai/tokenizer" import { getLastUserMessage, isMessageCompacted } from "../shared-utils" +/** + * Get current token usage from the last assistant message. + * Returns total tokens (input + output + reasoning + cache). + */ +export function getCurrentTokenUsage(messages: WithParts[]): number { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (msg.info.role === "assistant") { + const assistantInfo = msg.info as AssistantMessage + if (assistantInfo.tokens?.output > 0) { + const input = assistantInfo.tokens?.input || 0 + const output = assistantInfo.tokens?.output || 0 + const reasoning = assistantInfo.tokens?.reasoning || 0 + const cacheRead = assistantInfo.tokens?.cache?.read || 0 + const cacheWrite = assistantInfo.tokens?.cache?.write || 0 + return input + output + reasoning + cacheRead + cacheWrite + } + } + } + return 0 +} + export function getCurrentParams( state: SessionState, messages: WithParts[], @@ -42,8 +64,53 @@ export function countTokens(text: string): number { } } -function estimateTokensBatch(texts: string[]): number[] { - return texts.map(countTokens) +export function estimateTokensBatch(texts: string[]): number { + if (texts.length === 0) return 0 + return countTokens(texts.join(" ")) +} + +export function extractToolContent(part: any): string[] { + const contents: string[] = [] + + if (part.tool === "question") { + const questions = part.state?.input?.questions + if (questions !== undefined) { + const content = typeof questions === "string" ? questions : JSON.stringify(questions) + contents.push(content) + } + return contents + } + + if (part.tool === "edit" || part.tool === "write") { + if (part.state?.input) { + const inputContent = + typeof part.state.input === "string" + ? part.state.input + : JSON.stringify(part.state.input) + contents.push(inputContent) + } + } + + if (part.state?.status === "completed" && part.state?.output) { + const content = + typeof part.state.output === "string" + ? part.state.output + : JSON.stringify(part.state.output) + contents.push(content) + } else if (part.state?.status === "error" && part.state?.error) { + const content = + typeof part.state.error === "string" + ? part.state.error + : JSON.stringify(part.state.error) + contents.push(content) + } + + return contents +} + +export function countToolTokens(part: any): number { + const contents = extractToolContent(part) + return estimateTokensBatch(contents) } export const calculateTokensSaved = ( @@ -62,32 +129,10 @@ export const calculateTokensSaved = ( if (part.type !== "tool" || !pruneToolIds.includes(part.callID)) { continue } - if (part.tool === "question") { - const questions = part.state.input?.questions - if (questions !== undefined) { - const content = - typeof questions === "string" ? questions : JSON.stringify(questions) - contents.push(content) - } - continue - } - if (part.state.status === "completed") { - const content = - typeof part.state.output === "string" - ? part.state.output - : JSON.stringify(part.state.output) - contents.push(content) - } else if (part.state.status === "error") { - const content = - typeof part.state.error === "string" - ? part.state.error - : JSON.stringify(part.state.error) - contents.push(content) - } + contents.push(...extractToolContent(part)) } } - const tokenCounts: number[] = estimateTokensBatch(contents) - return tokenCounts.reduce((sum, count) => sum + count, 0) + return estimateTokensBatch(contents) } catch (error: any) { return 0 } diff --git a/lib/tools/compress.ts b/lib/tools/compress.ts new file mode 100644 index 00000000..68ac5a56 --- /dev/null +++ b/lib/tools/compress.ts @@ -0,0 +1,190 @@ +import { tool } from "@opencode-ai/plugin" +import type { WithParts, CompressSummary } from "../state" +import type { PruneToolContext } from "./types" +import { ensureSessionInitialized } from "../state" +import { saveSessionState } from "../state/persistence" +import { loadPrompt } from "../prompts" +import { estimateTokensBatch, getCurrentParams } from "../strategies/utils" +import { + collectContentInRange, + findStringInMessages, + collectToolIdsInRange, + collectMessageIdsInRange, +} from "./utils" +import { sendCompressNotification } from "../ui/notification" + +const COMPRESS_TOOL_DESCRIPTION = loadPrompt("compress-tool-spec") + +export function createCompressTool(ctx: PruneToolContext): ReturnType { + return tool({ + description: COMPRESS_TOOL_DESCRIPTION, + args: { + topic: tool.schema + .string() + .describe("Short label (3-5 words) for display - e.g., 'Auth System Exploration'"), + content: tool.schema + .object({ + startString: tool.schema + .string() + .describe("Unique text from conversation marking the beginning of range"), + endString: tool.schema + .string() + .describe("Unique text marking the end of range"), + summary: tool.schema + .string() + .describe("Complete technical summary replacing all content in range"), + }) + .describe("The compression details: boundaries and replacement summary"), + }, + async execute(args, toolCtx) { + const { client, state, logger } = ctx + const sessionId = toolCtx.sessionID + + await toolCtx.ask({ + permission: "compress", + patterns: ["*"], + always: ["*"], + metadata: {}, + }) + + const { topic, content } = args + const { startString, endString, summary } = content || {} + + if (!topic || typeof topic !== "string") { + throw new Error("topic is required and must be a non-empty string") + } + if (!startString || typeof startString !== "string") { + throw new Error("content.startString is required and must be a non-empty string") + } + if (!endString || typeof endString !== "string") { + throw new Error("content.endString is required and must be a non-empty string") + } + if (!summary || typeof summary !== "string") { + throw new Error("content.summary is required and must be a non-empty string") + } + + logger.info("Compress tool invoked") + // logger.info( + // JSON.stringify({ + // startString: startString?.substring(0, 50) + "...", + // endString: endString?.substring(0, 50) + "...", + // topic: topic, + // summaryLength: summary?.length, + // }), + // ) + + const messagesResponse = await client.session.messages({ + path: { id: sessionId }, + }) + const messages: WithParts[] = messagesResponse.data || messagesResponse + + await ensureSessionInitialized(client, state, sessionId, logger, messages) + + const startResult = findStringInMessages( + messages, + startString, + logger, + state.compressSummaries, + "startString", + ) + const endResult = findStringInMessages( + messages, + endString, + logger, + state.compressSummaries, + "endString", + ) + + if (startResult.messageIndex > endResult.messageIndex) { + throw new Error( + `startString appears after endString in the conversation. Start must come before end.`, + ) + } + + const containedToolIds = collectToolIdsInRange( + messages, + startResult.messageIndex, + endResult.messageIndex, + ) + + const containedMessageIds = collectMessageIdsInRange( + messages, + startResult.messageIndex, + endResult.messageIndex, + ) + + for (const id of containedToolIds) { + state.prune.toolIds.add(id) + } + for (const id of containedMessageIds) { + state.prune.messageIds.add(id) + } + + // Remove any existing summaries whose anchors are now inside this range + // This prevents duplicate injections when a larger compress subsumes a smaller one + const removedSummaries = state.compressSummaries.filter((s) => + containedMessageIds.includes(s.anchorMessageId), + ) + if (removedSummaries.length > 0) { + // logger.info("Removing subsumed compress summaries", { + // count: removedSummaries.length, + // anchorIds: removedSummaries.map((s) => s.anchorMessageId), + // }) + state.compressSummaries = state.compressSummaries.filter( + (s) => !containedMessageIds.includes(s.anchorMessageId), + ) + } + + const compressSummary: CompressSummary = { + anchorMessageId: startResult.messageId, + summary: summary, + } + state.compressSummaries.push(compressSummary) + + const contentsToTokenize = collectContentInRange( + messages, + startResult.messageIndex, + endResult.messageIndex, + ) + const estimatedCompressedTokens = estimateTokensBatch(contentsToTokenize) + + state.stats.pruneTokenCounter += estimatedCompressedTokens + + const currentParams = getCurrentParams(state, messages, logger) + await sendCompressNotification( + client, + logger, + ctx.config, + state, + sessionId, + containedToolIds, + containedMessageIds, + topic, + summary, + startResult, + endResult, + messages.length, + currentParams, + ) + + state.stats.totalPruneTokens += state.stats.pruneTokenCounter + state.stats.pruneTokenCounter = 0 + state.nudgeCounter = 0 + + // logger.info("Compress range created", { + // startMessageId: startResult.messageId, + // endMessageId: endResult.messageId, + // toolIdsRemoved: containedToolIds.length, + // messagesInRange: containedMessageIds.length, + // estimatedTokens: estimatedCompressedTokens, + // }) + + saveSessionState(state, logger).catch((err) => + logger.error("Failed to persist state", { error: err.message }), + ) + + const messagesCompressed = endResult.messageIndex - startResult.messageIndex + 1 + return `Compressed ${messagesCompressed} messages (${containedToolIds.length} tool calls) into summary. The content will be replaced with your summary.` + }, + }) +} diff --git a/lib/tools/distill.ts b/lib/tools/distill.ts new file mode 100644 index 00000000..b67d56b4 --- /dev/null +++ b/lib/tools/distill.ts @@ -0,0 +1,60 @@ +import { tool } from "@opencode-ai/plugin" +import type { PruneToolContext } from "./types" +import { executePruneOperation } from "./prune-shared" +import { PruneReason } from "../ui/notification" +import { loadPrompt } from "../prompts" + +const DISTILL_TOOL_DESCRIPTION = loadPrompt("distill-tool-spec") + +export function createDistillTool(ctx: PruneToolContext): ReturnType { + return tool({ + description: DISTILL_TOOL_DESCRIPTION, + args: { + targets: tool.schema + .array( + tool.schema.object({ + id: tool.schema + .string() + .describe("Numeric ID from the list"), + distillation: tool.schema + .string() + .describe("Complete technical distillation for this tool output"), + }), + ) + .describe("Tool outputs to distill, each pairing an ID with its distillation"), + }, + async execute(args, toolCtx) { + if (!args.targets || !Array.isArray(args.targets) || args.targets.length === 0) { + ctx.logger.debug("Distill tool called without targets: " + JSON.stringify(args)) + throw new Error("Missing targets. Provide at least one { id, distillation } entry.") + } + + for (const target of args.targets) { + if (!target.id || typeof target.id !== "string" || target.id.trim() === "") { + ctx.logger.debug("Distill target missing id: " + JSON.stringify(target)) + throw new Error( + "Each target must have an id (numeric string from ).", + ) + } + if (!target.distillation || typeof target.distillation !== "string") { + ctx.logger.debug( + "Distill target missing distillation: " + JSON.stringify(target), + ) + throw new Error("Each target must have a distillation string.") + } + } + + const ids = args.targets.map((t) => t.id) + const distillations = args.targets.map((t) => t.distillation) + + return executePruneOperation( + ctx, + toolCtx, + ids, + "extraction" as PruneReason, + "Distill", + distillations, + ) + }, + }) +} diff --git a/lib/tools/index.ts b/lib/tools/index.ts new file mode 100644 index 00000000..32a5e9c8 --- /dev/null +++ b/lib/tools/index.ts @@ -0,0 +1,4 @@ +export { PruneToolContext } from "./types" +export { createPruneTool } from "./prune" +export { createDistillTool } from "./distill" +export { createCompressTool } from "./compress" diff --git a/lib/tools/prune-shared.ts b/lib/tools/prune-shared.ts new file mode 100644 index 00000000..c1253f76 --- /dev/null +++ b/lib/tools/prune-shared.ts @@ -0,0 +1,161 @@ +import type { SessionState, ToolParameterEntry, WithParts } from "../state" +import type { PluginConfig } from "../config" +import type { Logger } from "../logger" +import type { PruneToolContext } from "./types" +import { syncToolCache } from "../state/tool-cache" +import { PruneReason, sendUnifiedNotification } from "../ui/notification" +import { formatPruningResultForTool } from "../ui/utils" +import { ensureSessionInitialized } from "../state" +import { saveSessionState } from "../state/persistence" +import { calculateTokensSaved, getCurrentParams } from "../strategies/utils" +import { getFilePathsFromParameters, isProtected } from "../protected-file-patterns" + +// Shared logic for executing prune operations. +export async function executePruneOperation( + ctx: PruneToolContext, + toolCtx: { sessionID: string }, + ids: string[], + reason: PruneReason, + toolName: string, + distillation?: string[], +): Promise { + const { client, state, logger, config, workingDirectory } = ctx + const sessionId = toolCtx.sessionID + + logger.info(`${toolName} tool invoked`) + logger.info(JSON.stringify(reason ? { ids, reason } : { ids })) + + if (!ids || ids.length === 0) { + logger.debug(`${toolName} tool called but ids is empty or undefined`) + throw new Error( + `No IDs provided. Check the list for available IDs to ${toolName.toLowerCase()}.`, + ) + } + + const numericToolIds: number[] = ids + .map((id) => parseInt(id, 10)) + .filter((n): n is number => !isNaN(n)) + + if (numericToolIds.length === 0) { + logger.debug(`No numeric tool IDs provided for ${toolName}: ` + JSON.stringify(ids)) + throw new Error("No numeric IDs provided. Format: ids: [id1, id2, ...]") + } + + // Fetch messages to calculate tokens and find current agent + const messagesResponse = await client.session.messages({ + path: { id: sessionId }, + }) + const messages: WithParts[] = messagesResponse.data || messagesResponse + + await ensureSessionInitialized(ctx.client, state, sessionId, logger, messages) + await syncToolCache(state, config, logger, messages) + + const currentParams = getCurrentParams(state, messages, logger) + + const toolIdList = state.toolIdList + + const validNumericIds: number[] = [] + const skippedIds: string[] = [] + + // Validate and filter IDs + for (const index of numericToolIds) { + // Validate that index is within bounds + if (index < 0 || index >= toolIdList.length) { + logger.debug(`Rejecting prune request - index out of bounds: ${index}`) + skippedIds.push(index.toString()) + continue + } + + const id = toolIdList[index] + const metadata = state.toolParameters.get(id) + + // Validate that all IDs exist in cache and aren't protected + // (rejects hallucinated IDs and turn-protected tools not shown in ) + if (!metadata) { + logger.debug( + "Rejecting prune request - ID not in cache (turn-protected or hallucinated)", + { index, id }, + ) + skippedIds.push(index.toString()) + continue + } + + const allProtectedTools = config.tools.settings.protectedTools + if (allProtectedTools.includes(metadata.tool)) { + logger.debug("Rejecting prune request - protected tool", { + index, + id, + tool: metadata.tool, + }) + skippedIds.push(index.toString()) + continue + } + + const filePaths = getFilePathsFromParameters(metadata.tool, metadata.parameters) + if (isProtected(filePaths, config.protectedFilePatterns)) { + logger.debug("Rejecting prune request - protected file path", { + index, + id, + tool: metadata.tool, + filePaths, + }) + skippedIds.push(index.toString()) + continue + } + + validNumericIds.push(index) + } + + if (validNumericIds.length === 0) { + const errorMsg = + skippedIds.length > 0 + ? `Invalid IDs provided: [${skippedIds.join(", ")}]. Only use numeric IDs from the list.` + : `No valid IDs provided to ${toolName.toLowerCase()}.` + throw new Error(errorMsg) + } + + const pruneToolIds: string[] = validNumericIds.map((index) => toolIdList[index]) + for (const id of pruneToolIds) { + state.prune.toolIds.add(id) + } + + const toolMetadata = new Map() + for (const id of pruneToolIds) { + const toolParameters = state.toolParameters.get(id) + if (toolParameters) { + toolMetadata.set(id, toolParameters) + } else { + logger.debug("No metadata found for ID", { id }) + } + } + + state.stats.pruneTokenCounter += calculateTokensSaved(state, messages, pruneToolIds) + + await sendUnifiedNotification( + client, + logger, + config, + state, + sessionId, + pruneToolIds, + toolMetadata, + reason, + currentParams, + workingDirectory, + distillation, + ) + + state.stats.totalPruneTokens += state.stats.pruneTokenCounter + state.stats.pruneTokenCounter = 0 + state.nudgeCounter = 0 + + saveSessionState(state, logger).catch((err) => + logger.error("Failed to persist state", { error: err.message }), + ) + + let result = formatPruningResultForTool(pruneToolIds, toolMetadata, workingDirectory) + if (skippedIds.length > 0) { + result += `\n\nNote: ${skippedIds.length} IDs were skipped (invalid, protected, or missing metadata): ${skippedIds.join(", ")}` + } + return result +} diff --git a/lib/tools/prune.ts b/lib/tools/prune.ts new file mode 100644 index 00000000..17065aa9 --- /dev/null +++ b/lib/tools/prune.ts @@ -0,0 +1,36 @@ +import { tool } from "@opencode-ai/plugin" +import type { PruneToolContext } from "./types" +import { executePruneOperation } from "./prune-shared" +import { PruneReason } from "../ui/notification" +import { loadPrompt } from "../prompts" + +const PRUNE_TOOL_DESCRIPTION = loadPrompt("prune-tool-spec") + +export function createPruneTool(ctx: PruneToolContext): ReturnType { + return tool({ + description: PRUNE_TOOL_DESCRIPTION, + args: { + ids: tool.schema + .array(tool.schema.string()) + .describe("Numeric IDs as strings from the list to prune"), + }, + async execute(args, toolCtx) { + if (!args.ids || !Array.isArray(args.ids) || args.ids.length === 0) { + ctx.logger.debug("Prune tool called without ids: " + JSON.stringify(args)) + throw new Error("Missing ids. You must provide at least one ID to prune.") + } + + if (!args.ids.every((id) => typeof id === "string" && id.trim() !== "")) { + ctx.logger.debug("Prune tool called with invalid ids: " + JSON.stringify(args)) + throw new Error( + 'Invalid ids. All IDs must be numeric strings (e.g., "1", "23") from the list.', + ) + } + + const numericIds = args.ids + const reason = "noise" + + return executePruneOperation(ctx, toolCtx, numericIds, reason, "Prune") + }, + }) +} diff --git a/lib/tools/types.ts b/lib/tools/types.ts new file mode 100644 index 00000000..c4950e47 --- /dev/null +++ b/lib/tools/types.ts @@ -0,0 +1,11 @@ +import type { SessionState } from "../state" +import type { PluginConfig } from "../config" +import type { Logger } from "../logger" + +export interface PruneToolContext { + client: any + state: SessionState + logger: Logger + config: PluginConfig + workingDirectory: string +} diff --git a/lib/tools/utils.ts b/lib/tools/utils.ts new file mode 100644 index 00000000..8adec13e --- /dev/null +++ b/lib/tools/utils.ts @@ -0,0 +1,165 @@ +import type { WithParts, CompressSummary } from "../state" +import type { Logger } from "../logger" + +/** + * Searches messages for a string and returns the message ID where it's found. + * Searches in text parts, tool outputs, tool inputs, and other textual content. + * Also searches through existing compress summaries to enable chained compression. + * Throws an error if the string is not found or found more than once. + */ +export function findStringInMessages( + messages: WithParts[], + searchString: string, + logger: Logger, + compressSummaries: CompressSummary[] = [], + stringType: "startString" | "endString", +): { messageId: string; messageIndex: number } { + const matches: { messageId: string; messageIndex: number }[] = [] + + // First, search through existing compress summaries + // This allows referencing text from previous compress operations + for (const summary of compressSummaries) { + if (summary.summary.includes(searchString)) { + const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId) + if (anchorIndex !== -1) { + matches.push({ + messageId: summary.anchorMessageId, + messageIndex: anchorIndex, + }) + } + } + } + + // Then search through raw messages + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] + const parts = Array.isArray(msg.parts) ? msg.parts : [] + + for (const part of parts) { + let content = "" + + if (part.type === "text" && typeof part.text === "string") { + content = part.text + } else if (part.type === "tool" && part.state?.status === "completed") { + if (typeof part.state.output === "string") { + content = part.state.output + } + if (part.state.input) { + const inputStr = + typeof part.state.input === "string" + ? part.state.input + : JSON.stringify(part.state.input) + content += " " + inputStr + } + } + + if (content.includes(searchString)) { + matches.push({ messageId: msg.info.id, messageIndex: i }) + } + } + } + + if (matches.length === 0) { + throw new Error( + `${stringType} not found in conversation. Make sure the string exists and is spelled exactly as it appears.`, + ) + } + + if (matches.length > 1) { + throw new Error( + `Found multiple matches for ${stringType}. Provide more surrounding context to uniquely identify the intended match.`, + ) + } + + return matches[0] +} + +/** + * Collects all tool callIDs from messages between start and end indices (inclusive). + */ +export function collectToolIdsInRange( + messages: WithParts[], + startIndex: number, + endIndex: number, +): string[] { + const toolIds: string[] = [] + + for (let i = startIndex; i <= endIndex; i++) { + const msg = messages[i] + const parts = Array.isArray(msg.parts) ? msg.parts : [] + + for (const part of parts) { + if (part.type === "tool" && part.callID) { + if (!toolIds.includes(part.callID)) { + toolIds.push(part.callID) + } + } + } + } + + return toolIds +} + +/** + * Collects all message IDs from messages between start and end indices (inclusive). + */ +export function collectMessageIdsInRange( + messages: WithParts[], + startIndex: number, + endIndex: number, +): string[] { + const messageIds: string[] = [] + + for (let i = startIndex; i <= endIndex; i++) { + const msgId = messages[i].info.id + if (!messageIds.includes(msgId)) { + messageIds.push(msgId) + } + } + + return messageIds +} + +/** + * Collects all textual content (text parts, tool inputs, and tool outputs) + * from a range of messages. Used for token estimation. + */ +export function collectContentInRange( + messages: WithParts[], + startIndex: number, + endIndex: number, +): string[] { + const contents: string[] = [] + for (let i = startIndex; i <= endIndex; i++) { + const msg = messages[i] + const parts = Array.isArray(msg.parts) ? msg.parts : [] + for (const part of parts) { + if (part.type === "text") { + contents.push(part.text) + } else if (part.type === "tool") { + const toolState = part.state as any + if (toolState?.input) { + contents.push( + typeof toolState.input === "string" + ? toolState.input + : JSON.stringify(toolState.input), + ) + } + if (toolState?.status === "completed" && toolState?.output) { + contents.push( + typeof toolState.output === "string" + ? toolState.output + : JSON.stringify(toolState.output), + ) + } else if (toolState?.status === "error" && toolState?.error) { + contents.push( + typeof toolState.error === "string" + ? toolState.error + : JSON.stringify(toolState.error), + ) + } + } + } + } + return contents +} diff --git a/lib/ui/notification.ts b/lib/ui/notification.ts index acb948cd..9d628175 100644 --- a/lib/ui/notification.ts +++ b/lib/ui/notification.ts @@ -6,6 +6,7 @@ import { formatPrunedItemsList, formatStatsHeader, formatTokenCount, + formatProgressBar, } from "./utils" import { ToolParameterEntry } from "../state" import { PluginConfig } from "../config" @@ -25,7 +26,7 @@ function buildMinimalMessage( ): string { const extractedTokens = countDistillationTokens(distillation) const extractedSuffix = - extractedTokens > 0 ? ` (extracted ${formatTokenCount(extractedTokens)})` : "" + extractedTokens > 0 ? ` (distilled ${formatTokenCount(extractedTokens)})` : "" const reasonSuffix = reason && extractedTokens === 0 ? ` — ${PRUNE_REASON_LABELS[reason]}` : "" let message = formatStatsHeader(state.stats.totalPruneTokens, state.stats.pruneTokenCounter) + @@ -50,7 +51,7 @@ function buildDetailedMessage( const pruneTokenCounterStr = `~${formatTokenCount(state.stats.pruneTokenCounter)}` const extractedTokens = countDistillationTokens(distillation) const extractedSuffix = - extractedTokens > 0 ? `, extracted ${formatTokenCount(extractedTokens)}` : "" + extractedTokens > 0 ? `, distilled ${formatTokenCount(extractedTokens)}` : "" const reasonLabel = reason && extractedTokens === 0 ? ` — ${PRUNE_REASON_LABELS[reason]}` : "" message += `\n\n▣ Pruning (${pruneTokenCounterStr}${extractedSuffix})${reasonLabel}` @@ -62,6 +63,42 @@ function buildDetailedMessage( return (message + formatExtracted(showDistillation ? distillation : undefined)).trim() } +const TOAST_BODY_MAX_LINES = 12 +const TOAST_SUMMARY_MAX_CHARS = 600 + +function truncateToastBody(body: string, maxLines: number = TOAST_BODY_MAX_LINES): string { + const lines = body.split("\n") + if (lines.length <= maxLines) { + return body + } + const kept = lines.slice(0, maxLines - 1) + const remaining = lines.length - maxLines + 1 + return kept.join("\n") + `\n... and ${remaining} more` +} + +function truncateToastSummary(summary: string, maxChars: number = TOAST_SUMMARY_MAX_CHARS): string { + if (summary.length <= maxChars) { + return summary + } + return summary.slice(0, maxChars - 3) + "..." +} + +function truncateExtractedSection( + message: string, + maxChars: number = TOAST_SUMMARY_MAX_CHARS, +): string { + const marker = "\n\n▣ Extracted" + const index = message.indexOf(marker) + if (index === -1) { + return message + } + const extracted = message.slice(index) + if (extracted.length <= maxChars) { + return message + } + return message.slice(0, index) + truncateToastSummary(extracted, maxChars) +} + export async function sendUnifiedNotification( client: any, logger: Logger, @@ -84,7 +121,7 @@ export async function sendUnifiedNotification( return false } - const showDistillation = config.tools.extract.showDistillation + const showDistillation = config.tools.distill.showDistillation const message = config.pruneNotification === "minimal" @@ -99,6 +136,97 @@ export async function sendUnifiedNotification( showDistillation, ) + if (config.pruneNotificationType === "toast") { + let toastMessage = truncateExtractedSection(message) + toastMessage = + config.pruneNotification === "minimal" ? toastMessage : truncateToastBody(toastMessage) + + await client.tui.showToast({ + body: { + title: "DCP: Prune Notification", + message: toastMessage, + variant: "info", + duration: 5000, + }, + }) + return true + } + + await sendIgnoredMessage(client, sessionId, message, params, logger) + return true +} + +export async function sendCompressNotification( + client: any, + logger: Logger, + config: PluginConfig, + state: SessionState, + sessionId: string, + toolIds: string[], + messageIds: string[], + topic: string, + summary: string, + startResult: any, + endResult: any, + totalMessages: number, + params: any, +): Promise { + if (config.pruneNotification === "off") { + return false + } + + let message: string + + if (config.pruneNotification === "minimal") { + message = formatStatsHeader(state.stats.totalPruneTokens, state.stats.pruneTokenCounter) + } else { + message = formatStatsHeader(state.stats.totalPruneTokens, state.stats.pruneTokenCounter) + + const pruneTokenCounterStr = `~${formatTokenCount(state.stats.pruneTokenCounter)}` + const progressBar = formatProgressBar( + totalMessages, + startResult.messageIndex, + endResult.messageIndex, + 25, + ) + message += `\n\n▣ Compressing (${pruneTokenCounterStr}) ${progressBar}` + message += `\n→ Topic: ${topic}` + message += `\n→ Items: ${messageIds.length} messages` + if (toolIds.length > 0) { + message += ` and ${toolIds.length} tools condensed` + } else { + message += ` condensed` + } + if (config.tools.compress.showCompression) { + message += `\n→ Compression: ${summary}` + } + } + + if (config.pruneNotificationType === "toast") { + let toastMessage = message + if (config.tools.compress.showCompression) { + const truncatedSummary = truncateToastSummary(summary) + if (truncatedSummary !== summary) { + toastMessage = toastMessage.replace( + `\n→ Compression: ${summary}`, + `\n→ Compression: ${truncatedSummary}`, + ) + } + } + toastMessage = + config.pruneNotification === "minimal" ? toastMessage : truncateToastBody(toastMessage) + + await client.tui.showToast({ + body: { + title: "DCP: Compress Notification", + message: toastMessage, + variant: "info", + duration: 5000, + }, + }) + return true + } + await sendIgnoredMessage(client, sessionId, message, params, logger) return true } diff --git a/lib/ui/utils.ts b/lib/ui/utils.ts index 9134a5cf..2f6fc754 100644 --- a/lib/ui/utils.ts +++ b/lib/ui/utils.ts @@ -35,6 +35,29 @@ export function truncate(str: string, maxLen: number = 60): string { return str.slice(0, maxLen - 3) + "..." } +export function formatProgressBar( + total: number, + start: number, + end: number, + width: number = 20, +): string { + if (total <= 0) return `│${" ".repeat(width)}│` + + const startIdx = Math.floor((start / total) * width) + const endIdx = Math.min(width - 1, Math.floor((end / total) * width)) + + let bar = "" + for (let i = 0; i < width; i++) { + if (i >= startIdx && i <= endIdx) { + bar += "░" + } else { + bar += "█" + } + } + + return `│${bar}│` +} + export function shortenPath(input: string, workingDirectory?: string): string { const inPathMatch = input.match(/^(.+) in (.+)$/) if (inPathMatch) { diff --git a/package-lock.json b/package-lock.json index 43517ddd..16116939 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,23 +1,24 @@ { "name": "@tarquinen/opencode-dcp", - "version": "1.2.8", + "version": "1.4.2-beta.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@tarquinen/opencode-dcp", - "version": "1.2.8", - "license": "MIT", + "version": "1.4.2-beta.0", + "license": "AGPL-3.0-or-later", "dependencies": { "@anthropic-ai/tokenizer": "^0.0.4", - "@opencode-ai/sdk": "^1.1.3", + "@opencode-ai/sdk": "^1.1.48", "jsonc-parser": "^3.3.1", - "zod": "^4.1.13" + "ulid": "^3.0.2", + "zod": "^4.3.6" }, "devDependencies": { - "@opencode-ai/plugin": "^1.0.143", - "@types/node": "^24.10.1", - "prettier": "^3.4.2", + "@opencode-ai/plugin": "^1.1.49", + "@types/node": "^25.1.0", + "prettier": "^3.8.1", "tsx": "^4.21.0", "typescript": "^5.9.3" }, @@ -493,21 +494,16 @@ } }, "node_modules/@opencode-ai/plugin": { - "version": "1.0.143", - "resolved": "https://registry.npmjs.org/@opencode-ai/plugin/-/plugin-1.0.143.tgz", - "integrity": "sha512-yzaCmdazVJMDADJLbMM8KGp1X+Hd/HVyIXMlNt9qcvz/fcs/ET4EwHJsJaQi/9m/jLJ+plwBJAeIW08BMrECPg==", + "version": "1.1.49", + "resolved": "https://registry.npmjs.org/@opencode-ai/plugin/-/plugin-1.1.49.tgz", + "integrity": "sha512-+FEE730fLJtoHCta5MXixOIzI9Cjos700QDNnAx6mA8YjFzO+kABnyqLQrCgZ9wUPJgiKH9bnHxT7AdRjWsNPw==", "dev": true, + "license": "MIT", "dependencies": { - "@opencode-ai/sdk": "1.0.143", + "@opencode-ai/sdk": "1.1.49", "zod": "4.1.8" } }, - "node_modules/@opencode-ai/plugin/node_modules/@opencode-ai/sdk": { - "version": "1.0.143", - "resolved": "https://registry.npmjs.org/@opencode-ai/sdk/-/sdk-1.0.143.tgz", - "integrity": "sha512-dtmkBfJ7IIAHzL6KCzAlwc9GybfJONVeCsF6ePYySpkuhslDbRkZBJYb5vqGd1H5zdsgjc6JjuvmOf0rPWUL6A==", - "dev": true - }, "node_modules/@opencode-ai/plugin/node_modules/zod": { "version": "4.1.8", "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.8.tgz", @@ -519,15 +515,15 @@ } }, "node_modules/@opencode-ai/sdk": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@opencode-ai/sdk/-/sdk-1.1.3.tgz", - "integrity": "sha512-P4ERbfuT7CilZYyB1l6J/DM6KD0i5V15O+xvsjUitxSS3S2Gr0YsA4bmXU+EsBQGHryUHc81bhJF49a8wSU+tw==", + "version": "1.1.49", + "resolved": "https://registry.npmjs.org/@opencode-ai/sdk/-/sdk-1.1.49.tgz", + "integrity": "sha512-F5ZkgiqOiV+z3U4zeBLvrmNZv5MwNFMTWM+HWhChD+/UEswIebQKk9UMz9lPX4fswexIJdFPwFI/TBdNyZfKMg==", "license": "MIT" }, "node_modules/@types/node": { - "version": "24.10.1", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz", - "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==", + "version": "25.1.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.1.0.tgz", + "integrity": "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA==", "dev": true, "license": "MIT", "dependencies": { @@ -611,9 +607,9 @@ "license": "MIT" }, "node_modules/prettier": { - "version": "3.7.4", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.7.4.tgz", - "integrity": "sha512-v6UNi1+3hSlVvv8fSaoUbggEM5VErKmmpGA7Pl3HF8V6uKY7rvClBOJlH6yNwQtfTueNkGVpOv/mtWL9L4bgRA==", + "version": "3.8.1", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.8.1.tgz", + "integrity": "sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==", "dev": true, "license": "MIT", "bin": { @@ -676,6 +672,15 @@ "node": ">=14.17" } }, + "node_modules/ulid": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/ulid/-/ulid-3.0.2.tgz", + "integrity": "sha512-yu26mwteFYzBAot7KVMqFGCVpsF6g8wXfJzQUHvu1no3+rRRSFcSV2nKeYvNPLD2J4b08jYBDhHUjeH0ygIl9w==", + "license": "MIT", + "bin": { + "ulid": "dist/cli.js" + } + }, "node_modules/undici-types": { "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", @@ -684,9 +689,9 @@ "license": "MIT" }, "node_modules/zod": { - "version": "4.1.13", - "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.13.tgz", - "integrity": "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig==", + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "license": "MIT", "funding": { "url": "https://github.com/sponsors/colinhacks" diff --git a/package.json b/package.json index d35967c2..aca577cc 100644 --- a/package.json +++ b/package.json @@ -1,21 +1,23 @@ { "$schema": "https://json.schemastore.org/package.json", "name": "@tarquinen/opencode-dcp", - "version": "1.2.8", + "version": "1.4.2-beta.0", "type": "module", "description": "OpenCode plugin that optimizes token usage by pruning obsolete tool outputs from conversation context", "main": "./dist/index.js", "types": "./dist/index.d.ts", "scripts": { "clean": "rm -rf dist", + "generate:prompts": "tsx scripts/generate-prompts.ts", + "prebuild": "npm run generate:prompts", "build": "npm run clean && tsc", - "postbuild": "rm -rf dist/logs", "prepublishOnly": "npm run build", "dev": "opencode plugin dev", - "typecheck": "tsc --noEmit", + "typecheck": "npm run generate:prompts && tsc --noEmit", "test": "node --import tsx --test tests/*.test.ts", "format": "prettier --write .", - "format:check": "prettier --check ." + "format:check": "prettier --check .", + "dcp": "tsx scripts/print.ts" }, "keywords": [ "opencode", @@ -35,20 +37,21 @@ }, "homepage": "https://github.com/Tarquinen/opencode-dynamic-context-pruning#readme", "author": "tarquinen", - "license": "MIT", + "license": "AGPL-3.0-or-later", "peerDependencies": { "@opencode-ai/plugin": ">=0.13.7" }, "dependencies": { "@anthropic-ai/tokenizer": "^0.0.4", - "@opencode-ai/sdk": "^1.1.3", + "@opencode-ai/sdk": "^1.1.48", "jsonc-parser": "^3.3.1", - "zod": "^4.1.13" + "ulid": "^3.0.2", + "zod": "^4.3.6" }, "devDependencies": { - "@opencode-ai/plugin": "^1.0.143", - "@types/node": "^24.10.1", - "prettier": "^3.4.2", + "@opencode-ai/plugin": "^1.1.49", + "@types/node": "^25.1.0", + "prettier": "^3.8.1", "tsx": "^4.21.0", "typescript": "^5.9.3" }, diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 00000000..a99c256b --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,41 @@ +# DCP CLI + +Dev tool for previewing prompt outputs. Verify parsing works correctly and quickly check specific tool combinations. + +## Usage + +```bash +bun run dcp [TYPE] [-p] [-d] [-c] +``` + +## Types + +| Flag | Description | +| -------------------- | --------------------------- | +| `--system` | System prompt | +| `--nudge` | Nudge prompt | +| `--prune-list` | Example prunable tools list | +| `--compress-context` | Example compress context | + +## Tool Flags + +| Flag | Description | +| ---------------- | -------------------- | +| `-d, --distill` | Enable distill tool | +| `-c, --compress` | Enable compress tool | +| `-p, --prune` | Enable prune tool | + +If no tool flags specified, all are enabled. + +## Examples + +```bash +bun run dcp --system -p -d -c # System prompt with all tools +bun run dcp --system -p # System prompt with prune only +bun run dcp --nudge -d -c # Nudge with distill and compress +bun run dcp --prune-list # Example prunable tools list +``` + +## Purpose + +This CLI does NOT ship with the plugin. It's purely for DX - iterate on prompt templates and verify the `` conditional parsing produces the expected output. diff --git a/scripts/generate-prompts.ts b/scripts/generate-prompts.ts new file mode 100644 index 00000000..e83ffe75 --- /dev/null +++ b/scripts/generate-prompts.ts @@ -0,0 +1,55 @@ +#!/usr/bin/env tsx +/** + * Prebuild script that generates TypeScript files from Markdown prompts. + * + * This solves the issue where readFileSync with __dirname fails when the + * package is bundled by Bun (see issue #222, PR #272, #327). + * + * The .md files are kept for convenient editing, and this script generates + * .ts files with exported string constants that bundle correctly. + */ + +import { readFileSync, writeFileSync, readdirSync, mkdirSync, unlinkSync } from "node:fs" +import { dirname, join, basename } from "node:path" +import { fileURLToPath } from "node:url" + +const __dirname = dirname(fileURLToPath(import.meta.url)) +const PROMPTS_DIR = join(__dirname, "..", "lib", "prompts") +const CODEGEN_DIR = join(PROMPTS_DIR, "_codegen") + +// Ensure _codegen directory exists +mkdirSync(CODEGEN_DIR, { recursive: true }) + +// MIGRATION - Clean up old generated files from the prompts directory root (they're now in _codegen/) +const oldGeneratedFiles = readdirSync(PROMPTS_DIR).filter((f) => f.endsWith(".generated.ts")) +for (const file of oldGeneratedFiles) { + unlinkSync(join(PROMPTS_DIR, file)) + console.log(`Cleaned up old: ${file}`) +} + +// Find all .md files in the prompts directory +const mdFiles = readdirSync(PROMPTS_DIR).filter((f) => f.endsWith(".md")) + +for (const mdFile of mdFiles) { + const mdPath = join(PROMPTS_DIR, mdFile) + const baseName = basename(mdFile, ".md") + const constName = baseName.toUpperCase().replace(/-/g, "_") + const tsPath = join(CODEGEN_DIR, `${baseName}.generated.ts`) + + const content = readFileSync(mdPath, "utf-8") + + // Escape backticks and ${} template expressions for safe embedding in template literal + const escaped = content.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, "\\${") + + const tsContent = `// AUTO-GENERATED FILE - DO NOT EDIT +// Generated from ${mdFile} by scripts/generate-prompts.ts +// To modify, edit ${mdFile} and run \`npm run generate:prompts\` + +export const ${constName} = \`${escaped}\` +` + + writeFileSync(tsPath, tsContent) + console.log(`Generated: ${baseName}.generated.ts`) +} + +console.log(`Done! Generated ${mdFiles.length} TypeScript file(s) from Markdown prompts.`) diff --git a/scripts/opencode-dcp-stats b/scripts/opencode-dcp-stats new file mode 100755 index 00000000..ab0059d5 --- /dev/null +++ b/scripts/opencode-dcp-stats @@ -0,0 +1,489 @@ +#!/usr/bin/env python3 +""" +Analyze Dynamic Context Pruning (DCP) tool impact on cache efficiency. +Tracks cache hit rates and context size changes before/after DCP tool invocations. + +Usage: opencode-dcp-stats [--sessions N] [--min-messages M] [--json] [--verbose] +""" + +import json +import argparse +from pathlib import Path +from datetime import datetime +from collections import defaultdict +from typing import Optional + +# DCP tool names (across different plugin versions) +DCP_TOOLS = { + "prune", "discard", "extract", "context_pruning", + "squash", "compress", "consolidate", "distill" +} + +# Anthropic pricing: cache read is ~10% of input cost +CACHE_READ_COST_PER_1K = 0.00030 # $0.30 per 1M tokens +INPUT_COST_PER_1K = 0.003 # $3.00 per 1M tokens + + +def get_session_messages(storage: Path, session_id: str) -> list[dict]: + """Get all messages for a session, sorted by creation order.""" + message_dir = storage / "message" / session_id + if not message_dir.exists(): + return [] + + messages = [] + for msg_file in message_dir.glob("*.json"): + try: + msg = json.loads(msg_file.read_text()) + msg["_file"] = msg_file + msg["_id"] = msg_file.stem + messages.append(msg) + except (json.JSONDecodeError, IOError): + pass + + return sorted(messages, key=lambda m: m.get("_id", "")) + + +def get_message_parts(storage: Path, message_id: str) -> list[dict]: + """Get all parts for a message, sorted by creation order.""" + parts_dir = storage / "part" / message_id + if not parts_dir.exists(): + return [] + + parts = [] + for part_file in parts_dir.glob("*.json"): + try: + part = json.loads(part_file.read_text()) + part["_file"] = part_file + part["_id"] = part_file.stem + parts.append(part) + except (json.JSONDecodeError, IOError): + pass + + return sorted(parts, key=lambda p: p.get("_id", "")) + + +def is_ignored_message(message: dict, parts: list[dict]) -> bool: + """ + Check if a message should be ignored (DCP notification messages). + Returns True if message has no parts OR all parts have ignored=true. + Mirrors the isIgnoredUserMessage logic from the DCP plugin. + """ + if not parts: + return True + + # Check text parts for ignored flag + text_parts = [p for p in parts if p.get("type") == "text"] + if not text_parts: + return False + + for part in text_parts: + if not part.get("ignored", False): + return False + + return True + + +def count_real_user_messages(storage: Path, session_id: str) -> int: + """Count user messages that are not ignored (real user interactions).""" + messages = get_session_messages(storage, session_id) + count = 0 + + for msg in messages: + # Only count user role messages + if msg.get("role") != "user": + continue + + msg_id = msg.get("_id", "") + parts = get_message_parts(storage, msg_id) + + if not is_ignored_message(msg, parts): + count += 1 + + return count + + +def extract_step_finish(parts: list[dict]) -> Optional[dict]: + """Extract step-finish record from message parts.""" + for part in parts: + if part.get("type") == "step-finish" and "tokens" in part: + return part + return None + + +def extract_dcp_tools(parts: list[dict]) -> list[dict]: + """Extract all DCP tool calls from message parts.""" + dcp_calls = [] + for part in parts: + if part.get("type") == "tool": + tool_name = part.get("tool", "") + if tool_name in DCP_TOOLS: + dcp_calls.append({ + "tool": tool_name, + "state": part.get("state", {}), + "part_id": part.get("_id", "") + }) + return dcp_calls + + +def calc_cache_hit_rate(tokens: dict) -> float: + """Calculate cache hit rate from token dict.""" + input_tokens = tokens.get("input", 0) + cache = tokens.get("cache", {}) + cache_read = cache.get("read", 0) + total_context = input_tokens + cache_read + if total_context == 0: + return 0.0 + return (cache_read / total_context) * 100 + + +def analyze_session(storage: Path, session_id: str) -> dict: + """Analyze DCP impact for a single session.""" + messages = get_session_messages(storage, session_id) + + result = { + "session_id": session_id, + "dcp_events": [], + "total_dcp_calls": 0, + "total_steps": 0, + "by_tool": defaultdict(lambda: { + "calls": 0, + "hit_rate_before_sum": 0, + "hit_rate_after_sum": 0, + "context_before_sum": 0, + "context_after_sum": 0, + "input_before_sum": 0, + "input_after_sum": 0, + "cache_before_sum": 0, + "cache_after_sum": 0, + "events_with_data": 0 + }), + # Track hit rates by distance from last DCP call + "hit_rates_by_distance": defaultdict(list) + } + + prev_step = None + prev_dcp_tools = [] + steps_since_dcp = None # None = no DCP yet, 0 = just had DCP, 1+ = steps after + + for i, msg in enumerate(messages): + msg_id = msg.get("_id", "") + parts = get_message_parts(storage, msg_id) + + step_finish = extract_step_finish(parts) + dcp_tools = extract_dcp_tools(parts) + + if step_finish: + result["total_steps"] += 1 + tokens = step_finish.get("tokens", {}) + curr_hit_rate = calc_cache_hit_rate(tokens) + + # Track hit rate by distance from last DCP call + if steps_since_dcp is not None: + result["hit_rates_by_distance"][steps_since_dcp].append(curr_hit_rate) + steps_since_dcp += 1 + + # If previous step had DCP tools, measure impact + if prev_dcp_tools and prev_step is not None: + prev_tokens = prev_step.get("tokens", {}) + + prev_input = prev_tokens.get("input", 0) + prev_cache = prev_tokens.get("cache", {}).get("read", 0) + prev_context = prev_input + prev_cache + prev_hit_rate = calc_cache_hit_rate(prev_tokens) + + curr_input = tokens.get("input", 0) + curr_cache = tokens.get("cache", {}).get("read", 0) + curr_context = curr_input + curr_cache + curr_hit_rate = calc_cache_hit_rate(tokens) + + for dcp in prev_dcp_tools: + tool_name = dcp["tool"] + result["total_dcp_calls"] += 1 + + event = { + "tool": tool_name, + "input_before": prev_input, + "input_after": curr_input, + "cache_before": prev_cache, + "cache_after": curr_cache, + "context_before": prev_context, + "context_after": curr_context, + "hit_rate_before": round(prev_hit_rate, 1), + "hit_rate_after": round(curr_hit_rate, 1), + "hit_rate_delta": round(curr_hit_rate - prev_hit_rate, 1), + "context_delta": curr_context - prev_context, + "message_id": msg_id + } + result["dcp_events"].append(event) + + # Aggregate stats + stats = result["by_tool"][tool_name] + stats["calls"] += 1 + stats["hit_rate_before_sum"] += prev_hit_rate + stats["hit_rate_after_sum"] += curr_hit_rate + stats["context_before_sum"] += prev_context + stats["context_after_sum"] += curr_context + stats["input_before_sum"] += prev_input + stats["input_after_sum"] += curr_input + stats["cache_before_sum"] += prev_cache + stats["cache_after_sum"] += curr_cache + stats["events_with_data"] += 1 + + prev_step = step_finish + prev_dcp_tools = dcp_tools + + # Reset distance counter if this step had DCP tools + if dcp_tools: + steps_since_dcp = 0 + + return result + + +def analyze_sessions(num_sessions: int = 20, min_messages: int = 5, output_json: bool = False, verbose: bool = False, session_id: str = None): + """Analyze DCP impact across recent sessions.""" + storage = Path.home() / ".local/share/opencode/storage" + message_dir = storage / "message" + session_dir = storage / "session" + + if not message_dir.exists(): + print("Error: OpenCode storage not found at", storage) + return + + # Get sessions to analyze + if session_id: + # Analyze specific session + session_path = message_dir / session_id + if not session_path.exists(): + print(f"Error: Session {session_id} not found") + return + sessions = [session_path] + else: + sessions = sorted(message_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)[:num_sessions] + + all_results = [] + grand_totals = { + "sessions_analyzed": 0, + "sessions_with_dcp": 0, + "sessions_skipped_short": 0, + "total_dcp_calls": 0, + "total_steps": 0, + "min_messages_filter": min_messages, + "by_tool": defaultdict(lambda: { + "calls": 0, + "hit_rate_before_sum": 0, + "hit_rate_after_sum": 0, + "context_before_sum": 0, + "context_after_sum": 0, + "input_before_sum": 0, + "input_after_sum": 0, + "cache_before_sum": 0, + "cache_after_sum": 0, + "events_with_data": 0 + }), + "hit_rates_by_distance": defaultdict(list) + } + + for session_path in sessions: + session_id = session_path.name + + # Check minimum message count (excluding ignored messages) + real_user_messages = count_real_user_messages(storage, session_id) + if real_user_messages < min_messages: + grand_totals["sessions_skipped_short"] += 1 + continue + + result = analyze_session(storage, session_id) + result["user_messages"] = real_user_messages + + # Get session metadata + title = "Unknown" + for s_dir in session_dir.iterdir(): + s_file = s_dir / f"{session_id}.json" + if s_file.exists(): + try: + sess = json.loads(s_file.read_text()) + title = sess.get("title", "Untitled")[:50] + except (json.JSONDecodeError, IOError): + pass + break + + result["title"] = title + + if result["total_dcp_calls"] > 0: + all_results.append(result) + grand_totals["sessions_with_dcp"] += 1 + + grand_totals["sessions_analyzed"] += 1 + grand_totals["total_dcp_calls"] += result["total_dcp_calls"] + grand_totals["total_steps"] += result["total_steps"] + + for tool, stats in result["by_tool"].items(): + for key in stats: + grand_totals["by_tool"][tool][key] += stats[key] + + # Aggregate hit rates by distance + for dist, rates in result["hit_rates_by_distance"].items(): + grand_totals["hit_rates_by_distance"][dist].extend(rates) + + if output_json: + output = { + "sessions": all_results, + "totals": dict(grand_totals), + "generated_at": datetime.now().isoformat() + } + output["totals"]["by_tool"] = {k: dict(v) for k, v in grand_totals["by_tool"].items()} + print(json.dumps(output, indent=2, default=str)) + else: + print_summary(all_results, grand_totals, verbose) + + +def print_summary(results: list, totals: dict, verbose: bool = False): + """Print human-readable summary.""" + print("=" * 110) + print("DCP (Dynamic Context Pruning) Cache Impact Analysis") + print("=" * 110) + print() + + print("OVERVIEW") + print("-" * 50) + print(f" Min user messages filter: {totals['min_messages_filter']:>10,}") + print(f" Sessions scanned: {totals['sessions_analyzed'] + totals['sessions_skipped_short']:>10,}") + print(f" Sessions skipped (short): {totals['sessions_skipped_short']:>10,}") + print(f" Sessions analyzed: {totals['sessions_analyzed']:>10,}") + print(f" Sessions with DCP: {totals['sessions_with_dcp']:>10,}") + print(f" Total DCP tool calls: {totals['total_dcp_calls']:>10,}") + print(f" Total steps: {totals['total_steps']:>10,}") + print() + + # Per-tool breakdown with cache hit rate and context changes + if totals["by_tool"]: + print("PER-TOOL BREAKDOWN") + print("-" * 110) + print(f"{'Tool':<18} {'Calls':>7} {'Avg Hit% Before':>16} {'Avg Hit% After':>15} {'Delta':>8} {'Avg Ctx Before':>15} {'Avg Ctx After':>14}") + print("-" * 110) + + for tool, stats in sorted(totals["by_tool"].items(), key=lambda x: x[1]["calls"], reverse=True): + calls = stats["calls"] + if calls == 0: + continue + + avg_hit_before = stats["hit_rate_before_sum"] / calls + avg_hit_after = stats["hit_rate_after_sum"] / calls + hit_delta = avg_hit_after - avg_hit_before + avg_ctx_before = stats["context_before_sum"] / calls + avg_ctx_after = stats["context_after_sum"] / calls + + delta_str = f"{hit_delta:+.1f}%" + print(f"{tool:<18} {calls:>7,} {avg_hit_before:>15.1f}% {avg_hit_after:>14.1f}% {delta_str:>8} {avg_ctx_before:>14,.0f} {avg_ctx_after:>13,.0f}") + print() + + # Cost analysis + print("COST IMPACT ANALYSIS") + print("-" * 80) + total_input_before = sum(s["input_before_sum"] for s in totals["by_tool"].values()) + total_input_after = sum(s["input_after_sum"] for s in totals["by_tool"].values()) + total_cache_before = sum(s["cache_before_sum"] for s in totals["by_tool"].values()) + total_cache_after = sum(s["cache_after_sum"] for s in totals["by_tool"].values()) + + # Tokens moved from cache to input = cache lost + cache_preserved = total_cache_after + cache_lost_to_input = max(0, total_cache_before - total_cache_after) + + if totals["total_dcp_calls"] > 0: + # Average context reduction + avg_ctx_before = (total_input_before + total_cache_before) / totals["total_dcp_calls"] + avg_ctx_after = (total_input_after + total_cache_after) / totals["total_dcp_calls"] + ctx_reduction = avg_ctx_before - avg_ctx_after + + print(f" Total DCP events measured: {totals['total_dcp_calls']:>12,}") + print(f" Avg context before DCP: {avg_ctx_before:>12,.0f} tokens") + print(f" Avg context after DCP: {avg_ctx_after:>12,.0f} tokens") + print(f" Avg context change: {ctx_reduction:>+12,.0f} tokens") + print() + + # Cache efficiency + overall_hit_before = (total_cache_before / (total_input_before + total_cache_before) * 100) if (total_input_before + total_cache_before) > 0 else 0 + overall_hit_after = (total_cache_after / (total_input_after + total_cache_after) * 100) if (total_input_after + total_cache_after) > 0 else 0 + + print(f" Overall cache hit rate before: {overall_hit_before:>11.1f}%") + print(f" Overall cache hit rate after: {overall_hit_after:>11.1f}%") + print(f" Hit rate change: {overall_hit_after - overall_hit_before:>+11.1f}%") + print() + + # Cache recovery analysis - hit rates by distance from last DCP call + if totals["hit_rates_by_distance"]: + print("CACHE RECOVERY ANALYSIS (Hit Rate by Steps Since Last DCP Call)") + print("-" * 80) + print(f"{'Steps After DCP':<18} {'Samples':>10} {'Avg Hit%':>12} {'Min':>10} {'Max':>10}") + print("-" * 80) + + for dist in sorted(totals["hit_rates_by_distance"].keys())[:15]: + rates = totals["hit_rates_by_distance"][dist] + if rates: + avg_rate = sum(rates) / len(rates) + min_rate = min(rates) + max_rate = max(rates) + print(f"{dist:<18} {len(rates):>10,} {avg_rate:>11.1f}% {min_rate:>9.1f}% {max_rate:>9.1f}%") + print() + print(" (If cache fully recovers, later steps should approach 85%)") + print() + + # Per-session breakdown (if verbose or few sessions) + if verbose or len(results) <= 10: + if results: + print("PER-SESSION BREAKDOWN") + print("-" * 110) + print(f"{'Session':<25} {'Title':<30} {'User Msgs':>10} {'DCP Calls':>10} {'Avg Hit% Delta':>15}") + print("-" * 110) + + for r in results: + sid = r["session_id"][:24] + title = r["title"][:29] + user_msgs = r.get("user_messages", 0) + avg_delta = 0 + if r["dcp_events"]: + avg_delta = sum(e["hit_rate_delta"] for e in r["dcp_events"]) / len(r["dcp_events"]) + delta_str = f"{avg_delta:+.1f}%" + print(f"{sid:<25} {title:<30} {user_msgs:>10,} {r['total_dcp_calls']:>10,} {delta_str:>15}") + print() + + # Individual DCP events (only in verbose mode) + if verbose: + print("INDIVIDUAL DCP EVENTS") + print("-" * 110) + for r in results: + if r["dcp_events"]: + print(f"\n Session: {r['title'][:60]}") + for event in r["dcp_events"][:15]: + ctx_delta = f"{event['context_delta']:+,}" + hit_delta = f"{event['hit_rate_delta']:+.1f}%" + print(f" {event['tool']:<12} hit%: {event['hit_rate_before']:>5.1f} -> {event['hit_rate_after']:>5.1f} ({hit_delta:>7}) ctx: {event['context_before']:>8,} -> {event['context_after']:>8,} ({ctx_delta:>8})") + + print("=" * 110) + + +def main(): + parser = argparse.ArgumentParser(description="Analyze DCP tool cache impact") + parser.add_argument("--sessions", "-n", type=int, default=20, + help="Number of recent sessions to scan (default: 20)") + parser.add_argument("--session", "-s", type=str, default=None, + help="Analyze specific session ID") + parser.add_argument("--min-messages", "-m", type=int, default=5, + help="Minimum real user messages required (default: 5)") + parser.add_argument("--json", "-j", action="store_true", + help="Output as JSON") + parser.add_argument("--verbose", "-v", action="store_true", + help="Show detailed per-event breakdown") + args = parser.parse_args() + + analyze_sessions( + num_sessions=args.sessions, + min_messages=args.min_messages, + output_json=args.json, + verbose=args.verbose, + session_id=args.session + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/opencode-find-session b/scripts/opencode-find-session new file mode 100755 index 00000000..5b7e2087 --- /dev/null +++ b/scripts/opencode-find-session @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +""" +Find OpenCode session IDs by title search. +Returns matching session IDs ordered by last usage time. + +Usage: opencode-find-session [--exact] [--json] +""" + +import json +import argparse +from pathlib import Path +from datetime import datetime + + +def get_all_sessions(storage: Path) -> list[dict]: + """Get all sessions with their metadata.""" + session_dir = storage / "session" + message_dir = storage / "message" + + if not session_dir.exists(): + return [] + + sessions = [] + + for app_dir in session_dir.iterdir(): + if not app_dir.is_dir(): + continue + + for session_file in app_dir.glob("*.json"): + try: + session = json.loads(session_file.read_text()) + session_id = session_file.stem + + # Get last modified time from message directory + msg_path = message_dir / session_id + if msg_path.exists(): + mtime = msg_path.stat().st_mtime + else: + mtime = session_file.stat().st_mtime + + sessions.append({ + "id": session_id, + "title": session.get("title", "Untitled"), + "created_at": session.get("createdAt"), + "last_used": mtime, + "last_used_iso": datetime.fromtimestamp(mtime).isoformat() + }) + except (json.JSONDecodeError, IOError): + pass + + return sessions + + +def search_sessions(sessions: list[dict], search_term: str, exact: bool = False) -> list[dict]: + """Search sessions by title.""" + results = [] + search_lower = search_term.lower() + + for session in sessions: + title = session.get("title", "") + title_lower = title.lower() + + if exact: + if title_lower == search_lower: + results.append(session) + else: + if search_lower in title_lower: + results.append(session) + + # Sort by last used time, most recent first + results.sort(key=lambda s: s["last_used"], reverse=True) + + return results + + +def print_results(results: list[dict], search_term: str): + """Print search results.""" + if not results: + print(f"No sessions found matching: {search_term}") + return + + if len(results) == 1: + # Single result - just print the ID for easy piping + print(results[0]["id"]) + else: + # Multiple results - show a table + print(f"Found {len(results)} sessions matching: {search_term}") + print() + print(f"{'Session ID':<32} {'Last Used':<20} {'Title'}") + print("-" * 100) + + for r in results: + last_used = datetime.fromtimestamp(r["last_used"]).strftime("%Y-%m-%d %H:%M") + title = r["title"][:50] if len(r["title"]) > 50 else r["title"] + print(f"{r['id']:<32} {last_used:<20} {title}") + + +def main(): + parser = argparse.ArgumentParser( + description="Find OpenCode session IDs by title search" + ) + parser.add_argument( + "search_term", + type=str, + help="Text to search for in session titles" + ) + parser.add_argument( + "--exact", "-e", + action="store_true", + help="Require exact title match (case-insensitive)" + ) + parser.add_argument( + "--json", "-j", + action="store_true", + help="Output as JSON" + ) + parser.add_argument( + "--all", "-a", + action="store_true", + help="Show all sessions (ignore search term)" + ) + args = parser.parse_args() + + storage = Path.home() / ".local/share/opencode/storage" + + if not storage.exists(): + print("Error: OpenCode storage not found at", storage) + return 1 + + sessions = get_all_sessions(storage) + + if args.all: + results = sorted(sessions, key=lambda s: s["last_used"], reverse=True) + else: + results = search_sessions(sessions, args.search_term, args.exact) + + if args.json: + print(json.dumps(results, indent=2, default=str)) + else: + print_results(results, args.search_term if not args.all else "(all)") + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/opencode-session-timeline b/scripts/opencode-session-timeline new file mode 100755 index 00000000..a3683cea --- /dev/null +++ b/scripts/opencode-session-timeline @@ -0,0 +1,412 @@ +#!/usr/bin/env python3 +""" +Analyze token values at each step within a single OpenCode session. +Shows cache growth over time and highlights DCP tool usage that causes cache drops. + +Usage: opencode-session-timeline [--session ID] [--json] [--no-color] +""" + +import json +import argparse +from pathlib import Path +from typing import Optional +from datetime import datetime + +# DCP tool names (tools that prune context and reduce cache) +DCP_TOOLS = { + "prune", "discard", "extract", "context_pruning", + "squash", "compress", "consolidate", "distill" +} + +# ANSI colors +class Colors: + RESET = "\033[0m" + BOLD = "\033[1m" + DIM = "\033[2m" + RED = "\033[31m" + GREEN = "\033[32m" + YELLOW = "\033[33m" + BLUE = "\033[34m" + MAGENTA = "\033[35m" + CYAN = "\033[36m" + +NO_COLOR = Colors() +for attr in dir(NO_COLOR): + if not attr.startswith('_'): + setattr(NO_COLOR, attr, "") + + +def format_duration(ms: Optional[int], colors: Colors = None) -> str: + """Format milliseconds as human-readable duration.""" + if ms is None: + return "-" + + seconds = ms / 1000 + if seconds < 60: + return f"{seconds:.1f}s" + elif seconds < 3600: + minutes = int(seconds // 60) + secs = seconds % 60 + return f"{minutes}m{secs:.0f}s" + else: + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + return f"{hours}h{minutes}m" + + +def get_session_messages(storage: Path, session_id: str) -> list[dict]: + """Get all messages for a session, sorted by creation order.""" + message_dir = storage / "message" / session_id + if not message_dir.exists(): + return [] + + messages = [] + for msg_file in message_dir.glob("*.json"): + try: + msg = json.loads(msg_file.read_text()) + msg["_file"] = str(msg_file) + msg["_id"] = msg_file.stem + # Extract timing info + time_info = msg.get("time", {}) + msg["_created"] = time_info.get("created") + msg["_completed"] = time_info.get("completed") + messages.append(msg) + except (json.JSONDecodeError, IOError): + pass + + return sorted(messages, key=lambda m: m.get("_id", "")) + + +def get_message_parts(storage: Path, message_id: str) -> list[dict]: + """Get all parts for a message, sorted by creation order.""" + parts_dir = storage / "part" / message_id + if not parts_dir.exists(): + return [] + + parts = [] + for part_file in parts_dir.glob("*.json"): + try: + part = json.loads(part_file.read_text()) + part["_file"] = str(part_file) + part["_id"] = part_file.stem + parts.append(part) + except (json.JSONDecodeError, IOError): + pass + + return sorted(parts, key=lambda p: p.get("_id", "")) + + +def extract_step_data(parts: list[dict]) -> Optional[dict]: + """Extract step-finish data and tool calls from message parts.""" + step_finish = None + tools_used = [] + dcp_tools_used = [] + + for part in parts: + if part.get("type") == "step-finish" and "tokens" in part: + step_finish = part + elif part.get("type") == "tool": + tool_name = part.get("tool", "") + tools_used.append(tool_name) + if tool_name in DCP_TOOLS: + dcp_tools_used.append(tool_name) + + if step_finish is None: + return None + + tokens = step_finish.get("tokens", {}) + cache = tokens.get("cache", {}) + + return { + "input": tokens.get("input", 0), + "output": tokens.get("output", 0), + "reasoning": tokens.get("reasoning", 0), + "cache_read": cache.get("read", 0), + "cache_write": cache.get("write", 0), + "cost": step_finish.get("cost", 0), + "reason": step_finish.get("reason", "unknown"), + "tools_used": tools_used, + "dcp_tools_used": dcp_tools_used, + "has_dcp": len(dcp_tools_used) > 0 + } + + +def get_most_recent_session(storage: Path) -> Optional[str]: + """Get the most recent session ID.""" + message_dir = storage / "message" + if not message_dir.exists(): + return None + + sessions = sorted(message_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True) + return sessions[0].name if sessions else None + + +def get_session_title(storage: Path, session_id: str) -> str: + """Get session title from metadata.""" + session_dir = storage / "session" + if not session_dir.exists(): + return "Unknown" + + for s_dir in session_dir.iterdir(): + s_file = s_dir / f"{session_id}.json" + if s_file.exists(): + try: + sess = json.loads(s_file.read_text()) + return sess.get("title", "Untitled") + except (json.JSONDecodeError, IOError): + pass + return "Unknown" + + +def analyze_session(storage: Path, session_id: str) -> dict: + """Analyze a single session step by step.""" + messages = get_session_messages(storage, session_id) + title = get_session_title(storage, session_id) + + steps = [] + for msg in messages: + msg_id = msg.get("_id", "") + parts = get_message_parts(storage, msg_id) + step_data = extract_step_data(parts) + + if step_data: + step_data["message_id"] = msg_id + step_data["created"] = msg.get("_created") + step_data["completed"] = msg.get("_completed") + steps.append(step_data) + + # Calculate deltas + for i, step in enumerate(steps): + if i == 0: + step["cache_read_delta"] = step["cache_read"] + step["input_delta"] = step["input"] + else: + prev = steps[i - 1] + step["cache_read_delta"] = step["cache_read"] - prev["cache_read"] + step["input_delta"] = step["input"] - prev["input"] + + # Calculate cache hit rate + total_context = step["input"] + step["cache_read"] + step["cache_hit_rate"] = (step["cache_read"] / total_context * 100) if total_context > 0 else 0 + + # Calculate step duration and time since previous step + created = step.get("created") + completed = step.get("completed") + + if created and completed: + step["duration_ms"] = completed - created + else: + step["duration_ms"] = None + + if i == 0: + step["time_since_prev_ms"] = None + else: + prev_completed = steps[i - 1].get("completed") + if prev_completed and created: + step["time_since_prev_ms"] = created - prev_completed + else: + step["time_since_prev_ms"] = None + + return { + "session_id": session_id, + "title": title, + "steps": steps, + "total_steps": len(steps) + } + + +def print_timeline(result: dict, colors: Colors): + """Print the step-by-step timeline.""" + c = colors + + print(f"{c.BOLD}{'=' * 130}{c.RESET}") + print(f"{c.BOLD}SESSION TIMELINE: Token Values at Each Step{c.RESET}") + print(f"{c.BOLD}{'=' * 130}{c.RESET}") + print() + print(f" Session: {c.CYAN}{result['session_id']}{c.RESET}") + print(f" Title: {result['title']}") + print(f" Steps: {result['total_steps']}") + print() + + if not result["steps"]: + print(" No steps found in this session.") + return + + # Header + print(f"{c.BOLD}{'Step':<6} {'Cache Read':>12} {'Δ Cache':>12} {'Input':>10} {'Output':>10} {'Cache %':>9} {'Duration':>10} {'Gap':>10} {'DCP Tools':<15} {'Reason':<12}{c.RESET}") + print("-" * 130) + + prev_cache = 0 + for i, step in enumerate(result["steps"], 1): + cache_read = step["cache_read"] + cache_delta = step["cache_read_delta"] + input_tokens = step["input"] + output_tokens = step["output"] + cache_pct = step["cache_hit_rate"] + has_dcp = step["has_dcp"] + dcp_tools = step["dcp_tools_used"] + reason = step["reason"] + + # Color the delta based on direction + if cache_delta > 0: + delta_str = f"{c.GREEN}+{cache_delta:,}{c.RESET}" + elif cache_delta < 0: + delta_str = f"{c.RED}{cache_delta:,}{c.RESET}" + else: + delta_str = f"{c.DIM}0{c.RESET}" + + # Pad delta string for alignment (accounting for color codes) + delta_display = f"{cache_delta:+,}" if cache_delta != 0 else "0" + delta_padded = f"{delta_str:>22}" if cache_delta != 0 else f"{c.DIM}{'0':>12}{c.RESET}" + + # Highlight DCP rows + if has_dcp: + row_prefix = f"{c.YELLOW}{c.BOLD}" + row_suffix = c.RESET + dcp_str = f"{c.YELLOW}{', '.join(dcp_tools)}{c.RESET}" + else: + row_prefix = "" + row_suffix = "" + dcp_str = f"{c.DIM}-{c.RESET}" + + # Cache percentage coloring + if cache_pct >= 80: + pct_str = f"{c.GREEN}{cache_pct:>8.1f}%{c.RESET}" + elif cache_pct >= 50: + pct_str = f"{c.YELLOW}{cache_pct:>8.1f}%{c.RESET}" + else: + pct_str = f"{c.RED}{cache_pct:>8.1f}%{c.RESET}" + + # Format delta with proper width + if cache_delta > 0: + delta_formatted = f"{c.GREEN}{'+' + f'{cache_delta:,}':>11}{c.RESET}" + elif cache_delta < 0: + delta_formatted = f"{c.RED}{f'{cache_delta:,}':>12}{c.RESET}" + else: + delta_formatted = f"{c.DIM}{'0':>12}{c.RESET}" + + print(f"{row_prefix}{i:<6}{row_suffix} {cache_read:>12,} {delta_formatted} {input_tokens:>10,} {output_tokens:>10,} {pct_str} {format_duration(step.get('duration_ms')):>10} {format_duration(step.get('time_since_prev_ms')):>10} {dcp_str:<15} {reason:<12}") + + prev_cache = cache_read + + print("-" * 130) + print() + + # Summary statistics + steps = result["steps"] + total_input = sum(s["input"] for s in steps) + total_output = sum(s["output"] for s in steps) + total_cache_read = sum(s["cache_read"] for s in steps) + + dcp_steps = [s for s in steps if s["has_dcp"]] + cache_increases = [s for s in steps if s["cache_read_delta"] > 0] + cache_decreases = [s for s in steps if s["cache_read_delta"] < 0] + + # Overall cache hit rate + total_context = total_input + total_cache_read + overall_cache_rate = (total_cache_read / total_context * 100) if total_context > 0 else 0 + + print(f"{c.BOLD}CACHE BEHAVIOR SUMMARY{c.RESET}") + print("-" * 50) + + # Overall cache hit rate with coloring + if overall_cache_rate >= 80: + rate_str = f"{c.GREEN}{overall_cache_rate:.1f}%{c.RESET}" + elif overall_cache_rate >= 50: + rate_str = f"{c.YELLOW}{overall_cache_rate:.1f}%{c.RESET}" + else: + rate_str = f"{c.RED}{overall_cache_rate:.1f}%{c.RESET}" + + print(f" {c.BOLD}Overall cache hit rate: {rate_str}{c.RESET}") + print(f" Total input tokens: {total_input:>12,}") + print(f" Total cache read tokens: {total_cache_read:>12,}") + print() + print(f" Steps with cache increase: {c.GREEN}{len(cache_increases):>5}{c.RESET}") + print(f" Steps with cache decrease: {c.RED}{len(cache_decreases):>5}{c.RESET}") + print(f" Steps with DCP tools: {c.YELLOW}{len(dcp_steps):>5}{c.RESET}") + print() + + if dcp_steps: + dcp_decreases = [s for s in dcp_steps if s["cache_read_delta"] < 0] + print(f" DCP steps with cache drop: {len(dcp_decreases)}/{len(dcp_steps)}") + if dcp_decreases: + avg_drop = sum(s["cache_read_delta"] for s in dcp_decreases) / len(dcp_decreases) + print(f" Avg cache drop on DCP: {c.RED}{avg_drop:,.0f}{c.RESET} tokens") + + print() + + # Cache growth verification + if len(steps) >= 2: + first_cache = steps[0]["cache_read"] + last_cache = steps[-1]["cache_read"] + max_cache = max(s["cache_read"] for s in steps) + + print(f"{c.BOLD}CACHE GROWTH VERIFICATION{c.RESET}") + print("-" * 50) + print(f" First step cache read: {first_cache:>12,}") + print(f" Last step cache read: {last_cache:>12,}") + print(f" Max cache read observed: {max_cache:>12,}") + + if last_cache > first_cache: + growth = last_cache - first_cache + print(f" Net cache growth: {c.GREEN}+{growth:>11,}{c.RESET}") + print(f"\n {c.GREEN}✓ Provider caching appears to be working{c.RESET}") + elif last_cache < first_cache: + loss = first_cache - last_cache + print(f" Net cache loss: {c.RED}-{loss:>11,}{c.RESET}") + if dcp_steps: + print(f"\n {c.YELLOW}⚠ Cache decreased (likely due to DCP pruning){c.RESET}") + else: + print(f"\n {c.RED}⚠ Cache decreased without DCP - investigate{c.RESET}") + else: + print(f"\n {c.DIM}Cache unchanged between first and last step{c.RESET}") + + print() + print(f"{c.BOLD}{'=' * 130}{c.RESET}") + + +def main(): + parser = argparse.ArgumentParser( + description="Analyze token values at each step within an OpenCode session" + ) + parser.add_argument( + "--session", "-s", type=str, default=None, + help="Session ID to analyze (default: most recent)" + ) + parser.add_argument( + "--json", "-j", action="store_true", + help="Output as JSON" + ) + parser.add_argument( + "--no-color", action="store_true", + help="Disable colored output" + ) + args = parser.parse_args() + + storage = Path.home() / ".local/share/opencode/storage" + + if not storage.exists(): + print("Error: OpenCode storage not found at", storage) + return 1 + + session_id = args.session + if session_id is None: + session_id = get_most_recent_session(storage) + if session_id is None: + print("Error: No sessions found") + return 1 + + result = analyze_session(storage, session_id) + + if args.json: + # Remove non-serializable fields + print(json.dumps(result, indent=2, default=str)) + else: + colors = NO_COLOR if args.no_color else Colors() + print_timeline(result, colors) + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/scripts/opencode-token-stats b/scripts/opencode-token-stats new file mode 100755 index 00000000..3a7d6dba --- /dev/null +++ b/scripts/opencode-token-stats @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +""" +Analyze token usage across recent OpenCode sessions. +Usage: opencode-token-stats [--sessions N] [--json] +""" + +import json +import argparse +from pathlib import Path +from datetime import datetime + +def analyze_sessions(num_sessions=10, output_json=False, session_id=None): + storage = Path.home() / ".local/share/opencode/storage" + message_dir = storage / "message" + part_dir = storage / "part" + session_dir = storage / "session" + + if not message_dir.exists(): + print("Error: OpenCode storage not found at", storage) + return + + # Get sessions to analyze + if session_id: + # Analyze specific session + session_path = message_dir / session_id + if not session_path.exists(): + print(f"Error: Session {session_id} not found") + return + sessions = [session_path] + else: + # Get recent sessions sorted by modification time + sessions = sorted(message_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)[:num_sessions] + + results = [] + grand_totals = { + "input": 0, "output": 0, "reasoning": 0, + "cache_read": 0, "cache_write": 0, + "steps": 0, "sessions": 0, + "reasons": {"tool-calls": 0, "stop": 0, "other": 0} + } + + for session_path in sessions: + session_id = session_path.name + totals = { + "input": 0, "output": 0, "reasoning": 0, + "cache_read": 0, "cache_write": 0, + "cost": 0.0, "steps": 0, + "reasons": {"tool-calls": 0, "stop": 0, "other": 0} + } + + # Get messages for this session + msg_files = list(session_path.glob("*.json")) + + for msg_file in msg_files: + msg_id = msg_file.stem + parts_path = part_dir / msg_id + if parts_path.exists(): + for part_file in parts_path.glob("*.json"): + try: + part = json.loads(part_file.read_text()) + if part.get("type") == "step-finish" and "tokens" in part: + t = part["tokens"] + totals["input"] += t.get("input", 0) + totals["output"] += t.get("output", 0) + totals["reasoning"] += t.get("reasoning", 0) + cache = t.get("cache", {}) + totals["cache_read"] += cache.get("read", 0) + totals["cache_write"] += cache.get("write", 0) + totals["cost"] += part.get("cost", 0) + totals["steps"] += 1 + + reason = part.get("reason", "other") + if reason in totals["reasons"]: + totals["reasons"][reason] += 1 + else: + totals["reasons"]["other"] += 1 + except (json.JSONDecodeError, KeyError): + pass + + # Get session metadata (title, timestamps) + title = "Unknown" + created = None + for s_dir in session_dir.iterdir(): + s_file = s_dir / f"{session_id}.json" + if s_file.exists(): + try: + sess = json.loads(s_file.read_text()) + title = sess.get("title", "Untitled")[:60] + created = sess.get("createdAt") + except (json.JSONDecodeError, KeyError): + pass + break + + # Calculate derived metrics + total_tokens = totals["input"] + totals["output"] + totals["cache_read"] + cache_hit_rate = (totals["cache_read"] / (totals["input"] + totals["cache_read"]) * 100) if (totals["input"] + totals["cache_read"]) > 0 else 0 + + session_result = { + "session_id": session_id, + "title": title, + "created": created, + "steps": totals["steps"], + "tokens": { + "input": totals["input"], + "output": totals["output"], + "reasoning": totals["reasoning"], + "cache_read": totals["cache_read"], + "cache_write": totals["cache_write"], + "total": total_tokens + }, + "cost": totals["cost"], + "cache_hit_rate": round(cache_hit_rate, 1), + "finish_reasons": totals["reasons"] + } + results.append(session_result) + + # Update grand totals + grand_totals["input"] += totals["input"] + grand_totals["output"] += totals["output"] + grand_totals["reasoning"] += totals["reasoning"] + grand_totals["cache_read"] += totals["cache_read"] + grand_totals["cache_write"] += totals["cache_write"] + grand_totals["steps"] += totals["steps"] + grand_totals["sessions"] += 1 + for reason, count in totals["reasons"].items(): + grand_totals["reasons"][reason] += count + + # Output + if output_json: + output = { + "sessions": results, + "totals": grand_totals, + "generated_at": datetime.now().isoformat() + } + print(json.dumps(output, indent=2)) + else: + print_summary(results, grand_totals) + +def print_summary(results, grand_totals): + print("=" * 120) + print("OPENCODE SESSION TOKEN ANALYSIS") + print("=" * 120) + print() + + # Per-session breakdown + print(f"{'Session':<25} {'Title':<30} {'Steps':>6} {'Input':>12} {'Output':>10} {'Reasoning':>10} {'Cache Read':>12} {'Cache Write':>12} {'Cache %':>8}") + print("-" * 120) + + for r in results: + t = r["tokens"] + print(f"{r['session_id'][:24]:<25} {r['title'][:29]:<30} {r['steps']:>6} {t['input']:>12,} {t['output']:>10,} {t['reasoning']:>10,} {t['cache_read']:>12,} {t['cache_write']:>12,} {r['cache_hit_rate']:>7.1f}%") + + print("-" * 120) + print() + + # Grand totals + total_all = grand_totals["input"] + grand_totals["output"] + grand_totals["cache_read"] + overall_cache_rate = (grand_totals["cache_read"] / (grand_totals["input"] + grand_totals["cache_read"]) * 100) if (grand_totals["input"] + grand_totals["cache_read"]) > 0 else 0 + + print("TOTALS ACROSS ALL SESSIONS") + print("-" * 50) + print(f" Sessions analyzed: {grand_totals['sessions']:>15,}") + print(f" Total steps: {grand_totals['steps']:>15,}") + avg_steps = grand_totals['steps'] / grand_totals['sessions'] if grand_totals['sessions'] > 0 else 0 + print(f" Avg steps/session: {avg_steps:>15.1f}") + print() + print(" TOKEN BREAKDOWN:") + print(f" Input tokens: {grand_totals['input']:>15,}") + print(f" Output tokens: {grand_totals['output']:>15,}") + print(f" Reasoning tokens: {grand_totals['reasoning']:>15,}") + print(f" Cache read: {grand_totals['cache_read']:>15,}") + print(f" Cache write: {grand_totals['cache_write']:>15,}") + print(f" ─────────────────────────────────────────────") + print(f" TOTAL: {total_all:>15,}") + print() + print(f" Overall cache hit rate: {overall_cache_rate:.1f}%") + print() + print(" STEP FINISH REASONS:") + for reason, count in grand_totals["reasons"].items(): + if count > 0: + print(f" {reason}: {count:>10,}") + print() + print("=" * 120) + +def main(): + parser = argparse.ArgumentParser(description="Analyze OpenCode session token usage") + parser.add_argument("--sessions", "-n", type=int, default=10, help="Number of recent sessions to analyze (default: 10)") + parser.add_argument("--session", "-s", type=str, default=None, help="Analyze specific session ID") + parser.add_argument("--json", "-j", action="store_true", help="Output as JSON instead of formatted text") + args = parser.parse_args() + + analyze_sessions(num_sessions=args.sessions, output_json=args.json, session_id=args.session) + +if __name__ == "__main__": + main() diff --git a/scripts/print.ts b/scripts/print.ts new file mode 100644 index 00000000..484bc023 --- /dev/null +++ b/scripts/print.ts @@ -0,0 +1,117 @@ +#!/usr/bin/env npx tsx + +import { renderSystemPrompt, renderNudge, type ToolFlags } from "../lib/prompts/index.js" +import { + wrapPrunableTools, + wrapCompressContext, + wrapCooldownMessage, +} from "../lib/messages/inject.js" + +const args = process.argv.slice(2) + +const flags: ToolFlags = { + distill: args.includes("-d") || args.includes("--distill"), + compress: args.includes("-c") || args.includes("--compress"), + prune: args.includes("-p") || args.includes("--prune"), +} + +// Default to all enabled if none specified +if (!flags.prune && !flags.distill && !flags.compress) { + flags.prune = true + flags.distill = true + flags.compress = true +} + +const showSystem = args.includes("--system") +const showNudge = args.includes("--nudge") +const showPruneList = args.includes("--prune-list") +const showCompressContext = args.includes("--compress-context") +const showCooldown = args.includes("--cooldown") +const showHelp = args.includes("--help") || args.includes("-h") + +if ( + showHelp || + (!showSystem && !showNudge && !showPruneList && !showCompressContext && !showCooldown) +) { + console.log(` +Usage: bun run dcp [TYPE] [-d] [-c] [-p] + +Types: + --system System prompt + --nudge Nudge prompt + --prune-list Example prunable tools list + --compress-context Example compress context + --cooldown Cooldown message after pruning + +Tool flags (for --system and --nudge): + -d, --distill Enable distill tool + -c, --compress Enable compress tool + -p, --prune Enable prune tool + +If no tool flags specified, all are enabled. + +Examples: + bun run dcp --system -d -c -p # System prompt with all tools + bun run dcp --system -p # System prompt with prune only + bun run dcp --nudge -d -c # Nudge with distill and compress + bun run dcp --prune-list # Example prunable tools list +`) + process.exit(0) +} + +const header = (title: string) => { + console.log() + console.log("─".repeat(60)) + console.log(title) + console.log("─".repeat(60)) +} + +if (showSystem) { + const enabled = [ + flags.distill && "distill", + flags.compress && "compress", + flags.prune && "prune", + ] + .filter(Boolean) + .join(", ") + header(`SYSTEM PROMPT (tools: ${enabled})`) + console.log(renderSystemPrompt(flags)) +} + +if (showNudge) { + const enabled = [ + flags.distill && "distill", + flags.compress && "compress", + flags.prune && "prune", + ] + .filter(Boolean) + .join(", ") + header(`NUDGE (tools: ${enabled})`) + console.log(renderNudge(flags)) +} + +if (showPruneList) { + header("PRUNABLE TOOLS LIST (mock example)") + const mockList = `5: read, /path/to/file.ts +8: bash, npm run build +12: glob, src/**/*.ts +15: read, /path/to/another-file.ts` + console.log(wrapPrunableTools(mockList)) +} + +if (showCompressContext) { + header("COMPRESS CONTEXT (mock example)") + console.log(wrapCompressContext(45)) +} + +if (showCooldown) { + const enabled = [ + flags.distill && "distill", + flags.compress && "compress", + flags.prune && "prune", + ] + .filter(Boolean) + .join(", ") + header(`COOLDOWN MESSAGE (tools: ${enabled})`) + console.log(wrapCooldownMessage(flags)) +} diff --git a/tests/test-dcp-cache.sh b/tests/test-dcp-cache.sh new file mode 100755 index 00000000..49e7dfac --- /dev/null +++ b/tests/test-dcp-cache.sh @@ -0,0 +1,364 @@ +#!/usr/bin/env bash +# +# DCP Token Cache Test Script +# Tests how Dynamic Context Pruning affects token caching across different providers/models +# +# Usage: +# ./test-dcp-cache.sh [OPTIONS] +# +# Options: +# --provider NAME Run test for specific provider only +# --dry-run Show what would be executed without running +# --results-dir DIR Custom results directory (default: ./results) +# --port PORT Port for server (default: 4096, enables TUI attach) +# --no-server Don't start a server, use standalone mode (no TUI attach) +# --help Show this help message +# +# To watch tests in real-time: +# Terminal 1: ./test-dcp-cache.sh --provider anthropic +# Terminal 2: opencode attach http://localhost:4096 +# + +set -euo pipefail + +# ============================================================================ +# CONFIGURATION - Modify these to change which models are tested +# ============================================================================ + +# Models to test: one per provider +# Format: ["provider-name"]="provider/model-id" +declare -A MODELS=( + ["opencode-kimi"]="opencode/kimi-k2.5-free" + ["kimi"]="kimi-for-coding/k2p5" + ["llm-proxy-cli-gemini"]="llm-proxy/cli_gemini-3-flash-high" + ["llm-proxy-ant-gemini"]="llm-proxy/ant_gemini-3-flash-high" + ["llm-proxy-opus"]="llm-proxy/claude-opus-4-5-thinking" + ["openai"]="openai/gpt-5.2-codex" + ["openrouter-haiku"]="openrouter/anthropic/claude-haiku-4.5" +) + +# Codebases to analyze (ordered from simple to complex) +# Format: "clone_command|description" +CODEBASES=( + "git clone --depth 1 https://github.com/sindresorhus/is-odd.git|is-odd: minimal npm package (~10 lines)" + "git clone --depth 1 https://github.com/chalk/chalk.git|chalk: small terminal styling utility" + "git clone --depth 1 https://github.com/tj/commander.js.git|commander: medium-complexity CLI framework" + "git clone --depth 1 https://github.com/yargs/yargs.git|yargs: medium-complex argument parser" + "cp -r ~/.config/opencode/opencode|opencode: full-featured coding assistant (local copy)" +) + +# Base prompt template - {CODEBASE_CMD} and {CODEBASE_DESC} will be replaced +PROMPT_TEMPLATE='Clone/copy {CODEBASE_DESC} to /tmp/{CODEBASE_NAME} and give me a comprehensive summary of what it does and how it works. Analyze the directory structure, key files, main functionality, and architecture. Do not use subagents.' + +# ============================================================================ +# SCRIPT LOGIC - Generally no need to modify below +# ============================================================================ + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SCRIPTS_DIR="${SCRIPT_DIR}/../scripts" +RESULTS_DIR="${SCRIPT_DIR}/results" +DRY_RUN=false +SPECIFIC_PROVIDER="" +SERVER_PORT="4096" +USE_SERVER=true +SERVER_PID="" + +cleanup() { + if [[ -n "$SERVER_PID" ]] && kill -0 "$SERVER_PID" 2>/dev/null; then + log "Stopping opencode server (PID: $SERVER_PID)..." + kill "$SERVER_PID" 2>/dev/null || true + wait "$SERVER_PID" 2>/dev/null || true + fi +} +trap cleanup EXIT + +usage() { + head -20 "$0" | tail -18 | sed 's/^# \?//' + echo "" + echo "Configured models:" + for provider in "${!MODELS[@]}"; do + echo " $provider: ${MODELS[$provider]}" + done +} + +log() { + echo "[$(date '+%H:%M:%S')] $*" +} + +log_section() { + echo "" + echo "============================================================================" + echo "$*" + echo "============================================================================" +} + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --provider) + SPECIFIC_PROVIDER="$2" + shift 2 + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --results-dir) + RESULTS_DIR="$2" + shift 2 + ;; + --port) + SERVER_PORT="$2" + shift 2 + ;; + --no-server) + USE_SERVER=false + shift + ;; + --help|-h) + usage + exit 0 + ;; + *) + echo "Unknown option: $1" + usage + exit 1 + ;; + esac +done + +# Create timestamp for this test run +TIMESTAMP=$(date '+%Y%m%d_%H%M%S') +RUN_DIR="${RESULTS_DIR}/${TIMESTAMP}" + +# Validate specific provider if given +if [[ -n "$SPECIFIC_PROVIDER" ]] && [[ -z "${MODELS[$SPECIFIC_PROVIDER]:-}" ]]; then + echo "Error: Unknown provider '$SPECIFIC_PROVIDER'" + echo "Available providers: ${!MODELS[*]}" + exit 1 +fi + +# Build list of providers to test +if [[ -n "$SPECIFIC_PROVIDER" ]]; then + PROVIDERS=("$SPECIFIC_PROVIDER") +else + PROVIDERS=("${!MODELS[@]}") +fi + +log_section "DCP Token Cache Test" +log "Results directory: ${RUN_DIR}" +log "Providers to test: ${PROVIDERS[*]}" +log "Codebases: ${#CODEBASES[@]}" +log "Dry run: ${DRY_RUN}" + +if [[ "$USE_SERVER" == "true" ]]; then + log "Server port: ${SERVER_PORT}" + log "" + log ">>> To watch in real-time, run in another terminal:" + log ">>> opencode attach http://localhost:${SERVER_PORT}" +else + log "Server mode: disabled (standalone runs, no TUI attach)" +fi + +if [[ "$DRY_RUN" == "true" ]]; then + log_section "DRY RUN - Commands that would be executed" +fi + +# Create results directory +if [[ "$DRY_RUN" == "false" ]]; then + mkdir -p "$RUN_DIR" + # Save test configuration (simple approach without complex jq) + { + echo "{" + echo " \"timestamp\": \"${TIMESTAMP}\"," + echo " \"providers\": [\"${PROVIDERS[*]// /\", \"}\"]," + echo " \"codebases\": ${#CODEBASES[@]}," + echo " \"server_port\": ${SERVER_PORT:-null}" + echo "}" + } > "${RUN_DIR}/config.json" +fi + +# Start server if requested +if [[ "$DRY_RUN" == "false" ]] && [[ "$USE_SERVER" == "true" ]]; then + log "" + + # Check if port is already in use + if lsof -i ":${SERVER_PORT}" &>/dev/null; then + log "Port ${SERVER_PORT} is already in use." + read -p "Kill existing process and continue? [y/N] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + log "Killing process on port ${SERVER_PORT}..." + fuser -k "${SERVER_PORT}/tcp" 2>/dev/null || lsof -ti ":${SERVER_PORT}" | xargs -r kill -9 + sleep 1 + else + log "Aborting. Free port ${SERVER_PORT} or use --port to specify a different port." + exit 1 + fi + fi + + log "Starting opencode server on port ${SERVER_PORT}..." + opencode serve --port "$SERVER_PORT" & + SERVER_PID=$! + + # Wait for server to be ready + sleep 2 + if ! kill -0 "$SERVER_PID" 2>/dev/null; then + log "Error: Server failed to start" + exit 1 + fi + log "Server started (PID: $SERVER_PID)" +fi + +# Build base command depending on server mode +if [[ "$USE_SERVER" == "true" ]]; then + BASE_CMD="opencode run --attach http://localhost:${SERVER_PORT}" +else + BASE_CMD="opencode run" +fi + +# Run tests for each provider +for provider in "${PROVIDERS[@]}"; do + model="${MODELS[$provider]}" + provider_dir="${RUN_DIR}/${provider}" + + log_section "Testing Provider: ${provider}" + log "Model: ${model}" + + if [[ "$DRY_RUN" == "false" ]]; then + mkdir -p "$provider_dir" + fi + + SESSION_ID="" + PROMPT_NUM=0 + + for codebase_entry in "${CODEBASES[@]}"; do + PROMPT_NUM=$((PROMPT_NUM + 1)) + + # Parse codebase entry + IFS='|' read -r clone_cmd codebase_desc <<< "$codebase_entry" + codebase_name=$(echo "$clone_cmd" | grep -oE '[^/]+\.git$' | sed 's/\.git$//' || echo "$clone_cmd" | awk '{print $NF}') + + # Build the prompt + prompt="${PROMPT_TEMPLATE}" + prompt="${prompt//\{CODEBASE_CMD\}/$clone_cmd}" + prompt="${prompt//\{CODEBASE_DESC\}/$codebase_desc}" + prompt="${prompt//\{CODEBASE_NAME\}/$codebase_name}" + + log "" + log "Prompt ${PROMPT_NUM}/${#CODEBASES[@]}: ${codebase_desc}" + + # Build opencode command (for display only, actual execution below) + if [[ -z "$SESSION_ID" ]]; then + # First prompt - create new session + display_cmd="${BASE_CMD} -m '${model}' --title 'DCP Test: ${provider}' ''" + else + # Subsequent prompts - continue session + display_cmd="${BASE_CMD} -m '${model}' --session '${SESSION_ID}' ''" + fi + + if [[ "$DRY_RUN" == "true" ]]; then + echo " $ $display_cmd" + # Simulate session ID for dry run + if [[ -z "$SESSION_ID" ]]; then + SESSION_ID="dry-run-session-id" + fi + else + log "Executing: $display_cmd" + + # Run opencode and capture output + output_file="${provider_dir}/prompt_${PROMPT_NUM}_output.txt" + json_file="${provider_dir}/prompt_${PROMPT_NUM}_events.json" + + if [[ -z "$SESSION_ID" ]]; then + # First run - use --format json to capture session ID from events + log "Using JSON format to capture session ID..." + if [[ "$USE_SERVER" == "true" ]]; then + opencode run --attach "http://localhost:${SERVER_PORT}" \ + -m "${model}" \ + --title "DCP Test: ${provider}" \ + --format json \ + "${prompt}" 2>&1 | tee "$json_file" + else + opencode run \ + -m "${model}" \ + --title "DCP Test: ${provider}" \ + --format json \ + "${prompt}" 2>&1 | tee "$json_file" + fi + + # Extract session ID from the first JSON event + SESSION_ID=$(head -1 "$json_file" | jq -r '.sessionID // empty' 2>/dev/null || echo "") + + if [[ -z "$SESSION_ID" ]]; then + log "Warning: Could not extract session ID from JSON output" + # Fallback to opencode-find-session + log "Falling back to session search..." + SESSION_ID=$("${SCRIPTS_DIR}/opencode-find-session" "DCP Test: ${provider}" 2>/dev/null | head -1 || echo "") + fi + + if [[ -z "$SESSION_ID" ]]; then + log "Error: Could not find session ID, cannot continue session" + log "Will create new sessions for each prompt (cache test will be less meaningful)" + fi + + log "Session ID: ${SESSION_ID:-unknown}" + echo "$SESSION_ID" > "${provider_dir}/session_id.txt" + else + # Subsequent prompts - continue session with normal output + if [[ "$USE_SERVER" == "true" ]]; then + opencode run --attach "http://localhost:${SERVER_PORT}" \ + -m "${model}" \ + --session "${SESSION_ID}" \ + "${prompt}" 2>&1 | tee "$output_file" + else + opencode run \ + -m "${model}" \ + --session "${SESSION_ID}" \ + "${prompt}" 2>&1 | tee "$output_file" + fi + fi + + log "Output saved to: ${output_file:-$json_file}" + fi + done + + # Collect analysis after all prompts for this provider + if [[ "$DRY_RUN" == "false" ]] && [[ -n "$SESSION_ID" ]]; then + log "" + log "Collecting cache analysis for session ${SESSION_ID}..." + + # Session timeline + "${SCRIPTS_DIR}/opencode-session-timeline" --session "$SESSION_ID" --no-color > "${provider_dir}/session_timeline.txt" 2>&1 || true + "${SCRIPTS_DIR}/opencode-session-timeline" --session "$SESSION_ID" --json > "${provider_dir}/session_timeline.json" 2>&1 || true + + # Token stats + "${SCRIPTS_DIR}/opencode-token-stats" --session "$SESSION_ID" > "${provider_dir}/token_stats.txt" 2>&1 || true + "${SCRIPTS_DIR}/opencode-token-stats" --session "$SESSION_ID" --json > "${provider_dir}/token_stats.json" 2>&1 || true + + # DCP stats + "${SCRIPTS_DIR}/opencode-dcp-stats" --session "$SESSION_ID" > "${provider_dir}/dcp_stats.txt" 2>&1 || true + "${SCRIPTS_DIR}/opencode-dcp-stats" --session "$SESSION_ID" --json > "${provider_dir}/dcp_stats.json" 2>&1 || true + + log "Analysis saved to: ${provider_dir}/" + elif [[ "$DRY_RUN" == "true" ]]; then + echo "" + echo " # After session completes:" + echo " $ ${SCRIPTS_DIR}/opencode-session-timeline --session \$SESSION_ID > ${provider_dir}/session_timeline.txt" + echo " $ ${SCRIPTS_DIR}/opencode-token-stats --session \$SESSION_ID > ${provider_dir}/token_stats.txt" + echo " $ ${SCRIPTS_DIR}/opencode-dcp-stats --session \$SESSION_ID > ${provider_dir}/dcp_stats.txt" + fi +done + +log_section "Test Complete" +if [[ "$DRY_RUN" == "false" ]]; then + log "Results saved to: ${RUN_DIR}" + log "" + log "To view results:" + echo " ls -la ${RUN_DIR}/" + for provider in "${PROVIDERS[@]}"; do + echo " cat ${RUN_DIR}/${provider}/session_timeline.txt" + done +else + log "Dry run complete. Run without --dry-run to execute tests." +fi diff --git a/tsconfig.json b/tsconfig.json index b30286cf..c20d8a54 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -3,7 +3,7 @@ "compilerOptions": { "target": "ES2022", "module": "ESNext", - "lib": ["ES2022"], + "lib": ["ES2023"], "moduleResolution": "bundler", "resolveJsonModule": true, "allowJs": true,