-
Notifications
You must be signed in to change notification settings - Fork 7
Add @ocap/kernel-agents package
#668
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
199cbca
12efd46
b961390
42c8240
1ffd595
939345a
2b8f65b
4c10546
14c239c
0da9cd1
238b9c2
548907f
84cbf32
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| # Changelog | ||
|
|
||
| All notable changes to this project will be documented in this file. | ||
|
|
||
| The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), | ||
| and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). | ||
|
|
||
| ## [Unreleased] | ||
|
|
||
| [Unreleased]: https://github.com/MetaMask/ocap-kernel/ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| # `@ocap/kernel-agents` | ||
|
|
||
| Capability-enabled, language-model-flow-controlled programming. | ||
|
|
||
| ## Installation | ||
|
|
||
| `yarn add @ocap/kernel-agents` | ||
|
|
||
| or | ||
|
|
||
| `npm install @ocap/kernel-agents` | ||
|
|
||
| ## Contributing | ||
|
|
||
| This package is part of a monorepo. Instructions for contributing can be found in the [monorepo README](https://github.com/MetaMask/ocap-kernel#readme). | ||
|
|
||
| ## Running E2E Tests | ||
|
|
||
| The end to end tests assume an [ollama](https://ollama.com/) server is running on `localhost:11343` and has the [DEFAULT_MODEL](./test/constants.ts) already pulled. | ||
|
|
||
| ### Pulling an Ollama model (CLI) | ||
|
|
||
| `ollama pull 'llama3.1:latest'` | ||
|
|
||
| ### Pulling an Ollama model (curl) | ||
|
|
||
| ```sh | ||
| curl -X POST http://localhost:11434/api/pull -d '{ | ||
| "name": "llama3.1:latest" | ||
| }' | ||
| ``` | ||
|
|
||
| ### Test Commands | ||
|
|
||
| To run the test suite, use the `yarn test:e2e` command. Ollama configuration errors will be detected by the [suite tests](./test/e2e/suite.test.ts). | ||
|
|
||
| To observe intermediate steps, including prompts provided to the agent, use the `--no-silent` flag. | ||
|
|
||
| ```sh | ||
| yarn test:e2e --no-silent | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| { | ||
| "name": "@ocap/kernel-agents", | ||
| "version": "0.0.0", | ||
| "private": true, | ||
| "description": "Capability-enabled, language-model-flow-controlled programming", | ||
| "homepage": "https://github.com/MetaMask/ocap-kernel/tree/main/packages/kernel-agents#readme", | ||
| "bugs": { | ||
| "url": "https://github.com/MetaMask/ocap-kernel/issues" | ||
| }, | ||
| "repository": { | ||
| "type": "git", | ||
| "url": "https://github.com/MetaMask/ocap-kernel.git" | ||
| }, | ||
| "type": "module", | ||
| "exports": { | ||
| ".": { | ||
| "import": { | ||
| "types": "./dist/index.d.mts", | ||
| "default": "./dist/index.mjs" | ||
| }, | ||
| "require": { | ||
| "types": "./dist/index.d.cts", | ||
| "default": "./dist/index.cjs" | ||
| } | ||
| }, | ||
| "./package.json": "./package.json" | ||
| }, | ||
| "files": [ | ||
| "dist/" | ||
| ], | ||
| "scripts": { | ||
| "build": "ts-bridge --project tsconfig.build.json --no-references --clean", | ||
| "build:docs": "typedoc", | ||
| "changelog:validate": "../../scripts/validate-changelog.sh @ocap/kernel-agents", | ||
| "clean": "rimraf --glob './*.tsbuildinfo' ./.eslintcache ./coverage ./dist", | ||
| "lint": "yarn lint:eslint && yarn lint:misc --check && yarn constraints && yarn lint:dependencies", | ||
| "lint:dependencies": "depcheck", | ||
| "lint:eslint": "eslint . --cache", | ||
| "lint:fix": "yarn lint:eslint --fix && yarn lint:misc --write && yarn constraints --fix && yarn lint:dependencies", | ||
| "lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore", | ||
| "publish:preview": "yarn npm publish --tag preview", | ||
| "test": "vitest run --config vitest.config.ts", | ||
| "test:e2e": "vitest run --config vitest.config.e2e.ts", | ||
| "test:clean": "yarn test --no-cache --coverage.clean", | ||
| "test:dev": "yarn test --mode development", | ||
| "test:verbose": "yarn test --reporter verbose", | ||
| "test:watch": "vitest --config vitest.config.ts" | ||
| }, | ||
| "devDependencies": { | ||
| "@arethetypeswrong/cli": "^0.17.4", | ||
| "@metamask/auto-changelog": "^5.0.1", | ||
| "@metamask/eslint-config": "^14.0.0", | ||
| "@metamask/eslint-config-nodejs": "^14.0.0", | ||
| "@metamask/eslint-config-typescript": "^14.0.0", | ||
| "@ocap/repo-tools": "workspace:^", | ||
| "@ts-bridge/cli": "^0.6.3", | ||
| "@ts-bridge/shims": "^0.1.1", | ||
| "@types/node": "^22.13.1", | ||
| "@typescript-eslint/eslint-plugin": "^8.29.0", | ||
| "@typescript-eslint/parser": "^8.29.0", | ||
| "@typescript-eslint/utils": "^8.29.0", | ||
| "@vitest/eslint-plugin": "^1.3.4", | ||
| "depcheck": "^1.4.7", | ||
| "eslint": "^9.23.0", | ||
| "eslint-config-prettier": "^10.1.1", | ||
| "eslint-import-resolver-typescript": "^4.3.1", | ||
| "eslint-plugin-import-x": "^4.10.0", | ||
| "eslint-plugin-jsdoc": "^50.6.9", | ||
| "eslint-plugin-n": "^17.17.0", | ||
| "eslint-plugin-prettier": "^5.2.6", | ||
| "eslint-plugin-promise": "^7.2.1", | ||
| "prettier": "^3.5.3", | ||
| "rimraf": "^6.0.1", | ||
| "turbo": "^2.5.6", | ||
| "typedoc": "^0.28.1", | ||
| "typescript": "~5.8.2", | ||
| "typescript-eslint": "^8.29.0", | ||
| "vite": "^7.1.2", | ||
| "vitest": "^3.2.4" | ||
| }, | ||
| "engines": { | ||
| "node": "^20.6 || >=22" | ||
| }, | ||
| "dependencies": { | ||
| "@metamask/kernel-utils": "workspace:^", | ||
| "@metamask/logger": "workspace:^", | ||
| "@ocap/kernel-language-model-service": "workspace:^" | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| import '@ocap/repo-tools/test-utils/mock-endoify'; | ||
|
|
||
| import type { Logger } from '@metamask/logger'; | ||
| import { vi, describe, it, expect } from 'vitest'; | ||
|
|
||
| import { makeAgent } from './agent.ts'; | ||
| import { capability } from './capability.ts'; | ||
| import { end } from './default-capabilities.ts'; | ||
| import { AssistantMessage, CapabilityResultMessage } from './messages.ts'; | ||
| import { makeChat } from './prompt.ts'; | ||
|
|
||
| const prompt = 'test prompt'; | ||
| const prefix = '{"messageType":"assistant","'; | ||
|
|
||
| vi.mock('./prompt.ts', () => ({ | ||
| makeChat: vi.fn(() => ({ | ||
| getPromptAndPrefix: vi.fn(() => ({ prompt, prefix })), | ||
| pushMessages: vi.fn(), | ||
| })), | ||
| })); | ||
|
|
||
| describe('makeAgent', () => { | ||
| const mockLlm = (...chunks: string[]) => ({ | ||
| getInfo: vi.fn(), | ||
| load: vi.fn(), | ||
| unload: vi.fn(), | ||
| sample: vi.fn().mockResolvedValue({ | ||
| stream: { | ||
| async *[Symbol.asyncIterator]() { | ||
| for (const chunk of chunks) { | ||
| yield { response: chunk }; | ||
| } | ||
| }, | ||
| }, | ||
| abort: vi.fn(), | ||
| }), | ||
| }); | ||
|
|
||
| it('makes an agent', () => { | ||
| const llm = mockLlm(); | ||
| const agent = makeAgent({ llm, capabilities: {} }); | ||
| expect(agent).toBeDefined(); | ||
| expect(agent).toHaveProperty('task'); | ||
| }); | ||
|
|
||
| it('endows the "end" capability by default', async () => { | ||
| const llm = mockLlm(); | ||
| const mockMergeDisjointRecordsSpy = vi.spyOn( | ||
| await import('@metamask/kernel-utils'), | ||
| 'mergeDisjointRecords', | ||
| ); | ||
| const capabilities = {}; | ||
| makeAgent({ llm, capabilities }); | ||
| expect(mockMergeDisjointRecordsSpy).toHaveBeenCalledWith( | ||
| { end }, | ||
| capabilities, | ||
| ); | ||
| }); | ||
|
|
||
| describe('task', () => { | ||
| it('invokes the LLM', async () => { | ||
| const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`); | ||
| const agent = makeAgent({ llm, capabilities: {} }); | ||
| const result = await agent.task(''); | ||
| expect(result).toBe('x'); | ||
| // This is a massive understatement, but we don't want to test the prompt | ||
| expect(llm.sample).toHaveBeenCalledWith(prompt); | ||
| }); | ||
|
|
||
| it('throws if the LLM did not invoke a capability', async () => { | ||
| // LLM finishes valid JSON, but no invoke property | ||
| const llm = mockLlm(`content":""}`); | ||
| const agent = makeAgent({ llm, capabilities: {} }); | ||
| const task = agent.task(''); | ||
| await expect(task).rejects.toThrow('No invoke in result'); | ||
| }); | ||
|
|
||
| it('throws if invocation budget is exceeded', async () => { | ||
| const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`); | ||
| const agent = makeAgent({ llm, capabilities: {} }); | ||
| const task = agent.task('', { invocationBudget: 0 }); | ||
| await expect(task).rejects.toThrow('Invocation budget exceeded'); | ||
| }); | ||
|
|
||
| // XXX This test reflects a poor factorization of the agent. | ||
| it('pushes messages to the transcript', async () => { | ||
| const llm = mockLlm(`invoke":[{"name":"test","args":{}}]}`); | ||
| const pushMessages = vi.fn(); | ||
| vi.mocked(makeChat).mockReturnValue({ | ||
| getPromptAndPrefix: vi.fn(() => ({ prompt, prefix })), | ||
| pushMessages, | ||
| }); | ||
| const { makeAgent: makeAgent2 } = await import('./agent.ts'); | ||
| const agent = makeAgent2({ | ||
| llm, | ||
| capabilities: { | ||
| test: capability(async () => 'test', { | ||
| description: 'test', | ||
| args: {}, | ||
| returns: { type: 'string' }, | ||
| }), | ||
| }, | ||
| }); | ||
| const task = agent.task('test', { invocationBudget: 1 }); | ||
| await expect(task).rejects.toThrow('Invocation budget exceeded'); | ||
| expect(pushMessages).toHaveBeenCalledWith( | ||
| expect.any(AssistantMessage), | ||
| expect.any(CapabilityResultMessage), | ||
| ); | ||
| }); | ||
|
|
||
| it('logs to the provided logger', async () => { | ||
| const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`); | ||
| const logger = { | ||
| info: vi.fn(), | ||
| subLogger: vi.fn(() => logger), | ||
| } as unknown as Logger; | ||
| const agent = makeAgent({ llm, capabilities: {}, logger }); | ||
| await agent.task('test', { invocationBudget: 1 }); | ||
| expect(logger.info).toHaveBeenCalledWith('query:', 'test'); | ||
| expect(logger.subLogger).toHaveBeenCalledWith({ tags: ['t001'] }); | ||
| }); | ||
| }); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| import { makeCounter, mergeDisjointRecords } from '@metamask/kernel-utils'; | ||
| import type { Logger } from '@metamask/logger'; | ||
| import type { LanguageModel } from '@ocap/kernel-language-model-service'; | ||
|
|
||
| import { invokeCapabilities } from './capability.ts'; | ||
| import { end } from './default-capabilities.ts'; | ||
| import { AssistantMessage, CapabilityResultMessage } from './messages.ts'; | ||
| import type { AssistantMessageJson } from './messages.ts'; | ||
| import { gatherStreamingResponse, makeIncrementalParser } from './parser.ts'; | ||
| import { makeChat } from './prompt.ts'; | ||
| import type { Agent, CapabilityRecord } from './types.ts'; | ||
|
|
||
| /** | ||
| * Make a capability-augmented agent | ||
| * | ||
| * @param args - The arguments to make the agent. | ||
| * @param args.llm - The language model to use for the agent | ||
| * @param args.capabilities - The agent's capabilities | ||
| * @param args.logger - The logger to use for the agent | ||
| * @returns A kernel agent | ||
| */ | ||
| export const makeAgent = ({ | ||
| llm, | ||
rekmarks marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| capabilities, | ||
| logger, | ||
| }: { | ||
| llm: LanguageModel<unknown, { response: string }>; | ||
| capabilities: CapabilityRecord; | ||
| logger?: Logger; | ||
| }): Agent => { | ||
| const agentCapabilities = mergeDisjointRecords( | ||
| { end }, | ||
| capabilities, | ||
| ) as CapabilityRecord; | ||
|
|
||
| const taskCounter = makeCounter(); | ||
|
|
||
| return { | ||
| task: async ( | ||
| query: string, | ||
| { invocationBudget = 10 }: { invocationBudget?: number } = {}, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's this for?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's the number of times the agent can invoke capabilities before the task throws. You can imagine that by the 10th call to a tool without completing the task, this particular approach is probably not going to succeed. |
||
| ) => { | ||
| // XXX Tasks could be integrated deeper in the kernel | ||
| const taskId = `t${taskCounter().toString().padStart(3, '0')}`; | ||
| const taskLogger = logger?.subLogger({ tags: [taskId] }); | ||
| taskLogger?.info('query:', query); | ||
|
|
||
| const { getPromptAndPrefix, pushMessages } = makeChat( | ||
| agentCapabilities, | ||
| query, | ||
| ); | ||
|
|
||
| for (let invocation = 0; invocation < invocationBudget; invocation++) { | ||
| taskLogger?.info(`begin invocation ${invocation}/${invocationBudget}`); | ||
|
|
||
| const { prompt, prefix } = getPromptAndPrefix(); | ||
| const parse = makeIncrementalParser<AssistantMessageJson>({ | ||
| prefix, | ||
| ...(taskLogger ? { logger: taskLogger } : {}), | ||
| }); | ||
| taskLogger?.info('prompt:', prompt); | ||
|
|
||
| const { stream, abort } = await llm.sample(prompt); | ||
| let assistantMessage: AssistantMessageJson; | ||
| try { | ||
| assistantMessage = await gatherStreamingResponse({ | ||
| stream, | ||
| parse, | ||
| }); | ||
| } finally { | ||
| // Stop the LLM from generating anymore | ||
| await abort(); | ||
| } | ||
| taskLogger?.info('assistantMessage:', assistantMessage); | ||
grypez marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // TODO: this should already be validated by the parser | ||
| if (!assistantMessage.invoke) { | ||
| throw new Error('No invoke in result'); | ||
| } | ||
| const results = await invokeCapabilities( | ||
| assistantMessage.invoke, | ||
| agentCapabilities, | ||
| ); | ||
| logger?.info('results:', results); | ||
| const didEnd = results.find((capability) => capability.name === 'end'); | ||
| if (didEnd) { | ||
| logger?.info('exit invocation with result:', didEnd.result); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| return didEnd.result; | ||
| } | ||
| pushMessages( | ||
| new AssistantMessage(assistantMessage), | ||
| new CapabilityResultMessage(results), | ||
| ); | ||
| } | ||
| throw new Error('Invocation budget exceeded'); | ||
| }, | ||
| }; | ||
| }; | ||
Uh oh!
There was an error while loading. Please reload this page.