Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions packages/kernel-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Changelog

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

[Unreleased]: https://github.com/MetaMask/ocap-kernel/
41 changes: 41 additions & 0 deletions packages/kernel-agents/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# `@ocap/kernel-agents`

Capability-enabled, language-model-flow-controlled programming.

## Installation

`yarn add @ocap/kernel-agents`

or

`npm install @ocap/kernel-agents`

## Contributing

This package is part of a monorepo. Instructions for contributing can be found in the [monorepo README](https://github.com/MetaMask/ocap-kernel#readme).

## Running E2E Tests

The end to end tests assume an [ollama](https://ollama.com/) server is running on `localhost:11343` and has the [DEFAULT_MODEL](./test/constants.ts) already pulled.

### Pulling an Ollama model (CLI)

`ollama pull 'llama3.1:latest'`

### Pulling an Ollama model (curl)

```sh
curl -X POST http://localhost:11434/api/pull -d '{
"name": "llama3.1:latest"
}'
```

### Test Commands

To run the test suite, use the `yarn test:e2e` command. Ollama configuration errors will be detected by the [suite tests](./test/e2e/suite.test.ts).

To observe intermediate steps, including prompts provided to the agent, use the `--no-silent` flag.

```sh
yarn test:e2e --no-silent
```
89 changes: 89 additions & 0 deletions packages/kernel-agents/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"name": "@ocap/kernel-agents",
"version": "0.0.0",
"private": true,
"description": "Capability-enabled, language-model-flow-controlled programming",
"homepage": "https://github.com/MetaMask/ocap-kernel/tree/main/packages/kernel-agents#readme",
"bugs": {
"url": "https://github.com/MetaMask/ocap-kernel/issues"
},
"repository": {
"type": "git",
"url": "https://github.com/MetaMask/ocap-kernel.git"
},
"type": "module",
"exports": {
".": {
"import": {
"types": "./dist/index.d.mts",
"default": "./dist/index.mjs"
},
"require": {
"types": "./dist/index.d.cts",
"default": "./dist/index.cjs"
}
},
"./package.json": "./package.json"
},
"files": [
"dist/"
],
"scripts": {
"build": "ts-bridge --project tsconfig.build.json --no-references --clean",
"build:docs": "typedoc",
"changelog:validate": "../../scripts/validate-changelog.sh @ocap/kernel-agents",
"clean": "rimraf --glob './*.tsbuildinfo' ./.eslintcache ./coverage ./dist",
"lint": "yarn lint:eslint && yarn lint:misc --check && yarn constraints && yarn lint:dependencies",
"lint:dependencies": "depcheck",
"lint:eslint": "eslint . --cache",
"lint:fix": "yarn lint:eslint --fix && yarn lint:misc --write && yarn constraints --fix && yarn lint:dependencies",
"lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore",
"publish:preview": "yarn npm publish --tag preview",
"test": "vitest run --config vitest.config.ts",
"test:e2e": "vitest run --config vitest.config.e2e.ts",
"test:clean": "yarn test --no-cache --coverage.clean",
"test:dev": "yarn test --mode development",
"test:verbose": "yarn test --reporter verbose",
"test:watch": "vitest --config vitest.config.ts"
},
"devDependencies": {
"@arethetypeswrong/cli": "^0.17.4",
"@metamask/auto-changelog": "^5.0.1",
"@metamask/eslint-config": "^14.0.0",
"@metamask/eslint-config-nodejs": "^14.0.0",
"@metamask/eslint-config-typescript": "^14.0.0",
"@ocap/repo-tools": "workspace:^",
"@ts-bridge/cli": "^0.6.3",
"@ts-bridge/shims": "^0.1.1",
"@types/node": "^22.13.1",
"@typescript-eslint/eslint-plugin": "^8.29.0",
"@typescript-eslint/parser": "^8.29.0",
"@typescript-eslint/utils": "^8.29.0",
"@vitest/eslint-plugin": "^1.3.4",
"depcheck": "^1.4.7",
"eslint": "^9.23.0",
"eslint-config-prettier": "^10.1.1",
"eslint-import-resolver-typescript": "^4.3.1",
"eslint-plugin-import-x": "^4.10.0",
"eslint-plugin-jsdoc": "^50.6.9",
"eslint-plugin-n": "^17.17.0",
"eslint-plugin-prettier": "^5.2.6",
"eslint-plugin-promise": "^7.2.1",
"prettier": "^3.5.3",
"rimraf": "^6.0.1",
"turbo": "^2.5.6",
"typedoc": "^0.28.1",
"typescript": "~5.8.2",
"typescript-eslint": "^8.29.0",
"vite": "^7.1.2",
"vitest": "^3.2.4"
},
"engines": {
"node": "^20.6 || >=22"
},
"dependencies": {
"@metamask/kernel-utils": "workspace:^",
"@metamask/logger": "workspace:^",
"@ocap/kernel-language-model-service": "workspace:^"
}
}
124 changes: 124 additions & 0 deletions packages/kernel-agents/src/agent.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import '@ocap/repo-tools/test-utils/mock-endoify';

import type { Logger } from '@metamask/logger';
import { vi, describe, it, expect } from 'vitest';

import { makeAgent } from './agent.ts';
import { capability } from './capability.ts';
import { end } from './default-capabilities.ts';
import { AssistantMessage, CapabilityResultMessage } from './messages.ts';
import { makeChat } from './prompt.ts';

const prompt = 'test prompt';
const prefix = '{"messageType":"assistant","';

vi.mock('./prompt.ts', () => ({
makeChat: vi.fn(() => ({
getPromptAndPrefix: vi.fn(() => ({ prompt, prefix })),
pushMessages: vi.fn(),
})),
}));

describe('makeAgent', () => {
const mockLlm = (...chunks: string[]) => ({
getInfo: vi.fn(),
load: vi.fn(),
unload: vi.fn(),
sample: vi.fn().mockResolvedValue({
stream: {
async *[Symbol.asyncIterator]() {
for (const chunk of chunks) {
yield { response: chunk };
}
},
},
abort: vi.fn(),
}),
});

it('makes an agent', () => {
const llm = mockLlm();
const agent = makeAgent({ llm, capabilities: {} });
expect(agent).toBeDefined();
expect(agent).toHaveProperty('task');
});

it('endows the "end" capability by default', async () => {
const llm = mockLlm();
const mockMergeDisjointRecordsSpy = vi.spyOn(
await import('@metamask/kernel-utils'),
'mergeDisjointRecords',
);
const capabilities = {};
makeAgent({ llm, capabilities });
expect(mockMergeDisjointRecordsSpy).toHaveBeenCalledWith(
{ end },
capabilities,
);
});

describe('task', () => {
it('invokes the LLM', async () => {
const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
const agent = makeAgent({ llm, capabilities: {} });
const result = await agent.task('');
expect(result).toBe('x');
// This is a massive understatement, but we don't want to test the prompt
expect(llm.sample).toHaveBeenCalledWith(prompt);
});

it('throws if the LLM did not invoke a capability', async () => {
// LLM finishes valid JSON, but no invoke property
const llm = mockLlm(`content":""}`);
const agent = makeAgent({ llm, capabilities: {} });
const task = agent.task('');
await expect(task).rejects.toThrow('No invoke in result');
});

it('throws if invocation budget is exceeded', async () => {
const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
const agent = makeAgent({ llm, capabilities: {} });
const task = agent.task('', { invocationBudget: 0 });
await expect(task).rejects.toThrow('Invocation budget exceeded');
});

// XXX This test reflects a poor factorization of the agent.
it('pushes messages to the transcript', async () => {
const llm = mockLlm(`invoke":[{"name":"test","args":{}}]}`);
const pushMessages = vi.fn();
vi.mocked(makeChat).mockReturnValue({
getPromptAndPrefix: vi.fn(() => ({ prompt, prefix })),
pushMessages,
});
const { makeAgent: makeAgent2 } = await import('./agent.ts');
const agent = makeAgent2({
llm,
capabilities: {
test: capability(async () => 'test', {
description: 'test',
args: {},
returns: { type: 'string' },
}),
},
});
const task = agent.task('test', { invocationBudget: 1 });
await expect(task).rejects.toThrow('Invocation budget exceeded');
expect(pushMessages).toHaveBeenCalledWith(
expect.any(AssistantMessage),
expect.any(CapabilityResultMessage),
);
});

it('logs to the provided logger', async () => {
const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
const logger = {
info: vi.fn(),
subLogger: vi.fn(() => logger),
} as unknown as Logger;
const agent = makeAgent({ llm, capabilities: {}, logger });
await agent.task('test', { invocationBudget: 1 });
expect(logger.info).toHaveBeenCalledWith('query:', 'test');
expect(logger.subLogger).toHaveBeenCalledWith({ tags: ['t001'] });
});
});
});
98 changes: 98 additions & 0 deletions packages/kernel-agents/src/agent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import { makeCounter, mergeDisjointRecords } from '@metamask/kernel-utils';
import type { Logger } from '@metamask/logger';
import type { LanguageModel } from '@ocap/kernel-language-model-service';

import { invokeCapabilities } from './capability.ts';
import { end } from './default-capabilities.ts';
import { AssistantMessage, CapabilityResultMessage } from './messages.ts';
import type { AssistantMessageJson } from './messages.ts';
import { gatherStreamingResponse, makeIncrementalParser } from './parser.ts';
import { makeChat } from './prompt.ts';
import type { Agent, CapabilityRecord } from './types.ts';

/**
* Make a capability-augmented agent
*
* @param args - The arguments to make the agent.
* @param args.llm - The language model to use for the agent
* @param args.capabilities - The agent's capabilities
* @param args.logger - The logger to use for the agent
* @returns A kernel agent
*/
export const makeAgent = ({
llm,
capabilities,
logger,
}: {
llm: LanguageModel<unknown, { response: string }>;
capabilities: CapabilityRecord;
logger?: Logger;
}): Agent => {
const agentCapabilities = mergeDisjointRecords(
{ end },
capabilities,
) as CapabilityRecord;

const taskCounter = makeCounter();

return {
task: async (
query: string,
{ invocationBudget = 10 }: { invocationBudget?: number } = {},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's the number of times the agent can invoke capabilities before the task throws. You can imagine that by the 10th call to a tool without completing the task, this particular approach is probably not going to succeed.

) => {
// XXX Tasks could be integrated deeper in the kernel
const taskId = `t${taskCounter().toString().padStart(3, '0')}`;
const taskLogger = logger?.subLogger({ tags: [taskId] });
taskLogger?.info('query:', query);

const { getPromptAndPrefix, pushMessages } = makeChat(
agentCapabilities,
query,
);

for (let invocation = 0; invocation < invocationBudget; invocation++) {
taskLogger?.info(`begin invocation ${invocation}/${invocationBudget}`);

const { prompt, prefix } = getPromptAndPrefix();
const parse = makeIncrementalParser<AssistantMessageJson>({
prefix,
...(taskLogger ? { logger: taskLogger } : {}),
});
taskLogger?.info('prompt:', prompt);

const { stream, abort } = await llm.sample(prompt);
let assistantMessage: AssistantMessageJson;
try {
assistantMessage = await gatherStreamingResponse({
stream,
parse,
});
} finally {
// Stop the LLM from generating anymore
await abort();
}
taskLogger?.info('assistantMessage:', assistantMessage);

// TODO: this should already be validated by the parser
if (!assistantMessage.invoke) {
throw new Error('No invoke in result');
}
const results = await invokeCapabilities(
assistantMessage.invoke,
agentCapabilities,
);
logger?.info('results:', results);
const didEnd = results.find((capability) => capability.name === 'end');
if (didEnd) {
logger?.info('exit invocation with result:', didEnd.result);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Task Logs Missing Specific Context

The results and exit invocation logs on lines 84 and 87 use the general logger instead of the task-specific taskLogger. This causes these important task-related messages to miss their task context, like the task ID.

Fix in Cursor Fix in Web

return didEnd.result;
}
pushMessages(
new AssistantMessage(assistantMessage),
new CapabilityResultMessage(results),
);
}
throw new Error('Invocation budget exceeded');
},
};
};
Loading
Loading