MetaMask · grypez · Oct 15, 2025 · Oct 8, 2025 · Oct 8, 2025 · Sep 2, 2025
@@ -0,0 +1,10 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+[Unreleased]: https://github.com/MetaMask/ocap-kernel/
@@ -0,0 +1,41 @@
+# `@ocap/kernel-agents`
+
+Capability-enabled, language-model-flow-controlled programming.
+
+## Installation
+
+`yarn add @ocap/kernel-agents`
+
+or
+
+`npm install @ocap/kernel-agents`
+
+## Contributing
+
+This package is part of a monorepo. Instructions for contributing can be found in the [monorepo README](https://github.com/MetaMask/ocap-kernel#readme).
+
+## Running E2E Tests
+
+The end to end tests assume an [ollama](https://ollama.com/) server is running on `localhost:11343` and has the [DEFAULT_MODEL](./test/constants.ts) already pulled.
+
+### Pulling an Ollama model (CLI)
+
+`ollama pull 'llama3.1:latest'`
+
+### Pulling an Ollama model (curl)
+
+```sh
+curl -X POST http://localhost:11434/api/pull -d '{
+  "name": "llama3.1:latest"
+}'
+```
+
+### Test Commands
+
+To run the test suite, use the `yarn test:e2e` command. Ollama configuration errors will be detected by the [suite tests](./test/e2e/suite.test.ts).
+
+To observe intermediate steps, including prompts provided to the agent, use the `--no-silent` flag.
+
+```sh
+yarn test:e2e --no-silent
+```
@@ -0,0 +1,89 @@
+{
+  "name": "@ocap/kernel-agents",
+  "version": "0.0.0",
+  "private": true,
+  "description": "Capability-enabled, language-model-flow-controlled programming",
+  "homepage": "https://github.com/MetaMask/ocap-kernel/tree/main/packages/kernel-agents#readme",
+  "bugs": {
+    "url": "https://github.com/MetaMask/ocap-kernel/issues"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/MetaMask/ocap-kernel.git"
+  },
+  "type": "module",
+  "exports": {
+    ".": {
+      "import": {
+        "types": "./dist/index.d.mts",
+        "default": "./dist/index.mjs"
+      },
+      "require": {
+        "types": "./dist/index.d.cts",
+        "default": "./dist/index.cjs"
+      }
+    },
+    "./package.json": "./package.json"
+  },
+  "files": [
+    "dist/"
+  ],
+  "scripts": {
+    "build": "ts-bridge --project tsconfig.build.json --no-references --clean",
+    "build:docs": "typedoc",
+    "changelog:validate": "../../scripts/validate-changelog.sh @ocap/kernel-agents",
+    "clean": "rimraf --glob './*.tsbuildinfo' ./.eslintcache ./coverage ./dist",
+    "lint": "yarn lint:eslint && yarn lint:misc --check && yarn constraints && yarn lint:dependencies",
+    "lint:dependencies": "depcheck",
+    "lint:eslint": "eslint . --cache",
+    "lint:fix": "yarn lint:eslint --fix && yarn lint:misc --write && yarn constraints --fix && yarn lint:dependencies",
+    "lint:misc": "prettier --no-error-on-unmatched-pattern '**/*.json' '**/*.md' '**/*.html' '!**/CHANGELOG.old.md' '**/*.yml' '!.yarnrc.yml' '!merged-packages/**' --ignore-path ../../.gitignore",
+    "publish:preview": "yarn npm publish --tag preview",
+    "test": "vitest run --config vitest.config.ts",
+    "test:e2e": "vitest run --config vitest.config.e2e.ts",
+    "test:clean": "yarn test --no-cache --coverage.clean",
+    "test:dev": "yarn test --mode development",
+    "test:verbose": "yarn test --reporter verbose",
+    "test:watch": "vitest --config vitest.config.ts"
+  },
+  "devDependencies": {
+    "@arethetypeswrong/cli": "^0.17.4",
+    "@metamask/auto-changelog": "^5.0.1",
+    "@metamask/eslint-config": "^14.0.0",
+    "@metamask/eslint-config-nodejs": "^14.0.0",
+    "@metamask/eslint-config-typescript": "^14.0.0",
+    "@ocap/repo-tools": "workspace:^",
+    "@ts-bridge/cli": "^0.6.3",
+    "@ts-bridge/shims": "^0.1.1",
+    "@types/node": "^22.13.1",
+    "@typescript-eslint/eslint-plugin": "^8.29.0",
+    "@typescript-eslint/parser": "^8.29.0",
+    "@typescript-eslint/utils": "^8.29.0",
+    "@vitest/eslint-plugin": "^1.3.4",
+    "depcheck": "^1.4.7",
+    "eslint": "^9.23.0",
+    "eslint-config-prettier": "^10.1.1",
+    "eslint-import-resolver-typescript": "^4.3.1",
+    "eslint-plugin-import-x": "^4.10.0",
+    "eslint-plugin-jsdoc": "^50.6.9",
+    "eslint-plugin-n": "^17.17.0",
+    "eslint-plugin-prettier": "^5.2.6",
+    "eslint-plugin-promise": "^7.2.1",
+    "prettier": "^3.5.3",
+    "rimraf": "^6.0.1",
+    "turbo": "^2.5.6",
+    "typedoc": "^0.28.1",
+    "typescript": "~5.8.2",
+    "typescript-eslint": "^8.29.0",
+    "vite": "^7.1.2",
+    "vitest": "^3.2.4"
+  },
+  "engines": {
+    "node": "^20.6 || >=22"
+  },
+  "dependencies": {
+    "@metamask/kernel-utils": "workspace:^",
+    "@metamask/logger": "workspace:^",
+    "@ocap/kernel-language-model-service": "workspace:^"
+  }
+}
@@ -0,0 +1,124 @@
+import '@ocap/repo-tools/test-utils/mock-endoify';
+
+import type { Logger } from '@metamask/logger';
+import { vi, describe, it, expect } from 'vitest';
+
+import { makeAgent } from './agent.ts';
+import { capability } from './capability.ts';
+import { end } from './default-capabilities.ts';
+import { AssistantMessage, CapabilityResultMessage } from './messages.ts';
+import { makeChat } from './prompt.ts';
+
+const prompt = 'test prompt';
+const prefix = '{"messageType":"assistant","';
+
+vi.mock('./prompt.ts', () => ({
+  makeChat: vi.fn(() => ({
+    getPromptAndPrefix: vi.fn(() => ({ prompt, prefix })),
+    pushMessages: vi.fn(),
+  })),
+}));
+
+describe('makeAgent', () => {
+  const mockLlm = (...chunks: string[]) => ({
+    getInfo: vi.fn(),
+    load: vi.fn(),
+    unload: vi.fn(),
+    sample: vi.fn().mockResolvedValue({
+      stream: {
+        async *[Symbol.asyncIterator]() {
+          for (const chunk of chunks) {
+            yield { response: chunk };
+          }
+        },
+      },
+      abort: vi.fn(),
+    }),
+  });
+
+  it('makes an agent', () => {
+    const llm = mockLlm();
+    const agent = makeAgent({ llm, capabilities: {} });
+    expect(agent).toBeDefined();
+    expect(agent).toHaveProperty('task');
+  });
+
+  it('endows the "end" capability by default', async () => {
+    const llm = mockLlm();
+    const mockMergeDisjointRecordsSpy = vi.spyOn(
+      await import('@metamask/kernel-utils'),
+      'mergeDisjointRecords',
+    );
+    const capabilities = {};
+    makeAgent({ llm, capabilities });
+    expect(mockMergeDisjointRecordsSpy).toHaveBeenCalledWith(
+      { end },
+      capabilities,
+    );
+  });
+
+  describe('task', () => {
+    it('invokes the LLM', async () => {
+      const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
+      const agent = makeAgent({ llm, capabilities: {} });
+      const result = await agent.task('');
+      expect(result).toBe('x');
+      // This is a massive understatement, but we don't want to test the prompt
+      expect(llm.sample).toHaveBeenCalledWith(prompt);
+    });
+
+    it('throws if the LLM did not invoke a capability', async () => {
+      // LLM finishes valid JSON, but no invoke property
+      const llm = mockLlm(`content":""}`);
+      const agent = makeAgent({ llm, capabilities: {} });
+      const task = agent.task('');
+      await expect(task).rejects.toThrow('No invoke in result');
+    });
+
+    it('throws if invocation budget is exceeded', async () => {
+      const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
+      const agent = makeAgent({ llm, capabilities: {} });
+      const task = agent.task('', { invocationBudget: 0 });
+      await expect(task).rejects.toThrow('Invocation budget exceeded');
+    });
+
+    // XXX This test reflects a poor factorization of the agent.
+    it('pushes messages to the transcript', async () => {
+      const llm = mockLlm(`invoke":[{"name":"test","args":{}}]}`);
+      const pushMessages = vi.fn();
+      vi.mocked(makeChat).mockReturnValue({
+        getPromptAndPrefix: vi.fn(() => ({ prompt, prefix })),
+        pushMessages,
+      });
+      const { makeAgent: makeAgent2 } = await import('./agent.ts');
+      const agent = makeAgent2({
+        llm,
+        capabilities: {
+          test: capability(async () => 'test', {
+            description: 'test',
+            args: {},
+            returns: { type: 'string' },
+          }),
+        },
+      });
+      const task = agent.task('test', { invocationBudget: 1 });
+      await expect(task).rejects.toThrow('Invocation budget exceeded');
+      expect(pushMessages).toHaveBeenCalledWith(
+        expect.any(AssistantMessage),
+        expect.any(CapabilityResultMessage),
+      );
+    });
+
+    it('logs to the provided logger', async () => {
+      const llm = mockLlm(`invoke":[{"name":"end","args":{"final":"x"}}]}`);
+      const logger = {
+        info: vi.fn(),
+        subLogger: vi.fn(() => logger),
+      } as unknown as Logger;
+      const agent = makeAgent({ llm, capabilities: {}, logger });
+      await agent.task('test', { invocationBudget: 1 });
+      expect(logger.info).toHaveBeenCalledWith('query:', 'test');
+      expect(logger.subLogger).toHaveBeenCalledWith({ tags: ['t001'] });
+    });
+  });
+});
@@ -0,0 +1,98 @@
+import { makeCounter, mergeDisjointRecords } from '@metamask/kernel-utils';
+import type { Logger } from '@metamask/logger';
+import type { LanguageModel } from '@ocap/kernel-language-model-service';
+
+import { invokeCapabilities } from './capability.ts';
+import { end } from './default-capabilities.ts';
+import { AssistantMessage, CapabilityResultMessage } from './messages.ts';
+import type { AssistantMessageJson } from './messages.ts';
+import { gatherStreamingResponse, makeIncrementalParser } from './parser.ts';
+import { makeChat } from './prompt.ts';
+import type { Agent, CapabilityRecord } from './types.ts';
+
+/**
+ * Make a capability-augmented agent
+ *
+ * @param args - The arguments to make the agent.
+ * @param args.llm - The language model to use for the agent
+ * @param args.capabilities - The agent's capabilities
+ * @param args.logger - The logger to use for the agent
+ * @returns A kernel agent
+ */
+export const makeAgent = ({
+  llm,
+  capabilities,
+  logger,
+}: {
+  llm: LanguageModel<unknown, { response: string }>;
+  capabilities: CapabilityRecord;
+  logger?: Logger;
+}): Agent => {
+  const agentCapabilities = mergeDisjointRecords(
+    { end },
+    capabilities,
+  ) as CapabilityRecord;
+
+  const taskCounter = makeCounter();
+
+  return {
+    task: async (
+      query: string,
+      { invocationBudget = 10 }: { invocationBudget?: number } = {},
+    ) => {
+      // XXX Tasks could be integrated deeper in the kernel
+      const taskId = `t${taskCounter().toString().padStart(3, '0')}`;
+      const taskLogger = logger?.subLogger({ tags: [taskId] });
+      taskLogger?.info('query:', query);
+
+      const { getPromptAndPrefix, pushMessages } = makeChat(
+        agentCapabilities,
+        query,
+      );
+
+      for (let invocation = 0; invocation < invocationBudget; invocation++) {
+        taskLogger?.info(`begin invocation ${invocation}/${invocationBudget}`);
+
+        const { prompt, prefix } = getPromptAndPrefix();
+        const parse = makeIncrementalParser<AssistantMessageJson>({
+          prefix,
+          ...(taskLogger ? { logger: taskLogger } : {}),
+        });
+        taskLogger?.info('prompt:', prompt);
+
+        const { stream, abort } = await llm.sample(prompt);
+        let assistantMessage: AssistantMessageJson;
+        try {
+          assistantMessage = await gatherStreamingResponse({
+            stream,
+            parse,
+          });
+        } finally {
+          // Stop the LLM from generating anymore
+          await abort();
+        }
+        taskLogger?.info('assistantMessage:', assistantMessage);
+
+        // TODO: this should already be validated by the parser
+        if (!assistantMessage.invoke) {
+          throw new Error('No invoke in result');
+        }
+        const results = await invokeCapabilities(
+          assistantMessage.invoke,
+          agentCapabilities,
+        );
+        logger?.info('results:', results);
+        const didEnd = results.find((capability) => capability.name === 'end');
+        if (didEnd) {
+          logger?.info('exit invocation with result:', didEnd.result);
+          return didEnd.result;
+        }
+        pushMessages(
+          new AssistantMessage(assistantMessage),
+          new CapabilityResultMessage(results),
+        );
+      }
+      throw new Error('Invocation budget exceeded');
+    },
+  };
+};