diff --git a/package-lock.json b/package-lock.json
index 0b47f87..6d543b2 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,16 +1,18 @@
 {
   "name": "@automagik/rlmx",
-  "version": "0.260330.5",
+  "version": "0.260330.18",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@automagik/rlmx",
-      "version": "0.260330.5",
+      "version": "0.260330.18",
       "license": "MIT",
       "dependencies": {
         "@mariozechner/pi-ai": "0.64.0",
-        "js-yaml": "^4.1.1"
+        "js-yaml": "^4.1.1",
+        "pg": "^8.20.0",
+        "pgserve": "^1.1.6"
       },
       "bin": {
         "rlmx": "dist/src/cli.js"
@@ -20,6 +22,7 @@
         "@commitlint/config-conventional": "^19.0.0",
         "@types/js-yaml": "^4.0.9",
         "@types/node": "^22.0.0",
+        "@types/pg": "^8.11.0",
         "husky": "^9.0.0",
         "typescript": "^5.7.0"
       },
@@ -1051,6 +1054,74 @@
         "node": ">=v18"
       }
     },
+    "node_modules/@embedded-postgres/darwin-arm64": {
+      "version": "18.2.0-beta.16",
+      "resolved": "https://registry.npmjs.org/@embedded-postgres/darwin-arm64/-/darwin-arm64-18.2.0-beta.16.tgz",
+      "integrity": "sha512-wnswaF+uDvGeitqajJ8v8xOG4ttFrzixElwKNe2MIxBXSLWPV3xhi6tBY0Sjw8Lmiu6UG9vNLFZSjHPrIeokBg==",
+      "cpu": [
+        "arm64"
+      ],
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/@embedded-postgres/darwin-x64": {
+      "version": "18.2.0-beta.16",
+      "resolved": "https://registry.npmjs.org/@embedded-postgres/darwin-x64/-/darwin-x64-18.2.0-beta.16.tgz",
+      "integrity": "sha512-u9WtTPxRuO0uOny5IniXHSDaLmtOujwzDoExIV/jFT0Fu8SzpX7wdoPbsSPBLgyQWdr/nPA77K9QI4r6P1/fKA==",
+      "cpu": [
+        "x64"
+      ],
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/@embedded-postgres/linux-x64": {
+      "version": "18.2.0-beta.16",
+      "resolved": "https://registry.npmjs.org/@embedded-postgres/linux-x64/-/linux-x64-18.2.0-beta.16.tgz",
+      "integrity": "sha512-BIt485ioL8/AwDgw37IcdraOfRgHNDOtGM6Hh63vnNaUAG4Z0qtJd5zXS5fr2wZTEsYHyC5PC60k7zkCRZXSzg==",
+      "cpu": [
+        "x64"
+      ],
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/@embedded-postgres/windows-x64": {
+      "version": "18.2.0-beta.16",
+      "resolved": "https://registry.npmjs.org/@embedded-postgres/windows-x64/-/windows-x64-18.2.0-beta.16.tgz",
+      "integrity": "sha512-Sj6GhCZrvtMwchATEtWuEmexEBWpRNMHPTUHsqPuyDrHX/XgKfpIxz2/AMHa4sp7SZ0JOHGouH8AFIVsWQrQsQ==",
+      "cpu": [
+        "x64"
+      ],
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=16"
+      }
+    },
     "node_modules/@google/genai": {
       "version": "1.46.0",
       "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.46.0.tgz",
@@ -1111,6 +1182,162 @@
         "zod-to-json-schema": "^3.24.1"
       }
     },
+    "node_modules/@oven/bun-darwin-aarch64": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-darwin-aarch64/-/bun-darwin-aarch64-1.3.11.tgz",
+      "integrity": "sha512-/8IzqSu4/OWGRs7Fs2ROzGVwJMFTBQkgAp6sAthkBYoN7OiM4rY/CpPVs2X9w9N1W61CHSkEdNKi8HrLZKfK3g==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@oven/bun-darwin-x64": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-darwin-x64/-/bun-darwin-x64-1.3.11.tgz",
+      "integrity": "sha512-TT7eUihnAzxM2tlZesusuC75PAOYKvUBgVU/Nm/lakZ/DpyuqhNkzUfcxSgmmK9IjVWzMmezLIGZl16XGCGJng==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@oven/bun-darwin-x64-baseline": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-darwin-x64-baseline/-/bun-darwin-x64-baseline-1.3.11.tgz",
+      "integrity": "sha512-CYjIHWaQG7T4phfjErHr6BiXRs0K/9DqMeiohJmuYSBF+H2m56vFslOenLCguGYQL9jeiiCZBeoVCpwjxZrMgQ==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@oven/bun-linux-aarch64": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-linux-aarch64/-/bun-linux-aarch64-1.3.11.tgz",
+      "integrity": "sha512-8XMLyRNxHF4jfLajkWt+F8UDxsWbzysyxQVMZKUXwoeGvaxB0rVd07r3YbgDtG8U6khhRFM3oaGp+CQ0whwmdA==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@oven/bun-linux-aarch64-musl": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-linux-aarch64-musl/-/bun-linux-aarch64-musl-1.3.11.tgz",
+      "integrity": "sha512-jBwYCLG5Eb+PqtFrc3Wp2WMYlw1Id75gUcsdP+ApCOpf5oQhHxkFWCjZmcDoioDmEhMWAiM3wtwSrTlPg+sI6Q==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@oven/bun-linux-x64": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-linux-x64/-/bun-linux-x64-1.3.11.tgz",
+      "integrity": "sha512-z3GFCk1UBzDOOiEBHL32lVP7Edi26BhOjKb6bIc0nRyabbRiyON4++GR0zmd/H5zM5S0+UcXFgCGnD+b8avTLw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@oven/bun-linux-x64-baseline": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-linux-x64-baseline/-/bun-linux-x64-baseline-1.3.11.tgz",
+      "integrity": "sha512-KZlf1jKtf4jai8xiQv/0XRjxVVhHnw/HtUKtLdOeQpTOQ1fQFhLoz2FGGtVRd0LVa/yiRbSz9HlWIzWlmJClng==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@oven/bun-linux-x64-musl": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-linux-x64-musl/-/bun-linux-x64-musl-1.3.11.tgz",
+      "integrity": "sha512-ADImD4yCHNpqZu718E2chWcCaAHvua90yhmpzzV6fF4zOhwkGGbPCgUWmKyJ83uz+DXaPdYxX0ttDvtolrzx3Q==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@oven/bun-linux-x64-musl-baseline": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-linux-x64-musl-baseline/-/bun-linux-x64-musl-baseline-1.3.11.tgz",
+      "integrity": "sha512-J+qz4Al05PrNIOdj7xsWVTyx0c/gjUauG5nKV3Rrx0Q+5JO+1pPVlnfNmWbOF9pKG4f3IGad8KXJUfGMORld+Q==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@oven/bun-windows-aarch64": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-windows-aarch64/-/bun-windows-aarch64-1.3.11.tgz",
+      "integrity": "sha512-UOdkwScHRkGPz+n9ZJU7sTkTvqV7rD1SLCLaru1xH8WRsV7tDorPqNCzEN1msOIiPRK825nvAtEm9UsomO1GsA==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/@oven/bun-windows-x64": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-windows-x64/-/bun-windows-x64-1.3.11.tgz",
+      "integrity": "sha512-E51tyWDP1l0CbjZYhiUxhDGPaY8Hf5YBREx0PHBff1LM1/q3qsJ6ZvRUa8YbbOO0Ax9QP6GHjD9vf3n6bXZ7QA==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/@oven/bun-windows-x64-baseline": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/@oven/bun-windows-x64-baseline/-/bun-windows-x64-baseline-1.3.11.tgz",
+      "integrity": "sha512-cCsXK9AQ9Zf18QlVnbrFu2IKfr4sf2sfbErkF2jfCzyCO9Bnhl0KRx63zlN+Ni1xU7gcBLAssgcui5R400N2eA==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
     "node_modules/@protobufjs/aspromise": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
@@ -1864,6 +2091,18 @@
         "undici-types": "~6.21.0"
       }
     },
+    "node_modules/@types/pg": {
+      "version": "8.20.0",
+      "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.20.0.tgz",
+      "integrity": "sha512-bEPFOaMAHTEP1EzpvHTbmwR8UsFyHSKsRisLIHVMXnpNefSbGA1bD6CVy+qKjGSqmZqNqBDV2azOBo8TgkcVow==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/node": "*",
+        "pg-protocol": "*",
+        "pg-types": "^2.2.0"
+      }
+    },
     "node_modules/@types/retry": {
       "version": "0.12.0",
       "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
@@ -2013,6 +2252,40 @@
       "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==",
       "license": "BSD-3-Clause"
     },
+    "node_modules/bun": {
+      "version": "1.3.11",
+      "resolved": "https://registry.npmjs.org/bun/-/bun-1.3.11.tgz",
+      "integrity": "sha512-AvXWYFO6j/ZQ7bhGm4X6eilq2JHsDVC90ZM32k2B7/srhC2gs3Sdki1QTbwrdRCo8o7eT+167vcB1yzOvPdbjA==",
+      "cpu": [
+        "arm64",
+        "x64"
+      ],
+      "hasInstallScript": true,
+      "license": "MIT",
+      "os": [
+        "darwin",
+        "linux",
+        "win32"
+      ],
+      "bin": {
+        "bun": "bin/bun.exe",
+        "bunx": "bin/bunx.exe"
+      },
+      "optionalDependencies": {
+        "@oven/bun-darwin-aarch64": "1.3.11",
+        "@oven/bun-darwin-x64": "1.3.11",
+        "@oven/bun-darwin-x64-baseline": "1.3.11",
+        "@oven/bun-linux-aarch64": "1.3.11",
+        "@oven/bun-linux-aarch64-musl": "1.3.11",
+        "@oven/bun-linux-x64": "1.3.11",
+        "@oven/bun-linux-x64-baseline": "1.3.11",
+        "@oven/bun-linux-x64-musl": "1.3.11",
+        "@oven/bun-linux-x64-musl-baseline": "1.3.11",
+        "@oven/bun-windows-aarch64": "1.3.11",
+        "@oven/bun-windows-x64": "1.3.11",
+        "@oven/bun-windows-x64-baseline": "1.3.11"
+      }
+    },
     "node_modules/callsites": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -3162,6 +3435,113 @@
         "node": ">=14.0.0"
       }
     },
+    "node_modules/pg": {
+      "version": "8.20.0",
+      "resolved": "https://registry.npmjs.org/pg/-/pg-8.20.0.tgz",
+      "integrity": "sha512-ldhMxz2r8fl/6QkXnBD3CR9/xg694oT6DZQ2s6c/RI28OjtSOpxnPrUCGOBJ46RCUxcWdx3p6kw/xnDHjKvaRA==",
+      "license": "MIT",
+      "dependencies": {
+        "pg-connection-string": "^2.12.0",
+        "pg-pool": "^3.13.0",
+        "pg-protocol": "^1.13.0",
+        "pg-types": "2.2.0",
+        "pgpass": "1.0.5"
+      },
+      "engines": {
+        "node": ">= 16.0.0"
+      },
+      "optionalDependencies": {
+        "pg-cloudflare": "^1.3.0"
+      },
+      "peerDependencies": {
+        "pg-native": ">=3.0.1"
+      },
+      "peerDependenciesMeta": {
+        "pg-native": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/pg-cloudflare": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/pg-cloudflare/-/pg-cloudflare-1.3.0.tgz",
+      "integrity": "sha512-6lswVVSztmHiRtD6I8hw4qP/nDm1EJbKMRhf3HCYaqud7frGysPv7FYJ5noZQdhQtN2xJnimfMtvQq21pdbzyQ==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/pg-connection-string": {
+      "version": "2.12.0",
+      "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.12.0.tgz",
+      "integrity": "sha512-U7qg+bpswf3Cs5xLzRqbXbQl85ng0mfSV/J0nnA31MCLgvEaAo7CIhmeyrmJpOr7o+zm0rXK+hNnT5l9RHkCkQ==",
+      "license": "MIT"
+    },
+    "node_modules/pg-int8": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz",
+      "integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=4.0.0"
+      }
+    },
+    "node_modules/pg-pool": {
+      "version": "3.13.0",
+      "resolved": "https://registry.npmjs.org/pg-pool/-/pg-pool-3.13.0.tgz",
+      "integrity": "sha512-gB+R+Xud1gLFuRD/QgOIgGOBE2KCQPaPwkzBBGC9oG69pHTkhQeIuejVIk3/cnDyX39av2AxomQiyPT13WKHQA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "pg": ">=8.0"
+      }
+    },
+    "node_modules/pg-protocol": {
+      "version": "1.13.0",
+      "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.13.0.tgz",
+      "integrity": "sha512-zzdvXfS6v89r6v7OcFCHfHlyG/wvry1ALxZo4LqgUoy7W9xhBDMaqOuMiF3qEV45VqsN6rdlcehHrfDtlCPc8w==",
+      "license": "MIT"
+    },
+    "node_modules/pg-types": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz",
+      "integrity": "sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==",
+      "license": "MIT",
+      "dependencies": {
+        "pg-int8": "1.0.1",
+        "postgres-array": "~2.0.0",
+        "postgres-bytea": "~1.0.0",
+        "postgres-date": "~1.0.4",
+        "postgres-interval": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/pgpass": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/pgpass/-/pgpass-1.0.5.tgz",
+      "integrity": "sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==",
+      "license": "MIT",
+      "dependencies": {
+        "split2": "^4.1.0"
+      }
+    },
+    "node_modules/pgserve": {
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/pgserve/-/pgserve-1.1.6.tgz",
+      "integrity": "sha512-qdgiU3q/o/k8lP+IPHLiZG5uTWmvCSgqM3erdP7nmgVeQYv89DTo0nSG5XB0ccVD7yG/BppLFHjRvvXcWVgKnQ==",
+      "license": "MIT",
+      "dependencies": {
+        "bun": "^1.3.4"
+      },
+      "bin": {
+        "pgserve": "bin/pgserve-wrapper.cjs"
+      },
+      "optionalDependencies": {
+        "@embedded-postgres/darwin-arm64": "18.2.0-beta.16",
+        "@embedded-postgres/darwin-x64": "18.2.0-beta.16",
+        "@embedded-postgres/linux-x64": "18.2.0-beta.16",
+        "@embedded-postgres/windows-x64": "18.2.0-beta.16"
+      }
+    },
     "node_modules/picocolors": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
@@ -3169,6 +3549,45 @@
       "dev": true,
       "license": "ISC"
     },
+    "node_modules/postgres-array": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz",
+      "integrity": "sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/postgres-bytea": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-1.0.1.tgz",
+      "integrity": "sha512-5+5HqXnsZPE65IJZSMkZtURARZelel2oXUEO8rH83VS/hxH5vv1uHquPg5wZs8yMAfdv971IU+kcPUczi7NVBQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/postgres-date": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.7.tgz",
+      "integrity": "sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/postgres-interval": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-1.2.0.tgz",
+      "integrity": "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==",
+      "license": "MIT",
+      "dependencies": {
+        "xtend": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/protobufjs": {
       "version": "7.5.4",
       "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
@@ -3341,7 +3760,6 @@
       "version": "4.2.0",
       "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
       "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==",
-      "dev": true,
       "license": "ISC",
       "engines": {
         "node": ">= 10.x"
@@ -3519,6 +3937,15 @@
         }
       }
     },
+    "node_modules/xtend": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",
+      "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.4"
+      }
+    },
     "node_modules/y18n": {
       "version": "5.0.8",
       "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
diff --git a/package.json b/package.json
index d883d71..10ab02c 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@automagik/rlmx",
-  "version": "0.260330.13",
+  "version": "0.260331.1",
   "description": "RLM algorithm CLI for coding agents — prompt externalization, Python REPL with symbolic recursion, code-driven navigation",
   "type": "module",
   "publishConfig": {
@@ -17,7 +17,7 @@
     "examples"
   ],
   "scripts": {
-    "build": "tsc",
+    "build": "tsc && cp src/benchmark-data.json dist/src/",
     "dev": "tsc --watch",
     "clean": "rm -rf dist",
     "test": "node --test dist/tests/*.test.js",
@@ -27,13 +27,16 @@
   },
   "dependencies": {
     "@mariozechner/pi-ai": "0.64.0",
-    "js-yaml": "^4.1.1"
+    "js-yaml": "^4.1.1",
+    "pg": "^8.20.0",
+    "pgserve": "^1.1.6"
   },
   "devDependencies": {
     "@commitlint/cli": "^19.0.0",
     "@commitlint/config-conventional": "^19.0.0",
     "@types/js-yaml": "^4.0.9",
     "@types/node": "^22.0.0",
+    "@types/pg": "^8.11.0",
     "husky": "^9.0.0",
     "typescript": "^5.7.0"
   },
diff --git a/python/gemini_batteries.py b/python/gemini_batteries.py
index c584e22..6d42463 100644
--- a/python/gemini_batteries.py
+++ b/python/gemini_batteries.py
@@ -22,7 +22,7 @@ def web_search(query: str) -> str:
 
     Returns search results as text. Only available with provider: google.
     """
-    results = llm_bridge._send_request("web_search", [query])
+    results = llm_bridge.send_request("web_search", [query])
     return results[0] if results else "Error: no response from web_search"
 
 
@@ -31,7 +31,7 @@ def fetch_url(url: str) -> str:
 
     Returns page content as text. Only available with provider: google.
     """
-    results = llm_bridge._send_request("fetch_url", [url])
+    results = llm_bridge.send_request("fetch_url", [url])
     return results[0] if results else "Error: no response from fetch_url"
 
 
@@ -42,5 +42,5 @@ def generate_image(prompt: str, aspect_ratio: str = "16:9", size: str = "2K") ->
     Only available with provider: google.
     """
     full_prompt = f"{prompt} [aspect_ratio={aspect_ratio}, size={size}]"
-    results = llm_bridge._send_request("generate_image", [full_prompt])
+    results = llm_bridge.send_request("generate_image", [full_prompt])
     return results[0] if results else "Error: no response from generate_image"
diff --git a/python/llm_bridge.py b/python/llm_bridge.py
index 70ee44d..2d002d0 100644
--- a/python/llm_bridge.py
+++ b/python/llm_bridge.py
@@ -50,6 +50,11 @@ def _send_request(request_type: str, prompts: list, model=None) -> list:
             return [f"Error: invalid JSON response: {e}"] * len(prompts)
 
 
+def send_request(request_type: str, prompts: list, model=None) -> list:
+    """Public interface for sending IPC requests to Node.js parent process."""
+    return _send_request(request_type, prompts, model)
+
+
 def llm_query(prompt: str, model=None) -> str:
     """Query the LLM with a single prompt. Returns the response string."""
     results = _send_request("llm_query", [prompt], model)
diff --git a/python/load_dataset.py b/python/load_dataset.py
new file mode 100755
index 0000000..65450de
--- /dev/null
+++ b/python/load_dataset.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+"""Load Oolong Synth dataset from HuggingFace and output as JSON."""
+import json
+import sys
+from datasets import load_dataset
+
+
+def main():
+    samples = int(sys.argv[1]) if len(sys.argv) > 1 else 5
+    idx = int(sys.argv[2]) if len(sys.argv) > 2 else -1
+
+    ds = load_dataset("oolongbench/oolong-synth", split="test")
+
+    if idx >= 0:
+        items = [ds[idx]]
+    else:
+        items = list(ds.select(range(min(samples, len(ds)))))
+
+    output = []
+    for item in items:
+        # Oolong Synth uses "context_window_text" for the context field
+        context = item.get("context_window_text") or item.get("context", "")
+        answer = item.get("answer", "")
+        # answer can be a list — join if so
+        if isinstance(answer, list):
+            answer = ", ".join(str(a) for a in answer)
+        output.append({
+            "id": f"oolong-{item.get('id', 'unknown')}",
+            "name": item.get("question", "")[:50],
+            "question": item["question"],
+            "context": context,
+            "expected": answer,
+            "category": item.get("task_group", "oolong"),
+        })
+
+    json.dump(output, sys.stdout)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/pg_batteries.py b/python/pg_batteries.py
new file mode 100644
index 0000000..379263e
--- /dev/null
+++ b/python/pg_batteries.py
@@ -0,0 +1,158 @@
+"""
+pg_batteries.py — PostgreSQL storage query functions for rlmx.
+
+Provides pg_search(), pg_slice(), pg_time(), pg_count(), pg_query()
+that communicate with the Node.js PgStorage via the IPC bridge.
+
+Available when storage mode is active.
+"""
+
+import json
+
+# IPC bridge — resolved at call time from REPL namespace globals
+import llm_bridge
+
+
+def _pg_request(request_type, params=None):
+    """Send a pg_* request to Node.js PgStorage and return parsed result."""
+    payload = json.dumps(params) if params else "{}"
+    results = llm_bridge.send_request(request_type, [payload])
+    if not results:
+        return None
+    raw = results[0]
+    if raw.startswith("Error:"):
+        raise RuntimeError(raw)
+    try:
+        return json.loads(raw)
+    except (json.JSONDecodeError, TypeError):
+        return raw
+
+
+def _truncate_output(items, label="results"):
+    """Truncate output if > 2000 chars, showing first 5 items as stub."""
+    if not isinstance(items, list):
+        text = str(items)
+        if len(text) <= 2000:
+            return text
+        return text[:2000] + "\n... [truncated]"
+
+    full = json.dumps(items, indent=2)
+    if len(full) <= 2000:
+        return full
+
+    n = len(items)
+    preview_items = items[:5]
+    lines = [f"[{n} {label}, showing first {min(5, n)}]"]
+    for item in preview_items:
+        if isinstance(item, dict):
+            content = item.get("content", "")
+            preview = content[:100] + "..." if len(content) > 100 else content
+            shown = {k: (preview if k == "content" else v) for k, v in item.items()}
+            lines.append(json.dumps(shown))
+        else:
+            lines.append(str(item)[:100])
+    lines.append("...")
+    if items and isinstance(items[0], dict) and "line_num" in items[0]:
+        first_ln = items[0]["line_num"]
+        lines.append(f'Use pg_slice({first_ln}, {first_ln + 10}) to see full content')
+    return "\n".join(lines)
+
+
+def pg_search(pattern, limit=20):
+    """Full-text search in stored context. Returns matching records ranked by relevance.
+
+    Args:
+        pattern: Search terms (words joined with AND)
+        limit: Max results (default 20)
+
+    Returns:
+        List of {line_num, source, content, rank} dicts
+    """
+    result = _pg_request("pg_search", {"pattern": pattern, "limit": limit})
+    if isinstance(result, list):
+        return _truncate_output(result, "results")
+    return result
+
+
+def pg_slice(start, end):
+    """Get context lines by range. Returns content for lines [start, end).
+
+    Args:
+        start: Starting line number (inclusive)
+        end: Ending line number (exclusive)
+
+    Returns:
+        String with source and content of the requested lines
+    """
+    result = _pg_request("pg_slice", {"start": start, "end": end})
+    if isinstance(result, list):
+        lines = []
+        for r in result:
+            if isinstance(r, dict):
+                src = r.get("source", "")
+                content = r.get("content", "")
+                lines.append(f"[{src}] {content}" if src else content)
+            else:
+                lines.append(str(r))
+        content = "\n".join(lines)
+        if len(content) > 2000:
+            return content[:2000] + f"\n... [truncated, {len(result)} lines total]"
+        return content
+    return result
+
+
+def pg_sources():
+    """List distinct source files in the stored context.
+
+    Returns:
+        List of source file paths
+    """
+    result = _pg_request("pg_query", {
+        "sql": "SELECT DISTINCT source FROM records WHERE source IS NOT NULL ORDER BY source"
+    })
+    if isinstance(result, list):
+        return [r.get("source", "") if isinstance(r, dict) else str(r) for r in result]
+    return result
+
+
+def pg_time(from_time, to_time):
+    """Filter context records by timestamp range.
+
+    Args:
+        from_time: Start time (e.g. '01:00' or '2024-01-01T01:00:00')
+        to_time: End time
+
+    Returns:
+        List of {line_num, timestamp, content} dicts
+    """
+    result = _pg_request("pg_time", {"from": from_time, "to": to_time})
+    if isinstance(result, list):
+        return _truncate_output(result, "results")
+    return result
+
+
+def pg_count():
+    """Count total records in stored context.
+
+    Returns:
+        Integer count
+    """
+    result = _pg_request("pg_count", {})
+    if isinstance(result, dict):
+        return result.get("count", 0)
+    return result
+
+
+def pg_query(sql):
+    """Execute raw SQL query (read-only) against the context database.
+
+    Args:
+        sql: SQL query string
+
+    Returns:
+        List of result row dicts
+    """
+    result = _pg_request("pg_query", {"sql": sql})
+    if isinstance(result, list):
+        return _truncate_output(result, "rows")
+    return result
diff --git a/src/batch.ts b/src/batch.ts
index 131d330..85abb88 100644
--- a/src/batch.ts
+++ b/src/batch.ts
@@ -11,6 +11,7 @@ import { rlmLoop, type RLMOptions } from "./rlm.js";
 import type { RlmxConfig } from "./config.js";
 import type { LoadedContext } from "./context.js";
 import { isGoogleProvider } from "./gemini.js";
+import { validateContextSize } from "./cache.js";
 
 interface BatchResult {
   question: string;
@@ -36,6 +37,8 @@ export interface BatchOptions extends Partial<RLMOptions> {
   parallel?: number;
   /** Use Gemini Batch API for 50% cost reduction. Requires provider: google. */
   batchApi?: boolean;
+  /** When true, use pgserve storage for large context handling. */
+  storageMode?: boolean;
 }
 
 /**
@@ -96,13 +99,18 @@ export async function runBatch(
       break;
     }
 
-    // Run each question through rlmLoop with cache enabled
+    // Determine cache/storage mode for this batch
+    const useCache = options.cache ?? true;
+    const useStorage = options.storageMode ?? false;
+
+    // Run each question through rlmLoop
     const result = await rlmLoop(question, context, config, {
       maxIterations: options.maxIterations,
       timeout: options.timeout,
       verbose: options.verbose,
       output: "text", // batch always captures text internally
-      cache: true, // batch always uses cache
+      cache: useCache,
+      storageMode: useStorage,
     });
 
     totalCost += result.usage.totalCost;
diff --git a/src/benchmark-data.json b/src/benchmark-data.json
new file mode 100644
index 0000000..372362b
--- /dev/null
+++ b/src/benchmark-data.json
@@ -0,0 +1,44 @@
+[
+  {
+    "id": "cost-001",
+    "name": "API extraction",
+    "question": "What authentication methods does the API support?",
+    "context": "# Authentication API Reference\n\nThe Gateway API supports three authentication methods for securing client requests.\n\n## 1. API Key Authentication\n\nPass your API key via the `X-API-Key` header. Keys are generated in the Developer Portal under Settings > API Keys. Each key has configurable rate limits and scope restrictions. Keys can be rotated without downtime using the dual-key mechanism: generate a new key, update clients, then revoke the old key.\n\n## 2. OAuth 2.0 Bearer Tokens\n\nFor user-delegated access, the API supports OAuth 2.0 Authorization Code flow with PKCE. Request tokens from `POST /oauth/token` with grant_type=authorization_code. Access tokens expire after 3600 seconds. Refresh tokens are valid for 30 days and support automatic rotation.\n\n## 3. Mutual TLS (mTLS)\n\nFor service-to-service communication, mTLS provides certificate-based authentication. Upload your client certificate via the Admin Console. The API validates the certificate chain against the configured CA bundle. Certificate pinning is supported but optional.",
+    "category": "extraction"
+  },
+  {
+    "id": "cost-002",
+    "name": "Architecture summary",
+    "question": "Summarize the key architectural decisions and their trade-offs.",
+    "context": "# System Architecture Document\n\n## Overview\n\nThe platform uses a microservices architecture deployed on Kubernetes, with an event-driven backbone for inter-service communication. This document captures the major architectural decisions made during the Q3 2025 redesign.\n\n## Decision 1: Event Sourcing for Order Management\n\nWe adopted event sourcing for the order management domain. Every state change is captured as an immutable event in an append-only log (Apache Kafka). The current state is derived by replaying events. Trade-off: increased storage requirements (~3x compared to state-based) and higher complexity for simple queries, but we gain complete audit trails, temporal queries, and the ability to rebuild read models.\n\n## Decision 2: CQRS for Read/Write Separation\n\nCommand and Query Responsibility Segregation separates the write model (event store) from read models (PostgreSQL materialized views). Writes go through the command bus, reads hit optimized projections. Trade-off: eventual consistency (typically <100ms lag) and operational complexity of maintaining projections, but read performance improved 15x and we can scale read/write independently.\n\n## Decision 3: Service Mesh with Istio\n\nAll inter-service communication flows through Istio sidecars. This provides mTLS between services, circuit breaking, retries with exponential backoff, and observability (distributed tracing via Jaeger). Trade-off: ~2ms latency overhead per hop and significant operational complexity, but unified security policy enforcement and deep traffic visibility.\n\n## Decision 4: GraphQL Federation\n\nThe API gateway uses Apollo Federation to compose a unified GraphQL schema from individual service schemas. Each team owns their subgraph. Trade-off: schema governance overhead and potential N+1 query issues, but teams can iterate independently and clients get a single endpoint with exactly the data they need.\n\n## Decision 5: Multi-Region Active-Active\n\nThe system operates in active-active mode across us-east-1 and eu-west-1. CockroachDB handles cross-region replication with serializable isolation. DNS-based routing directs users to the nearest region. Trade-off: significantly higher infrastructure cost (~2.3x) and complex conflict resolution for concurrent writes, but we achieve <50ms latency globally and survive full region failures.",
+    "category": "summarization"
+  },
+  {
+    "id": "cost-003",
+    "name": "Migration reasoning",
+    "question": "Given the constraints described, should the team proceed with the database migration to PostgreSQL 16 during the holiday freeze, or wait until Q1? Explain your reasoning step by step.",
+    "context": "# Database Migration Planning: PostgreSQL 14 to 16\n\n## Current State\n- Running PostgreSQL 14.9 on AWS RDS Multi-AZ\n- 847 GB data across 23 databases\n- Peak load: 12,000 queries/second during business hours\n- End-of-life for PG 14 community support: November 2026\n\n## Migration Benefits (PG 16)\n- Logical replication improvements: parallel apply for large transactions\n- Performance: up to 30% improvement for bulk operations via SIMD JSON parsing\n- MERGE command support (SQL standard compliance)\n- pg_stat_io view for I/O monitoring\n- Security: libpq now supports Kerberos credential delegation\n\n## Holiday Freeze Context\n- Company holiday freeze: December 15 - January 5\n- All production changes require VP-level approval during freeze\n- On-call team reduced to 2 engineers (vs normal 6)\n- Customer traffic drops 40% during holidays (lower risk window)\n- Q1 has two major feature launches planned (weeks 3 and 7)\n\n## Risk Factors\n- Three extensions need compatibility verification: PostGIS 3.3, pg_partman 4.7, timescaledb 2.11\n- Application connection pooling (PgBouncer 1.19) needs config update for PG 16 protocol changes\n- ORM (Prisma 5.x) has known issue with PG 16 MERGE — workaround available but untested in production\n- Rollback requires full RDS snapshot restore (~45 minutes for 847 GB)\n- Two critical batch jobs run nightly at 02:00 UTC and cannot be interrupted\n\n## Team Assessment\n- DBA team confidence level: 7/10 (comfortable with process, concerned about extension compat)\n- Dev team readiness: 6/10 (integration tests pass, but limited PG 16 staging time)\n- Staging environment has been running PG 16 for 3 weeks with synthetic load\n- One P2 bug found in staging: connection timeout under high concurrent MERGE operations\n\n## Compliance Requirements\n- SOC 2 audit scheduled for February — auditors expect documented change management\n- GDPR data residency constraints require EU region migration to be completed within same maintenance window\n- PCI DSS requires change approval documentation with rollback evidence",
+    "category": "reasoning"
+  },
+  {
+    "id": "cost-004",
+    "name": "Framework comparison",
+    "question": "Compare the three deployment strategies described and recommend which one is best suited for a team with limited DevOps experience.",
+    "context": "# Deployment Strategy Comparison\n\n## Strategy A: Blue-Green Deployment\n\nMaintain two identical production environments (blue and green). At any time, one serves live traffic while the other is idle. To deploy: push new code to idle environment, run smoke tests, then switch the load balancer. Rollback is instant — switch back to the previous environment.\n\nResource cost: 2x infrastructure at all times. Complexity: moderate — requires automated environment provisioning and LB switching. Downtime: zero during switchover. Database handling: challenging — requires backward-compatible schema migrations or database-per-environment.\n\nBest for: teams with budget for double infrastructure, need instant rollback, relatively stable database schemas.\n\n## Strategy B: Canary Deployment\n\nRoute a small percentage (1-5%) of traffic to the new version while 95-99% stays on the old version. Gradually increase the percentage while monitoring error rates, latency, and business metrics. If anomalies are detected, route all traffic back to the old version.\n\nResource cost: 1.05-1.1x infrastructure during rollout. Complexity: high — requires sophisticated traffic routing (service mesh or smart LB), real-time monitoring, automated rollback triggers. Downtime: zero. Database handling: both versions must work with same database, requiring backward-compatible migrations.\n\nBest for: high-traffic services where bugs in new versions could have significant blast radius, teams with strong observability.\n\n## Strategy C: Rolling Update\n\nReplace instances of the old version with the new version one at a time (or in small batches). Kubernetes does this natively with Deployment resources. At any point during rollout, both old and new versions handle traffic.\n\nResource cost: 1.1-1.25x infrastructure during rollout. Complexity: low — built into Kubernetes, minimal configuration (maxSurge, maxUnavailable). Downtime: zero if configured correctly. Database handling: same as canary — requires backward-compatible migrations.\n\nBest for: Kubernetes-native teams, services that handle mixed-version traffic well, teams wanting simplicity.",
+    "category": "comparison"
+  },
+  {
+    "id": "cost-005",
+    "name": "Incident synthesis",
+    "question": "Synthesize the incident reports below into a unified root cause analysis and propose systemic improvements.",
+    "context": "# Incident Reports — Q3 2025\n\n## INC-2847: Payment Processing Outage (August 3, 14:22-15:47 UTC)\n\nSeverity: P1 | Duration: 85 minutes | Impact: 100% of payment transactions failed\n\nTimeline:\n- 14:22 — Monitoring alert: payment-service error rate >50%\n- 14:25 — On-call engineer begins investigation\n- 14:32 — Root cause identified: expired TLS certificate on payment gateway integration\n- 14:45 — Certificate renewal initiated via automated pipeline\n- 14:58 — New certificate deployed but service still failing\n- 15:12 — Discovery: PgBouncer connection pool exhausted due to retry storm from failed payments\n- 15:30 — PgBouncer restarted, connection pool drained\n- 15:47 — Full recovery confirmed\n\nRoot cause: TLS certificate auto-renewal cron job was disabled during June infrastructure migration and never re-enabled. Certificate expired after 90-day Let's Encrypt cycle.\n\nContributing factors: No monitoring on certificate expiry dates. Retry logic in payment-service uses fixed 1-second intervals (no exponential backoff), causing connection pool exhaustion.\n\n## INC-2901: Search Degradation (August 19, 09:15-10:30 UTC)\n\nSeverity: P2 | Duration: 75 minutes | Impact: Search latency increased from 200ms to 8s (p99)\n\nTimeline:\n- 09:15 — Latency alert triggered on search-service\n- 09:22 — Investigation reveals Elasticsearch cluster yellow status\n- 09:28 — One ES data node (es-data-04) unresponsive, triggering shard rebalancing\n- 09:45 — Node es-data-04 confirmed OOM-killed by Kubernetes (memory limit: 32GB, actual usage: 31.8GB)\n- 10:00 — Node restarted with increased memory limit (48GB)\n- 10:15 — Shard rebalancing complete\n- 10:30 — Latency returned to normal\n\nRoot cause: A bulk indexing job (product catalog refresh) was scheduled concurrently with peak search traffic. The indexing job consumed excessive heap memory on es-data-04 because it processed 50,000 documents per batch instead of the recommended 5,000.\n\nContributing factors: No resource isolation between indexing and search workloads. Bulk batch size was increased from 5,000 to 50,000 three months ago without load testing.\n\n## INC-2956: API Gateway Cascade Failure (September 2, 16:00-17:15 UTC)\n\nSeverity: P1 | Duration: 75 minutes | Impact: All API endpoints returned 503 for 60% of requests\n\nTimeline:\n- 16:00 — Marketing campaign launched, driving 3x normal traffic\n- 16:05 — User-service auto-scaling triggered (3 → 12 pods)\n- 16:08 — New user-service pods pulling container image (2.1GB) from registry\n- 16:12 — Container registry rate limit hit (DockerHub free tier: 100 pulls/6 hours)\n- 16:15 — 9 of 12 user-service pods stuck in ImagePullBackoff\n- 16:18 — Remaining 3 pods overwhelmed, latency spikes to 30s\n- 16:22 — Istio circuit breaker trips on user-service, returning 503\n- 16:25 — Cascade: order-service and cart-service depend on user-service, both start failing\n- 16:40 — Team switches to private ECR registry mirror\n- 16:55 — Images pulled from ECR, pods come up\n- 17:15 — Full recovery, circuit breakers reset\n\nRoot cause: Production Kubernetes cluster configured to pull container images from DockerHub free tier instead of private registry mirror. Combined with 2.1GB image size, auto-scaling events exhausted the rate limit.\n\nContributing factors: No pre-pulled images on nodes. Container image not optimized (2.1GB includes dev dependencies). Auto-scaling policy scales too aggressively (100% pod increase per step). No alerting on container registry rate limits.\n\n## INC-3012: Data Pipeline Delay (September 15, 03:00-11:00 UTC)\n\nSeverity: P2 | Duration: 8 hours | Impact: Analytics dashboards showed stale data (8+ hours old)\n\nTimeline:\n- 03:00 — Nightly ETL job starts, processing 2.3TB of event data\n- 03:45 — Spark executor OOM on largest partition (events from top-3 customers = 40% of data)\n- 04:00 — Spark retry policy kicks in (3 retries, same configuration)\n- 06:00 — All retries exhausted, job marked as failed\n- 08:30 — Morning shift notices stale dashboards\n- 09:00 — Engineer investigates, identifies skewed partition\n- 10:00 — Job rerun with increased executor memory (16GB → 32GB) and partition balancing\n- 11:00 — Backfill complete\n\nRoot cause: Data skew — three enterprise customers generated 40% of daily events, causing one Spark partition to exceed executor memory limits.\n\nContributing factors: No data skew monitoring. No alerting on ETL job failures (only dashboards stale alert exists). Retry policy retries with same config (insanity). No partition rebalancing strategy for skewed keys.",
+    "category": "synthesis"
+  },
+  {
+    "id": "cost-006",
+    "name": "Config extraction",
+    "question": "Extract all environment variables and their default values from this configuration reference.",
+    "context": "# Service Configuration Reference\n\nThe worker service reads configuration from environment variables at startup. All variables are optional and have sensible defaults.\n\n## Server Settings\n\n`PORT` — HTTP server listen port. Default: `8080`. Must be between 1024 and 65535.\n\n`HOST` — Bind address. Default: `0.0.0.0`. Set to `127.0.0.1` for local-only access.\n\n`WORKER_CONCURRENCY` — Number of parallel worker threads. Default: `4`. Recommended: set to CPU core count.\n\n## Database\n\n`DATABASE_URL` — PostgreSQL connection string. Default: `postgresql://localhost:5432/worker_db`. Supports connection pooling parameters via query string.\n\n`DB_POOL_SIZE` — Maximum database connections. Default: `10`. Should not exceed `max_connections / number_of_instances`.\n\n`DB_TIMEOUT_MS` — Query timeout in milliseconds. Default: `30000`.\n\n## Cache\n\n`REDIS_URL` — Redis connection string. Default: `redis://localhost:6379/0`.\n\n`CACHE_TTL_SECONDS` — Default cache entry TTL. Default: `300`.\n\n`CACHE_PREFIX` — Key prefix for cache entries. Default: `worker:`.",
+    "category": "extraction"
+  }
+]
diff --git a/src/benchmark.ts b/src/benchmark.ts
new file mode 100644
index 0000000..6f84830
--- /dev/null
+++ b/src/benchmark.ts
@@ -0,0 +1,431 @@
+/**
+ * Benchmark runner for rlmx — compares RLM vs direct LLM on cost/tokens/latency.
+ *
+ * Two modes:
+ * - cost: built-in curated dataset, measures cost savings
+ * - oolong: Oolong Synth from HuggingFace, measures accuracy
+ */
+
+import { homedir } from "node:os";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import { mkdir, writeFile, readFile, stat } from "node:fs/promises";
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+
+import type { RlmxConfig } from "./config.js";
+import type { LoadedContext } from "./context.js";
+import { llmComplete, type ChatMessage } from "./llm.js";
+import { rlmLoop } from "./rlm.js";
+
+const execFileAsync = promisify(execFile);
+
+// ─── Interfaces ──────────────────────────────────────────
+
+export interface BenchmarkQuestion {
+  id: string;
+  name: string;
+  question: string;
+  context: string;
+  category: string;
+  expected?: string;
+}
+
+export interface BenchmarkRunResult {
+  questionId: string;
+  questionName: string;
+  direct: {
+    tokens_input: number;
+    tokens_output: number;
+    cost: number;
+    latency_ms: number;
+    answer: string;
+  };
+  rlm: {
+    tokens_input: number;
+    tokens_output: number;
+    cost: number;
+    latency_ms: number;
+    iterations: number;
+    answer: string;
+  };
+  savings: {
+    tokens_pct: number;
+    cost_pct: number;
+  };
+}
+
+export interface BenchmarkResults {
+  timestamp: string;
+  mode: "cost" | "oolong";
+  model: string;
+  runs: BenchmarkRunResult[];
+  totals: {
+    direct: { tokens: number; cost: number; latency_ms: number };
+    rlm: { tokens: number; cost: number; latency_ms: number; avg_iterations: number };
+    savings: { tokens_pct: number; cost_pct: number };
+  };
+}
+
+// ─── Dataset Loading ─────────────────────────────────────
+
+async function loadBuiltinDataset(): Promise<BenchmarkQuestion[]> {
+  const thisDir = dirname(fileURLToPath(import.meta.url));
+  const jsonPath = join(thisDir, "benchmark-data.json");
+  const raw = await readFile(jsonPath, "utf-8");
+  return JSON.parse(raw) as BenchmarkQuestion[];
+}
+
+// ─── Savings Calculation ─────────────────────────────────
+
+export function calculateSavings(directTokens: number, rlmTokens: number): number {
+  if (directTokens <= 0) return 0;
+  return ((directTokens - rlmTokens) / directTokens) * 100;
+}
+
+export function calculateCostSavings(directCost: number, rlmCost: number): number {
+  if (directCost <= 0) return 0;
+  return ((directCost - rlmCost) / directCost) * 100;
+}
+
+// ─── Cost Benchmark ──────────────────────────────────────
+
+export async function runCostBenchmark(
+  config: RlmxConfig,
+  options?: { outputFormat?: "table" | "json" }
+): Promise<BenchmarkResults> {
+  const dataset = await loadBuiltinDataset();
+  const runs: BenchmarkRunResult[] = [];
+
+  for (const q of dataset) {
+    const ctx: LoadedContext = {
+      type: "string",
+      content: q.context,
+      metadata: `benchmark context for "${q.name}" (${q.context.length} chars)`,
+    };
+
+    // Direct LLM call
+    const directMessages: ChatMessage[] = [
+      {
+        role: "user",
+        content: `Context:\n${q.context}\n\nQuestion: ${q.question}`,
+      },
+    ];
+
+    const directStart = Date.now();
+    const directResp = await llmComplete(directMessages, config.model);
+    const directLatency = Date.now() - directStart;
+
+    // RLM call
+    const rlmStart = Date.now();
+    const rlmResult = await rlmLoop(q.question, ctx, config, {
+      maxIterations: config.budget.maxTokens ? 5 : 10,
+      timeout: 120_000,
+      verbose: false,
+      output: "text",
+      cache: false,
+    });
+    const rlmLatency = Date.now() - rlmStart;
+
+    const directTotalTokens = directResp.usage.inputTokens + directResp.usage.outputTokens;
+    const rlmTotalTokens = rlmResult.usage.inputTokens + rlmResult.usage.outputTokens;
+
+    runs.push({
+      questionId: q.id,
+      questionName: q.name,
+      direct: {
+        tokens_input: directResp.usage.inputTokens,
+        tokens_output: directResp.usage.outputTokens,
+        cost: directResp.usage.totalCost,
+        latency_ms: directLatency,
+        answer: directResp.text,
+      },
+      rlm: {
+        tokens_input: rlmResult.usage.inputTokens,
+        tokens_output: rlmResult.usage.outputTokens,
+        cost: rlmResult.usage.totalCost,
+        latency_ms: rlmLatency,
+        iterations: rlmResult.iterations,
+        answer: rlmResult.answer,
+      },
+      savings: {
+        tokens_pct: calculateSavings(directTotalTokens, rlmTotalTokens),
+        cost_pct: calculateCostSavings(directResp.usage.totalCost, rlmResult.usage.totalCost),
+      },
+    });
+
+    if (options?.outputFormat !== "json") {
+      process.stderr.write(`  completed: ${q.name}\n`);
+    }
+  }
+
+  const totals = aggregateTotals(runs);
+
+  return {
+    timestamp: new Date().toISOString(),
+    mode: "cost",
+    model: `${config.model.provider}/${config.model.model}`,
+    runs,
+    totals,
+  };
+}
+
+// ─── Oolong Benchmark ────────────────────────────────────
+
+async function ensureBenchVenv(): Promise<string> {
+  const venvDir = join(homedir(), ".rlmx", ".bench-venv");
+  const pythonBin = join(venvDir, "bin", "python");
+
+  try {
+    await stat(pythonBin);
+    return pythonBin;
+  } catch {
+    // Create venv and install datasets
+    process.stderr.write("rlmx benchmark: setting up Python venv for HuggingFace datasets...\n");
+    await mkdir(join(homedir(), ".rlmx"), { recursive: true });
+
+    // Try uv first (preferred), fall back to python3 -m venv + pip
+    try {
+      await execFileAsync("uv", ["venv", venvDir]);
+      await execFileAsync("uv", ["pip", "install", "--python", pythonBin, "datasets"]);
+    } catch {
+      await execFileAsync("python3", ["-m", "venv", venvDir]);
+      await execFileAsync(join(venvDir, "bin", "pip"), ["install", "datasets"]);
+    }
+
+    process.stderr.write("rlmx benchmark: Python venv ready.\n");
+    return pythonBin;
+  }
+}
+
+function findLoadDatasetScript(): string {
+  // The script is at python/load_dataset.py relative to package root
+  // From dist/src/benchmark.js, package root is ../../
+  const thisDir = dirname(fileURLToPath(import.meta.url));
+  return join(thisDir, "..", "..", "python", "load_dataset.py");
+}
+
+export async function runOolongBenchmark(
+  config: RlmxConfig,
+  options?: { samples?: number; idx?: number }
+): Promise<BenchmarkResults> {
+  const samples = options?.samples ?? 5;
+  const idx = options?.idx;
+
+  const pythonBin = await ensureBenchVenv();
+  const scriptPath = findLoadDatasetScript();
+
+  // Load dataset via Python subprocess
+  const args = [scriptPath, String(samples)];
+  if (idx !== undefined) {
+    args.push(String(idx));
+  }
+
+  process.stderr.write(`rlmx benchmark: loading Oolong Synth dataset (${idx !== undefined ? `idx=${idx}` : `${samples} samples`})...\n`);
+  const { stdout } = await execFileAsync(pythonBin, args, { maxBuffer: 50 * 1024 * 1024 });
+  const dataset: BenchmarkQuestion[] = JSON.parse(stdout);
+  process.stderr.write(`rlmx benchmark: loaded ${dataset.length} samples.\n`);
+
+  const runs: BenchmarkRunResult[] = [];
+
+  for (const q of dataset) {
+    const ctx: LoadedContext = {
+      type: "string",
+      content: q.context,
+      metadata: `oolong context (${q.context.length} chars)`,
+    };
+
+    // Direct LLM call
+    const directMessages: ChatMessage[] = [
+      {
+        role: "user",
+        content: `Context:\n${q.context}\n\nQuestion: ${q.question}`,
+      },
+    ];
+
+    const directStart = Date.now();
+    const directResp = await llmComplete(directMessages, config.model);
+    const directLatency = Date.now() - directStart;
+
+    // RLM call
+    const rlmStart = Date.now();
+    const rlmResult = await rlmLoop(q.question, ctx, config, {
+      maxIterations: 10,
+      timeout: 120_000,
+      verbose: false,
+      output: "text",
+      cache: false,
+    });
+    const rlmLatency = Date.now() - rlmStart;
+
+    const directTotalTokens = directResp.usage.inputTokens + directResp.usage.outputTokens;
+    const rlmTotalTokens = rlmResult.usage.inputTokens + rlmResult.usage.outputTokens;
+
+    runs.push({
+      questionId: q.id,
+      questionName: q.name,
+      direct: {
+        tokens_input: directResp.usage.inputTokens,
+        tokens_output: directResp.usage.outputTokens,
+        cost: directResp.usage.totalCost,
+        latency_ms: directLatency,
+        answer: directResp.text,
+      },
+      rlm: {
+        tokens_input: rlmResult.usage.inputTokens,
+        tokens_output: rlmResult.usage.outputTokens,
+        cost: rlmResult.usage.totalCost,
+        latency_ms: rlmLatency,
+        iterations: rlmResult.iterations,
+        answer: rlmResult.answer,
+      },
+      savings: {
+        tokens_pct: calculateSavings(directTotalTokens, rlmTotalTokens),
+        cost_pct: calculateCostSavings(directResp.usage.totalCost, rlmResult.usage.totalCost),
+      },
+    });
+
+    process.stderr.write(`  completed: ${q.name}\n`);
+  }
+
+  const totals = aggregateTotals(runs);
+
+  return {
+    timestamp: new Date().toISOString(),
+    mode: "oolong",
+    model: `${config.model.provider}/${config.model.model}`,
+    runs,
+    totals,
+  };
+}
+
+// ─── Aggregation ─────────────────────────────────────────
+
+export function aggregateTotals(runs: BenchmarkRunResult[]): BenchmarkResults["totals"] {
+  if (runs.length === 0) {
+    return {
+      direct: { tokens: 0, cost: 0, latency_ms: 0 },
+      rlm: { tokens: 0, cost: 0, latency_ms: 0, avg_iterations: 0 },
+      savings: { tokens_pct: 0, cost_pct: 0 },
+    };
+  }
+
+  const directTokens = runs.reduce((sum, r) => sum + r.direct.tokens_input + r.direct.tokens_output, 0);
+  const directCost = runs.reduce((sum, r) => sum + r.direct.cost, 0);
+  const directLatency = runs.reduce((sum, r) => sum + r.direct.latency_ms, 0);
+
+  const rlmTokens = runs.reduce((sum, r) => sum + r.rlm.tokens_input + r.rlm.tokens_output, 0);
+  const rlmCost = runs.reduce((sum, r) => sum + r.rlm.cost, 0);
+  const rlmLatency = runs.reduce((sum, r) => sum + r.rlm.latency_ms, 0);
+  const avgIterations = runs.reduce((sum, r) => sum + r.rlm.iterations, 0) / runs.length;
+
+  return {
+    direct: { tokens: directTokens, cost: directCost, latency_ms: directLatency },
+    rlm: { tokens: rlmTokens, cost: rlmCost, latency_ms: rlmLatency, avg_iterations: avgIterations },
+    savings: {
+      tokens_pct: calculateSavings(directTokens, rlmTokens),
+      cost_pct: calculateCostSavings(directCost, rlmCost),
+    },
+  };
+}
+
+// ─── Table Formatting ────────────────────────────────────
+
+function padRight(str: string, len: number): string {
+  if (str.length >= len) return str.slice(0, len);
+  return str + " ".repeat(len - str.length);
+}
+
+function padLeft(str: string, len: number): string {
+  if (str.length >= len) return str.slice(0, len);
+  return " ".repeat(len - str.length) + str;
+}
+
+function formatTokens(n: number): string {
+  return n.toLocaleString("en-US");
+}
+
+function formatCost(n: number): string {
+  return `$${n.toFixed(4)}`;
+}
+
+function formatLatency(ms: number): string {
+  if (ms < 1000) return `${ms}ms`;
+  return `${(ms / 1000).toFixed(1)}s`;
+}
+
+function formatPct(n: number): string {
+  const sign = n > 0 ? "" : "";
+  return `${sign}${n.toFixed(1)}%`;
+}
+
+export function formatBenchmarkTable(results: BenchmarkResults): string {
+  const colW = { name: 22, mode: 10, tokens: 12, cost: 10, latency: 10, iters: 8 };
+
+  const lines: string[] = [];
+
+  const modeLabel = results.mode === "cost" ? "cost comparison" : "oolong accuracy";
+  lines.push(`rlmx benchmark — ${modeLabel} (RLM vs Direct LLM)`);
+  lines.push("");
+
+  // Header
+  const hr = "─";
+  lines.push(
+    `┌${hr.repeat(colW.name)}┬${hr.repeat(colW.mode)}┬${hr.repeat(colW.tokens)}┬${hr.repeat(colW.cost)}┬${hr.repeat(colW.latency)}┬${hr.repeat(colW.iters)}┐`
+  );
+  lines.push(
+    `│${padRight(" Question", colW.name)}│${padRight(" Mode", colW.mode)}│${padRight(" Tokens", colW.tokens)}│${padRight(" Cost", colW.cost)}│${padRight(" Latency", colW.latency)}│${padRight(" Iters", colW.iters)}│`
+  );
+  lines.push(
+    `├${hr.repeat(colW.name)}┼${hr.repeat(colW.mode)}┼${hr.repeat(colW.tokens)}┼${hr.repeat(colW.cost)}┼${hr.repeat(colW.latency)}┼${hr.repeat(colW.iters)}┤`
+  );
+
+  // Rows
+  for (const run of results.runs) {
+    const directTokens = run.direct.tokens_input + run.direct.tokens_output;
+    const rlmTokens = run.rlm.tokens_input + run.rlm.tokens_output;
+
+    lines.push(
+      `│${padRight(` ${run.questionName}`, colW.name)}│${padRight(" Direct", colW.mode)}│${padLeft(formatTokens(directTokens) + " ", colW.tokens)}│${padLeft(formatCost(run.direct.cost) + " ", colW.cost)}│${padLeft(formatLatency(run.direct.latency_ms) + " ", colW.latency)}│${padRight(" -", colW.iters)}│`
+    );
+    lines.push(
+      `│${padRight("", colW.name)}│${padRight(" RLM", colW.mode)}│${padLeft(formatTokens(rlmTokens) + " ", colW.tokens)}│${padLeft(formatCost(run.rlm.cost) + " ", colW.cost)}│${padLeft(formatLatency(run.rlm.latency_ms) + " ", colW.latency)}│${padLeft(String(run.rlm.iterations) + " ", colW.iters)}│`
+    );
+    lines.push(
+      `│${padRight("", colW.name)}│${padRight(" Savings", colW.mode)}│${padLeft(formatPct(run.savings.tokens_pct) + " ", colW.tokens)}│${padLeft(formatPct(run.savings.cost_pct) + " ", colW.cost)}│${padRight(" -", colW.latency)}│${padRight("", colW.iters)}│`
+    );
+  }
+
+  // Footer with totals
+  lines.push(
+    `├${hr.repeat(colW.name)}┼${hr.repeat(colW.mode)}┼${hr.repeat(colW.tokens)}┼${hr.repeat(colW.cost)}┼${hr.repeat(colW.latency)}┼${hr.repeat(colW.iters)}┤`
+  );
+  lines.push(
+    `│${padRight(" TOTALS", colW.name)}│${padRight(" Direct", colW.mode)}│${padLeft(formatTokens(results.totals.direct.tokens) + " ", colW.tokens)}│${padLeft(formatCost(results.totals.direct.cost) + " ", colW.cost)}│${padLeft(formatLatency(results.totals.direct.latency_ms) + " ", colW.latency)}│${padRight(" -", colW.iters)}│`
+  );
+  lines.push(
+    `│${padRight("", colW.name)}│${padRight(" RLM", colW.mode)}│${padLeft(formatTokens(results.totals.rlm.tokens) + " ", colW.tokens)}│${padLeft(formatCost(results.totals.rlm.cost) + " ", colW.cost)}│${padLeft(formatLatency(results.totals.rlm.latency_ms) + " ", colW.latency)}│${padLeft(results.totals.rlm.avg_iterations.toFixed(1) + " ", colW.iters)}│`
+  );
+  lines.push(
+    `│${padRight("", colW.name)}│${padRight(" Savings", colW.mode)}│${padLeft(formatPct(results.totals.savings.tokens_pct) + " ", colW.tokens)}│${padLeft(formatPct(results.totals.savings.cost_pct) + " ", colW.cost)}│${padRight(" -", colW.latency)}│${padRight("", colW.iters)}│`
+  );
+  lines.push(
+    `└${hr.repeat(colW.name)}┴${hr.repeat(colW.mode)}┴${hr.repeat(colW.tokens)}┴${hr.repeat(colW.cost)}┴${hr.repeat(colW.latency)}┴${hr.repeat(colW.iters)}┘`
+  );
+
+  return lines.join("\n");
+}
+
+// ─── Results Persistence ─────────────────────────────────
+
+export async function saveBenchmarkResults(results: BenchmarkResults): Promise<string> {
+  const benchDir = join(homedir(), ".rlmx", "benchmarks");
+  await mkdir(benchDir, { recursive: true });
+
+  const ts = results.timestamp.replace(/[:.]/g, "-").replace("T", "_").replace("Z", "");
+  const filename = `benchmark-${results.mode}-${ts}.json`;
+  const filepath = join(benchDir, filename);
+
+  await writeFile(filepath, JSON.stringify(results, null, 2), "utf-8");
+  return filepath;
+}
diff --git a/src/cache.ts b/src/cache.ts
index d7f7634..0fba5ce 100644
--- a/src/cache.ts
+++ b/src/cache.ts
@@ -12,7 +12,7 @@ import type { RlmxConfig, CacheConfig } from "./config.js";
 import type { LoadedContext, ContextItem } from "./context.js";
 
 // Provider context window limits (approximate token counts)
-const PROVIDER_LIMITS: Record<string, number> = {
+export const PROVIDER_LIMITS: Record<string, number> = {
   anthropic: 200000,
   openai: 128000,
   google: 1000000,     // Gemini supports 1M+
diff --git a/src/cli.ts b/src/cli.ts
index 5fa045c..1064a94 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -22,6 +22,8 @@ Usage:
   rlmx init [--dir <path>]       Scaffold rlmx.yaml config
   rlmx cache [options]           Pre-warm cache or estimate context size
   rlmx batch <file> [options]    Bulk interrogation from questions file
+  rlmx benchmark <mode> [options]  Run benchmarks (cost or oolong)
+  rlmx stats [options]           Query run history and cost breakdowns
 
 Options:
   --context <path>        Path to context (directory or file)
@@ -42,6 +44,7 @@ Options:
   --ext <list>            File extensions for context dirs (comma-separated)
   --thinking <level>      Thinking level: minimal, low, medium, high (Gemini 3)
   --cache                 Enable cache mode (full context in system prompt for provider caching)
+  --no-session            Disable auto-save of session data
   --estimate              Show context size and cost estimate without caching (cache command)
   --parallel <n>          Concurrent questions for batch command (default: 1)
   --batch-api             Use Gemini Batch API for 50% cost reduction (batch command)
@@ -72,7 +75,7 @@ Examples:
 
 interface CliOptions {
   query: string | null;
-  command: "query" | "init" | "help" | "version" | "cache" | "batch" | "config";
+  command: "query" | "init" | "help" | "version" | "cache" | "batch" | "config" | "benchmark" | "stats";
   context: string | null;
   output: "text" | "json" | "stream";
   verbose: boolean;
@@ -92,6 +95,7 @@ interface CliOptions {
   batchFile: string | null;
   parallel: number;
   batchApi: boolean;
+  noSession: boolean;
 }
 
 function parseCliArgs(args: string[]): CliOptions {
@@ -118,6 +122,7 @@ function parseCliArgs(args: string[]): CliOptions {
       estimate: { type: "boolean", default: false },
       parallel: { type: "string", default: "1" },
       "batch-api": { type: "boolean", default: false },
+      "no-session": { type: "boolean", default: false },
     },
     allowPositionals: true,
     strict: false,
@@ -129,7 +134,7 @@ function parseCliArgs(args: string[]): CliOptions {
       verbose: false, maxIterations: 30, timeout: 300000, dir: process.cwd(),
       stats: false, log: null, tools: null, maxCost: null, maxTokens: null,
       maxDepth: null, ext: null, thinking: null, cache: false, estimate: false,
-      batchFile: null, parallel: 1, batchApi: false,
+      batchFile: null, parallel: 1, batchApi: false, noSession: false,
     };
   }
 
@@ -139,7 +144,7 @@ function parseCliArgs(args: string[]): CliOptions {
       verbose: false, maxIterations: 30, timeout: 300000, dir: process.cwd(),
       stats: false, log: null, tools: null, maxCost: null, maxTokens: null,
       maxDepth: null, ext: null, thinking: null, cache: false, estimate: false,
-      batchFile: null, parallel: 1, batchApi: false,
+      batchFile: null, parallel: 1, batchApi: false, noSession: false,
     };
   }
 
@@ -147,6 +152,8 @@ function parseCliArgs(args: string[]): CliOptions {
     : positionals[0] === "cache" ? "cache"
     : positionals[0] === "batch" ? "batch"
     : positionals[0] === "config" ? "config"
+    : positionals[0] === "benchmark" ? "benchmark"
+    : positionals[0] === "stats" ? "stats"
     : "query";
   const query = command === "query" ? positionals[0] ?? null : null;
   const batchFile = command === "batch" ? positionals[1] ?? null : null;
@@ -200,6 +207,7 @@ function parseCliArgs(args: string[]): CliOptions {
     batchFile,
     parallel: parseInt(values.parallel as string, 10) || 1,
     batchApi: values["batch-api"] as boolean,
+    noSession: values["no-session"] as boolean,
   };
 }
 
@@ -297,6 +305,36 @@ async function runQuery(opts: CliOptions): Promise<void> {
     }
   }
 
+  // Validate context size and auto-adjust cache/storage modes
+  let storageMode = false;
+  if (context) {
+    const validation = validateContextSize(context, config.model.provider);
+    if (!validation.valid) {
+      // Context exceeds model limit — disable cache mode if it was enabled
+      if (opts.cache || config.cache.enabled) {
+        console.error(
+          `rlmx: context exceeds model limit (~${validation.estimatedTokens.toLocaleString()} tokens > ${validation.limit.toLocaleString()}), disabling cache mode`
+        );
+        opts.cache = false;
+        config.cache.enabled = false;
+      }
+      // Signal storage mode when enabled is 'auto' or 'always'
+      if (config.storage.enabled === "auto" || config.storage.enabled === "always") {
+        storageMode = true;
+        console.error(
+          `rlmx: storage mode activated for large context (~${validation.estimatedTokens.toLocaleString()} tokens)`
+        );
+      }
+    }
+  }
+  // Force storage mode when explicitly set to 'always'
+  if (config.storage.enabled === "always" && !storageMode) {
+    storageMode = true;
+    if (opts.verbose) {
+      console.error("rlmx: storage mode forced (storage.enabled: always)");
+    }
+  }
+
   // Read query from stdin if not provided as argument
   let query = opts.query;
   if (!query && !process.stdin.isTTY) {
@@ -316,6 +354,7 @@ async function runQuery(opts: CliOptions): Promise<void> {
     verbose: opts.verbose,
     output: opts.output,
     cache: opts.cache,
+    storageMode,
     logger,
   });
 
@@ -359,6 +398,33 @@ async function runQuery(opts: CliOptions): Promise<void> {
     outputResult(result, opts.output);
   }
 
+  // Save session (unless --no-session)
+  if (!opts.noSession) {
+    try {
+      const { saveSession } = await import("./session.js");
+      await saveSession({
+        runId: logger.runId,
+        query: opts.query ?? "(stdin)",
+        contextPath: opts.context,
+        model: `${config.model.provider}/${config.model.model}`,
+        answer: result.answer,
+        usage: {
+          inputTokens: result.usage.inputTokens,
+          outputTokens: result.usage.outputTokens,
+          cachedTokens: result.usage.cacheReadTokens,
+          totalCost: result.usage.totalCost,
+          iterations: result.iterations,
+          timeMs: timeMs,
+          model: `${config.model.provider}/${config.model.model}`,
+        },
+        config: config as unknown as Record<string, unknown>,
+        logPath: opts.log,
+      });
+    } catch (err: unknown) {
+      console.error(`rlmx: session save failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  }
+
   // Exit with non-zero code on empty response abort (issue #14)
   if (result.budgetHit === "empty_responses") {
     process.exit(1);
@@ -498,6 +564,30 @@ async function runBatchCommand(opts: CliOptions): Promise<void> {
     }
   }
 
+  // Validate context size and auto-adjust cache/storage mode for batch
+  let batchCache = true;
+  let batchStorageMode = false;
+  if (context) {
+    const validation = validateContextSize(context, config.model.provider);
+    if (!validation.valid) {
+      console.error(
+        `rlmx: context exceeds model limit (~${validation.estimatedTokens.toLocaleString()} tokens > ${validation.limit.toLocaleString()}), disabling cache mode`
+      );
+      batchCache = false;
+      config.cache.enabled = false;
+      if (config.storage.enabled === "auto" || config.storage.enabled === "always") {
+        batchStorageMode = true;
+        console.error(
+          `rlmx: storage mode activated for batch (~${validation.estimatedTokens.toLocaleString()} tokens)`
+        );
+      }
+    }
+  }
+  // Force storage mode when explicitly set to 'always'
+  if (config.storage.enabled === "always" && !batchStorageMode) {
+    batchStorageMode = true;
+  }
+
   if (opts.verbose) {
     console.error(`rlmx batch: processing ${opts.batchFile}`);
   }
@@ -506,7 +596,8 @@ async function runBatchCommand(opts: CliOptions): Promise<void> {
     maxIterations: opts.maxIterations,
     timeout: opts.timeout,
     verbose: opts.verbose,
-    cache: true,
+    cache: batchCache,
+    storageMode: batchStorageMode,
     maxCost: opts.maxCost ?? undefined,
     parallel: opts.parallel,
   });
@@ -602,6 +693,41 @@ async function runConfig(args: string[]): Promise<void> {
   }
 }
 
+async function runBenchmarkCommand(opts: CliOptions, args: string[]): Promise<void> {
+  const mode = args[0];
+  const configDir = process.cwd();
+  const config = await loadConfig(configDir);
+
+  if (opts.tools) config.toolsLevel = opts.tools;
+
+  if (mode === "cost") {
+    const { runCostBenchmark, formatBenchmarkTable, saveBenchmarkResults } = await import("./benchmark.js");
+    const outputIdx = args.indexOf("--output");
+    const outputFormat = outputIdx >= 0 && args[outputIdx + 1] === "json" ? "json" as const : "table" as const;
+    const results = await runCostBenchmark(config, { outputFormat });
+    if (outputFormat === "json") {
+      console.log(JSON.stringify(results, null, 2));
+    } else {
+      console.error(formatBenchmarkTable(results));
+    }
+    const savedPath = await saveBenchmarkResults(results);
+    console.error(`Results saved to ${savedPath}`);
+  } else if (mode === "oolong") {
+    const samplesIdx = args.indexOf("--samples");
+    const samples = samplesIdx >= 0 ? parseInt(args[samplesIdx + 1], 10) : 5;
+    const idxArgIdx = args.indexOf("--idx");
+    const idx = idxArgIdx >= 0 ? parseInt(args[idxArgIdx + 1], 10) : undefined;
+
+    const { runOolongBenchmark, formatBenchmarkTable, saveBenchmarkResults } = await import("./benchmark.js");
+    const results = await runOolongBenchmark(config, { samples, idx });
+    console.error(formatBenchmarkTable(results));
+    const savedPath = await saveBenchmarkResults(results);
+    console.error(`Results saved to ${savedPath}`);
+  } else {
+    console.log(`rlmx benchmark — compare RLM vs direct LLM\n\nUsage:\n  rlmx benchmark cost                     Run cost benchmark with built-in dataset\n  rlmx benchmark cost --output json       Output results as JSON\n  rlmx benchmark oolong                   Run Oolong Synth (auto-installs HF datasets)\n  rlmx benchmark oolong --samples 5       Run N samples (default 5)\n  rlmx benchmark oolong --idx 42          Run specific sample by index`);
+  }
+}
+
 async function main(): Promise<void> {
   const opts = parseCliArgs(process.argv.slice(2));
 
@@ -639,6 +765,16 @@ async function main(): Promise<void> {
       await runConfig(process.argv.slice(3));
       break;
 
+    case "benchmark":
+      await runBenchmarkCommand(opts, process.argv.slice(3));
+      break;
+
+    case "stats": {
+      const { runStatsCommand } = await import("./stats.js");
+      await runStatsCommand(process.argv.slice(3));
+      break;
+    }
+
     case "query":
       if (!opts.query && process.stdin.isTTY) {
         console.log(HELP);
diff --git a/src/config.ts b/src/config.ts
index 5391973..3f51a60 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -65,6 +65,17 @@ export interface OutputConfig {
   schema: Record<string, unknown> | null;
 }
 
+/** Storage configuration for pgserve-backed large context handling */
+export interface StorageConfig {
+  enabled: "auto" | "always" | "never";
+  mode: "persistent" | "memory";
+  dataDir: string;
+  port: number;
+  chunkSize: number | null;
+  chunkUtilization: number;
+  charsPerToken: number;
+}
+
 /** Tool level — controls which functions are available in the REPL */
 export type ToolsLevel = "core" | "standard" | "full";
 
@@ -88,6 +99,8 @@ export interface RlmxConfig {
   gemini: GeminiConfig;
   /** Structured output configuration */
   output: OutputConfig;
+  /** Storage configuration for pgserve */
+  storage: StorageConfig;
   /** Config source: "yaml" | "md" | "defaults" */
   configSource: "yaml" | "md" | "defaults";
 }
@@ -131,6 +144,16 @@ const DEFAULT_OUTPUT_CONFIG: OutputConfig = {
   schema: null,
 };
 
+export const DEFAULT_STORAGE_CONFIG: StorageConfig = {
+  enabled: "auto",
+  mode: "persistent",
+  dataDir: "~/.rlmx/data",
+  port: 0,
+  chunkSize: null,
+  chunkUtilization: 0.6,
+  charsPerToken: 4,
+};
+
 // ─── YAML Schema ─────────────────────────────────────────
 
 /** Shape of rlmx.yaml on disk */
@@ -178,6 +201,15 @@ interface RawYamlConfig {
     ttl?: number;
     "expire-time"?: string;
   };
+  storage?: {
+    enabled?: string;
+    mode?: string;
+    "data-dir"?: string;
+    port?: number;
+    "chunk-size"?: number | null;
+    "chunk-utilization"?: number;
+    "chars-per-token"?: number;
+  };
 }
 
 // ─── YAML Loading ────────────────────────────────────────
@@ -256,13 +288,18 @@ function parseYamlConfig(content: string, dir: string): RlmxConfig {
     exclude: cfg.context?.exclude ?? DEFAULT_CONTEXT_CONFIG.exclude,
   };
 
-  // Validate extensions format
-  for (const ext of contextConfig.extensions) {
+  // Validate and normalize extensions format
+  for (let i = 0; i < contextConfig.extensions.length; i++) {
+    const ext = contextConfig.extensions[i];
     if (typeof ext !== "string") {
       throw new Error(
         `Invalid extension in context.extensions: expected string, got ${typeof ext}.`
       );
     }
+    // Ensure leading dot: "mdx" → ".mdx"
+    if (ext.length > 0 && !ext.startsWith(".")) {
+      contextConfig.extensions[i] = `.${ext}`;
+    }
   }
 
   // Parse budget
@@ -366,6 +403,53 @@ function parseYamlConfig(content: string, dir: string): RlmxConfig {
     );
   }
 
+  // Parse storage config
+  const rawEnabled = cfg.storage?.enabled ?? DEFAULT_STORAGE_CONFIG.enabled;
+  if (!["auto", "always", "never"].includes(rawEnabled)) {
+    throw new Error(
+      `Invalid storage.enabled "${rawEnabled}" in rlmx.yaml. Must be one of: auto, always, never.`
+    );
+  }
+  const rawMode = cfg.storage?.mode ?? DEFAULT_STORAGE_CONFIG.mode;
+  if (!["persistent", "memory"].includes(rawMode)) {
+    throw new Error(
+      `Invalid storage.mode "${rawMode}" in rlmx.yaml. Must be one of: persistent, memory.`
+    );
+  }
+  const storagePort = cfg.storage?.port ?? DEFAULT_STORAGE_CONFIG.port;
+  if (typeof storagePort !== "number" || storagePort < 0 || !Number.isInteger(storagePort)) {
+    throw new Error(
+      `Invalid storage.port in rlmx.yaml: must be a non-negative integer, got ${storagePort}.`
+    );
+  }
+  const chunkSize = cfg.storage?.["chunk-size"] ?? DEFAULT_STORAGE_CONFIG.chunkSize;
+  if (chunkSize !== null && (typeof chunkSize !== "number" || chunkSize <= 0)) {
+    throw new Error(
+      `Invalid storage.chunk-size in rlmx.yaml: must be a positive number or null, got ${chunkSize}.`
+    );
+  }
+  const chunkUtilization = cfg.storage?.["chunk-utilization"] ?? DEFAULT_STORAGE_CONFIG.chunkUtilization;
+  if (typeof chunkUtilization !== "number" || chunkUtilization <= 0 || chunkUtilization > 1) {
+    throw new Error(
+      `Invalid storage.chunk-utilization in rlmx.yaml: must be a number between 0 (exclusive) and 1 (inclusive), got ${chunkUtilization}.`
+    );
+  }
+  const charsPerToken = cfg.storage?.["chars-per-token"] ?? DEFAULT_STORAGE_CONFIG.charsPerToken;
+  if (typeof charsPerToken !== "number" || charsPerToken <= 0) {
+    throw new Error(
+      `Invalid storage.chars-per-token in rlmx.yaml: must be a positive number, got ${charsPerToken}.`
+    );
+  }
+  const storage: StorageConfig = {
+    enabled: rawEnabled as StorageConfig["enabled"],
+    mode: rawMode as StorageConfig["mode"],
+    dataDir: cfg.storage?.["data-dir"] ?? DEFAULT_STORAGE_CONFIG.dataDir,
+    port: storagePort,
+    chunkSize,
+    chunkUtilization,
+    charsPerToken,
+  };
+
   return {
     system,
     tools,
@@ -378,6 +462,7 @@ function parseYamlConfig(content: string, dir: string): RlmxConfig {
     cache,
     gemini,
     output,
+    storage,
     configSource: "yaml",
   };
 }
@@ -494,6 +579,7 @@ async function loadConfigFromMd(dir: string): Promise<RlmxConfig> {
     cache: { ...DEFAULT_CACHE_CONFIG },
     gemini: { ...DEFAULT_GEMINI_CONFIG },
     output: { ...DEFAULT_OUTPUT_CONFIG },
+    storage: { ...DEFAULT_STORAGE_CONFIG },
     configSource: "md",
   };
 }
@@ -514,6 +600,7 @@ function defaultConfig(dir: string): RlmxConfig {
     cache: { ...DEFAULT_CACHE_CONFIG },
     gemini: { ...DEFAULT_GEMINI_CONFIG },
     output: { ...DEFAULT_OUTPUT_CONFIG },
+    storage: { ...DEFAULT_STORAGE_CONFIG },
     configSource: "defaults",
   };
 }
diff --git a/src/context.ts b/src/context.ts
index 4faecac..4168d49 100644
--- a/src/context.ts
+++ b/src/context.ts
@@ -140,8 +140,13 @@ export async function loadContextFromDir(
   dirPath: string,
   options?: Partial<CollectOptions>
 ): Promise<LoadedContext> {
+  // Normalize extensions: ensure each has a leading dot ("mdx" → ".mdx")
+  const rawExtensions = options?.extensions ?? DEFAULT_COLLECT_OPTIONS.extensions;
+  const normalizedExtensions = rawExtensions.map((ext) =>
+    ext.startsWith(".") ? ext : `.${ext}`
+  );
   const merged: CollectOptions = {
-    extensions: options?.extensions ?? DEFAULT_COLLECT_OPTIONS.extensions,
+    extensions: normalizedExtensions,
     exclude: options?.exclude ?? DEFAULT_COLLECT_OPTIONS.exclude,
   };
   const items = await collectFiles(dirPath, dirPath, merged);
diff --git a/src/ipc.ts b/src/ipc.ts
index f710c76..b49e9ae 100644
--- a/src/ipc.ts
+++ b/src/ipc.ts
@@ -60,7 +60,12 @@ export interface LLMRequest {
     | "rlm_query_batched"
     | "web_search"
     | "fetch_url"
-    | "generate_image";
+    | "generate_image"
+    | "pg_search"
+    | "pg_slice"
+    | "pg_time"
+    | "pg_count"
+    | "pg_query";
   prompts: string[];
   model?: string;
 }
diff --git a/src/llm.ts b/src/llm.ts
index ecd35fe..f6a9be7 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -11,6 +11,7 @@ import { spawn } from "node:child_process";
 import type { RlmxConfig, ModelConfig, GeminiConfig } from "./config.js";
 import type { LLMRequest } from "./ipc.js";
 import type { Logger } from "./logger.js";
+import type { PgStorage } from "./storage.js";
 import { buildGeminiOnPayload, isGoogleProvider, type ThinkingLevel } from "./gemini.js";
 
 /** Token usage tracking. */
@@ -384,7 +385,8 @@ export async function handleLLMRequest(
   config: RlmxConfig,
   usage: UsageStats,
   signal?: AbortSignal,
-  geminiCounts?: GeminiCallCounts
+  geminiCounts?: GeminiCallCounts,
+  storage?: PgStorage
 ): Promise<string[]> {
   const subCallModel: ModelConfig = config.model.subCallModel
     ? { ...config.model, model: config.model.subCallModel }
@@ -487,6 +489,40 @@ export async function handleLLMRequest(
       return [igResp.text];
     }
 
+    case "pg_search": {
+      if (!storage) return [`Error: storage not available`];
+      const params = JSON.parse(request.prompts[0]);
+      const rows = await storage.search(params.pattern, params.limit);
+      return [JSON.stringify(rows)];
+    }
+
+    case "pg_slice": {
+      if (!storage) return [`Error: storage not available`];
+      const params = JSON.parse(request.prompts[0]);
+      const rows = await storage.slice(params.start, params.end);
+      return [JSON.stringify(rows)];
+    }
+
+    case "pg_time": {
+      if (!storage) return [`Error: storage not available`];
+      const params = JSON.parse(request.prompts[0]);
+      const rows = await storage.timeRange(params.from, params.to);
+      return [JSON.stringify(rows)];
+    }
+
+    case "pg_count": {
+      if (!storage) return [`Error: storage not available`];
+      const cnt = await storage.count();
+      return [JSON.stringify({ count: cnt })];
+    }
+
+    case "pg_query": {
+      if (!storage) return [`Error: storage not available`];
+      const params = JSON.parse(request.prompts[0]);
+      const rows = await storage.query(params.sql);
+      return [JSON.stringify(rows)];
+    }
+
     default:
       return request.prompts.map(
         () => `Error: unknown request type "${request.request_type}"`
diff --git a/src/observe.ts b/src/observe.ts
new file mode 100644
index 0000000..4a3b928
--- /dev/null
+++ b/src/observe.ts
@@ -0,0 +1,186 @@
+/**
+ * ObservabilityRecorder — fire-and-forget event recording to pgserve.
+ *
+ * Records every LLM call, REPL execution, and sub-call into rlmx_sessions
+ * and rlmx_events tables. All methods are fire-and-forget: errors are logged
+ * to stderr but never thrown or block the main RLM loop.
+ */
+
+import type { PgStorage } from "./storage.js";
+
+/** Usage info for recording an LLM call. */
+export interface LLMCallUsage {
+  inputTokens: number;
+  outputTokens: number;
+  cost: number;
+}
+
+/** Total usage for recording session completion. */
+export interface TotalUsage {
+  inputTokens: number;
+  outputTokens: number;
+  cachedTokens?: number;
+  totalCost: number;
+}
+
+export class ObservabilityRecorder {
+  private storage: PgStorage;
+  private sessionId: string | null = null;
+
+  constructor(storage: PgStorage) {
+    this.storage = storage;
+  }
+
+  /**
+   * Create a new session record.
+   */
+  startSession(
+    runId: string,
+    query: string,
+    model: string,
+    provider: string,
+    contextPath?: string,
+    config?: Record<string, unknown>
+  ): void {
+    this.sessionId = runId;
+    this.fire(async () => {
+      const client = this.storage.getClient();
+      if (!client) return;
+      await client.query(
+        `INSERT INTO rlmx_sessions (id, query, context_path, model, provider, config)
+         VALUES ($1, $2, $3, $4, $5, $6)
+         ON CONFLICT (id) DO NOTHING`,
+        [runId, query, contextPath ?? null, model, provider, config ? JSON.stringify(config) : null]
+      );
+    });
+  }
+
+  /**
+   * Record an LLM call event.
+   */
+  recordLLMCall(
+    iteration: number,
+    usage: LLMCallUsage,
+    model: string,
+    durationMs: number
+  ): void {
+    this.fire(async () => {
+      const client = this.storage.getClient();
+      if (!client || !this.sessionId) return;
+      await client.query(
+        `INSERT INTO rlmx_events (session_id, iteration, kind, input_tokens, output_tokens, cost, model, duration_ms)
+         VALUES ($1, $2, 'llm_call', $3, $4, $5, $6, $7)`,
+        [this.sessionId, iteration, usage.inputTokens, usage.outputTokens, usage.cost, model, durationMs]
+      );
+    });
+  }
+
+  /**
+   * Record a REPL execution event.
+   */
+  recordReplExec(
+    iteration: number,
+    code: string,
+    stdout: string,
+    stderr: string,
+    durationMs: number,
+    isError?: boolean
+  ): void {
+    this.fire(async () => {
+      const client = this.storage.getClient();
+      if (!client || !this.sessionId) return;
+      await client.query(
+        `INSERT INTO rlmx_events (session_id, iteration, kind, code, stdout, stderr, duration_ms, is_error, error_message)
+         VALUES ($1, $2, 'repl_exec', $3, $4, $5, $6, $7, $8)`,
+        [
+          this.sessionId, iteration,
+          code.slice(0, 10000), stdout.slice(0, 10000), stderr.slice(0, 5000),
+          durationMs, isError ?? false, isError ? stderr.slice(0, 1000) : null,
+        ]
+      );
+    });
+  }
+
+  /**
+   * Record a sub-call event (pg_search, llm_query from REPL, etc.).
+   */
+  recordSubCall(
+    iteration: number,
+    requestType: string,
+    promptPreview: string,
+    durationMs: number,
+    isError?: boolean,
+    errorMessage?: string
+  ): void {
+    this.fire(async () => {
+      const client = this.storage.getClient();
+      if (!client || !this.sessionId) return;
+      await client.query(
+        `INSERT INTO rlmx_events (session_id, iteration, kind, request_type, prompt_preview, duration_ms, is_error, error_message)
+         VALUES ($1, $2, 'sub_call', $3, $4, $5, $6, $7)`,
+        [
+          this.sessionId, iteration,
+          requestType, promptPreview.slice(0, 500),
+          durationMs, isError ?? false, errorMessage?.slice(0, 1000) ?? null,
+        ]
+      );
+    });
+  }
+
+  /**
+   * Record session completion with final answer and totals.
+   */
+  recordFinal(
+    answer: string,
+    iterations: number,
+    totalUsage: TotalUsage
+  ): void {
+    this.fire(async () => {
+      const client = this.storage.getClient();
+      if (!client || !this.sessionId) return;
+      await client.query(
+        `UPDATE rlmx_sessions SET
+           status = 'completed',
+           ended_at = now(),
+           iterations = $2,
+           input_tokens = $3,
+           output_tokens = $4,
+           cached_tokens = $5,
+           total_cost = $6,
+           answer_length = $7
+         WHERE id = $1`,
+        [
+          this.sessionId, iterations,
+          totalUsage.inputTokens, totalUsage.outputTokens,
+          totalUsage.cachedTokens ?? 0, totalUsage.totalCost,
+          answer.length,
+        ]
+      );
+    });
+  }
+
+  /**
+   * Record session failure.
+   */
+  recordError(errorMessage: string): void {
+    this.fire(async () => {
+      const client = this.storage.getClient();
+      if (!client || !this.sessionId) return;
+      await client.query(
+        `UPDATE rlmx_sessions SET status = 'failed', ended_at = now(), budget_hit = $2 WHERE id = $1`,
+        [this.sessionId, errorMessage.slice(0, 500)]
+      );
+    });
+  }
+
+  /**
+   * Fire-and-forget: run an async operation, swallow errors.
+   */
+  private fire(fn: () => Promise<void>): void {
+    fn().catch((err) => {
+      process.stderr.write(
+        `rlmx: observability recording error: ${err instanceof Error ? err.message : String(err)}\n`
+      );
+    });
+  }
+}
diff --git a/src/repl.ts b/src/repl.ts
index 16efef4..deab5f1 100644
--- a/src/repl.ts
+++ b/src/repl.ts
@@ -30,6 +30,7 @@ const __dirname = dirname(__filename);
 const REPL_SERVER_PATH = join(__dirname, "..", "..", "python", "repl_server.py");
 const BATTERIES_PATH = join(__dirname, "..", "..", "python", "batteries.py");
 const GEMINI_BATTERIES_PATH = join(__dirname, "..", "..", "python", "gemini_batteries.py");
+const PG_BATTERIES_PATH = join(__dirname, "..", "..", "python", "pg_batteries.py");
 
 /** Battery function names — tracked for stats. */
 const BATTERY_FUNCTION_NAMES = [
@@ -60,6 +61,8 @@ export interface REPLStartOptions {
   toolsLevel?: ToolsLevel;
   /** Whether to load Gemini batteries (web_search, fetch_url, generate_image). */
   loadGeminiBatteries?: boolean;
+  /** Whether to load pg_batteries (pg_search, pg_slice, etc.) for storage mode. */
+  loadPgBatteries?: boolean;
   /** Python executable path (default: "python3"). */
   pythonPath?: string;
   /** Path to repl_server.py (auto-detected). */
@@ -167,6 +170,11 @@ export class REPL {
         await this._loadGeminiBatteries();
       }
     }
+
+    // Load pg_batteries when storage mode is active (independent of tools level)
+    if (options.loadPgBatteries) {
+      await this._loadPgBatteries();
+    }
   }
 
   /** Execute Python code in the REPL and return the result. */
@@ -272,6 +280,13 @@ export class REPL {
     this._skipTracking = false;
   }
 
+  private async _loadPgBatteries(): Promise<void> {
+    const code = await readFile(PG_BATTERIES_PATH, "utf-8");
+    this._skipTracking = true;
+    await this.execute(code);
+    this._skipTracking = false;
+  }
+
   /** Track which battery functions appear in executed code. */
   private _trackBatteryUsage(code: string): void {
     if (this._skipTracking) return;
diff --git a/src/rlm.ts b/src/rlm.ts
index 92fefdb..29e8283 100644
--- a/src/rlm.ts
+++ b/src/rlm.ts
@@ -9,10 +9,13 @@
  * - Recursive sub-calls via llm_query/rlm_query
  */
 
+import { randomUUID } from "node:crypto";
 import type { RlmxConfig, ToolDef } from "./config.js";
 import type { LoadedContext, ContextItem } from "./context.js";
 import { buildCachedSystemPrompt, computeContentHash, buildSessionId, estimateTokens } from "./cache.js";
 import { REPL } from "./repl.js";
+import { PgStorage } from "./storage.js";
+import { ObservabilityRecorder } from "./observe.js";
 import {
   llmComplete,
   handleLLMRequest,
@@ -42,6 +45,8 @@ export interface RLMOptions {
   verbose: boolean;
   output: "text" | "json" | "stream";
   cache: boolean;
+  /** When true, route context through pgserve storage instead of REPL variable. */
+  storageMode?: boolean;
   logger?: Logger;
 }
 
@@ -66,7 +71,8 @@ function isStructuredOutputMode(config: RlmxConfig): boolean {
  */
 function buildSystemPrompt(
   config: RlmxConfig,
-  _context: LoadedContext | null
+  _context: LoadedContext | null,
+  storageRecordCount?: number
 ): string {
   // Use SYSTEM.md content or paper default (from scaffold)
   let system = config.system ?? "";
@@ -86,6 +92,19 @@ function buildSystemPrompt(
       config.criteria;
   }
 
+  // Append storage mode instructions when context is in PostgreSQL
+  if (storageRecordCount !== undefined) {
+    system +=
+      `\n\n## Context Storage\n\n` +
+      `Context is stored in PostgreSQL (~${storageRecordCount.toLocaleString()} records). Use these tools to query it:\n` +
+      `- pg_search("pattern") — full-text search\n` +
+      `- pg_slice(start, end) — get lines by range\n` +
+      `- pg_time("HH:MM", "HH:MM") — filter by timestamp\n` +
+      `- pg_count() — total records\n` +
+      `- pg_query("SQL") — raw SQL (read-only)\n` +
+      `Do NOT try to access the \`context\` variable directly — it is not loaded in memory.`;
+  }
+
   return system;
 }
 
@@ -175,11 +194,45 @@ export async function rlmLoop(
   const geminiCounts = createGeminiCallCounts();
   const budget = new BudgetTracker(config.budget);
 
-  // Build system prompt — cache mode embeds full context, normal mode uses metadata only
+  // ── Storage mode setup ──────────────────────────────────
+  let storage: PgStorage | undefined;
+  let recorder: ObservabilityRecorder | undefined;
+  let storageRecordCount: number | undefined;
+  const runId = randomUUID();
+
+  if (opts.storageMode) {
+    storage = new PgStorage();
+    await storage.start(config.storage);
+
+    // Ingest context into Postgres
+    if (context) {
+      storageRecordCount = await storage.ingest(context);
+      if (opts.verbose) {
+        process.stderr.write(`rlmx: ingested ${storageRecordCount} records into pgserve storage\n`);
+      }
+    }
+
+    // Set up observability recorder
+    recorder = new ObservabilityRecorder(storage);
+    recorder.startSession(
+      runId,
+      query,
+      `${config.model.provider}/${config.model.model}`,
+      config.model.provider,
+      undefined,
+      config as unknown as Record<string, unknown>
+    );
+  }
+
+  // Build system prompt — cache mode embeds full context, storage mode adds pg_* tools, normal mode uses metadata only
   const systemPrompt = opts.cache
     ? buildCachedSystemPrompt(config, context)
-    : buildSystemPrompt(config, context);
-  const contextMetadata = buildContextMetadata(context);
+    : buildSystemPrompt(config, context, storageRecordCount);
+
+  // In storage mode, override context metadata to describe storage
+  const contextMetadata = opts.storageMode && storageRecordCount !== undefined
+    ? `Context is stored in PostgreSQL (~${storageRecordCount.toLocaleString()} records). Use pg_search(), pg_slice(), pg_time(), pg_count(), pg_query() to query it.`
+    : buildContextMetadata(context);
 
   // Build cache config for LLM calls (passed through to pi/ai completeSimple)
   let cacheConfig: CacheLLMConfig | undefined;
@@ -211,8 +264,8 @@ export async function rlmLoop(
   const repl = new REPL();
 
   try {
-    // Start REPL with context and custom tools
-    const replContext = prepareReplContext(context);
+    // Start REPL — in storage mode, skip raw context injection and load pg_batteries
+    const replContext = opts.storageMode ? undefined : prepareReplContext(context);
     const toolsMap: Record<string, string> = {};
     for (const tool of config.tools) {
       toolsMap[tool.name] = tool.code;
@@ -222,24 +275,47 @@ export async function rlmLoop(
       context: replContext as string | string[] | Record<string, unknown>,
       tools: Object.keys(toolsMap).length > 0 ? toolsMap : undefined,
       loadGeminiBatteries: isGoogleProvider(config.model.provider) && (config.toolsLevel === "standard" || config.toolsLevel === "full"),
+      loadPgBatteries: !!opts.storageMode,
       toolsLevel: config.toolsLevel,
     });
 
-    // Set up LLM request handler for REPL IPC
+    // Set up LLM request handler for REPL IPC — pass storage for pg_* routes
     repl.onLLMRequest(async (request) => {
-      return handleLLMRequest(
+      const startMs = Date.now();
+      const results = await handleLLMRequest(
         request,
         config,
         usage,
         abortController.signal,
-        geminiCounts
+        geminiCounts,
+        storage
       );
+      // Record sub-calls to observability
+      if (recorder && request.request_type !== "llm_query" && request.request_type !== "llm_query_batched") {
+        recorder.recordSubCall(
+          0, // iteration not available here; will be approximate
+          request.request_type,
+          request.prompts[0]?.slice(0, 200) ?? "",
+          Date.now() - startMs
+        );
+      }
+      return results;
     });
 
-    /** Cleanup timeout/REPL and build the final result. */
+    /** Cleanup timeout/REPL/storage and build the final result. */
     const finalize = async (answer: string, iterations: number): Promise<RLMResult> => {
       clearTimeout(timeoutHandle);
+      // Record final observability event
+      if (recorder) {
+        recorder.recordFinal(answer, iterations, {
+          inputTokens: usage.inputTokens,
+          outputTokens: usage.outputTokens,
+          cachedTokens: usage.cacheReadTokens,
+          totalCost: usage.totalCost,
+        });
+      }
       await repl.stop();
+      if (storage) await storage.stop();
       return buildResult(answer, usage, iterations, config, budget.getState().budgetHit, geminiCounts, repl.getGeminiBatteriesUsed());
     };
 
@@ -275,6 +351,7 @@ export async function rlmLoop(
       }
 
       // Call LLM
+      const llmStartMs = Date.now();
       const response = await llmComplete(messages, config.model, {
         signal: abortController.signal,
         cacheConfig,
@@ -282,9 +359,20 @@ export async function rlmLoop(
         outputSchema: config.output.schema,
         geminiConfig: config.gemini,
       });
+      const llmDurationMs = Date.now() - llmStartMs;
       mergeUsage(usage, response.usage);
       budget.record(response.usage.inputTokens, response.usage.outputTokens, response.usage.totalCost);
 
+      // Record LLM call to observability
+      if (recorder) {
+        recorder.recordLLMCall(
+          iteration,
+          { inputTokens: response.usage.inputTokens, outputTokens: response.usage.outputTokens, cost: response.usage.totalCost },
+          `${config.model.provider}/${config.model.model}`,
+          llmDurationMs
+        );
+      }
+
       // Track thought signatures for Gemini stats
       if (response.thoughtSignatureCount) {
         geminiCounts.thoughtSignatures += response.thoughtSignatureCount;
@@ -355,7 +443,9 @@ export async function rlmLoop(
           logVerbose(iteration, `executing code (${block.code.length} chars)`);
         }
 
+        const execStartMs = Date.now();
         const execResult = await repl.execute(block.code);
+        const execDurationMs = Date.now() - execStartMs;
 
         executions.push({
           code: block.code,
@@ -365,6 +455,14 @@ export async function rlmLoop(
           error: execResult.error,
         });
 
+        // Record REPL execution to observability
+        if (recorder) {
+          recorder.recordReplExec(
+            iteration, block.code, execResult.stdout, execResult.stderr ?? "",
+            execDurationMs, !!execResult.error
+          );
+        }
+
         if (execResult.final) {
           return finalize(execResult.final.value, iteration + 1);
         }
@@ -470,7 +568,9 @@ export async function rlmLoop(
         `rlmx: 3 consecutive empty LLM responses — aborting. Context may exceed API limits.\n`
       );
       clearTimeout(timeoutHandle);
+      if (recorder) recorder.recordError("empty_responses");
       await repl.stop();
+      if (storage) await storage.stop();
 
       return buildResult(
         "Error: aborted after 3 consecutive empty LLM responses. Context may exceed API token limits.",
@@ -493,7 +593,9 @@ export async function rlmLoop(
     return finalize(forcedResult, actualIterations);
   } catch (err: unknown) {
     clearTimeout(timeoutHandle);
+    if (recorder) recorder.recordError(err instanceof Error ? err.message : String(err));
     await repl.stop().catch(() => {});
+    if (storage) await storage.stop().catch(() => {});
 
     if ((err instanceof Error && err.name === "AbortError") || abortController.signal.aborted) {
       return buildResult(
diff --git a/src/session.ts b/src/session.ts
new file mode 100644
index 0000000..84ab26c
--- /dev/null
+++ b/src/session.ts
@@ -0,0 +1,91 @@
+/**
+ * Session persistence — auto-save every rlmx run to ~/.rlmx/sessions/<runId>/.
+ *
+ * Each session directory contains:
+ *   meta.json        — run metadata (runId, query, context, timestamp, version)
+ *   usage.json       — token usage and cost statistics
+ *   answer.txt       — final answer text
+ *   config.yaml      — snapshot of the RlmxConfig used
+ *   trajectory.jsonl  — copy of the JSONL log (if --log was specified)
+ */
+
+import { mkdir, writeFile, copyFile, stat } from "node:fs/promises";
+import { join } from "node:path";
+import { homedir } from "node:os";
+import { createRequire } from "node:module";
+import yaml from "js-yaml";
+
+/** Data required to persist a session. */
+export interface SessionData {
+  runId: string;
+  query: string;
+  contextPath: string | null;
+  model: string;
+  answer: string;
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+    cachedTokens: number;
+    totalCost: number;
+    iterations: number;
+    timeMs: number;
+    model: string;
+  };
+  config: Record<string, unknown>;
+  logPath: string | null;
+}
+
+/**
+ * Save session artifacts to ~/.rlmx/sessions/<runId>/.
+ * Returns the session directory path.
+ */
+export async function saveSession(data: SessionData): Promise<string> {
+  const sessionsDir = join(homedir(), ".rlmx", "sessions", data.runId);
+  await mkdir(sessionsDir, { recursive: true });
+
+  // Read package version
+  let version = "unknown";
+  try {
+    const require = createRequire(import.meta.url);
+    const pkg = require("../../package.json");
+    version = pkg.version ?? "unknown";
+  } catch {
+    // If we can't read the package, continue with "unknown"
+  }
+
+  // 1. meta.json
+  const meta = {
+    runId: data.runId,
+    query: data.query,
+    contextPath: data.contextPath,
+    timestamp: new Date().toISOString(),
+    version,
+  };
+  await writeFile(join(sessionsDir, "meta.json"), JSON.stringify(meta, null, 2) + "\n");
+
+  // 2. usage.json
+  await writeFile(join(sessionsDir, "usage.json"), JSON.stringify(data.usage, null, 2) + "\n");
+
+  // 3. answer.txt
+  await writeFile(join(sessionsDir, "answer.txt"), data.answer);
+
+  // 4. config.yaml
+  const configYaml = yaml.dump(data.config, { lineWidth: 120, noRefs: true });
+  await writeFile(join(sessionsDir, "config.yaml"), configYaml);
+
+  // 5. trajectory.jsonl — copy from logPath if it exists, otherwise write empty file
+  const trajectoryPath = join(sessionsDir, "trajectory.jsonl");
+  if (data.logPath) {
+    try {
+      await stat(data.logPath);
+      await copyFile(data.logPath, trajectoryPath);
+    } catch {
+      // Log file doesn't exist or can't be read — write empty
+      await writeFile(trajectoryPath, "");
+    }
+  } else {
+    await writeFile(trajectoryPath, "");
+  }
+
+  return sessionsDir;
+}
diff --git a/src/stats.ts b/src/stats.ts
new file mode 100644
index 0000000..f21c6e6
--- /dev/null
+++ b/src/stats.ts
@@ -0,0 +1,408 @@
+/**
+ * Stats query functions for rlmx observability data.
+ *
+ * Connects to the persistent pgserve data at ~/.rlmx/data to query
+ * rlmx_sessions and rlmx_events tables. Starts pgserve temporarily,
+ * queries, then stops.
+ */
+
+import { existsSync } from "node:fs";
+import { homedir } from "node:os";
+import { PgStorage } from "./storage.js";
+import { loadConfig } from "./config.js";
+
+/** A session row from rlmx_sessions */
+export interface SessionRow {
+  id: string;
+  query: string;
+  model: string;
+  provider: string;
+  status: string;
+  iterations: number | null;
+  input_tokens: number;
+  output_tokens: number;
+  total_cost: number;
+  started_at: string;
+  ended_at: string | null;
+  duration_ms: number | null;
+}
+
+/** An event row from rlmx_events */
+export interface EventRow {
+  id: number;
+  iteration: number | null;
+  kind: string;
+  input_tokens: number | null;
+  output_tokens: number | null;
+  cost: number | null;
+  model: string | null;
+  code: string | null;
+  stdout: string | null;
+  stderr: string | null;
+  request_type: string | null;
+  prompt_preview: string | null;
+  duration_ms: number | null;
+  is_error: boolean;
+  error_message: string | null;
+  created_at: string;
+}
+
+/** Cost breakdown row from v_cost_breakdown */
+export interface CostRow {
+  session_id: string;
+  model: string;
+  calls: number;
+  total_input: number;
+  total_output: number;
+  total_cost: number;
+  avg_duration_ms: number;
+}
+
+/** Tool usage row from v_repl_usage */
+export interface ToolRow {
+  session_id: string;
+  request_type: string;
+  calls: number;
+  errors: number;
+  avg_duration_ms: number;
+}
+
+/**
+ * Check if persistent data directory exists.
+ */
+export async function hasStatsData(): Promise<boolean> {
+  const config = await loadConfig(process.cwd());
+  const dataDir = config.storage.dataDir.replace(/^~/, homedir());
+  return existsSync(dataDir);
+}
+
+/**
+ * Parse a "since" duration string (e.g., "24h", "7d", "30m") into a JS Date cutoff.
+ */
+function parseSinceCutoff(since: string): Date {
+  const match = since.match(/^(\d+)([mhd])$/);
+  if (!match) throw new Error(`Invalid --since format "${since}". Use Nh, Nd, or Nm (e.g., 24h, 7d, 30m).`);
+  const num = parseInt(match[1], 10);
+  const unit = match[2];
+  const msMap: Record<string, number> = { m: 60_000, h: 3_600_000, d: 86_400_000 };
+  return new Date(Date.now() - num * msMap[unit]);
+}
+
+/**
+ * Create a temporary PgStorage connected to the configured data directory for querying stats.
+ */
+async function createStatsStorage(): Promise<PgStorage> {
+  const config = await loadConfig(process.cwd());
+  const storage = new PgStorage();
+  await storage.start({
+    ...config.storage,
+    mode: "persistent",
+    enabled: "always",
+  });
+  return storage;
+}
+
+/**
+ * List recent runs.
+ */
+export async function listRuns(storage: PgStorage, limit = 20): Promise<SessionRow[]> {
+  const rows = await storage.query(
+    `SELECT id, query, model, provider, status, iterations,
+            input_tokens::int, output_tokens::int, total_cost::float,
+            started_at::text, ended_at::text,
+            EXTRACT(EPOCH FROM (COALESCE(ended_at, now()) - started_at))::int * 1000 AS duration_ms
+     FROM rlmx_sessions
+     ORDER BY started_at DESC
+     LIMIT $1`,
+    [limit]
+  );
+  return rows as SessionRow[];
+}
+
+/**
+ * Get events for a specific run.
+ */
+export async function getRun(storage: PgStorage, runId: string): Promise<EventRow[]> {
+  const rows = await storage.query(
+    `SELECT id, iteration, kind, input_tokens, output_tokens, cost::float,
+            model, code, stdout, stderr, request_type, prompt_preview,
+            duration_ms, is_error, error_message, created_at::text
+     FROM rlmx_events
+     WHERE session_id = $1
+     ORDER BY id`,
+    [runId]
+  );
+  return rows as EventRow[];
+}
+
+/**
+ * Get cost breakdown by model.
+ */
+export async function costBreakdown(storage: PgStorage, since?: string): Promise<CostRow[]> {
+  if (since) {
+    const cutoff = parseSinceCutoff(since);
+    const rows = await storage.query(
+      `SELECT e.session_id, e.model,
+              COUNT(*)::int AS calls,
+              SUM(e.input_tokens)::int AS total_input,
+              SUM(e.output_tokens)::int AS total_output,
+              SUM(e.cost)::float AS total_cost,
+              AVG(e.duration_ms)::int AS avg_duration_ms
+       FROM rlmx_events e
+       WHERE e.created_at >= $1 AND e.kind = 'llm_call'
+       GROUP BY e.session_id, e.model
+       ORDER BY total_cost DESC`,
+      [cutoff.toISOString()]
+    );
+    return rows as CostRow[];
+  }
+  const rows = await storage.query(
+    `SELECT e.session_id, e.model,
+            COUNT(*)::int AS calls,
+            SUM(e.input_tokens)::int AS total_input,
+            SUM(e.output_tokens)::int AS total_output,
+            SUM(e.cost)::float AS total_cost,
+            AVG(e.duration_ms)::int AS avg_duration_ms
+     FROM rlmx_events e
+     WHERE e.kind = 'llm_call'
+     GROUP BY e.session_id, e.model
+     ORDER BY total_cost DESC`
+  );
+  return rows as CostRow[];
+}
+
+/**
+ * Get tool/sub-call usage.
+ */
+export async function toolUsage(storage: PgStorage, since?: string): Promise<ToolRow[]> {
+  if (since) {
+    const cutoff = parseSinceCutoff(since);
+    const rows = await storage.query(
+      `SELECT e.session_id, e.request_type,
+              COUNT(*)::int AS calls,
+              SUM(CASE WHEN e.is_error THEN 1 ELSE 0 END)::int AS errors,
+              AVG(e.duration_ms)::int AS avg_duration_ms
+       FROM rlmx_events e
+       WHERE e.created_at >= $1 AND e.kind IN ('sub_call', 'pg_query', 'repl_exec')
+       GROUP BY e.session_id, e.request_type
+       ORDER BY calls DESC`,
+      [cutoff.toISOString()]
+    );
+    return rows as ToolRow[];
+  }
+  const rows = await storage.query(
+    `SELECT e.session_id, e.request_type,
+            COUNT(*)::int AS calls,
+            SUM(CASE WHEN e.is_error THEN 1 ELSE 0 END)::int AS errors,
+            AVG(e.duration_ms)::int AS avg_duration_ms
+     FROM rlmx_events e
+     WHERE e.kind IN ('sub_call', 'pg_query', 'repl_exec')
+     GROUP BY e.session_id, e.request_type
+     ORDER BY calls DESC`
+  );
+  return rows as ToolRow[];
+}
+
+// ─── Formatting ─────────────────────────────────────────
+
+function pad(s: string, len: number, right = false): string {
+  return right ? s.padStart(len) : s.padEnd(len);
+}
+
+function truncate(s: string, max: number): string {
+  return s.length > max ? s.slice(0, max - 1) + "…" : s;
+}
+
+function formatCost(n: number): string {
+  return n < 0.01 ? `$${n.toFixed(6)}` : `$${n.toFixed(4)}`;
+}
+
+function formatDuration(ms: number | null): string {
+  if (ms === null) return "-";
+  if (ms < 1000) return `${ms}ms`;
+  return `${(ms / 1000).toFixed(1)}s`;
+}
+
+/**
+ * Format session rows as a terminal table.
+ */
+export function formatRunsTable(rows: SessionRow[]): string {
+  if (rows.length === 0) return "No runs found.";
+
+  const header = [
+    pad("ID", 10),
+    pad("Query", 30),
+    pad("Model", 25),
+    pad("Iter", 5, true),
+    pad("Cost", 10, true),
+    pad("Status", 10),
+    pad("Duration", 10, true),
+  ].join("  ");
+
+  const sep = "-".repeat(header.length);
+
+  const lines = rows.map((r) => [
+    pad(r.id.slice(0, 8) + "..", 10),
+    pad(truncate(r.query, 30), 30),
+    pad(truncate(r.model, 25), 25),
+    pad(r.iterations !== null ? String(r.iterations) : "-", 5, true),
+    pad(formatCost(r.total_cost), 10, true),
+    pad(r.status, 10),
+    pad(formatDuration(r.duration_ms), 10, true),
+  ].join("  "));
+
+  return [header, sep, ...lines].join("\n");
+}
+
+/**
+ * Format event rows as a terminal table.
+ */
+export function formatEventsTable(rows: EventRow[]): string {
+  if (rows.length === 0) return "No events found for this run.";
+
+  const header = [
+    pad("#", 4, true),
+    pad("Iter", 5, true),
+    pad("Kind", 12),
+    pad("In", 8, true),
+    pad("Out", 8, true),
+    pad("Cost", 10, true),
+    pad("Time", 8, true),
+    pad("Detail", 40),
+  ].join("  ");
+
+  const sep = "-".repeat(header.length);
+
+  const lines = rows.map((r) => {
+    let detail = "";
+    if (r.kind === "llm_call") detail = r.model ?? "";
+    else if (r.kind === "repl_exec") detail = truncate(r.code ?? "", 40);
+    else if (r.kind === "sub_call") detail = r.request_type ?? "";
+
+    return [
+      pad(String(r.id), 4, true),
+      pad(r.iteration !== null ? String(r.iteration) : "-", 5, true),
+      pad(r.kind, 12),
+      pad(r.input_tokens !== null ? String(r.input_tokens) : "-", 8, true),
+      pad(r.output_tokens !== null ? String(r.output_tokens) : "-", 8, true),
+      pad(r.cost !== null ? formatCost(r.cost) : "-", 10, true),
+      pad(formatDuration(r.duration_ms), 8, true),
+      pad(truncate(detail, 40), 40),
+    ].join("  ");
+  });
+
+  return [header, sep, ...lines].join("\n");
+}
+
+/**
+ * Format cost breakdown as a terminal table.
+ */
+export function formatCostTable(rows: CostRow[]): string {
+  if (rows.length === 0) return "No cost data found.";
+
+  const header = [
+    pad("Model", 30),
+    pad("Calls", 6, true),
+    pad("Input", 10, true),
+    pad("Output", 10, true),
+    pad("Cost", 12, true),
+    pad("Avg Time", 10, true),
+  ].join("  ");
+
+  const sep = "-".repeat(header.length);
+
+  const lines = rows.map((r) => [
+    pad(truncate(r.model, 30), 30),
+    pad(String(r.calls), 6, true),
+    pad(r.total_input.toLocaleString(), 10, true),
+    pad(r.total_output.toLocaleString(), 10, true),
+    pad(formatCost(r.total_cost), 12, true),
+    pad(formatDuration(r.avg_duration_ms), 10, true),
+  ].join("  "));
+
+  return [header, sep, ...lines].join("\n");
+}
+
+/**
+ * Format tool usage as a terminal table.
+ */
+export function formatToolTable(rows: ToolRow[]): string {
+  if (rows.length === 0) return "No tool usage data found.";
+
+  const header = [
+    pad("Type", 20),
+    pad("Calls", 6, true),
+    pad("Errors", 7, true),
+    pad("Avg Time", 10, true),
+  ].join("  ");
+
+  const sep = "-".repeat(header.length);
+
+  const lines = rows.map((r) => [
+    pad(r.request_type ?? "-", 20),
+    pad(String(r.calls), 6, true),
+    pad(String(r.errors), 7, true),
+    pad(formatDuration(r.avg_duration_ms), 10, true),
+  ].join("  "));
+
+  return [header, sep, ...lines].join("\n");
+}
+
+/**
+ * Run the stats command with parsed args.
+ */
+export async function runStatsCommand(args: string[]): Promise<void> {
+  // Check for data
+  if (!(await hasStatsData())) {
+    console.log("No stats yet. Run a query first.");
+    return;
+  }
+
+  // Parse args
+  const runIdx = args.indexOf("--run");
+  const runId = runIdx >= 0 ? args[runIdx + 1] : undefined;
+  const costsFlag = args.includes("--costs");
+  const toolsFlag = args.includes("--tools");
+  const sinceIdx = args.indexOf("--since");
+  const since = sinceIdx >= 0 ? args[sinceIdx + 1] : undefined;
+  const outputIdx = args.indexOf("--output");
+  const jsonOutput = outputIdx >= 0 && args[outputIdx + 1] === "json";
+
+  // Start temporary pgserve for querying
+  let storage: PgStorage | undefined;
+  try {
+    storage = await createStatsStorage();
+
+    if (runId) {
+      const events = await getRun(storage, runId);
+      if (jsonOutput) {
+        console.log(JSON.stringify(events, null, 2));
+      } else {
+        console.log(formatEventsTable(events));
+      }
+    } else if (costsFlag) {
+      const costs = await costBreakdown(storage, since);
+      if (jsonOutput) {
+        console.log(JSON.stringify(costs, null, 2));
+      } else {
+        console.log(formatCostTable(costs));
+      }
+    } else if (toolsFlag) {
+      const tools = await toolUsage(storage, since);
+      if (jsonOutput) {
+        console.log(JSON.stringify(tools, null, 2));
+      } else {
+        console.log(formatToolTable(tools));
+      }
+    } else {
+      const runs = await listRuns(storage);
+      if (jsonOutput) {
+        console.log(JSON.stringify(runs, null, 2));
+      } else {
+        console.log(formatRunsTable(runs));
+      }
+    }
+  } finally {
+    if (storage) await storage.stop();
+  }
+}
diff --git a/src/storage.ts b/src/storage.ts
new file mode 100644
index 0000000..bf07086
--- /dev/null
+++ b/src/storage.ts
@@ -0,0 +1,573 @@
+/**
+ * PgStorage — embedded pgserve lifecycle, context ingestion, and query interface.
+ *
+ * Spawns pgserve as a child process (Bun-based TCP proxy over embedded PostgreSQL),
+ * connects via node-postgres, and provides methods for context storage and retrieval.
+ */
+
+import { spawn, type ChildProcess } from "node:child_process";
+import { createServer } from "node:net";
+import { resolve, join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import { homedir } from "node:os";
+import { mkdirSync, existsSync } from "node:fs";
+import pg from "pg";
+import type { StorageConfig } from "./config.js";
+import type { LoadedContext, ContextItem } from "./context.js";
+import { PROVIDER_LIMITS } from "./cache.js";
+
+const { Client } = pg;
+
+/** Database name used for rlmx context storage */
+const RLMX_DB = "rlmx";
+
+/** Schema DDL for the records table */
+const SCHEMA_DDL = `
+CREATE TABLE IF NOT EXISTS records (
+  line_num       INT PRIMARY KEY,
+  timestamp      TIMESTAMPTZ,
+  type           TEXT,
+  source         TEXT,
+  session_id     TEXT,
+  content        TEXT NOT NULL,
+  content_tsvector TSVECTOR GENERATED ALWAYS AS (to_tsvector('english', content)) STORED
+);
+
+CREATE INDEX IF NOT EXISTS idx_records_tsvector ON records USING GIN (content_tsvector);
+CREATE INDEX IF NOT EXISTS idx_records_timestamp ON records (timestamp) WHERE timestamp IS NOT NULL;
+`;
+
+/** Schema DDL for observability tables */
+const OBSERVABILITY_DDL = `
+CREATE TABLE IF NOT EXISTS rlmx_sessions (
+  id             TEXT PRIMARY KEY,
+  query          TEXT NOT NULL,
+  context_path   TEXT,
+  model          TEXT NOT NULL,
+  provider       TEXT NOT NULL,
+  status         TEXT DEFAULT 'running',
+  config         JSONB,
+  started_at     TIMESTAMPTZ DEFAULT now(),
+  ended_at       TIMESTAMPTZ,
+  iterations     INT,
+  input_tokens   BIGINT DEFAULT 0,
+  output_tokens  BIGINT DEFAULT 0,
+  cached_tokens  BIGINT DEFAULT 0,
+  total_cost     NUMERIC(10,6) DEFAULT 0,
+  answer_length  INT,
+  budget_hit     TEXT
+);
+
+CREATE TABLE IF NOT EXISTS rlmx_events (
+  id             BIGSERIAL PRIMARY KEY,
+  session_id     TEXT REFERENCES rlmx_sessions(id),
+  iteration      INT,
+  kind           TEXT NOT NULL,
+  input_tokens   INT,
+  output_tokens  INT,
+  cost           NUMERIC(10,6),
+  model          TEXT,
+  code           TEXT,
+  stdout         TEXT,
+  stderr         TEXT,
+  request_type   TEXT,
+  prompt_preview TEXT,
+  duration_ms    INT,
+  is_error       BOOLEAN DEFAULT false,
+  error_message  TEXT,
+  data           JSONB,
+  created_at     TIMESTAMPTZ DEFAULT now()
+);
+
+CREATE OR REPLACE VIEW v_cost_breakdown AS
+SELECT session_id, model,
+  COUNT(*) AS calls,
+  SUM(input_tokens) AS total_input,
+  SUM(output_tokens) AS total_output,
+  SUM(cost) AS total_cost,
+  AVG(duration_ms)::INT AS avg_duration_ms
+FROM rlmx_events WHERE kind = 'llm_call'
+GROUP BY session_id, model;
+
+CREATE OR REPLACE VIEW v_repl_usage AS
+SELECT session_id, request_type,
+  COUNT(*) AS calls,
+  SUM(CASE WHEN is_error THEN 1 ELSE 0 END) AS errors,
+  AVG(duration_ms)::INT AS avg_duration_ms
+FROM rlmx_events WHERE kind IN ('sub_call', 'pg_query', 'repl_exec')
+GROUP BY session_id, request_type;
+`;
+
+/** Resolve ~ in paths to the user's home directory */
+function expandHome(p: string): string {
+  if (p.startsWith("~/") || p === "~") {
+    return join(homedir(), p.slice(1));
+  }
+  return p;
+}
+
+/**
+ * Find a free TCP port by binding to port 0 and reading the assigned port.
+ */
+async function findFreePort(): Promise<number> {
+  return new Promise((resolve, reject) => {
+    const srv = createServer();
+    srv.listen(0, "127.0.0.1", () => {
+      const addr = srv.address();
+      if (addr && typeof addr === "object") {
+        const port = addr.port;
+        srv.close(() => resolve(port));
+      } else {
+        srv.close(() => reject(new Error("Failed to get free port")));
+      }
+    });
+    srv.on("error", reject);
+  });
+}
+
+/**
+ * Compute adaptive chunk size based on provider limits and storage config.
+ */
+export function getChunkSize(provider: string, config: StorageConfig): number {
+  if (config.chunkSize) return config.chunkSize;
+  const limit = PROVIDER_LIMITS[provider] ?? 128000;
+  return Math.floor(limit * config.chunkUtilization * config.charsPerToken);
+}
+
+/**
+ * PgStorage manages an embedded pgserve instance for large context handling.
+ */
+export class PgStorage {
+  private process: ChildProcess | null = null;
+  private client: InstanceType<typeof Client> | null = null;
+  private port = 0;
+  private stopping = false;
+  private cleanupRegistered = false;
+
+  /** Connection string for the running pgserve instance */
+  get connectionString(): string {
+    return `postgresql://localhost:${this.port}/${RLMX_DB}`;
+  }
+
+  /** Get the underlying pg Client (for observability recorder). */
+  getClient(): InstanceType<typeof Client> | null {
+    return this.client;
+  }
+
+  /**
+   * Start pgserve and connect to it.
+   * Returns the connection string once ready.
+   */
+  async start(config: StorageConfig): Promise<string> {
+    // Resolve pgserve binary path
+    const pgserveBin = this.findPgserveBin();
+
+    // Build CLI args
+    const args: string[] = [];
+
+    // Port: 0 means auto-assign a free port; otherwise use the specified port
+    const requestedPort = config.port === 0 ? await findFreePort() : config.port;
+    args.push("--port", String(requestedPort));
+
+    // Mode: persistent uses dataDir, memory uses temp
+    if (config.mode === "persistent") {
+      const dataDir = expandHome(config.dataDir);
+      mkdirSync(dataDir, { recursive: true });
+      args.push("--data", dataDir);
+    }
+    // memory mode: no --data flag = in-memory (pgserve default)
+
+    // Quiet output for embedded use
+    args.push("--log", "error");
+    args.push("--no-cluster");
+    args.push("--no-stats");
+
+    // Spawn pgserve
+    this.process = spawn(pgserveBin, args, {
+      stdio: ["ignore", "pipe", "pipe"],
+      detached: false,
+    });
+
+    // Register cleanup handlers (only once per process)
+    if (!this.cleanupRegistered) {
+      this.registerCleanup();
+      this.cleanupRegistered = true;
+    }
+
+    // Wait for pgserve to be ready by polling for connection
+    this.port = requestedPort;
+    await this.waitForReady();
+
+    // Connect pg client and create schema
+    this.client = new Client({ connectionString: this.connectionString });
+    await this.client.connect();
+    await this.client.query(SCHEMA_DDL);
+    await this.client.query(OBSERVABILITY_DDL);
+
+    return this.connectionString;
+  }
+
+  /**
+   * Ingest a loaded context into the records table.
+   * For JSONL: parses each line as JSON, extracts timestamp/type fields.
+   * For other text: one record per line.
+   */
+  async ingest(context: LoadedContext, sessionId?: string): Promise<number> {
+    if (!this.client) throw new Error("PgStorage not started");
+
+    // Clear stale records so re-runs with different context don't keep old data
+    await this.client.query("TRUNCATE records");
+
+    // Collect all lines from context, tracking source file when available
+    let lines: Array<{ text: string; source: string | null }>;
+    if (context.type === "list") {
+      const items = context.content as ContextItem[];
+      lines = items.flatMap((item) =>
+        item.content.split("\n").map((text) => ({ text, source: item.path }))
+      );
+    } else {
+      lines = (context.content as string)
+        .split("\n")
+        .map((text) => ({ text, source: null }));
+    }
+
+    // Batch insert using multi-row VALUES
+    const BATCH_SIZE = 500;
+    let ingested = 0;
+
+    for (let i = 0; i < lines.length; i += BATCH_SIZE) {
+      const batch = lines.slice(i, i + BATCH_SIZE);
+      const values: unknown[] = [];
+      const placeholders: string[] = [];
+
+      for (let j = 0; j < batch.length; j++) {
+        const lineNum = i + j;
+        const { text, source } = batch[j];
+        if (text.trim() === "") continue;
+
+        const { timestamp, type, content } = parseLine(text);
+        const idx = values.length;
+        placeholders.push(
+          `($${idx + 1}, $${idx + 2}, $${idx + 3}, $${idx + 4}, $${idx + 5}, $${idx + 6})`
+        );
+        values.push(lineNum, timestamp, type, source, sessionId ?? null, content);
+      }
+
+      if (placeholders.length > 0) {
+        await this.client.query(
+          `INSERT INTO records (line_num, timestamp, type, source, session_id, content)
+           VALUES ${placeholders.join(", ")}`,
+          values
+        );
+        ingested += placeholders.length;
+      }
+    }
+
+    return ingested;
+  }
+
+  /**
+   * Full-text search via tsvector.
+   */
+  async search(
+    pattern: string,
+    limit = 20
+  ): Promise<Array<{ line_num: number; content: string; rank: number }>> {
+    if (!this.client) throw new Error("PgStorage not started");
+
+    // Convert pattern to tsquery: split words and join with &
+    const tsquery = pattern
+      .trim()
+      .split(/\s+/)
+      .map((w) => w.replace(/[^a-zA-Z0-9]/g, ""))
+      .filter(Boolean)
+      .join(" & ");
+
+    if (!tsquery) return [];
+
+    const result = await this.client.query(
+      `SELECT line_num, source, content, ts_rank(content_tsvector, to_tsquery('english', $1)) AS rank
+       FROM records
+       WHERE content_tsvector @@ to_tsquery('english', $1)
+       ORDER BY rank DESC
+       LIMIT $2`,
+      [tsquery, limit]
+    );
+
+    return result.rows.map((r) => ({
+      line_num: r.line_num,
+      source: r.source,
+      content: r.content,
+      rank: parseFloat(r.rank),
+    }));
+  }
+
+  /**
+   * Get records by line number range (inclusive).
+   */
+  async slice(
+    start: number,
+    end: number
+  ): Promise<Array<{ line_num: number; content: string }>> {
+    if (!this.client) throw new Error("PgStorage not started");
+
+    const result = await this.client.query(
+      `SELECT line_num, source, content FROM records
+       WHERE line_num >= $1 AND line_num < $2
+       ORDER BY line_num`,
+      [start, end]
+    );
+
+    return result.rows;
+  }
+
+  /**
+   * Filter records by timestamp range.
+   */
+  async timeRange(
+    from: string,
+    to: string
+  ): Promise<
+    Array<{ line_num: number; timestamp: string; content: string }>
+  > {
+    if (!this.client) throw new Error("PgStorage not started");
+
+    const result = await this.client.query(
+      `SELECT line_num, timestamp, content FROM records
+       WHERE timestamp >= $1::timestamptz AND timestamp <= $2::timestamptz
+       ORDER BY timestamp`,
+      [from, to]
+    );
+
+    return result.rows;
+  }
+
+  /**
+   * Count total records.
+   */
+  async count(): Promise<number> {
+    if (!this.client) throw new Error("PgStorage not started");
+    const result = await this.client.query("SELECT COUNT(*)::int AS cnt FROM records");
+    return result.rows[0].cnt;
+  }
+
+  /**
+   * Execute raw SQL (read-only). Supports parameterized queries.
+   */
+  async query(sql: string, params?: unknown[]): Promise<unknown[]> {
+    if (!this.client) throw new Error("PgStorage not started");
+
+    // Wrap in read-only transaction for safety
+    await this.client.query("BEGIN TRANSACTION READ ONLY");
+    try {
+      const result = params
+        ? await this.client.query(sql, params)
+        : await this.client.query(sql);
+      await this.client.query("COMMIT");
+      return result.rows;
+    } catch (err) {
+      await this.client.query("ROLLBACK").catch(() => {});
+      throw err;
+    }
+  }
+
+  /**
+   * Stop pgserve: graceful 3s timeout, then SIGKILL.
+   */
+  async stop(): Promise<void> {
+    if (this.stopping) return;
+    this.stopping = true;
+
+    // Close pg client
+    if (this.client) {
+      try {
+        await this.client.end();
+      } catch {
+        // Ignore client close errors
+      }
+      this.client = null;
+    }
+
+    // Kill pgserve process
+    const proc = this.process;
+    if (proc && proc.pid && !proc.killed) {
+      await new Promise<void>((resolve) => {
+        const forceKillTimer = setTimeout(() => {
+          try {
+            proc.kill("SIGKILL");
+          } catch {
+            // Already dead
+          }
+          resolve();
+        }, 3000);
+
+        proc.once("exit", () => {
+          clearTimeout(forceKillTimer);
+          resolve();
+        });
+
+        try {
+          proc.kill("SIGTERM");
+        } catch {
+          clearTimeout(forceKillTimer);
+          resolve();
+        }
+      });
+    }
+
+    this.process = null;
+    this.stopping = false;
+  }
+
+  // ─── Private helpers ──────────────────────────────────────
+
+  /** Find the pgserve CLI binary in node_modules */
+  private findPgserveBin(): string {
+    // Try package-relative first (works for global installs), then cwd fallback
+    const __dirname = dirname(fileURLToPath(import.meta.url));
+    const pkgBin = join(__dirname, "..", "..", "node_modules", ".bin", "pgserve");
+    if (existsSync(pkgBin)) return pkgBin;
+
+    // Fallback: resolve from cwd (works for local dev)
+    return resolve("node_modules", ".bin", "pgserve");
+  }
+
+  /**
+   * Wait for pgserve to accept connections (poll with backoff).
+   * Throws if not ready within 10 seconds.
+   */
+  private async waitForReady(): Promise<void> {
+    const deadline = Date.now() + 10_000;
+    let lastError: Error | null = null;
+
+    // Also capture process errors
+    const procError = new Promise<never>((_, reject) => {
+      if (!this.process) return;
+      this.process.once("exit", (code) => {
+        if (code !== null && code !== 0) {
+          reject(new Error(`pgserve exited with code ${code}`));
+        }
+      });
+      this.process.once("error", (err) => {
+        reject(new Error(`pgserve spawn error: ${err.message}`));
+      });
+    });
+
+    while (Date.now() < deadline) {
+      // Fail fast if the process already exited
+      if (this.process && typeof this.process.exitCode === 'number') {
+        throw new Error(`pgserve exited with code ${this.process.exitCode} before becoming ready`);
+      }
+
+      try {
+        const testClient = new Client({
+          connectionString: this.connectionString,
+          connectionTimeoutMillis: 1000,
+        });
+        await Promise.race([testClient.connect(), procError]);
+        await testClient.end();
+        return;
+      } catch (err) {
+        lastError = err instanceof Error ? err : new Error(String(err));
+        await new Promise((r) => setTimeout(r, 200));
+      }
+    }
+
+    throw new Error(
+      `pgserve did not become ready within 10s: ${lastError?.message ?? "unknown error"}`
+    );
+  }
+
+  /** Register process exit handlers for cleanup */
+  private registerCleanup(): void {
+    const cleanup = () => {
+      if (this.process && this.process.pid && !this.process.killed) {
+        try {
+          this.process.kill("SIGKILL");
+        } catch {
+          // Best effort
+        }
+      }
+    };
+
+    process.once("exit", cleanup);
+    process.once("SIGTERM", () => {
+      cleanup();
+      process.exit(128 + 15);
+    });
+    process.once("SIGINT", () => {
+      cleanup();
+      process.exit(128 + 2);
+    });
+    process.once("uncaughtException", (err) => {
+      console.error("rlmx: uncaught exception, cleaning up pgserve:", err.message);
+      cleanup();
+      process.exit(1);
+    });
+  }
+}
+
+// ─── Line parsing ───────────────────────────────────────
+
+/** Common timestamp field names in JSONL data */
+const TIMESTAMP_FIELDS = [
+  "timestamp",
+  "ts",
+  "time",
+  "datetime",
+  "date",
+  "created_at",
+  "createdAt",
+  "@timestamp",
+];
+
+/** Common type/category field names in JSONL data */
+const TYPE_FIELDS = ["type", "kind", "level", "severity", "category", "event"];
+
+/**
+ * Parse a single line of context data.
+ * Tries JSON first (JSONL), falls back to plain text.
+ */
+function parseLine(line: string): {
+  timestamp: string | null;
+  type: string | null;
+  content: string;
+} {
+  const trimmed = line.trim();
+  if (!trimmed.startsWith("{")) {
+    return { timestamp: null, type: null, content: trimmed };
+  }
+
+  try {
+    const obj = JSON.parse(trimmed);
+    if (typeof obj !== "object" || obj === null) {
+      return { timestamp: null, type: null, content: trimmed };
+    }
+
+    // Extract timestamp
+    let timestamp: string | null = null;
+    for (const field of TIMESTAMP_FIELDS) {
+      if (obj[field] !== undefined && obj[field] !== null) {
+        timestamp = String(obj[field]);
+        break;
+      }
+    }
+
+    // Extract type
+    let type: string | null = null;
+    for (const field of TYPE_FIELDS) {
+      if (obj[field] !== undefined && obj[field] !== null) {
+        type = String(obj[field]);
+        break;
+      }
+    }
+
+    return { timestamp, type, content: trimmed };
+  } catch {
+    // Malformed JSON — log warning and treat as plain text
+    process.stderr.write(
+      `rlmx: warning: skipping malformed JSONL line: ${trimmed.slice(0, 80)}...\n`
+    );
+    return { timestamp: null, type: null, content: trimmed };
+  }
+}
diff --git a/src/version.ts b/src/version.ts
index aa3187d..db853f1 100644
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const VERSION = '0.260330.13';
+export const VERSION = '0.260331.1';
diff --git a/tests/benchmark.test.ts b/tests/benchmark.test.ts
new file mode 100644
index 0000000..b3f604b
--- /dev/null
+++ b/tests/benchmark.test.ts
@@ -0,0 +1,302 @@
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import { mkdtemp, readFile, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+
+import {
+  formatBenchmarkTable,
+  saveBenchmarkResults,
+  aggregateTotals,
+  calculateSavings,
+  calculateCostSavings,
+  type BenchmarkQuestion,
+  type BenchmarkRunResult,
+  type BenchmarkResults,
+} from "../src/benchmark.js";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+async function loadDataset(): Promise<BenchmarkQuestion[]> {
+  // From dist/tests/ -> ../../src/benchmark-data.json
+  const jsonPath = join(__dirname, "..", "..", "src", "benchmark-data.json");
+  const raw = await readFile(jsonPath, "utf-8");
+  return JSON.parse(raw) as BenchmarkQuestion[];
+}
+
+// ─── Dataset Validation ──────────────────────────────────
+
+describe("benchmark-data.json", () => {
+  it("has valid structure for all entries", async () => {
+    const dataset = await loadDataset();
+
+    assert.ok(Array.isArray(dataset), "dataset should be an array");
+    assert.ok(dataset.length >= 5, `dataset should have at least 5 entries, got ${dataset.length}`);
+
+    for (const entry of dataset) {
+      assert.ok(typeof entry.id === "string" && entry.id.length > 0, `entry must have non-empty id`);
+      assert.ok(typeof entry.name === "string" && entry.name.length > 0, `entry ${entry.id} must have non-empty name`);
+      assert.ok(typeof entry.question === "string" && entry.question.length > 0, `entry ${entry.id} must have non-empty question`);
+      assert.ok(typeof entry.context === "string" && entry.context.length > 0, `entry ${entry.id} must have non-empty context`);
+      assert.ok(typeof entry.category === "string" && entry.category.length > 0, `entry ${entry.id} must have non-empty category`);
+    }
+  });
+
+  it("covers required categories", async () => {
+    const dataset = await loadDataset();
+
+    const categories = new Set(dataset.map((d) => d.category));
+    assert.ok(categories.has("extraction"), "should have extraction category");
+    assert.ok(categories.has("summarization"), "should have summarization category");
+    assert.ok(categories.has("reasoning"), "should have reasoning category");
+    assert.ok(categories.has("comparison"), "should have comparison category");
+    assert.ok(categories.has("synthesis"), "should have synthesis category");
+  });
+
+  it("has unique ids", async () => {
+    const dataset = await loadDataset();
+
+    const ids = dataset.map((d) => d.id);
+    const uniqueIds = new Set(ids);
+    assert.equal(ids.length, uniqueIds.size, "all ids must be unique");
+  });
+});
+
+// ─── Savings Calculation ─────────────────────────────────
+
+describe("calculateSavings", () => {
+  it("computes correct percentage", () => {
+    assert.equal(calculateSavings(100, 70), 30);
+    assert.equal(calculateSavings(200, 100), 50);
+  });
+
+  it("returns 0 for zero direct tokens", () => {
+    assert.equal(calculateSavings(0, 100), 0);
+  });
+
+  it("handles equal tokens", () => {
+    assert.equal(calculateSavings(100, 100), 0);
+  });
+
+  it("handles RLM using more tokens (negative savings)", () => {
+    const result = calculateSavings(100, 150);
+    assert.ok(result < 0, "savings should be negative when RLM uses more");
+    assert.equal(result, -50);
+  });
+});
+
+describe("calculateCostSavings", () => {
+  it("computes correct percentage", () => {
+    const result = calculateCostSavings(1.0, 0.7);
+    assert.ok(Math.abs(result - 30) < 0.001, `expected ~30, got ${result}`);
+  });
+
+  it("returns 0 for zero direct cost", () => {
+    assert.equal(calculateCostSavings(0, 0.5), 0);
+  });
+
+  it("handles equal cost", () => {
+    assert.equal(calculateCostSavings(0.5, 0.5), 0);
+  });
+});
+
+// ─── Table Formatting ────────────────────────────────────
+
+function makeMockResults(): BenchmarkResults {
+  return {
+    timestamp: "2025-01-15T10:30:00.000Z",
+    mode: "cost",
+    model: "google/gemini-2.0-flash",
+    runs: [
+      {
+        questionId: "cost-001",
+        questionName: "API extraction",
+        direct: {
+          tokens_input: 10000,
+          tokens_output: 2450,
+          cost: 0.0012,
+          latency_ms: 2300,
+          answer: "Direct answer",
+        },
+        rlm: {
+          tokens_input: 6000,
+          tokens_output: 2200,
+          cost: 0.0008,
+          latency_ms: 4100,
+          iterations: 3,
+          answer: "RLM answer",
+        },
+        savings: {
+          tokens_pct: 34.1,
+          cost_pct: 33.3,
+        },
+      },
+      {
+        questionId: "cost-002",
+        questionName: "Summary test",
+        direct: {
+          tokens_input: 20000,
+          tokens_output: 5000,
+          cost: 0.0025,
+          latency_ms: 3500,
+          answer: "Direct summary",
+        },
+        rlm: {
+          tokens_input: 12000,
+          tokens_output: 4000,
+          cost: 0.0016,
+          latency_ms: 6200,
+          iterations: 5,
+          answer: "RLM summary",
+        },
+        savings: {
+          tokens_pct: 36.0,
+          cost_pct: 36.0,
+        },
+      },
+    ],
+    totals: {
+      direct: { tokens: 37450, cost: 0.0037, latency_ms: 5800 },
+      rlm: { tokens: 24200, cost: 0.0024, latency_ms: 10300, avg_iterations: 4.0 },
+      savings: { tokens_pct: 35.4, cost_pct: 35.1 },
+    },
+  };
+}
+
+describe("formatBenchmarkTable", () => {
+  it("contains expected header", () => {
+    const table = formatBenchmarkTable(makeMockResults());
+    assert.ok(table.includes("rlmx benchmark"), "should contain header");
+    assert.ok(table.includes("cost comparison"), "should contain mode label");
+    assert.ok(table.includes("Question"), "should contain Question column");
+    assert.ok(table.includes("Mode"), "should contain Mode column");
+    assert.ok(table.includes("Tokens"), "should contain Tokens column");
+    assert.ok(table.includes("Cost"), "should contain Cost column");
+    assert.ok(table.includes("Latency"), "should contain Latency column");
+    assert.ok(table.includes("Iters"), "should contain Iters column");
+  });
+
+  it("contains row data for each run", () => {
+    const table = formatBenchmarkTable(makeMockResults());
+    assert.ok(table.includes("API extraction"), "should contain first question name");
+    assert.ok(table.includes("Summary test"), "should contain second question name");
+    assert.ok(table.includes("Direct"), "should contain Direct mode");
+    assert.ok(table.includes("RLM"), "should contain RLM mode");
+    assert.ok(table.includes("Savings"), "should contain Savings mode");
+  });
+
+  it("contains box-drawing characters", () => {
+    const table = formatBenchmarkTable(makeMockResults());
+    assert.ok(table.includes("┌"), "should contain top-left corner");
+    assert.ok(table.includes("┐"), "should contain top-right corner");
+    assert.ok(table.includes("└"), "should contain bottom-left corner");
+    assert.ok(table.includes("┘"), "should contain bottom-right corner");
+    assert.ok(table.includes("│"), "should contain vertical bar");
+    assert.ok(table.includes("─"), "should contain horizontal bar");
+    assert.ok(table.includes("├"), "should contain left tee");
+    assert.ok(table.includes("┤"), "should contain right tee");
+  });
+
+  it("contains TOTALS row", () => {
+    const table = formatBenchmarkTable(makeMockResults());
+    assert.ok(table.includes("TOTALS"), "should contain TOTALS label");
+  });
+
+  it("uses oolong label for oolong mode", () => {
+    const results = makeMockResults();
+    results.mode = "oolong";
+    const table = formatBenchmarkTable(results);
+    assert.ok(table.includes("oolong accuracy"), "should contain oolong mode label");
+  });
+});
+
+// ─── Aggregation ─────────────────────────────────────────
+
+describe("aggregateTotals", () => {
+  it("sums tokens and costs correctly", () => {
+    const runs: BenchmarkRunResult[] = [
+      {
+        questionId: "q1",
+        questionName: "Q1",
+        direct: { tokens_input: 100, tokens_output: 50, cost: 0.01, latency_ms: 1000, answer: "" },
+        rlm: { tokens_input: 60, tokens_output: 30, cost: 0.006, latency_ms: 2000, iterations: 2, answer: "" },
+        savings: { tokens_pct: 40, cost_pct: 40 },
+      },
+      {
+        questionId: "q2",
+        questionName: "Q2",
+        direct: { tokens_input: 200, tokens_output: 100, cost: 0.02, latency_ms: 1500, answer: "" },
+        rlm: { tokens_input: 120, tokens_output: 80, cost: 0.012, latency_ms: 3000, iterations: 4, answer: "" },
+        savings: { tokens_pct: 33.3, cost_pct: 40 },
+      },
+    ];
+
+    const totals = aggregateTotals(runs);
+
+    assert.equal(totals.direct.tokens, 450, "direct tokens: 100+50+200+100");
+    assert.equal(totals.rlm.tokens, 290, "rlm tokens: 60+30+120+80");
+    assert.equal(totals.direct.latency_ms, 2500, "direct latency sum");
+    assert.equal(totals.rlm.latency_ms, 5000, "rlm latency sum");
+    assert.equal(totals.rlm.avg_iterations, 3, "avg iterations: (2+4)/2");
+
+    // Check cost sums (use approximate due to floating point)
+    assert.ok(Math.abs(totals.direct.cost - 0.03) < 0.0001, "direct cost should be ~0.03");
+    assert.ok(Math.abs(totals.rlm.cost - 0.018) < 0.0001, "rlm cost should be ~0.018");
+  });
+
+  it("handles empty runs", () => {
+    const totals = aggregateTotals([]);
+    assert.equal(totals.direct.tokens, 0);
+    assert.equal(totals.rlm.tokens, 0);
+    assert.equal(totals.direct.cost, 0);
+    assert.equal(totals.rlm.cost, 0);
+    assert.equal(totals.rlm.avg_iterations, 0);
+    assert.equal(totals.savings.tokens_pct, 0);
+    assert.equal(totals.savings.cost_pct, 0);
+  });
+
+  it("computes savings percentages from aggregated totals", () => {
+    const runs: BenchmarkRunResult[] = [
+      {
+        questionId: "q1",
+        questionName: "Q1",
+        direct: { tokens_input: 100, tokens_output: 0, cost: 0.10, latency_ms: 100, answer: "" },
+        rlm: { tokens_input: 50, tokens_output: 0, cost: 0.05, latency_ms: 200, iterations: 1, answer: "" },
+        savings: { tokens_pct: 50, cost_pct: 50 },
+      },
+    ];
+
+    const totals = aggregateTotals(runs);
+    assert.equal(totals.savings.tokens_pct, 50);
+    assert.equal(totals.savings.cost_pct, 50);
+  });
+});
+
+// ─── Results Saving ──────────────────────────────────────
+
+describe("saveBenchmarkResults", () => {
+  it("writes results as parseable JSON file", async () => {
+    const tempDir = await mkdtemp(join(tmpdir(), "rlmx-bench-test-"));
+    // Override homedir by setting env for the save call
+    const origHome = process.env.HOME;
+    process.env.HOME = tempDir;
+
+    try {
+      const results = makeMockResults();
+      const savedPath = await saveBenchmarkResults(results);
+
+      assert.ok(savedPath.endsWith(".json"), "saved file should be .json");
+      assert.ok(savedPath.includes("benchmark-cost"), "saved file should include mode");
+
+      const content = await readFile(savedPath, "utf-8");
+      const parsed = JSON.parse(content);
+      assert.equal(parsed.mode, "cost");
+      assert.equal(parsed.runs.length, 2);
+      assert.equal(parsed.model, "google/gemini-2.0-flash");
+    } finally {
+      process.env.HOME = origHome;
+      await rm(tempDir, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/tests/cache.test.ts b/tests/cache.test.ts
index b4848c9..8a845b0 100644
--- a/tests/cache.test.ts
+++ b/tests/cache.test.ts
@@ -42,6 +42,7 @@ function makeConfig(overrides: Partial<RlmxConfig> = {}): RlmxConfig {
     configSource: overrides.configSource ?? "yaml",
     gemini: overrides.gemini ?? { thinkingLevel: null, googleSearch: false, urlContext: false, codeExecution: false, computerUse: false, mapsGrounding: false, fileSearch: false, mediaResolution: null },
     output: overrides.output ?? { schema: null },
+    storage: overrides.storage ?? { enabled: "auto", mode: "persistent", dataDir: "~/.rlmx/data", port: 0, chunkSize: null, chunkUtilization: 0.6, charsPerToken: 4 },
   };
 }
 
diff --git a/tests/context.test.ts b/tests/context.test.ts
index 2cf0b48..bcd8d14 100644
--- a/tests/context.test.ts
+++ b/tests/context.test.ts
@@ -5,6 +5,7 @@ import { join } from "node:path";
 import { tmpdir } from "node:os";
 import { loadContext, loadContextFromDir, loadContextFromFile } from "../src/context.js";
 import type { ContextItem } from "../src/context.js";
+import { loadConfig } from "../src/config.js";
 
 describe("context loading", () => {
   let dir: string;
@@ -105,4 +106,66 @@ describe("context loading", () => {
     assert.equal(items.length, 0);
     await rm(dir, { recursive: true });
   });
+
+  it("extensions filter loads only matching types (regression #28)", async () => {
+    dir = await mkdtemp(join(tmpdir(), "rlmx-ctx-"));
+    await writeFile(join(dir, "readme.md"), "markdown");
+    await writeFile(join(dir, "app.ts"), "typescript");
+    await writeFile(join(dir, "doc.mdx"), "mdx content");
+    await writeFile(join(dir, "data.json"), '{"key": "value"}');
+    await writeFile(join(dir, "style.css"), "body {}");
+
+    const ctx = await loadContextFromDir(dir, { extensions: [".mdx", ".json"] });
+    const items = ctx.content as ContextItem[];
+    const paths = items.map((i) => i.path).sort();
+
+    assert.equal(items.length, 2, `Expected 2 files but got ${items.length}: ${paths.join(", ")}`);
+    assert.deepEqual(paths, ["data.json", "doc.mdx"]);
+    await rm(dir, { recursive: true });
+  });
+
+  it("extensions without leading dots are normalized (regression #28)", async () => {
+    dir = await mkdtemp(join(tmpdir(), "rlmx-ctx-"));
+    await writeFile(join(dir, "readme.md"), "markdown");
+    await writeFile(join(dir, "app.ts"), "typescript");
+    await writeFile(join(dir, "doc.mdx"), "mdx content");
+    await writeFile(join(dir, "data.json"), '{"key": "value"}');
+
+    // Pass extensions WITHOUT leading dots — should still work
+    const ctx = await loadContextFromDir(dir, { extensions: ["mdx", "json"] });
+    const items = ctx.content as ContextItem[];
+    const paths = items.map((i) => i.path).sort();
+
+    assert.equal(items.length, 2, `Expected 2 files but got ${items.length}: ${paths.join(", ")}`);
+    assert.deepEqual(paths, ["data.json", "doc.mdx"]);
+    await rm(dir, { recursive: true });
+  });
+
+  it("yaml context.extensions propagates to config correctly (regression #28)", async () => {
+    dir = await mkdtemp(join(tmpdir(), "rlmx-ctx-"));
+    // Create an rlmx.yaml with custom extensions (without dots, as users often write)
+    await writeFile(
+      join(dir, "rlmx.yaml"),
+      "context:\n  extensions: [mdx, json]\n"
+    );
+
+    const config = await loadConfig(dir);
+    // Extensions should be normalized to have leading dots
+    assert.deepEqual(config.contextConfig.extensions, [".mdx", ".json"]);
+    // Exclude should fall back to defaults
+    assert.ok(config.contextConfig.exclude.includes("node_modules"));
+    await rm(dir, { recursive: true });
+  });
+
+  it("yaml context.extensions with dots propagates correctly (regression #28)", async () => {
+    dir = await mkdtemp(join(tmpdir(), "rlmx-ctx-"));
+    await writeFile(
+      join(dir, "rlmx.yaml"),
+      "context:\n  extensions: [.mdx, .json]\n"
+    );
+
+    const config = await loadConfig(dir);
+    assert.deepEqual(config.contextConfig.extensions, [".mdx", ".json"]);
+    await rm(dir, { recursive: true });
+  });
 });
diff --git a/tests/session.test.ts b/tests/session.test.ts
new file mode 100644
index 0000000..a05d786
--- /dev/null
+++ b/tests/session.test.ts
@@ -0,0 +1,156 @@
+import { describe, it, before, after } from "node:test";
+import assert from "node:assert/strict";
+import { rm, mkdir, readFile, writeFile, stat } from "node:fs/promises";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import yaml from "js-yaml";
+import { saveSession, type SessionData } from "../src/session.js";
+
+function makeSessionData(overrides?: Partial<SessionData>): SessionData {
+  return {
+    runId: "test-run-id-1234",
+    query: "What is the meaning of life?",
+    contextPath: "/tmp/context",
+    model: "google/gemini-2.0-flash",
+    answer: "The answer is 42.",
+    usage: {
+      inputTokens: 1000,
+      outputTokens: 500,
+      cachedTokens: 200,
+      totalCost: 0.0015,
+      iterations: 3,
+      timeMs: 4500,
+      model: "google/gemini-2.0-flash",
+    },
+    config: {
+      model: { provider: "google", model: "gemini-2.0-flash" },
+      toolsLevel: "core",
+      budget: { maxCost: 1.0, maxTokens: 100000 },
+    },
+    logPath: null,
+    ...overrides,
+  };
+}
+
+describe("saveSession", () => {
+  const testDir = join(tmpdir(), `rlmx-session-test-${Date.now()}`);
+  const originalHome = process.env.HOME;
+
+  before(async () => {
+    await mkdir(testDir, { recursive: true });
+    process.env.HOME = testDir;
+  });
+
+  after(async () => {
+    process.env.HOME = originalHome;
+    await rm(testDir, { recursive: true, force: true });
+  });
+
+  it("creates session directory and returns its path", async () => {
+    const data = makeSessionData();
+    const sessionDir = await saveSession(data);
+    assert.ok(sessionDir.includes(data.runId));
+    const dirStat = await stat(sessionDir);
+    assert.ok(dirStat.isDirectory());
+  });
+
+  it("writes all 5 expected files", async () => {
+    const data = makeSessionData({ runId: "test-all-files" });
+    const sessionDir = await saveSession(data);
+
+    const files = ["meta.json", "usage.json", "answer.txt", "config.yaml", "trajectory.jsonl"];
+    for (const file of files) {
+      const fileStat = await stat(join(sessionDir, file));
+      assert.ok(fileStat.isFile(), `Expected ${file} to exist`);
+    }
+  });
+
+  it("meta.json has correct fields", async () => {
+    const data = makeSessionData({ runId: "test-meta-fields" });
+    const sessionDir = await saveSession(data);
+
+    const raw = await readFile(join(sessionDir, "meta.json"), "utf-8");
+    const meta = JSON.parse(raw);
+    assert.equal(meta.runId, "test-meta-fields");
+    assert.equal(meta.query, data.query);
+    assert.equal(meta.contextPath, data.contextPath);
+    assert.ok(typeof meta.timestamp === "string");
+    assert.ok(meta.timestamp.includes("T")); // ISO string
+    assert.ok(typeof meta.version === "string");
+  });
+
+  it("usage.json has correct token counts", async () => {
+    const data = makeSessionData({ runId: "test-usage-fields" });
+    const sessionDir = await saveSession(data);
+
+    const raw = await readFile(join(sessionDir, "usage.json"), "utf-8");
+    const usage = JSON.parse(raw);
+    assert.equal(usage.inputTokens, 1000);
+    assert.equal(usage.outputTokens, 500);
+    assert.equal(usage.cachedTokens, 200);
+    assert.equal(usage.totalCost, 0.0015);
+    assert.equal(usage.iterations, 3);
+    assert.equal(usage.timeMs, 4500);
+    assert.equal(usage.model, "google/gemini-2.0-flash");
+  });
+
+  it("answer.txt contains the answer text", async () => {
+    const data = makeSessionData({ runId: "test-answer-txt" });
+    const sessionDir = await saveSession(data);
+
+    const answer = await readFile(join(sessionDir, "answer.txt"), "utf-8");
+    assert.equal(answer, "The answer is 42.");
+  });
+
+  it("config.yaml is valid YAML matching the config snapshot", async () => {
+    const data = makeSessionData({ runId: "test-config-yaml" });
+    const sessionDir = await saveSession(data);
+
+    const raw = await readFile(join(sessionDir, "config.yaml"), "utf-8");
+    const parsed = yaml.load(raw) as Record<string, unknown>;
+    assert.ok(typeof parsed === "object");
+    assert.deepEqual(parsed, data.config);
+  });
+
+  it("copies trajectory when logPath is provided", async () => {
+    const logDir = join(testDir, "logs");
+    await mkdir(logDir, { recursive: true });
+    const logPath = join(logDir, "test-run.jsonl");
+    const logContent = '{"event":"run_start","run_id":"abc"}\n{"event":"run_end","run_id":"abc"}\n';
+    await writeFile(logPath, logContent);
+
+    const data = makeSessionData({ runId: "test-trajectory-copy", logPath });
+    const sessionDir = await saveSession(data);
+
+    const trajectory = await readFile(join(sessionDir, "trajectory.jsonl"), "utf-8");
+    assert.equal(trajectory, logContent);
+  });
+
+  it("writes empty trajectory when logPath is null", async () => {
+    const data = makeSessionData({ runId: "test-trajectory-null", logPath: null });
+    const sessionDir = await saveSession(data);
+
+    const trajectory = await readFile(join(sessionDir, "trajectory.jsonl"), "utf-8");
+    assert.equal(trajectory, "");
+  });
+
+  it("writes empty trajectory when logPath points to non-existent file", async () => {
+    const data = makeSessionData({
+      runId: "test-trajectory-missing",
+      logPath: "/tmp/does-not-exist-rlmx-test.jsonl",
+    });
+    const sessionDir = await saveSession(data);
+
+    const trajectory = await readFile(join(sessionDir, "trajectory.jsonl"), "utf-8");
+    assert.equal(trajectory, "");
+  });
+
+  it("handles null contextPath in meta.json", async () => {
+    const data = makeSessionData({ runId: "test-null-context", contextPath: null });
+    const sessionDir = await saveSession(data);
+
+    const raw = await readFile(join(sessionDir, "meta.json"), "utf-8");
+    const meta = JSON.parse(raw);
+    assert.equal(meta.contextPath, null);
+  });
+});