diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d585202 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,84 @@ +name: ci + +on: + push: + branches: [main] + pull_request: + +jobs: + rust: + name: rust crate + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: install rust toolchain + run: | + rustup update stable + rustup default stable + rustup target add wasm32-unknown-unknown + + - name: cargo test (native) + run: cargo test --manifest-path crates/dhamaka-runtime/Cargo.toml + + - name: build wasm + run: crates/dhamaka-runtime/build.sh + + - name: upload wasm artifact + uses: actions/upload-artifact@v4 + with: + name: dhamaka-runtime-wasm + path: packages/hub/public/runtime/dhamaka-runtime.wasm + if-no-files-found: error + + js: + name: js (node ${{ matrix.node }}) + runs-on: ubuntu-latest + needs: rust + strategy: + fail-fast: false + matrix: + node: ["20", "22"] + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node }} + + - name: download wasm artifact + uses: actions/download-artifact@v4 + with: + name: dhamaka-runtime-wasm + path: packages/hub/public/runtime + + - name: syntax check + run: | + find packages -name '*.js' -not -path '*/node_modules/*' \ + | xargs -n1 node --check + + - name: run tests + run: npm test + + - name: smoke test dev server + run: | + node packages/playground/server.js & + SERVER_PID=$! + sleep 2 + for url in \ + "http://localhost:5174/" \ + "http://localhost:5174/hub.js" \ + "http://localhost:5174/manifest.json" \ + "http://localhost:5174/runtime/dhamaka-runtime.wasm" \ + "http://localhost:5173/" \ + "http://localhost:5173/sdk/index.js" \ + "http://localhost:5173/runtime/index.js"; do + code=$(curl -s -o /dev/null -w "%{http_code}" "$url") + if [ "$code" != "200" ]; then + echo "FAIL: $url returned $code" + kill $SERVER_PID 2>/dev/null || true + exit 1 + fi + echo "OK: $url" + done + kill $SERVER_PID 2>/dev/null || true diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 0000000..ee2f3e8 --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,57 @@ +name: pages + +on: + push: + branches: [main] + paths: + - "packages/**" + - "crates/**" + - "docs/**" + - ".github/workflows/pages.yml" + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: true + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deploy.outputs.page_url }} + steps: + - uses: actions/checkout@v4 + + - name: configure pages + uses: actions/configure-pages@v5 + + - name: install rust toolchain + run: | + rustup update stable + rustup default stable + rustup target add wasm32-unknown-unknown + + - name: build wasm + run: crates/dhamaka-runtime/build.sh + + - uses: actions/setup-node@v4 + with: + node-version: "22" + + - name: assemble site + run: node packages/playground/build-site.mjs + + - name: upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: packages/playground/_site + + - name: deploy to github pages + id: deploy + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..15d6a20 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,109 @@ +name: release + +on: + push: + tags: + - "v*" + +permissions: + contents: write # needed to create the GitHub release + upload assets + +jobs: + release: + name: build, test, stage, and release ${{ github.ref_name }} + runs-on: ubuntu-latest + env: + # Hoisting NPM_TOKEN to job level so the conditional `if` checks in + # the publish steps below can actually read it. + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + steps: + - uses: actions/checkout@v4 + + # ─── Rust toolchain + wasm build ──────────────────────────────────── + - name: install rust toolchain + run: | + rustup update stable + rustup default stable + rustup target add wasm32-unknown-unknown + + - name: cargo test (native) + run: cargo test --manifest-path crates/dhamaka-runtime/Cargo.toml + + - name: build wasm + run: crates/dhamaka-runtime/build.sh + + # ─── Node toolchain + JS tests ────────────────────────────────────── + - uses: actions/setup-node@v4 + with: + node-version: "22" + registry-url: "https://registry.npmjs.org" + + - name: run js tests + run: npm test + + # ─── Stage the publishable package ────────────────────────────────── + - name: stage publish + run: node scripts/prepare-publish.mjs + + - name: inspect staged package + run: | + cd packages/sdk/_staging + npm pack --dry-run + npm pack + ls -lh *.tgz + + # ─── Verify the tag matches the package version ───────────────────── + - name: verify tag matches package version + run: | + TAG="${GITHUB_REF_NAME#v}" + PKG=$(node -p "require('./packages/sdk/_staging/package.json').version") + if [ "$TAG" != "$PKG" ]; then + echo "FAIL: tag $TAG does not match package version $PKG" + exit 1 + fi + echo "OK: tag $TAG matches package version $PKG" + + # ─── Publish to npm (only if NPM_TOKEN is set) ────────────────────── + - name: publish to npm + if: env.NPM_TOKEN != '' + env: + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + cd packages/sdk/_staging + npm publish --access public --provenance + + - name: skip npm publish (no NPM_TOKEN) + if: env.NPM_TOKEN == '' + env: + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + echo "NPM_TOKEN not set — skipping npm publish." + echo "To enable automated publishing: Settings → Secrets → Actions → new secret 'NPM_TOKEN'." + + # ─── Create the GitHub release with the wasm + tarball attached ───── + - name: extract release notes from changelog + id: notes + run: | + VERSION="${GITHUB_REF_NAME#v}" + # Everything between "## [VERSION]" and the next "## [" header. + awk -v ver="$VERSION" ' + $0 ~ "^## \\[" ver "\\]" { found = 1; next } + found && $0 ~ "^## \\[" { exit } + found { print } + ' CHANGELOG.md > release_notes.md + if [ ! -s release_notes.md ]; then + echo "no changelog entry for $VERSION, using tag message" > release_notes.md + fi + echo "notes_file=release_notes.md" >> $GITHUB_OUTPUT + + - name: create github release + uses: softprops/action-gh-release@v2 + with: + name: Dhamaka ${{ github.ref_name }} + body_path: ${{ steps.notes.outputs.notes_file }} + draft: false + prerelease: ${{ contains(github.ref_name, '-') }} + files: | + packages/sdk/_staging/dhamaka-*.tgz + packages/hub/public/runtime/dhamaka-runtime.wasm diff --git a/.gitignore b/.gitignore index 8738cb1..b091ec1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,8 +7,27 @@ build/ .env .env.local coverage/ +package-lock.json *.wasm.map models/*.bin models/*.onnx models/*.gguf !models/manifest.json + +# Rust build output. The compiled .wasm is staged into +# packages/hub/public/runtime/ by build.sh and *is* committed so users +# without a Rust toolchain can run the dev stack. The target/ dir is not. +crates/*/target/ +Cargo.lock + +# npm publish staging directory, rebuilt from scratch by +# scripts/prepare-publish.mjs on every release. +packages/sdk/_staging/ +packages/sdk/*.tgz + +# GitHub Pages build output, rebuilt from scratch by +# packages/playground/build-site.mjs on every deploy. +packages/playground/_site/ + +# Playwright +test-results/ diff --git a/BENCHMARKS.md b/BENCHMARKS.md new file mode 100644 index 0000000..6e6f820 --- /dev/null +++ b/BENCHMARKS.md @@ -0,0 +1,120 @@ +# Benchmarks + +> Generated 2026-04-13 on Apple Silicon (darwin arm64), Node v25.2.1, +> headless Chromium via Playwright. All numbers are from the rules-first +> fast path — no model involved. + +## Run them yourself + +```bash +npm run bench # all three suites +npm run bench:tasks # task pipeline only +npm run bench:wasm # WASM runtime only +npm run bench:browser # real browser via Playwright +``` + +--- + +## Task pipeline (rules-first fast path) + +The hot path. Every keystroke in a `SmartField` runs through these +functions synchronously. The goal is **< 1 ms per call** — ideally +microseconds. + +| benchmark | p50 | p95 | p99 | mean | +|---|---:|---:|---:|---:| +| **city-to-state:** exact match ("San Francisco") | 0.3 ns | 0.4 ns | 1.0 µs | 0.3 ns | +| **city-to-state:** alias ("sf") | 0.2 ns | 0.2 ns | 0.3 ns | 0.2 ns | +| **city-to-state:** case-insensitive ("SAN FRANCISCO") | 0.2 ns | 0.3 ns | 0.3 ns | 0.2 ns | +| **city-to-state:** fuzzy match ("San Francsico") | 10.9 µs | 13.9 µs | 18.6 µs | 11.1 µs | +| **city-to-state:** miss ("xyzzy") | 10.9 µs | 13.0 µs | 17.2 µs | 11.2 µs | +| **spellcheck:** homophone ("see you their") | 0.5 ns | 0.7 ns | 0.9 ns | 0.5 ns | +| **spellcheck:** misspelling ("recieve") | 0.4 ns | 0.7 ns | 0.7 ns | 0.4 ns | +| **spellcheck:** clean text (no issues) | 0.7 ns | 0.8 ns | 0.8 ns | 0.7 ns | +| **spellcheck:** multiple errors | 0.7 ns | 0.9 ns | 1.0 ns | 0.7 ns | +| **paste-extract:** full contact blob (7 lines) | 1.5 µs | 2.1 µs | 2.2 µs | 1.6 µs | +| **paste-extract:** email-only blob | 0.9 ns | 1.2 µs | 1.5 µs | 1.0 ns | + +10,000 iterations per benchmark. **All p99 latencies are under 20 µs** — +well within the < 1 ms budget, let alone the 50 ms keystroke budget. + +**Key insight:** Exact gazetteer lookups and spellcheck rules resolve in +nanoseconds. Fuzzy matching (Levenshtein distance on ~100 cities) is the +slowest path at ~11 µs — still 5,000× faster than the 50 ms budget. + +--- + +## WASM runtime (Rust → wasm32) + +The fallback inference engine — real transformer math (matmul, RMSNorm, +softmax, RoPE, KV-cache, sampling) compiled from Rust to a 55 KB `.wasm`. + +| metric | value | +|---|---| +| **WASM binary size** | 55.1 KB | +| **Cold start** (instantiate + init) | 0.54 ms median, 0.37 ms min | +| **Tokens in 50 ms budget** | ~64 tokens | + +### Warm inference (8 tokens generated) + +| prompt | median | p95 | tok/s | +|---|---:|---:|---:| +| "hello" | 0.19 ms | 0.25 ms | 41,630/s | +| "The quick brown fox" | 0.34 ms | 0.38 ms | 23,674/s | +| "San Francisco is a city in" | 0.43 ms | 0.45 ms | 18,783/s | +| "function fibonacci(n) {" | 0.39 ms | 0.41 ms | 20,581/s | + +50 iterations per prompt. These are random-init demo weights (32-dim) so +the output isn't coherent — but the math is real. Throughput scales with +model dimension; real SmolLM2-360M Q4 weights will be slower but the +architecture is proven. + +--- + +## Browser end-to-end (headless Chromium) + +Real page loads, real DOM events, real import maps. Measured via Playwright. + +| scenario | time | +|---|---:| +| **Page load** (autofill demo) | 27 ms | +| **Type "San Francisco" → state filled** | 16 ms | +| SDK self-reported task latency | 0.20 ms | +| **10 sequential city lookups** | 34 ms total, **3.4 ms avg** | +| **Spellcheck: type → suggestion visible** | 113 ms (includes 80 ms debounce) | +| **Spellcheck: click fix → text corrected** | 17 ms | +| **Paste blob → 6 fields populated** | 16 ms | +| **External network requests** | **0** | + +### Budget check vs. goals + +The [GOALS.md](docs/GOALS.md) target is **< 50 ms per keystroke**. + +``` + ✔ autofill resolve: 0.20 ms (250× under budget) + ✔ 10-lookup average: 3.4 ms (15× under budget) + ✔ spellcheck: ~33 ms (after subtracting 80 ms debounce) + ✔ paste extraction: 16 ms (3× under budget) + ✔ cold start (wasm): 0.54 ms (93× under budget) + ✔ network requests: 0 (nothing leaves the device) +``` + +--- + +## Asset sizes + +| asset | size | +|---|---:| +| WASM runtime binary | 55.1 KB | +| SDK source (all JS) | ~83 KB (unminified) | +| City gazetteer | ~100 entries, 255 lines | + +--- + +## Test suite + +| suite | tests | time | +|---|---:|---:| +| Node unit tests (`npm test`) | 75 | ~580 ms | +| Playwright e2e (`npm run test:e2e`) | 18 | ~1.7 s | +| **Total** | **93** | **~2.3 s** | diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..35c5762 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,170 @@ +# Changelog + +All notable changes to Dhamaka are documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- **The thesis.** `docs/GOALS.md` and `README.md` now lead with the + manifesto: *stop sending the data to the model; ship the model to the + data.* Every architectural decision in the project is documented as a + consequence of that one inversion. +- **Transform family.** The second of four planned capability families. + - `Transform` class: generic `run({ task, input, instruction, context })` + one-shot AI call plus `.formula()` / `.explain()` / `.debug()` + shortcuts. Routes through the task registry, normalises TaskResult + into a TransformResult, falls back to a generic instruction-over-input + prompt when no task is specified. + - `formula-transform` task with 10 structural rewrite patterns shipping + at launch: percent-discount, percent-tax, round to N decimals, + multiply/divide by N, IFERROR wrapping, null-safe wrapping, currency + conversion, negate, absolute value. LLM fallback for anything the + patterns can't match. + - `formula-explain` task with a 30-function gloss table plus arithmetic- + tree detection for pure expressions. + - `formula-debug` task with an advice table for every standard + error code (#DIV/0!, #N/A, #REF!, #VALUE!, #NAME?, #NUM!, #NULL!, + #SPILL!), plus static detection of divide-by-cell risk. +- **erp.ai as the hero case study.** Formula editing in [erp.ai](https://erp.ai) + is the flagship Transform integration. Every ERP formula edit, explain, + and debug call runs locally — formulas contain the most sensitive data + a company owns (pricing, margins, payroll, commission tiers) so shipping + them to a remote AI provider is a non-starter, which makes local + inference uniquely viable for this category. + +### Positioning + +The previous pivot framed Dhamaka as a reflex layer for input fields. That +framing was too narrow. Dhamaka is a local AI capability layer for web apps +— SmartField is one family of capabilities (Reflex), Transform is a +second (shipping now), Search and Agent are the other two (planned). The +README, GOALS.md, and CHANGELOG all lead with the four-family framing +now. + +### Notes + +- An intermediate rename to "Locus" was considered and applied in one + commit (`c04ca5a`), then reverted in the next once the `dhamaka.dev` + domain purchase confirmed Dhamaka stays. No consumer-facing code + shipped under the Locus name. + +## [0.1.0] — 2026-04-11 + +The first cut. End-to-end browser-native LLM stack with a real Rust inference +runtime compiled to WebAssembly, a cross-site model cache, and a JS SDK that +drives it all. + +### Runtime (Rust → WebAssembly) + +- New crate `crates/dhamaka-runtime` written in pure Rust, zero dependencies. +- Tensor primitives: `matmul`, `rmsnorm`, numerically stable `softmax`, + `silu`, in-place `add` / `mul`, and rotary position embeddings (`rope`). +- Sampler: one-pass temperature + top-k + top-p + greedy with a deterministic + xorshift64* RNG seeded from prompt bytes. +- Transformer kernel: Llama-style block (RMSNorm → Q/K/V → RoPE → + KV-cached self-attention → output projection → RMSNorm → SwiGLU FFN → + residual) with `MAX_CTX = 512`. +- Tiny random-weights v0.1 model (32-dim hidden, 2 layers, 1 head, 64-entry + vocab) so the whole pipeline exercises real f32 math end-to-end. +- `#[no_mangle] extern "C"` ABI exposed to WebAssembly: + `dhamaka_version`, `dhamaka_alloc`, `dhamaka_free`, `dhamaka_init`, + `dhamaka_destroy`, `dhamaka_reset`, `dhamaka_set_sampling`, + `dhamaka_feed_prompt`, `dhamaka_next_token`. +- `build.sh` helper that installs the `wasm32-unknown-unknown` target on + demand, compiles `release` with fat LTO, and stages the resulting 56 KB + `.wasm` into `packages/hub/public/runtime/`. +- 27 native `cargo test` cases covering every primitive, the sampler laws, + forward-pass determinism, and position sensitivity via RoPE + KV cache. + +### SDK (`dhamaka`) + +- `Dhamaka.load(modelId, options)` fetches a model through the hub, loads + the compiled WASM runtime, and returns an instance with `complete`, + `stream`, `chat`, `info`, `evict`, `localModels`, and `unload`. +- `Chat` class with system prompts, streaming, reset, and per-turn history. +- `HubClient` that speaks a typed `postMessage` protocol with the hub iframe + and falls back to per-origin IndexedDB when the iframe is unreachable or + to an in-memory store when running in Node. +- Tiered storage mode reporting — `shared`, `storage-access`, `partitioned`, + `site-local`, `extension` — with `requestStorageAccess()` for a one-click + user-gated opt-in to unpartitioned storage. +- Auto-detection of the Dhamaka browser extension; when present the SDK + routes all hub messages through it to sidestep storage partitioning. +- OpenAI-compatible `/v1/chat/completions` shim with streaming + non-streaming + that robustly parses `string` / `Blob` / `ArrayBuffer` / `TypedArray` bodies. + +### Runtime adapter (`@dhamaka/runtime`) + +- `Engine` abstract interface. +- `WasmEngine` — loads the compiled Rust `.wasm`, verifies the ABI version, + writes prompt bytes into WASM linear memory via `dhamaka_alloc`, drives + `dhamaka_feed_prompt` + `dhamaka_next_token` in a loop, decodes UTF-8, and + yields tokens. Honors `AbortSignal`. +- `MockEngine` — dependency-free stand-in for development when the real + runtime isn't available. Streams canned responses at ~45 tok/s. +- `createEngine({ backend })` that prefers `WasmEngine` in browsers and + `MockEngine` in Node. + +### Hub (`@dhamaka/hub`) + +- Static site that runs in a hidden iframe embedded by every Dhamaka-powered + consumer. Stores models in IndexedDB and streams `ArrayBuffer`s back over + `postMessage` using transferables (zero-copy). +- SHA-256 content-addressed integrity checks on every artifact. +- Storage Access API integration so strict browsers can still get + unpartitioned storage on a user gesture. +- Serves the compiled `dhamaka-runtime.wasm` alongside model artifacts. +- JSON Schema draft-07 for the manifest format. + +### Browser extension (`@dhamaka/extension`) + +- Manifest V3 skeleton with a background service worker that stores models in + the extension's own origin — shared across every site on the machine, + sidestepping storage partitioning entirely. +- Content script bridge (`postMessage` ↔ `chrome.runtime.sendMessage`). +- SDK detects the extension via an injected `window.__dhamaka_extension__` + marker and prefers it over the iframe hub. +- Options page listing cached models with one-click eviction. + +### Playground (`@dhamaka/playground`) + +- Zero-dependency Node dev server that runs the hub on `:5174` and the + playground on `:5173`, serving the compiled WASM with the right MIME and + CORS headers. +- Live UI with a model picker, progress bar, live telemetry (cache hit, + load ms, tokens/sec, backend, memory), stateful chat, abort/stop button, + history reset, and eviction controls. +- Importmap-based module wiring — no bundler, no build step for JS edits. + +### Tests, CI, and infrastructure + +- **45 JS tests** (`node --test`, zero dependencies) covering the SDK, the + hub, the OpenAI shim, all engine adapters, and four end-to-end integration + tests that load the real compiled `.wasm` in Node and drive it through the + full ABI. +- **27 Rust tests** (`cargo test`) covering every primitive. +- **CI** (`.github/workflows/ci.yml`) with two jobs: `rust` compiles the + crate, runs cargo tests, and uploads the wasm artifact; `js` downloads the + artifact and runs `node --test` on Node 20 and Node 22, plus a smoke-test + that curl-s every dev-server endpoint. +- Animated SVG banner at the top of the README (rainbow gradient + pulsing + spotlight + drifting scanline) served from `docs/banner.svg`. + +### Known limitations for v0.1.0 + +- The v0.1 model is a 32-dim / 2-layer random-weights transformer, so output + is stream-of-tokens, not coherent English. When the SmolLM2-360M Q4 + artifacts arrive they'll plug into the same `dhamaka_init` entry point + without SDK changes. +- No SIMD128 build of the runtime yet (`-C target-feature=+simd128` is a + one-line change; it's gated on having a baseline benchmark). +- No WebGPU fast path. +- The other models in the registry (`dhamaka-code`, `dhamaka-sql`, + `dhamaka-json`, `dhamaka-summarize`, `dhamaka-embed`) are listed as + `status: planned`. + +[0.1.0]: https://github.com/protosphinx/dhamaka/releases/tag/v0.1.0 diff --git a/README.md b/README.md index 83c8ce8..29f289b 100644 --- a/README.md +++ b/README.md @@ -6,144 +6,322 @@