diff --git a/.autoloop/programs/tsb-perf-evolve/code/README.md b/.autoloop/programs/tsb-perf-evolve/code/README.md new file mode 100644 index 00000000..6f0861b8 --- /dev/null +++ b/.autoloop/programs/tsb-perf-evolve/code/README.md @@ -0,0 +1,11 @@ +# tsb-perf-evolve — code/ + +This directory holds the **fixed inputs** for the program: the benchmark scripts and a small config. The autoloop iterations should rarely touch these files. The thing that *evolves* is `src/core/series.ts` (specifically the `sortValues` method) — see `../program.md` for the full picture. + +## Files + +- `config.yaml` — tunables read by the AlphaEvolve playbook (`exploitation_ratio`, `num_islands`, `population_size`, `archive_size`, dataset size). +- `benchmark.ts` — tsb-side benchmark. Builds a Series of `dataset_size` random floats with ~5% NaN, calls `sortValues` in a tight loop, prints `{"function": "Series.sortValues", "mean_ms": …, "iterations": …, "total_ms": …}`. +- `benchmark.py` — pandas-side benchmark. Builds an equivalent `pd.Series`, calls `.sort_values()` in the same loop structure, prints the same JSON shape. + +The two benchmarks must stay aligned: same dataset size, same NaN ratio, same warm-up + measured iteration counts. If you tweak one, tweak the other. diff --git a/.autoloop/programs/tsb-perf-evolve/code/benchmark.py b/.autoloop/programs/tsb-perf-evolve/code/benchmark.py new file mode 100644 index 00000000..165f387e --- /dev/null +++ b/.autoloop/programs/tsb-perf-evolve/code/benchmark.py @@ -0,0 +1,60 @@ +"""pandas-side benchmark for Series.sort_values. + +Output: a single JSON line on stdout with the shape + {"function": "Series.sort_values", "mean_ms": , + "iterations": , "total_ms": } + +Dataset shape and iteration counts mirror ./benchmark.ts — keep the two in +lockstep. Fixed seed for reproducibility across runs. +""" + +from __future__ import annotations + +import json +import sys +import time + +import numpy as np +import pandas as pd + +# Inlined from config.yaml (kept in sync with benchmark.ts). +DATASET_SIZE = 100_000 +NAN_RATIO = 0.05 +WARMUP_ITERATIONS = 5 +MEASURED_ITERATIONS = 50 +RANDOM_SEED = 42 + + +def build_data() -> pd.Series: + rng = np.random.default_rng(RANDOM_SEED) + values = rng.uniform(-500_000.0, 500_000.0, size=DATASET_SIZE) + nan_mask = rng.random(size=DATASET_SIZE) < NAN_RATIO + values[nan_mask] = np.nan + return pd.Series(values, dtype="float64") + + +def main() -> None: + series = build_data() + + # Warm-up. + for _ in range(WARMUP_ITERATIONS): + series.sort_values() + + start = time.perf_counter() + for _ in range(MEASURED_ITERATIONS): + series.sort_values() + total_s = time.perf_counter() - start + total_ms = total_s * 1000.0 + mean_ms = total_ms / MEASURED_ITERATIONS + + result = { + "function": "Series.sort_values", + "mean_ms": mean_ms, + "iterations": MEASURED_ITERATIONS, + "total_ms": total_ms, + } + sys.stdout.write(json.dumps(result) + "\n") + + +if __name__ == "__main__": + main() diff --git a/.autoloop/programs/tsb-perf-evolve/code/benchmark.ts b/.autoloop/programs/tsb-perf-evolve/code/benchmark.ts new file mode 100644 index 00000000..fe6635cb --- /dev/null +++ b/.autoloop/programs/tsb-perf-evolve/code/benchmark.ts @@ -0,0 +1,75 @@ +// tsb-side benchmark for Series.sortValues. +// Output: a single JSON line on stdout with the shape +// {"function": "Series.sortValues", "mean_ms": , "iterations": , "total_ms": } +// +// Dataset shape and iteration counts come from ./config.yaml — keep this file +// and ./benchmark.py in lockstep. + +import { Series } from "../../../../src/index.ts"; + +// Inlined from config.yaml — the autoloop agent should keep these in sync. +// (No YAML parser dependency to keep this benchmark hermetic.) +const DATASET_SIZE = 100_000; +const NAN_RATIO = 0.05; +const WARMUP_ITERATIONS = 5; +const MEASURED_ITERATIONS = 50; +const RANDOM_SEED = 42; + +// A tiny deterministic PRNG (mulberry32). Note: this is *not* the same +// algorithm as numpy's default_rng on the Python side, so for any given seed +// the two benchmarks will see different concrete values. They will still see +// the same *distribution* (uniform over [-500_000, 500_000) with the same NaN +// fraction), and that is what matters for a sorting micro-benchmark — the +// dataset shape, not the exact bit pattern. If you ever need byte-identical +// inputs across the two sides, swap mulberry32 for a portable PRNG that has a +// matching numpy implementation (e.g. PCG64). +function mulberry32(seed: number): () => number { + let a = seed >>> 0; + return () => { + a = (a + 0x6d2b79f5) >>> 0; + let t = a; + t = Math.imul(t ^ (t >>> 15), t | 1); + t ^= t + Math.imul(t ^ (t >>> 7), t | 61); + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }; +} + +function buildData(): readonly (number | null)[] { + const rng = mulberry32(RANDOM_SEED); + const out: (number | null)[] = new Array(DATASET_SIZE); + for (let i = 0; i < DATASET_SIZE; i++) { + out[i] = rng() < NAN_RATIO ? null : rng() * 1_000_000 - 500_000; + } + return out; +} + +function nowMs(): number { + return performance.now(); +} + +function main(): void { + const data = buildData(); + const series = new Series({ data, dtype: "float64" }); + + // Warm-up — let the JIT specialize. + for (let i = 0; i < WARMUP_ITERATIONS; i++) { + series.sortValues(); + } + + const start = nowMs(); + for (let i = 0; i < MEASURED_ITERATIONS; i++) { + series.sortValues(); + } + const totalMs = nowMs() - start; + const meanMs = totalMs / MEASURED_ITERATIONS; + + const result = { + function: "Series.sortValues", + mean_ms: meanMs, + iterations: MEASURED_ITERATIONS, + total_ms: totalMs, + }; + process.stdout.write(`${JSON.stringify(result)}\n`); +} + +main(); diff --git a/.autoloop/programs/tsb-perf-evolve/code/config.yaml b/.autoloop/programs/tsb-perf-evolve/code/config.yaml new file mode 100644 index 00000000..6d0faaaa --- /dev/null +++ b/.autoloop/programs/tsb-perf-evolve/code/config.yaml @@ -0,0 +1,22 @@ +# AlphaEvolve tunables — read by strategy/alphaevolve.md every iteration. + +# Operator weights. Must sum to 1.0. Defaults bias toward exploitation. +exploitation_ratio: 0.50 +exploration_ratio: 0.30 +crossover_ratio: 0.15 +migration_ratio: 0.05 + +# Island count. Should match the number of islands enumerated in +# strategy/alphaevolve.md's "Pick parent(s)" section. +num_islands: 5 + +# MAP-Elites population caps. +population_size: 40 +archive_size: 10 + +# Benchmark dataset shape. Both benchmark.ts and benchmark.py read this. +dataset_size: 100000 +nan_ratio: 0.05 +warmup_iterations: 5 +measured_iterations: 50 +random_seed: 42 diff --git a/.autoloop/programs/tsb-perf-evolve/program.md b/.autoloop/programs/tsb-perf-evolve/program.md new file mode 100644 index 00000000..a8555bc6 --- /dev/null +++ b/.autoloop/programs/tsb-perf-evolve/program.md @@ -0,0 +1,80 @@ +--- +schedule: every 6h +--- + +# tsb perf evolve — Series.sortValues vs pandas Series.sort_values + +## Goal + +Evolve the implementation of `Series.sortValues` (`src/core/series.ts`) so that, on the synthetic benchmark in `code/benchmark.ts`, tsb runs **at least as fast as pandas** on the equivalent `Series.sort_values` call (`code/benchmark.py`). + +Concretely, we minimize the **ratio** + + fitness = mean_ms_tsb / mean_ms_pandas + +`fitness < 1.0` means tsb is faster than pandas; lower is better. We will keep iterating as long as fitness keeps improving. + +This is a **performance-evolution program** — there is one self-contained artifact (`Series.sortValues`), one scalar fitness (the ratio), and many plausible algorithmic families to try (comparison sort, typed-array indirect sort, dtype-dispatched non-comparison sort, batched/SoA, etc.). It is the canonical case for the AlphaEvolve strategy. + +### Validity invariants + +A candidate is valid iff: + +1. The existing test suite for `sortValues` passes: `bun test tests/core/series.sortValues.test.ts` (and any property tests that exercise it). +2. The function signature is unchanged: `sortValues(ascending = true, naPosition: "first" | "last" = "last"): Series`. +3. No new runtime dependencies (devDependencies for benchmarking are fine). +4. TypeScript strict mode is satisfied — no `any`, no `as` casts, no `@ts-ignore`. +5. Behaviour is identical to the current implementation for: numeric (with NaN), string, mixed dtypes, ascending and descending, both `naPosition` values, and an empty Series. + +The evaluator runs the test suite and the benchmark; if either fails, the candidate is rejected. + +## Target + +Only modify these files: +- `src/core/series.ts` — the `sortValues` method body (and any small private helpers inside `series.ts` that it calls). Keep the public signature unchanged. +- `.autoloop/programs/tsb-perf-evolve/code/**` — benchmark scripts and config. (You will rarely need to touch these — the evaluator is fixed; the benchmark dataset is fixed; only tweak if a candidate genuinely needs a new bench scenario.) + +Do NOT modify: +- `tests/**` — test files (they are the validity oracle; do not weaken them). +- `README.md` — read-only. +- `.autoloop/programs/**` other than this program's `code/` dir. +- `.github/workflows/autoloop*` — autoloop workflow files. +- Any `src/**` file other than `src/core/series.ts`. + +## Evolution Strategy + +This program uses the **AlphaEvolve** strategy. On every iteration, read `strategy/alphaevolve.md` and follow it literally — it supersedes the generic analyze/accept/reject steps in the default autoloop loop. + +Support files: +- `strategy/alphaevolve.md` — the runtime playbook (operators, parent selection, population rules). +- `strategy/prompts/mutation.md` — framing for exploitation and exploration operators. +- `strategy/prompts/crossover.md` — framing for crossover and migration operators. + +Population state lives in the state file on the `memory/autoloop` branch under the `## 🧬 Population` subsection (see the playbook for the schema). + +## Evaluation + +```bash +set -euo pipefail + +# 1. Validity — existing tests for sortValues must still pass. +bun test tests/core/series.sortValues.test.ts >/tmp/perf-evolve-tests.log 2>&1 || { + echo '{"fitness": null, "rejected_reason": "tests failed"}' + exit 0 +} + +# 2. Benchmark — tsb side. +tsb_ms=$(bun run .autoloop/programs/tsb-perf-evolve/code/benchmark.ts | python3 -c "import json,sys; print(json.load(sys.stdin)['mean_ms'])") + +# 3. Benchmark — pandas side. Skip gracefully if pandas isn't available. +if ! python3 -c 'import pandas' 2>/dev/null; then + pip3 install pandas --quiet 2>/dev/null || true +fi +pd_ms=$(python3 .autoloop/programs/tsb-perf-evolve/code/benchmark.py | python3 -c "import json,sys; print(json.load(sys.stdin)['mean_ms'])") + +# 4. Fitness = ratio. Lower is better. +ratio=$(python3 -c "print(${tsb_ms} / ${pd_ms})") +echo "{\"fitness\": ${ratio}, \"tsb_mean_ms\": ${tsb_ms}, \"pandas_mean_ms\": ${pd_ms}}" +``` + +The metric is `fitness` (= `tsb_mean_ms / pandas_mean_ms`). **Lower is better.** A value below `1.0` means tsb is now faster than pandas on this workload. diff --git a/.autoloop/programs/tsb-perf-evolve/strategy/alphaevolve.md b/.autoloop/programs/tsb-perf-evolve/strategy/alphaevolve.md new file mode 100644 index 00000000..aaa8b77a --- /dev/null +++ b/.autoloop/programs/tsb-perf-evolve/strategy/alphaevolve.md @@ -0,0 +1,142 @@ +# AlphaEvolve Strategy — tsb-perf-evolve + +This file is the **runtime playbook** for this program. The autoloop agent reads it at the start of every iteration and follows it literally. It supersedes the generic "Analyze and Propose" / "Accept or Reject" steps in the default autoloop iteration loop — all other steps (state read, branch management, state file updates) still apply. + +## Problem framing + +The target artifact is the body of `Series.sortValues` in `src/core/series.ts`. Fitness is the ratio `tsb_mean_ms / pandas_mean_ms` measured on the fixed benchmark in `code/benchmark.ts` (and its pandas mirror `code/benchmark.py`); **lower is better**, with `< 1.0` meaning tsb is faster than pandas. A candidate is valid iff the existing tests for `sortValues` pass, the public signature is unchanged, no new runtime dependencies are added, TypeScript strict mode is satisfied, and behaviour matches the reference for numeric/string/mixed dtypes, both ascending values, and both `naPosition` settings. + +## Per-iteration loop + +### Step 1. Load state + +1. Read `program.md` — Goal, Target, Evaluation. +2. Read the program's state file from the repo-memory folder (`tsb-perf-evolve.md`). Locate the `## 🧬 Population` subsection. If it does not exist, create it using the schema in [Population schema](#population-schema). +3. Read `code/config.yaml` for tunables (`exploitation_ratio`, `num_islands`, `population_size`, `archive_size`, `dataset_size`, etc.). Do not hard-code values you can read from config — the maintainer may have tuned them. +4. Read both prompt templates in `strategy/prompts/`. These frame how you reason about mutations and crossovers for sorting code. + +### Step 2. Pick operator + +Sample one operator using these weights (tuned for a perf problem with a small handful of plausible algorithmic families — exploitation-heavy because once an island has a working candidate, refinement usually pays): + +| Operator | Default weight | When it fires | +|---|---|---| +| Exploitation | 0.50 | Refine one of the elites — the current best or a near-best. | +| Exploration | 0.30 | Generate a candidate from an **under-represented island** or a novel family. | +| Crossover | 0.15 | Combine ideas from two parents on different islands. | +| Migration | 0.05 | Take a technique that works on island A and port it into a solution on island B. | + +Deterministic overrides (apply *before* sampling): + +- If the population is empty or has one member → **Exploration** (seed diversity). +- If the last 3 statuses in `recent_statuses` are all `rejected` → force **Exploration** with a previously-unused island. +- If the last 5 statuses are all `rejected` → force **Migration** or a radically new island; also revisit any domain knowledge in `prompts/mutation.md` that has not yet been applied. + +Record your chosen operator in the iteration's reasoning — the state file's Iteration History entry must include it. + +### Step 3. Pick parent(s) + +**Islands** for this program (algorithmic families for sorting a 1-D numeric Series with NaN): + +- **Island 0 — Comparison sort (objects)**: the current implementation — `Array.prototype.sort` over `{v, i}` pairs with a comparator that handles NaN. +- **Island 1 — Indirect typed-array sort**: copy values into a `Float64Array`, sort an index `Uint32Array` by that, then gather. NaN handled by partition. +- **Island 2 — Decorate-sort-undecorate with packed keys**: encode `(value, index)` into a single sortable representation (e.g. pack into a `BigInt64Array` or use parallel typed arrays), sort once, gather. +- **Island 3 — Non-comparison / radix**: dispatch on dtype; for finite floats, transform to a sortable unsigned representation and run an LSD radix sort, then untransform. +- **Island 4 — Hybrid**: small-input fast path (Array.prototype.sort) + large-input dispatch into one of the above families based on `dataset_size` and dtype. + +Parent selection by operator: + +- **Exploitation** — pick the best scorer; break ties by picking the most recent. +- **Exploration** — pick the island with the fewest members (or a brand-new island number if all are full), then either start from its best member or from scratch. +- **Crossover** — pick two parents on **different islands**. Bias toward one elite (top quartile) and one diverse (any island with a distinct feature-cell — see [Feature dimensions](#feature-dimensions)). +- **Migration** — pick one donor island (the source of the technique) and one recipient island (where the technique will be grafted in). The parent you actually edit is on the recipient island. + +### Step 4. Apply the operator + +Frame your reasoning using the matching prompt template: + +- Exploitation or Exploration → `strategy/prompts/mutation.md` +- Crossover or Migration → `strategy/prompts/crossover.md` + +Before writing any code, state (in your visible reasoning): + +1. Chosen operator + why. +2. Parent(s) picked — their IDs, island, score, and a one-line summary of each parent's approach. +3. What specifically you're changing, and your hypothesis for *why* it should improve the fitness. +4. Validity pre-check — walk through why the proposed candidate will satisfy each invariant: + - Existing tests for `sortValues` will pass (numeric + NaN, string, ascending/descending, both `naPosition` values, empty Series). + - Public signature unchanged: `sortValues(ascending = true, naPosition: "first" | "last" = "last"): Series`. + - No new runtime dependency added to `package.json`. + - No `any`, no `as`, no `@ts-ignore`. + - Index alignment preserved — every output value is paired with the original index of the input row it came from. +5. Novelty check: confirm this is not a near-duplicate of an existing population member or of anything in the state file's 🚧 Foreclosed Avenues. + +### Step 5. Implement + +Edit only the files listed in `program.md`'s Target section. The diff style for this program is **minimal diff** — `series.ts` is a large file and only the body of `sortValues` (plus, occasionally, a small private helper added immediately above it) should change. Do not reformat unrelated parts of the file. + +### Step 6. Evaluate + +Run the evaluation command from `program.md`. Parse the `fitness` field from the JSON output (along with `tsb_mean_ms` and `pandas_mean_ms` for the population entry). + +### Step 7. Update the population + +Regardless of whether the iteration is accepted or rejected at the branch level, the candidate has been tried and should be recorded in the population — the population is a memory of what's been explored, not just what's been kept. + +Append a new entry to the `## 🧬 Population` subsection in the state file using the schema below. Then enforce these caps: + +- **Population cap**: `population_size` from `code/config.yaml` (default 40). If exceeded, evict the *worst* member in the most-crowded feature cell (MAP-Elites style — never evict the best of any cell). +- **Elite archive**: the top `archive_size` from `code/config.yaml` (default 10) by fitness are always preserved regardless of cell crowding. + +### Step 8. Fold through to the default loop + +Continue with the normal autoloop Step 5 (Accept or Reject → commit / discard, update state file's Machine State, Iteration History, Lessons Learned, etc.) as defined in the workflow. The only additional requirements from AlphaEvolve are: + +- The Iteration History entry must include `operator`, `parent_id(s)`, `island`, and `fitness` fields (in addition to the normal status/change/metric/notes). +- Lessons Learned additions should be phrased as *transferable heuristics* about the problem space, not as reports of what this iteration did. (E.g. "Indirect sort over `Uint32Array` indices beats object-pair sort above n≈10k" — not "Iteration 17 tried indirect sort.") + +## Feature dimensions + +MAP-Elites partitions the population into **feature cells**. Each candidate is described by a small tuple of qualitative features, and the population keeps the best candidate per cell — this is what creates diversity pressure even when many candidates have similar fitness. + +For this program, use these feature dimensions: + +- **Dimension 1 — Storage**: `boxed-pairs` / `parallel-typed-arrays` / `packed-typed-array` / `wasm-buffer` +- **Dimension 2 — Algorithm class**: `comparison` / `non-comparison` / `hybrid` + +When evaluating a candidate, classify it into one cell per dimension. The combined `(storage, algorithm)` tuple is its **feature cell**. Record the cell in the population entry (see schema). + +## Population schema + +The population lives in the state file `tsb-perf-evolve.md` on the `memory/autoloop` branch as a subsection. Use this exact layout so maintainers can read and edit it: + +```markdown +## 🧬 Population + +> 🤖 *Managed by the AlphaEvolve strategy. One entry per candidate that has been evaluated (accepted or rejected). Newest first.* + +### Candidate · island · fitness · gen + +- **Operator**: exploitation / exploration / crossover / migration +- **Parent(s)**: [, ] +- **Feature cell**: · +- **Approach**: +- **Status**: ✅ accepted / ❌ rejected +- **Notes**: + +Code: + +\`\`\`typescript + +\`\`\` + +--- +``` + +Identifiers: +- `` is `c{NNN}` zero-padded, monotonically increasing across the program's lifetime. +- `` is the island number (0-indexed, 0..4 for this program). +- `` is the raw `fitness` (the tsb/pandas ms ratio). +- `` is the iteration number from the Machine State table. + +When evicting members under the population cap, **never** delete an entry — instead, prepend a strikethrough header (`### ~~Candidate c042~~ (evicted, gen 87)`) and remove the entire `Code:` block (both the `Code:` label and the surrounding triple-backtick `typescript` code fence) to keep the file size bounded. The metadata stays so future iterations can see what was tried. diff --git a/.autoloop/programs/tsb-perf-evolve/strategy/prompts/crossover.md b/.autoloop/programs/tsb-perf-evolve/strategy/prompts/crossover.md new file mode 100644 index 00000000..74d548f2 --- /dev/null +++ b/.autoloop/programs/tsb-perf-evolve/strategy/prompts/crossover.md @@ -0,0 +1,53 @@ +# Crossover & Migration prompt — tsb-perf-evolve + +You are about to apply a **two-parent operator** — either crossover (combine ideas from parents on different islands) or migration (graft a technique that works on one island into a solution on another). This file frames how to reason about that change. Use it together with `strategy/alphaevolve.md`. + +## What these operators are for + +- **Crossover** — both parents are valid, working candidates from different islands. The goal is a child that takes a *good idea* from each. Crossover that is just "average the two" almost never wins; structural composition does. +- **Migration** — one parent (the **donor**) is on island A, where some technique works particularly well. The other parent (the **recipient**) is on island B, where the technique has not been tried. The goal is to graft the technique from A into a candidate on B, *without* breaking what makes B's island distinctive. + +The agent must be able to clearly say: "the *X* in this child came from parent A; the *Y* came from parent B." + +## Combination patterns + +How "combining" looks for `Series.sortValues`: + +- **Storage × algorithm**: take parent A's storage layout (e.g. parallel typed arrays from the indirect-sort island) and parent B's algorithm (e.g. radix sort from the non-comparison island). Produces "radix sort over typed-array storage", which may live in a third island. +- **NaN handling × hot path**: take parent A's NaN pre-partition strategy (clean separation of finite and NaN slices) and parent B's hot-path code (whatever it does with the finite slice). Useful when parent A is slow but has clean NaN handling, and parent B is fast on finite data only. +- **Dispatch × kernel**: take parent A's dtype-dispatch (e.g. one path for `float64`, one for `string`, one for object) and parent B's per-dtype kernel for the dtype where parent B excels. +- **Small-input fast path × large-input core**: take parent A's small-input branch (often the boring boxed-pair sort, which is fastest at `n < 64`) and parent B's large-input core. Produces a hybrid that wins across the whole size range. +- **Comparator × indirection**: take parent A's monomorphic comparator and graft it into parent B's index-sort indirection scheme. + +If none of the patterns above fits the two parents you've picked, that's a signal those parents are not a good crossover pair. Pick different parents — don't force a bad combination. + +## Migration patterns + +Worked examples for "porting a technique from island A to island B": + +- **Typed-array gather → comparison-sort island**: the indirect-sort island uses a `Float64Array` to avoid the boxed-number tax. Port that allocation pattern into the comparison-sort island's gather step (after the boxed sort), keeping the boxed sort itself but materializing the output through a typed array. +- **Radix dtype-dispatch → hybrid island**: the radix island already dispatches on dtype to pick `Uint32Array` vs `Float64Array` paths. Port the dispatch into the hybrid island so the hybrid's large-input branch gets dtype-aware acceleration. +- **NaN pre-partition → typed-array island**: the comparison-sort island handles NaN inside the comparator. Port the *pre-partition* approach (separate finite from NaN once at the top) into the typed-array island, where it gives a much cleaner contiguous finite slice for `Float64Array.prototype.sort`. + +## Reasoning template + +Before writing any code, fill in (in your visible reasoning): + +1. **Operator**: crossover or migration. Why this one (or were you forced into it by the deterministic overrides in the playbook). +2. **Parent A** (donor for migration): id, island, fitness, the *specific technique* you're taking. +3. **Parent B** (recipient for migration): id, island, fitness, what you're keeping. +4. **The graft**: which combination/migration pattern from above. Be precise about what comes from where. +5. **Hypothesis**: why the combined / grafted result should outperform either parent alone. The mechanism must reference *both* parents' contributions. +6. **Recipient island integrity**: for migration only — does the resulting candidate still belong to the recipient island, or has the graft pushed it into a third island? If it's now in a different island, that's fine — but record it accurately in the population entry. +7. **Predicted feature cell**: which `(storage, algorithm)` cell the child lands in. Crossovers often land in a *new* cell — that's a feature, not a bug. +8. **Validity pre-check**: walk through the cheap invariants from the playbook (signature, no `any`, NaN handling, index alignment). Pay extra attention here — grafts are the most common source of "compiles but breaks an invariant" candidates, especially around NaN placement. + +Only after all eight are written should you start editing code. + +## Anti-patterns + +- ❌ **Naive average**: literally averaging two configs / two algorithms. Always loses to either parent. +- ❌ **Same-island crossover**: picking two parents on the same island. That's exploitation with extra steps. +- ❌ **Whole-parent swap**: producing a child that is identical to one of the parents (you "combined" by ignoring one). If you can't name a contribution from each parent, you haven't done crossover. +- ❌ **Migration that demolishes the recipient**: the graft replaces so much of the recipient that the result is just the donor on a different island label. The point of migration is to *enrich*, not overwrite. +- ❌ **Breaking NaN semantics on the seam**: the most common failure mode is the donor's storage and the recipient's NaN handling not agreeing on where NaN lives. Walk through one ascending+`naPosition: "first"` example by hand before committing. diff --git a/.autoloop/programs/tsb-perf-evolve/strategy/prompts/mutation.md b/.autoloop/programs/tsb-perf-evolve/strategy/prompts/mutation.md new file mode 100644 index 00000000..65183a3f --- /dev/null +++ b/.autoloop/programs/tsb-perf-evolve/strategy/prompts/mutation.md @@ -0,0 +1,59 @@ +# Mutation prompt — tsb-perf-evolve + +You are about to apply a **single-parent operator** — either exploitation (refine an elite) or exploration (try something new in an under-represented island). This file frames how to reason about that change. Use it together with `strategy/alphaevolve.md`. + +## What this operator is for + +- **Exploitation** — you have a parent that works well. Make a *small, principled* change that you have a clear reason to believe will improve fitness. One change at a time. If you change five things at once and fitness moves, you will not know which thing did it. +- **Exploration** — you are seeding diversity in an island that is under-represented (or has never been tried). It is fine — desirable, even — to produce a candidate with worse fitness than the current best, as long as it lands in a *different feature cell*. Diversity has value. + +## Mutation vocabulary + +These are the moves available for `Series.sortValues`. They map roughly onto the islands enumerated in the playbook, but any move is legal in any island as long as the resulting candidate still belongs there. + +- **Replace boxed pairs with parallel typed arrays**: instead of `[{v, i}, …]`, allocate a `Float64Array` for values and a `Uint32Array` for indices, sort one by reference to the other. +- **Indirect index sort**: sort a `Uint32Array` of indices `0..n-1` using a comparator that reads the source values; gather output at the end. Avoids touching the value array during the comparator. +- **Pack into a single typed array**: encode `(value, index)` into one `BigInt64Array` cell or two adjacent `Float64Array` cells; sort a single contiguous buffer. +- **Hoist NaN handling**: pre-partition NaN to the start or end (depending on `naPosition`) and sort only the finite slice. Eliminates a NaN check from the comparator. +- **Comparator monomorphization**: extract the comparator into a small monomorphic function so Bun's JIT can inline it. Avoid closing over `ascending`/`naPosition` — pass via dispatch to one of four pre-defined comparators. +- **Dtype dispatch**: branch on `this.dtype` before sorting, picking a specialized path per dtype (numeric → typed-array; string → string-comparator; object → boxed-pair fallback). +- **Radix / counting sort for finite floats**: transform `Float64` to a sortable `Uint32`/`BigUint64` representation (flip sign bit + flip negatives), LSD radix sort, untransform on gather. +- **Small-input fast path**: if `n < threshold` (e.g. 64), use the existing implementation; the typed-array overhead doesn't pay below that. +- **Preallocate output buffers**: avoid `Array.prototype.map` for the gather step; preallocate the output array(s) with `new Array(n)` or a typed array of the right size. +- **Avoid `Index.take` allocation**: if the index is a default `RangeIndex`, materialize directly without going through `take`; only call `take` for non-trivial indexes. + +For **exploitation**, prefer small moves from the top of this list. For **exploration**, prefer larger structural moves from further down — or invent something not on the list and add it for future iterations. + +## Domain knowledge + +Things to keep in mind about this specific problem: + +- Bun's JIT inlines monomorphic function calls aggressively — keep the comparator and the gather function call sites monomorphic. Avoid passing comparators that close over varying booleans; prefer dispatching to one of four pre-defined comparators. +- `Array.prototype.sort` in V8/JSC uses TimSort and is *very* good. Beating it requires either (a) avoiding the per-element object allocation, or (b) escaping comparison sort entirely (radix on transformed floats). +- Typed arrays bypass the JS GC, but allocating one inside a hot loop still costs. Allocate once, outside the measured region. (The benchmark runs `sortValues` `MEASURED_ITERATIONS` times — every per-call allocation matters.) +- The current implementation allocates `n` boxed `{v, i}` objects, then `n` more arrays for `pairs.map(...)` × 2, then a new Series. The allocation pressure dominates at `n = 100_000`. +- pandas `sort_values` is NumPy `argsort` under the hood, with a C-implemented quicksort/mergesort and zero per-element JS-style allocation. To beat it, exploit something JS has but NumPy doesn't (e.g. monomorphic JIT inlining of small specialized comparators) or avoid comparison entirely. +- NaN handling is *not* free in the comparator. Branch-prediction-friendly patterns: sort the finite slice and prepend/append NaN, rather than testing for NaN in every comparison. +- `Float64Array.prototype.sort` puts NaNs at the *end* by IEEE-754 ordering, not at the *position* requested by `naPosition`. You will need to partition NaN before/after the typed-array sort. +- Avoid `eval` / `new Function` — codegen overhead dominates at the iteration counts we measure. + +## Reasoning template + +Before writing any code, fill in (in your visible reasoning): + +1. **Operator**: exploitation or exploration. Why this one (you may have been forced into it by the deterministic overrides in the playbook — say so). +2. **Parent**: candidate id, island, fitness, one-line approach summary. +3. **The move**: which mutation from the vocabulary above (or a novel one you are inventing — describe it). +4. **Hypothesis**: why this should improve fitness. Be specific. "Should be faster" is not a hypothesis. "Removes one allocation per row in the inner loop, which dominates the profile at n=100k" is a hypothesis. +5. **Predicted feature cell**: which `(storage, algorithm)` cell will this candidate land in? If it's the same cell as an existing elite with worse fitness, you should already be at higher confidence than usual. +6. **Validity pre-check**: walk through the cheap invariants from the playbook (signature, no `any`, NaN handling, index alignment). + +Only after all six are written should you start editing code. + +## Anti-patterns + +- ❌ **Multi-mutation**: changing several unrelated things in one candidate. Split into separate iterations. +- ❌ **Re-discovering**: proposing a candidate whose approach already exists in the population. Always check the population first. +- ❌ **Vague hypothesis**: "this looks cleaner" or "should be more efficient" with no mechanism. If you can't name the mechanism, you don't have a hypothesis. +- ❌ **Ignoring rejected lessons**: if a similar mutation was rejected in a recent iteration *and* the Lessons Learned says why, do not retry it without a new angle. +- ❌ **Breaking NaN semantics**: silently changing where NaN ends up because the typed-array sort path makes it convenient. NaN placement is part of the contract. diff --git a/.autoloop/strategies/alphaevolve/CUSTOMIZE.md b/.autoloop/strategies/alphaevolve/CUSTOMIZE.md new file mode 100644 index 00000000..1de1179b --- /dev/null +++ b/.autoloop/strategies/alphaevolve/CUSTOMIZE.md @@ -0,0 +1,101 @@ +# Adopting the AlphaEvolve strategy for a new program + +This file is a **creator-time guide** — it is read by the maintainer (or a "create program" agent) **once**, when authoring a new program that wants to use AlphaEvolve. It is **not** copied into the program's `strategy/` directory and is **not** read by the iteration agent at runtime. + +If you are an iteration agent and have somehow ended up here: stop, go back to `strategy/alphaevolve.md` in the program directory, and follow that. + +## When to pick AlphaEvolve + +AlphaEvolve is the right strategy when **all** of the following are true: + +- The target is a **self-contained artifact** — a single function, a single file, a config blob — that can be replaced atomically each iteration. +- Fitness is a **scalar metric** the evaluator can produce in a few seconds to a few minutes (lower or higher is better — pick one). +- There are **multiple plausible algorithmic families**, not just one obvious approach with knobs to tune. AlphaEvolve's island model is wasted if everything collapses to one family. +- Iterations are **independent** — a candidate's fitness does not depend on the previous candidate's state. (If you need to *accumulate* changes, use the default loop, not AlphaEvolve.) + +If the program is "add another test", "port another feature", or any kind of coverage / accumulation task — **do not use AlphaEvolve**. Use the default loop. + +## Steps to adopt + +1. Create `.autoloop/programs//` with the usual layout: a `program.md` and a `code/` directory containing the target artifact and the evaluator. +2. Copy the strategy template into the program: + + ```bash + mkdir -p .autoloop/programs//strategy/prompts + cp .autoloop/strategies/alphaevolve/strategy.md \ + .autoloop/programs//strategy/alphaevolve.md + cp .autoloop/strategies/alphaevolve/prompts/mutation.md \ + .autoloop/programs//strategy/prompts/mutation.md + cp .autoloop/strategies/alphaevolve/prompts/crossover.md \ + .autoloop/programs//strategy/prompts/crossover.md + ``` + +3. Resolve every `` marker in `strategy/alphaevolve.md` and the two prompt files. See the marker-by-marker guidance below. +4. Add the `## Evolution Strategy` pointer block to `program.md` (template below). +5. Sanity-check: `grep -R "/strategy/` should return **nothing**. + +## The pointer block for `program.md` + +Replace (or add) `program.md`'s `## Evolution Strategy` section with exactly this: + +```markdown +## Evolution Strategy + +This program uses the **AlphaEvolve** strategy. On every iteration, read `strategy/alphaevolve.md` and follow it literally — it supersedes the generic analyze/accept/reject steps in the default autoloop loop. + +Support files: +- `strategy/alphaevolve.md` — the runtime playbook (operators, parent selection, population rules). +- `strategy/prompts/mutation.md` — framing for exploitation and exploration operators. +- `strategy/prompts/crossover.md` — framing for crossover and migration operators. + +Population state lives in the state file on the `memory/autoloop` branch under the `## 🧬 Population` subsection (see the playbook for the schema). +``` + +## Marker-by-marker guidance + +### `strategy.md` markers + +- **`# AlphaEvolve Strategy — `** — the program name as it appears in the file path. +- **`## Problem framing`** — 2–4 sentences. State the artifact, the fitness function, and the validity invariants. The agent reads this every iteration; make it dense. +- **Operator weight table** — only change defaults if you have a strong prior. The defaults bias toward exploitation, which is right for most perf problems. +- **Islands** — the most important thing to get right. Pick 3–6 **algorithmic families** that span the design space. Examples: + - For a numeric optimization: gradient-based, gradient-free local, evolutionary, hybrid. + - For a layout problem: grid, hex, force-directed, hierarchical. + - For a tsb perf evolve: column scan, iterator pipeline, gather/scatter, WASM, SoA batched. + Give each island a one-line description that is concrete enough that the agent can tell which island a new candidate belongs to. +- **Validity pre-check invariants** — list the *cheap* checks. Things the agent can verify by reading the candidate, before running the full evaluator. (E.g. "no `any`", "no new dependencies", "exported function signature unchanged".) +- **Diff style** — "full rewrite" if the artifact is a single small function; "minimal diff" if it is a larger file where most of the surface is fixed. +- **`population_size`, `archive_size`** — tune to your problem's scale. Defaults (40 / 10) are reasonable for most cases. Smaller populations converge faster but lose diversity; larger ones explore more but the per-iteration parent-selection cost grows. +- **Feature dimensions** — pick 2–3 *qualitative* dimensions that distinguish meaningfully-different solutions. Avoid using fitness as a dimension (that defeats the point). Good examples: "memory layout (AoS / SoA / typed-array)", "algorithm (sort-then-scan / hash / bitmap)". Bad examples: "fast / medium / slow". +- **Population schema language tag** — the `` in the code fence (e.g. `typescript`, `python`, `yaml`). + +### `prompts/mutation.md` markers + +This prompt frames how the agent reasons about *single-parent* changes (exploitation refining the best, exploration trying something new in an under-represented island). Customize: + +- **Mutation vocabulary** — list 5–10 concrete mutation moves that make sense for this problem. (E.g. "replace `Array.prototype.map` with a preallocated typed array", "split a hot loop into chunks of 64".) These act as a menu the agent can sample from. +- **Domain knowledge** — anything you, the maintainer, know about the problem space that the agent might not derive on its own. Keep it short (10–20 bullets max) — the agent reads this every iteration. + +### `prompts/crossover.md` markers + +This prompt frames *two-parent* operations (crossover combines, migration grafts). Customize: + +- **Combination patterns** — what does "combining two solutions" look like for this problem? (For code: "take the data structure from parent A and the loop body from parent B". For configs: "merge non-conflicting keys, agent picks for conflicts".) +- **Migration patterns** — what does "porting a technique from island A to island B" mean concretely? Spell out one or two worked examples. + +## A tiny worked example + +Suppose you are creating `tsb-perf-evolve` to make `Series.sort_values` faster than pandas. Filled-in islands might be: + +- **Island 0 — Comparison sort**: `Array.prototype.sort` with custom comparator. +- **Island 1 — Typed array sort**: copy into `Float64Array`, sort in place, gather indices. +- **Island 2 — Radix / counting sort**: dispatch on dtype, use a non-comparison sort where applicable. +- **Island 3 — WASM**: call a tiny WASM module compiled from Zig/Rust. +- **Island 4 — SoA batched**: sort multiple columns together in a single pass. + +Feature dimensions: + +- **Memory layout**: AoS / SoA / typed-array +- **Algorithm class**: comparison / non-comparison / hybrid + +That's the kind of fill-in to aim for — concrete, distinguishable, exhaustive enough that interesting candidates land in different cells. diff --git a/.autoloop/strategies/alphaevolve/prompts/crossover.md b/.autoloop/strategies/alphaevolve/prompts/crossover.md new file mode 100644 index 00000000..b66a4e92 --- /dev/null +++ b/.autoloop/strategies/alphaevolve/prompts/crossover.md @@ -0,0 +1,50 @@ +# Crossover & Migration prompt — + +You are about to apply a **two-parent operator** — either crossover (combine ideas from parents on different islands) or migration (graft a technique that works on one island into a solution on another). This file frames how to reason about that change. Use it together with `strategy/alphaevolve.md`. + +## What these operators are for + +- **Crossover** — both parents are valid, working candidates from different islands. The goal is a child that takes a *good idea* from each. Crossover that is just "average the two" almost never wins; structural composition does. +- **Migration** — one parent (the **donor**) is on island A, where some technique works particularly well. The other parent (the **recipient**) is on island B, where the technique has not been tried. The goal is to graft the technique from A into a candidate on B, *without* breaking what makes B's island distinctive. + +The agent must be able to clearly say: "the *X* in this child came from parent A; the *Y* came from parent B." + +## Combination patterns + +How "combining" looks for this problem (): + +- +- +- +- + +If none of the patterns above fits the two parents you've picked, that's a signal those parents are not a good crossover pair. Pick different parents — don't force a bad combination. + +## Migration patterns + +Worked examples for "porting a technique from island A to island B" in this problem (): + +- +- + +## Reasoning template + +Before writing any code, fill in (in your visible reasoning): + +1. **Operator**: crossover or migration. Why this one (or were you forced into it by the deterministic overrides in the playbook). +2. **Parent A** (donor for migration): id, island, fitness, the *specific technique* you're taking. +3. **Parent B** (recipient for migration): id, island, fitness, what you're keeping. +4. **The graft**: which combination/migration pattern from above. Be precise about what comes from where. +5. **Hypothesis**: why the combined / grafted result should outperform either parent alone. The mechanism must reference *both* parents' contributions. +6. **Recipient island integrity**: for migration only — does the resulting candidate still belong to the recipient island, or has the graft pushed it into a third island? If it's now in a different island, that's fine — but record it accurately in the population entry. +7. **Predicted feature cell**: which `(dim1, dim2)` cell the child lands in. Crossovers often land in a *new* cell — that's a feature, not a bug. +8. **Validity pre-check**: walk through the cheap invariants from the playbook. Pay extra attention here — grafts are the most common source of "compiles but breaks an invariant" candidates. + +Only after all eight are written should you start editing code. + +## Anti-patterns + +- ❌ **Naive average**: literally averaging two configs / two algorithms. Always loses to either parent. +- ❌ **Same-island crossover**: picking two parents on the same island. That's exploitation with extra steps. +- ❌ **Whole-parent swap**: producing a child that is identical to one of the parents (you "combined" by ignoring one). If you can't name a contribution from each parent, you haven't done crossover. +- ❌ **Migration that demolishes the recipient**: the graft replaces so much of the recipient that the result is just the donor on a different island label. The point of migration is to *enrich*, not overwrite. diff --git a/.autoloop/strategies/alphaevolve/prompts/mutation.md b/.autoloop/strategies/alphaevolve/prompts/mutation.md new file mode 100644 index 00000000..0990acab --- /dev/null +++ b/.autoloop/strategies/alphaevolve/prompts/mutation.md @@ -0,0 +1,51 @@ +# Mutation prompt — + +You are about to apply a **single-parent operator** — either exploitation (refine an elite) or exploration (try something new in an under-represented island). This file frames how to reason about that change. Use it together with `strategy/alphaevolve.md`. + +## What this operator is for + +- **Exploitation** — you have a parent that works well. Make a *small, principled* change that you have a clear reason to believe will improve fitness. One change at a time. If you change five things at once and fitness moves, you will not know which thing did it. +- **Exploration** — you are seeding diversity in an island that is under-represented (or has never been tried). It is fine — desirable, even — to produce a candidate with worse fitness than the current best, as long as it lands in a *different feature cell*. Diversity has value. + +## Mutation vocabulary + +These are the moves available for this problem (): + +- +- +- +- +- +- +- + +For **exploitation**, prefer small moves from the top of this list. For **exploration**, prefer larger structural moves from further down — or invent something not on the list and add it for future iterations. + +## Domain knowledge + +Things you, the agent, should keep in mind about this specific problem (): + +- +- +- +- + +## Reasoning template + +Before writing any code, fill in (in your visible reasoning): + +1. **Operator**: exploitation or exploration. Why this one (you may have been forced into it by the deterministic overrides in the playbook — say so). +2. **Parent**: candidate id, island, fitness, one-line approach summary. +3. **The move**: which mutation from the vocabulary above (or a novel one you are inventing — describe it). +4. **Hypothesis**: why this should improve fitness. Be specific. "Should be faster" is not a hypothesis. "Removes one allocation per row in the inner loop, which dominates the profile at n=100k" is a hypothesis. +5. **Predicted feature cell**: which `(dim1, dim2)` cell will this candidate land in? If it's the same cell as an existing elite with worse fitness, you should already be at higher confidence than usual. +6. **Validity pre-check**: walk through the cheap invariants from the playbook. + +Only after all six are written should you start editing code. + +## Anti-patterns + +- ❌ **Multi-mutation**: changing several unrelated things in one candidate. Split into separate iterations. +- ❌ **Re-discovering**: proposing a candidate whose approach already exists in the population. Always check the population first. +- ❌ **Vague hypothesis**: "this looks cleaner" or "should be more efficient" with no mechanism. If you can't name the mechanism, you don't have a hypothesis. +- ❌ **Ignoring rejected lessons**: if a similar mutation was rejected in a recent iteration *and* the Lessons Learned says why, do not retry it without a new angle. diff --git a/.autoloop/strategies/alphaevolve/strategy.md b/.autoloop/strategies/alphaevolve/strategy.md new file mode 100644 index 00000000..e54461b3 --- /dev/null +++ b/.autoloop/strategies/alphaevolve/strategy.md @@ -0,0 +1,136 @@ +# AlphaEvolve Strategy — + +This file is the **runtime playbook** for this program. The autoloop agent reads it at the start of every iteration and follows it literally. It supersedes the generic "Analyze and Propose" / "Accept or Reject" steps in the default autoloop iteration loop — all other steps (state read, branch management, state file updates) still apply. + +## Problem framing + + + +## Per-iteration loop + +### Step 1. Load state + +1. Read `program.md` — Goal, Target, Evaluation. +2. Read the program's state file from the repo-memory folder (`{program-name}.md`). Locate the `## 🧬 Population` subsection. If it does not exist, create it using the schema in [Population schema](#population-schema). +3. Read any config the program exposes (e.g. `code/config.yaml`) for tunables like `exploitation_ratio`, `num_islands`. Do not hard-code values you can read from config — the maintainer may have tuned them. +4. Read both prompt templates in `strategy/prompts/`. These frame how you reason about mutations and crossovers for this specific problem. + +### Step 2. Pick operator + +Sample one operator using these weights (): + +| Operator | Default weight | When it fires | +|---|---|---| +| Exploitation | 0.50 | Refine one of the elites — the current best or a near-best. | +| Exploration | 0.30 | Generate a candidate from an **under-represented island** or a novel family. | +| Crossover | 0.15 | Combine ideas from two parents on different islands. | +| Migration | 0.05 | Take a technique that works on island A and port it into a solution on island B. | + +Deterministic overrides (apply *before* sampling): + +- If the population is empty or has one member → **Exploration** (seed diversity). +- If the last 3 statuses in `recent_statuses` are all `rejected` → force **Exploration** with a previously-unused island. +- If the last 5 statuses are all `rejected` → force **Migration** or a radically new island; also revisit any domain knowledge in `prompts/mutation.md` that has not yet been applied. + +Record your chosen operator in the iteration's reasoning — the state file's Iteration History entry must include it. + +### Step 3. Pick parent(s) + +**Islands** for this program (): + +- **Island 0 — **: +- **Island 1 — **: +- **Island 2 — **: +- **Island 3 — **: + +Parent selection by operator: + +- **Exploitation** — pick the best scorer; break ties by picking the most recent. +- **Exploration** — pick the island with the fewest members (or a brand-new island number if all are full), then either start from its best member or from scratch. +- **Crossover** — pick two parents on **different islands**. Bias toward one elite (top quartile) and one diverse (any island with a distinct feature-cell — see [Feature dimensions](#feature-dimensions)). +- **Migration** — pick one donor island (the source of the technique) and one recipient island (where the technique will be grafted in). The parent you actually edit is on the recipient island. + +### Step 4. Apply the operator + +Frame your reasoning using the matching prompt template: + +- Exploitation or Exploration → `strategy/prompts/mutation.md` +- Crossover or Migration → `strategy/prompts/crossover.md` + +Before writing any code, state (in your visible reasoning): + +1. Chosen operator + why. +2. Parent(s) picked — their IDs, island, score, and a one-line summary of each parent's approach. +3. What specifically you're changing, and your hypothesis for *why* it should improve the fitness. +4. Validity pre-check (): walk through why the proposed candidate will satisfy each invariant. +5. Novelty check: confirm this is not a near-duplicate of an existing population member or of anything in the state file's 🚧 Foreclosed Avenues. + +### Step 5. Implement + +Edit only the files listed in `program.md`'s Target section. The diff style for this program is: . + +### Step 6. Evaluate + +Run the evaluation command from `program.md`. Parse the metric. + +### Step 7. Update the population + +Regardless of whether the iteration is accepted or rejected at the branch level, the candidate has been tried and should be recorded in the population — the population is a memory of what's been explored, not just what's been kept. + +Append a new entry to the `## 🧬 Population` subsection in the state file using the schema below. Then enforce these caps: + +- **Population cap**: . If exceeded, evict the *worst* member in the most-crowded feature cell (MAP-Elites style — never evict the best of any cell). +- **Elite archive**: the top by fitness are always preserved regardless of cell crowding. + +### Step 8. Fold through to the default loop + +Continue with the normal autoloop Step 5 (Accept or Reject → commit / discard, update state file's Machine State, Iteration History, Lessons Learned, etc.) as defined in the workflow. The only additional requirements from AlphaEvolve are: + +- The Iteration History entry must include `operator`, `parent_id(s)`, `island`, and `fitness` fields (in addition to the normal status/change/metric/notes). +- Lessons Learned additions should be phrased as *transferable heuristics* about the problem space, not as reports of what this iteration did. (E.g. "Hex layouts dominate grid layouts above n=20" — not "Iteration 17 tried a hex layout.") + +## Feature dimensions + +MAP-Elites partitions the population into **feature cells**. Each candidate is described by a small tuple of qualitative features, and the population keeps the best candidate per cell — this is what creates diversity pressure even when many candidates have similar fitness. + +For this program, use these feature dimensions (): + +- **Dimension 1 — **: +- **Dimension 2 — **: + +When evaluating a candidate, classify it into one cell per dimension. The combined `(dim1, dim2, …)` tuple is its **feature cell**. Record the cell in the population entry (see schema). + +## Population schema + +The population lives in the state file `{program-name}.md` on the `memory/autoloop` branch as a subsection. Use this exact layout so maintainers can read and edit it: + +```markdown +## 🧬 Population + +> 🤖 *Managed by the AlphaEvolve strategy. One entry per candidate that has been evaluated (accepted or rejected). Newest first.* + +### Candidate · island · fitness · gen + +- **Operator**: exploitation / exploration / crossover / migration +- **Parent(s)**: [, ] +- **Feature cell**: · +- **Approach**: +- **Status**: ✅ accepted / ❌ rejected +- **Notes**: + +Code: + +\`\`\` + +\`\`\` + +--- +``` + +Identifiers: +- `` is `c{NNN}` zero-padded, monotonically increasing across the program's lifetime. +- `` is the island number (0-indexed). +- `` is the raw fitness from the evaluator. +- `` is the iteration number from the Machine State table. + +When evicting members under the population cap, **never** delete an entry — instead, prepend a strikethrough header (`### ~~Candidate c042~~ (evicted, gen 87)`) and remove the entire `Code:` block (both the `Code:` label and the surrounding triple-backtick code fence with its language identifier) to keep the file size bounded. The metadata stays so future iterations can see what was tried. diff --git a/.github/workflows/autoloop.md b/.github/workflows/autoloop.md index 04857054..0c2d9c3e 100644 --- a/.github/workflows/autoloop.md +++ b/.github/workflows/autoloop.md @@ -344,6 +344,19 @@ All three reference each other. For file-based programs, the program issue is au Each run executes **one iteration for the single selected program**: +### Strategy discovery + +Before executing the generic iteration loop below, check whether this program has opted into a specialized strategy: + +1. Read `/program.md` and look for a `## Evolution Strategy` section. +2. If that section points to a strategy file — e.g., "This program uses the **AlphaEvolve** strategy. Read `strategy/alphaevolve.md` at the start of every iteration and follow it literally." — read the referenced file and follow it. +3. The strategy playbook **supersedes** the generic "Step 2: Analyze" through "Step 5: Accept or Reject" steps below. The other steps (state read, branch management, state file updates, CI gating) still apply. +4. If `## Evolution Strategy` is absent, contains only prose, or points to a file that does not exist, fall back to the default iteration flow below. + +Strategy files live under `/strategy/`. Program-specific prompts (e.g., `strategy/prompts/mutation.md`) are read by the strategy playbook at the appropriate step — do not read them pre-emptively, the playbook will tell you when. + +Reusable strategy templates (with `` markers) live in `.autoloop/strategies/`. To author a new strategy-based program, copy the chosen strategy's files into `/strategy/`, resolve the markers, and add the matching `## Evolution Strategy` pointer block to `program.md` (see the strategy's `CUSTOMIZE.md` for guidance). + ### Step 1: Read State 1. Read the program file to understand the goal, targets, and evaluation method.