diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..d9fd424d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,61 @@ +name: CI + +on: + push: + branches: + - main + - "autoloop/**" + pull_request: + branches: + - main + +permissions: + contents: read + +jobs: + test: + name: Test & Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install + + - name: Type check + run: bun run typecheck + + - name: Lint + run: bun run lint + + - name: Test + run: bun test --coverage + + build: + name: Build + runs-on: ubuntu-latest + needs: test + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install + + - name: Build library + run: bun build ./src/index.ts --outdir ./dist --target browser --minify + + - name: Upload dist artifact + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 00000000..5b9009b0 --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,54 @@ +name: Deploy Playground to Pages + +on: + push: + branches: + - main + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: false + +jobs: + build: + name: Build Playground + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install + + - name: Build library for browser + run: bun build ./src/index.ts --outdir ./playground/dist --target browser --minify + + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Upload Pages artifact + uses: actions/upload-pages-artifact@v3 + with: + path: playground/ + + deploy: + name: Deploy to Pages + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4088a0f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +node_modules/ +dist/ +*.tsbuildinfo +package-lock.json +*.tgz diff --git a/.vscode/mcp.json b/.vscode/mcp.json index 96e7285c..01021df6 100644 --- a/.vscode/mcp.json +++ b/.vscode/mcp.json @@ -2,10 +2,7 @@ "servers": { "github-agentic-workflows": { "command": "gh", - "args": [ - "aw", - "mcp-server" - ] + "args": ["aw", "mcp-server"] } } -} \ No newline at end of file +} diff --git a/.vscode/settings.json b/.vscode/settings.json index dbd4bd79..11d9bacd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,5 @@ { - "github.copilot.enable": { - "markdown": true - } -} \ No newline at end of file + "github.copilot.enable": { + "markdown": true + } +} diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..44c24676 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,65 @@ +# Agent Instructions (AGENTS.md) + +This file provides project-specific conventions for AI coding agents working in this repository. + +## Project Overview + +**tsb** is a TypeScript port of [pandas](https://pandas.pydata.org/), built from first principles. +- Package name: `tsb` — all imports use `tsb` +- Runtime: Bun +- Language: TypeScript (strictest mode) + +## Key Rules + +1. **Never modify `README.md`** — it is read-only, the source of truth for project parameters. +2. **Never modify `.autoloop/programs/**`** or autoloop workflow files. +3. **Strict TypeScript only** — no `any`, no `as` casts, no `@ts-ignore`, no escape hatches. +4. **Zero core dependencies** — implement everything from scratch. +5. **100% test coverage** required — unit + property-based (fast-check) + fuzz where applicable. +6. **Every feature gets a playground page** in `playground/`. +7. **One feature per commit** — keep changes small and targeted. + +## Project Structure + +``` +src/ + index.ts — package entry point, re-exports all features + types.ts — shared type definitions + core/ — core data structures (Series, DataFrame, Index, Dtype) + io/ — I/O utilities (read_csv, read_json, etc.) + groupby/ — groupby and aggregation + reshape/ — pivot, melt, stack, unstack + merge/ — merge, join, concat + window/ — rolling, expanding, ewm + stats/ — statistical functions +tests/ + setup.ts — global test setup (loaded via bunfig.toml) + *.test.ts — mirrors src/ structure +playground/ + index.html — landing page + *.html — one page per feature +``` + +## Adding a New Feature + +1. Create `src/{module}/{feature}.ts` with the implementation. +2. Export from `src/index.ts`. +3. Create `tests/{module}/{feature}.test.ts` with full coverage. +4. Create `playground/{feature}.html` with an interactive tutorial. +5. Update `playground/index.html` to mark the feature as complete. + +## Running Locally + +```bash +bun install # install devDependencies +bun test # run all tests +bun run lint # check linting +bun run typecheck # TypeScript strict check +``` + +## Autoloop Coordination + +This project is built by [Autoloop](https://github.com/githubnext/autoloop), an iterative optimization agent. +- Long-running branch: `autoloop/build-tsb-pandas-typescript-migration` +- State file: `build-tsb-pandas-typescript-migration.md` on `memory/autoloop` branch +- Issue #1 is the program definition — do not modify it. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..adf094b4 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,27 @@ +--- +description: Coding preferences for Claude when working on tsb. +--- + +# Claude Code Configuration (CLAUDE.md) + +## Behavior + +- Always read `AGENTS.md` first for project conventions. +- Read `README.md` to understand the project requirements — treat it as read-only. +- Read the state file in `.autoloop/memory/` for current migration progress. + +## Code Style + +- TypeScript strict mode — no `any`, no `as`, no `@ts-ignore` +- Biome formatting (spaces, 100-col lines, double quotes, trailing commas) +- JSDoc for all exported symbols +- Unit tests with `bun:test` + property tests with `fast-check` + +## Commands + +```bash +bun install # install deps +bun test # run tests +bun run lint # Biome lint +bun run typecheck # tsc --noEmit +``` diff --git a/biome.json b/biome.json new file mode 100644 index 00000000..4c06c454 --- /dev/null +++ b/biome.json @@ -0,0 +1,67 @@ +{ + "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", + "vcs": { + "enabled": true, + "clientKind": "git", + "useIgnoreFile": true + }, + "files": { + "ignoreUnknown": false, + "ignore": ["dist/**", "node_modules/**", "*.d.ts"] + }, + "formatter": { + "enabled": true, + "indentStyle": "space", + "indentWidth": 2, + "lineWidth": 100 + }, + "linter": { + "enabled": true, + "rules": { + "recommended": true, + "complexity": { + "all": true + }, + "correctness": { + "all": true + }, + "nursery": { + "all": true + }, + "performance": { + "all": true, + "noBarrelFile": "off" + }, + "security": { + "all": true + }, + "style": { + "all": true, + "noDefaultExport": "off", + "useNamingConvention": "off" + }, + "suspicious": { + "all": true + } + } + }, + "javascript": { + "formatter": { + "quoteStyle": "double", + "trailingCommas": "all", + "semicolons": "always" + } + }, + "overrides": [ + { + "include": ["**/*.ts", "**/*.tsx"], + "javascript": { + "formatter": { + "quoteStyle": "double", + "trailingCommas": "all", + "semicolons": "always" + } + } + } + ] +} diff --git a/bunfig.toml b/bunfig.toml new file mode 100644 index 00000000..8f9aee13 --- /dev/null +++ b/bunfig.toml @@ -0,0 +1,6 @@ +[test] +preload = ["./tests/setup.ts"] +coverage = true + +[install] +exact = true diff --git a/package.json b/package.json new file mode 100644 index 00000000..418b15e0 --- /dev/null +++ b/package.json @@ -0,0 +1,31 @@ +{ + "name": "tsb", + "version": "0.0.1", + "description": "A TypeScript port of pandas, built from first principles", + "type": "module", + "main": "./src/index.ts", + "module": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": { + "import": "./src/index.ts", + "types": "./src/index.ts" + } + }, + "scripts": { + "test": "bun test", + "lint": "biome check .", + "lint:fix": "biome check --write .", + "typecheck": "tsc --noEmit", + "build": "bun build ./src/index.ts --outdir ./dist --target browser", + "playground": "bun run playground/serve.ts" + }, + "devDependencies": { + "@biomejs/biome": "^1.9.4", + "fast-check": "^3.22.0", + "@types/bun": "^1.1.14" + }, + "peerDependencies": { + "typescript": "^5.7.0" + } +} diff --git a/playground/index-playground.html b/playground/index-playground.html new file mode 100644 index 00000000..3f1e5345 --- /dev/null +++ b/playground/index-playground.html @@ -0,0 +1,203 @@ + + + + + + tsb — Index & RangeIndex Playground + + + + ← Back to roadmap +

🏷️ Index & RangeIndex

+

+ The Index type is the immutable, ordered sequence of labels + that underpins both Series (row axis) and DataFrame + (row + column axes). RangeIndex is a memory-efficient subclass + for integer ranges. +

+ +
+

Creating an Index

+
+import { Index, RangeIndex } from "tsb";
+
+// String labels
+const labels = new Index(["a", "b", "c", "d"], "letters");
+// → Index([a, b, c, d], name='letters')
+
+// Numeric labels
+const nums = new Index([10, 20, 30]);
+// → Index([10, 20, 30])
+
+// RangeIndex (memory-efficient integer range)
+const range = new RangeIndex(5);
+// → RangeIndex(start=0, stop=5, step=1)  →  [0, 1, 2, 3, 4]
+
+const stepped = new RangeIndex(0, 10, 2);
+// → RangeIndex(start=0, stop=10, step=2)  →  [0, 2, 4, 6, 8]
+    
+
+ +
+

Properties

+
+const idx = new Index(["x", "y", "z"], "axis");
+
+idx.size            // 3
+idx.shape           // [3]
+idx.ndim            // 1
+idx.empty           // false
+idx.name            // "axis"
+idx.isUnique        // true
+idx.hasDuplicates   // false
+idx.isMonotonicIncreasing  // true (x < y < z)
+    
+
+ +
+

Label Look-up

+
+const idx = new Index(["a", "b", "c", "a"]);
+
+idx.getLoc("b")     // 1         (unique → single int)
+idx.getLoc("a")     // [0, 3]    (duplicated → array)
+idx.contains("c")   // true
+idx.isin(["a", "c"]) // [true, false, true, true]
+    
+
+ +
+

Set Operations

+
+const a = new Index([1, 2, 3]);
+const b = new Index([2, 3, 4]);
+
+a.union(b)                // Index([1, 2, 3, 4])
+a.intersection(b)         // Index([2, 3])
+a.difference(b)           // Index([1])
+a.symmetricDifference(b)  // Index([1, 4])
+    
+
+ +
+

Sorting & Aggregation

+
+const idx = new Index([30, 10, 20]);
+
+idx.sortValues()     // Index([10, 20, 30])
+idx.argsort()        // [1, 2, 0]
+idx.min()            // 10
+idx.max()            // 30
+idx.argmin()         // 1
+idx.argmax()         // 0
+    
+
+ +
+

Manipulation (immutable — always returns new Index)

+
+const idx = new Index(["a", "b", "c"]);
+
+idx.append(new Index(["d", "e"]))  // Index([a, b, c, d, e])
+idx.insert(1, "x")                 // Index([a, x, b, c])
+idx.delete(0)                      // Index([b, c])
+idx.drop(["b"])                    // Index([a, c])
+idx.rename("new_name")             // Index([a, b, c], name='new_name')
+    
+
+ +
+

Missing Values

+
+const idx = new Index([1, null, 3]);
+
+idx.isna()    // [false, true, false]
+idx.notna()   // [true, false, true]
+idx.dropna()  // Index([1, 3])
+idx.fillna(0) // Index([1, 0, 3])
+    
+
+ +
+

RangeIndex — Memory Efficient

+
+// Only stores start/stop/step — values computed on the fly
+const r = new RangeIndex(0, 1_000_000);
+r.size    // 1000000
+r.at(500) // 500
+
+// Negative step
+const desc = new RangeIndex(10, 0, -2);
+desc.toArray()  // [10, 8, 6, 4, 2]
+
+// Slicing preserves RangeIndex type
+r.slice(10, 20)  // RangeIndex(start=10, stop=20, step=1)
+    
+
+ + + + diff --git a/playground/index.html b/playground/index.html new file mode 100644 index 00000000..1be4bfdc --- /dev/null +++ b/playground/index.html @@ -0,0 +1,170 @@ + + + + + + tsb — TypeScript pandas | Interactive Playground + + + +
+
+

tsb

+

A TypeScript port of pandas, built from first principles

+
+
+ +
+
+ 🚧 Under Construction — Foundation Phase +

pandas for TypeScript

+

+ tsb is a ground-up TypeScript implementation of the pandas data + manipulation library, with full API parity, strict types, and an interactive + playground for every feature. +

+
+ +
+

Feature Roadmap

+
+
+

📐 Project Foundation

+

Bun, TypeScript (strict), Biome linting, CI, Pages deployment, type system.

+
✅ Complete
+
+
+

📊 Series

+

1-D labeled array — the core building block of tsb data structures.

+
⏳ Planned
+
+
+

🗃️ DataFrame

+

2-D labeled table with heterogeneous columns, the heart of pandas.

+
⏳ Planned
+
+
+

🏷️ Index

+

Immutable labeled axis — Index<T>, RangeIndex.

+
✅ Complete
+
+
+

🔢 Dtypes

+

Rich dtype system: int/float/bool/string/datetime/category.

+
⏳ Planned
+
+
+

📥 I/O

+

read_csv, read_json, read_parquet, to_csv, to_json.

+
⏳ Planned
+
+
+
+
+ + + + diff --git a/src/core/base-index.ts b/src/core/base-index.ts new file mode 100644 index 00000000..efb7018c --- /dev/null +++ b/src/core/base-index.ts @@ -0,0 +1,568 @@ +/** + * Generic Index — the immutable, labeled axis for Series and DataFrame. + * + * Mirrors pandas.Index: stores an ordered sequence of labels, + * supports set operations, duplicate detection, look-up by label, and more. + */ + +import type { Label } from "../types.ts"; + +/** Options accepted by the Index constructor. */ +export interface IndexOptions { + readonly data: readonly T[]; + readonly name?: string | null; +} + +/** + * An immutable, ordered sequence of labels. + * + * `Index` is the TypeScript equivalent of `pandas.Index`. + * It underpins both `Series` (as the row axis) and `DataFrame` + * (as the row axis *and* column axis). + */ +export class Index { + /** Internal storage — never exposed mutably. */ + protected readonly _values: readonly T[]; + + /** Optional human-readable label for this axis. */ + readonly name: string | null; + + // ─── construction ─────────────────────────────────────────────── + + constructor(data: readonly T[], name?: string | null) { + this._values = Object.freeze([...data]); + this.name = name ?? null; + } + + /** + * Factory that accepts the `IndexOptions` bag. + * Useful when forwarding options from higher-level constructors. + */ + static from(opts: IndexOptions): Index { + return new Index(opts.data, opts.name); + } + + // ─── properties ───────────────────────────────────────────────── + + /** Number of elements. */ + get size(): number { + return this._values.length; + } + + /** Shape tuple (always 1-D). */ + get shape(): [number] { + return [this._values.length]; + } + + /** Number of dimensions (always 1). */ + get ndim(): 1 { + return 1; + } + + /** True when the index has zero elements. */ + get empty(): boolean { + return this._values.length === 0; + } + + /** Snapshot of the underlying values as a plain array. */ + get values(): readonly T[] { + return this._values; + } + + /** True when every label appears exactly once. */ + get isUnique(): boolean { + return new Set(this._values).size === this._values.length; + } + + /** True when any label appears more than once. */ + get hasDuplicates(): boolean { + return !this.isUnique; + } + + /** True when values are weakly ascending. */ + get isMonotonicIncreasing(): boolean { + for (let i = 1; i < this._values.length; i++) { + const prev = this._values[i - 1]; + const curr = this._values[i]; + if (prev === undefined || curr === undefined || prev === null || curr === null) { + return false; + } + if (prev > curr) { + return false; + } + } + return true; + } + + /** True when values are weakly descending. */ + get isMonotonicDecreasing(): boolean { + for (let i = 1; i < this._values.length; i++) { + const prev = this._values[i - 1]; + const curr = this._values[i]; + if (prev === undefined || curr === undefined || prev === null || curr === null) { + return false; + } + if (prev < curr) { + return false; + } + } + return true; + } + + // ─── element access ───────────────────────────────────────────── + + /** Return the label at positional index `i`. */ + at(i: number): T { + const len = this._values.length; + const idx = i < 0 ? len + i : i; + if (idx < 0 || idx >= len) { + throw new RangeError(`Index ${i} is out of bounds for axis of size ${len}`); + } + return this._values[idx] as T; + } + + /** Return a new Index from a positional slice [start, end). */ + slice(start?: number, end?: number): Index { + return new Index(this._values.slice(start, end), this.name); + } + + /** + * Fancy-index: return a new Index by picking positions from `indices`. + */ + take(indices: readonly number[]): Index { + const out: T[] = []; + for (const i of indices) { + out.push(this.at(i)); + } + return new Index(out, this.name); + } + + // ─── look-up ──────────────────────────────────────────────────── + + /** + * Return the integer position of `key`. + * + * - If `key` appears exactly once, returns a single `number`. + * - If `key` appears more than once, returns an array of positions. + * - If `key` is absent, throws. + */ + getLoc(key: Label): number | readonly number[] { + const positions: number[] = []; + for (let i = 0; i < this._values.length; i++) { + if (this._values[i] === key) { + positions.push(i); + } + } + if (positions.length === 0) { + throw new Error(`KeyError: ${String(key)}`); + } + if (positions.length === 1) { + return positions[0] as number; + } + return positions; + } + + /** + * Compute an indexer array for `target` against this index. + * Each position in the returned array corresponds to a label in `target`: + * - its position in `this`, or + * - `-1` if not found. + */ + getIndexer(target: Index