From bce2c69b4a333ae22377c95b6056d2fe84136b77 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:50:08 +0000 Subject: [PATCH] =?UTF-8?q?Iteration=20135:=20Add=20insertColumn/popColumn?= =?UTF-8?q?=20=E2=80=94=20DataFrame=20column=20insertion=20and=20removal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `src/core/insert_pop.ts` implementing four column manipulation utilities that mirror pandas' DataFrame.insert() and DataFrame.pop(): - `insertColumn(df, loc, col, values)` — inserts a new column at integer position `loc`, rebuilding the ordered column Map; raises RangeError on duplicate names (unless allowDuplicates=true), out-of-range loc, or wrong value length - `popColumn(df, col)` — removes and returns `{ series, df }` (immutable style) - `reorderColumns(df, order)` — reorders/subsets columns (mirrors df[order]) - `moveColumn(df, col, newLoc)` — convenience wrapper: pop then re-insert All operations are non-mutating (return new DataFrames). 40+ unit + 3 property-based tests. Interactive playground page: insert_pop.html. Run: https://github.com/githubnext/tsessebe/actions/runs/24165728899 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- playground/index.html | 5 + playground/insert_pop.html | 172 ++++++++++++++++++++ src/core/index.ts | 2 + src/core/insert_pop.ts | 214 +++++++++++++++++++++++++ src/index.ts | 3 + tests/core/insert_pop.test.ts | 286 ++++++++++++++++++++++++++++++++++ 6 files changed, 682 insertions(+) create mode 100644 playground/insert_pop.html create mode 100644 src/core/insert_pop.ts create mode 100644 tests/core/insert_pop.test.ts diff --git a/playground/index.html b/playground/index.html index 48bfbcb9..e7d1046a 100644 --- a/playground/index.html +++ b/playground/index.html @@ -264,6 +264,11 @@

✅ Complete +
+

📥 insertColumn / popColumn

+

Insert and remove DataFrame columns at precise positions. insertColumn(df, loc, col, values) inserts at integer position, popColumn(df, col) returns { series, df }. Also includes reorderColumns and moveColumn. Mirrors pandas.DataFrame.insert() and .pop().

+
✅ Complete
+
diff --git a/playground/insert_pop.html b/playground/insert_pop.html new file mode 100644 index 00000000..8b724566 --- /dev/null +++ b/playground/insert_pop.html @@ -0,0 +1,172 @@ + + + + + + tsb — insertColumn / popColumn + + + +

← tsb playground

+ +

insertColumn / popColumn

+

+ Column insertion and removal for DataFrames — mirrors + + pandas.DataFrame.insert() and + + pandas.DataFrame.pop(). +

+

+ Because tsb DataFrames are immutable, both functions return a new DataFrame + rather than mutating the original. popColumn returns both the extracted + Series and the resulting DataFrame. +

+ +

API summary

+ + + + + + + + + + + + + + + + + + + + + + + + +
FunctionPandas equivalentDescription
insertColumn(df, loc, col, values)df.insert(loc, col, value)Insert a new column at integer position loc
popColumn(df, col)df.pop(col)Remove a column; returns { series, df }
reorderColumns(df, order)df[order]Reorder (and optionally subset) columns
moveColumn(df, col, newLoc)Move an existing column to a new integer position
+ +

Example 1 — insertColumn

+
import { DataFrame, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name: ["Alice", "Bob", "Carol"],
+  age:  [30, 25, 35],
+});
+// columns: ["name", "age"]
+
+// Insert "city" between "name" and "age"
+const df2 = insertColumn(df, 1, "city", ["NY", "LA", "SF"]);
+// df2.columns.values → ["name", "city", "age"]
+// df2.col("city").values → ["NY", "LA", "SF"]
+
+// Original is unchanged
+// df.columns.values → ["name", "age"]
+
+ +

Example 2 — Insert with a Series

+
import { DataFrame, Series, insertColumn } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+const salary = new Series({ data: [100_000, 90_000, 120_000], name: "salary" });
+
+const df2 = insertColumn(df, 0, "salary", salary);
+// df2.columns.values → ["salary", "a", "b"]
+
+ +

Example 3 — popColumn

+
import { DataFrame, popColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:   [1, 2, 3],
+  name: ["Alice", "Bob", "Carol"],
+  age:  [30, 25, 35],
+});
+
+// Remove "age" and keep the Series
+const { series: ageSeries, df: df2 } = popColumn(df, "age");
+// ageSeries.values       → [30, 25, 35]
+// df2.columns.values     → ["id", "name"]
+// df.columns.values      → ["id", "name", "age"]  ← original unchanged
+
+ +

Example 4 — reorderColumns

+
import { DataFrame, reorderColumns } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1], b: [2], c: [3], d: [4] });
+
+// Reverse the column order
+const df2 = reorderColumns(df, ["d", "c", "b", "a"]);
+// df2.columns.values → ["d", "c", "b", "a"]
+
+// Select a subset (drops columns not listed)
+const df3 = reorderColumns(df, ["a", "c"]);
+// df3.columns.values → ["a", "c"]   (b and d are dropped)
+
+ +

Example 5 — moveColumn

+
import { DataFrame, moveColumn } from "tsb";
+
+const df = DataFrame.fromColumns({
+  year:  [2020, 2021, 2022],
+  value: [10, 20, 30],
+  label: ["a", "b", "c"],
+});
+// columns: ["year", "value", "label"]
+
+// Move "label" to the front
+const df2 = moveColumn(df, "label", 0);
+// df2.columns.values → ["label", "year", "value"]
+
+ +

Error cases

+
// Duplicate column name (default: not allowed)
+insertColumn(df, 1, "a", [1, 2, 3]);
+// → RangeError: Column "a" already exists. Use allowDuplicates=true to permit...
+
+// Out-of-range loc
+insertColumn(df, 99, "x", [1, 2, 3]);
+// → RangeError: loc=99 is out of range [0, 2].
+
+// Wrong number of values
+insertColumn(df, 0, "x", [1]);  // df has 3 rows
+// → RangeError: values length 1 does not match DataFrame row count 3.
+
+// Column not found
+popColumn(df, "missing");
+// → RangeError: Column "missing" not found in DataFrame.
+
+ +
+ Immutability: Like all tsb DataFrame operations, these functions never + mutate the original DataFrame. Always assign the return value to a new variable. +
+ +

pandas equivalence table

+ + + + + + + + + +
pandastsb
df.insert(1, "x", [1,2,3]) *(mutates)*insertColumn(df, 1, "x", [1,2,3])
series = df.pop("col") *(mutates)*const { series, df: df2 } = popColumn(df, "col")
df[["c","a","b"]]reorderColumns(df, ["c","a","b"])
+ + diff --git a/src/core/index.ts b/src/core/index.ts index ada43b65..3d80d8f8 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -15,3 +15,5 @@ export { CategoricalAccessor } from "./cat_accessor.ts"; export type { CatSeriesLike } from "./cat_accessor.ts"; export { MultiIndex } from "./multi_index.ts"; export type { MultiIndexOptions } from "./multi_index.ts"; +export { insertColumn, popColumn, reorderColumns, moveColumn, dataFrameFromPairs } from "./insert_pop.ts"; +export type { PopResult } from "./insert_pop.ts"; diff --git a/src/core/insert_pop.ts b/src/core/insert_pop.ts new file mode 100644 index 00000000..d56c42bc --- /dev/null +++ b/src/core/insert_pop.ts @@ -0,0 +1,214 @@ +/** + * DataFrame.insert() and DataFrame.pop() — column insertion and removal. + * + * Mirrors `pandas.DataFrame.insert(loc, column, value)` and + * `pandas.DataFrame.pop(item)`. + * + * Since `DataFrame` in tsb is immutable, both operations return a new DataFrame. + * `popColumn` returns both the extracted `Series` and the resulting DataFrame. + * + * @example + * ```ts + * import { DataFrame, insertColumn, popColumn } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] }); + * + * // Insert column "x" at position 1 (between "a" and "b") + * const df2 = insertColumn(df, 1, "x", [10, 20]); + * // df2.columns.values → ["a", "x", "b"] + * + * // Pop column "a" out of df2 + * const { series, df: df3 } = popColumn(df2, "a"); + * // series.values → [1, 2] + * // df3.columns.values → ["x", "b"] + * ``` + * + * @packageDocumentation + */ + +import type { Label, Scalar } from "../types.ts"; +import { Index } from "./base-index.ts"; +import { DataFrame } from "./frame.ts"; +import { Series } from "./series.ts"; + +// ─── insertColumn ───────────────────────────────────────────────────────────── + +/** + * Insert a new column into `df` at integer column position `loc`. + * + * Mirrors `pandas.DataFrame.insert(loc, column, value, allow_duplicates=False)`. + * Raises a `RangeError` if: + * - `column` already exists in `df` (no duplicates by default) + * - `loc` is out of range (must be 0 ≤ loc ≤ df.shape[1]) + * - `values` length does not match the number of rows + * + * @param df Source DataFrame (not mutated). + * @param loc Zero-based integer position at which to insert the column. + * @param column Name of the new column. + * @param values Column data as an array of scalars or a `Series`. + * @param allowDuplicates When `true`, silently allow duplicate column names. Default `false`. + * @returns A new DataFrame with the column inserted. + */ +export function insertColumn( + df: DataFrame, + loc: number, + column: string, + values: readonly Scalar[] | Series, + allowDuplicates = false, +): DataFrame { + const nCols = df.shape[1]; + const nRows = df.shape[0]; + + if (!allowDuplicates && df.has(column)) { + throw new RangeError( + `Column "${column}" already exists. Use allowDuplicates=true to permit duplicate names.`, + ); + } + + if (loc < 0 || loc > nCols) { + throw new RangeError(`loc=${loc} is out of range [0, ${nCols}].`); + } + + // Resolve values to a Series aligned to df's row index. + const series: Series = + values instanceof Series + ? values + : new Series({ data: values, index: df.index, name: column }); + + if (series.size !== nRows) { + throw new RangeError( + `values length ${series.size} does not match DataFrame row count ${nRows}.`, + ); + } + + // Rebuild the column map, inserting the new column at position `loc`. + const colMap = new Map>(); + let idx = 0; + + for (const colName of df.columns.values) { + if (idx === loc) { + colMap.set(column, series); + } + colMap.set(colName, df.col(colName)); + idx++; + } + + // Handle insertion at the end (loc === nCols). + if (loc === nCols) { + colMap.set(column, series); + } + + return new DataFrame(colMap, df.index); +} + +// ─── popColumn ──────────────────────────────────────────────────────────────── + +/** Return type of {@link popColumn}. */ +export interface PopResult { + /** The extracted column as a Series. */ + readonly series: Series; + /** The DataFrame with the column removed. */ + readonly df: DataFrame; +} + +/** + * Remove a column from `df` and return both the extracted `Series` and the + * resulting DataFrame. + * + * Mirrors `pandas.DataFrame.pop(item)`, but because tsb DataFrames are + * immutable this function returns the removed Series *and* the new DataFrame + * (rather than mutating in place). + * + * Raises a `RangeError` if `col` does not exist in `df`. + * + * @param df Source DataFrame (not mutated). + * @param col Name of the column to remove. + * @returns `{ series, df }` — the extracted column and the remaining DataFrame. + * + * @example + * ```ts + * const { series, df: remaining } = popColumn(df, "age"); + * // series contains the "age" column; remaining has all other columns + * ``` + */ +export function popColumn(df: DataFrame, col: string): PopResult { + const series = df.get(col); + if (series === undefined) { + throw new RangeError(`Column "${col}" not found in DataFrame.`); + } + + const colMap = new Map>(); + for (const colName of df.columns.values) { + if (colName !== col) { + colMap.set(colName, df.col(colName)); + } + } + + return { + series, + df: new DataFrame(colMap, df.index), + }; +} + +// ─── reorderColumns ────────────────────────────────────────────────────────── + +/** + * Reorder the columns of `df` to match `order`. + * + * Mirrors `df[order]` in pandas. All names in `order` must be present in `df`; + * extra names in `df` not listed in `order` are dropped. + * + * @param df Source DataFrame. + * @param order New column order (subset of `df.columns.values`). + * @returns A new DataFrame with columns in the specified order. + */ +export function reorderColumns(df: DataFrame, order: readonly string[]): DataFrame { + const colMap = new Map>(); + for (const name of order) { + const s = df.get(name); + if (s === undefined) { + throw new RangeError(`Column "${name}" not found in DataFrame.`); + } + colMap.set(name, s); + } + return new DataFrame(colMap, df.index); +} + +// ─── moveColumn ────────────────────────────────────────────────────────────── + +/** + * Move an existing column to a new integer position. + * + * This is a convenience wrapper combining {@link popColumn} and + * {@link insertColumn}: it removes the column from its current position and + * re-inserts it at `newLoc` in the resulting DataFrame. + * + * @param df Source DataFrame. + * @param col Name of the column to move. + * @param newLoc Target position (0 ≤ newLoc ≤ df.shape[1] − 1). + * @returns A new DataFrame with the column at the new position. + */ +export function moveColumn(df: DataFrame, col: string, newLoc: number): DataFrame { + const { series, df: without } = popColumn(df, col); + return insertColumn(without, newLoc, col, series); +} + +// ─── internal re-export helper (used by DataFrame constructor access) ───────── + +/** + * Build a new DataFrame from an ordered iterable of `[name, Series]` pairs and + * a row index. Exported for use by other tsb modules that need to construct + * DataFrames without going through the public factory methods. + * + * @internal + */ +export function dataFrameFromPairs( + pairs: Iterable]>, + index: Index