From fe908d30762ffc515e8cb427d9aa288c993e9e14 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 07:01:50 +0000 Subject: [PATCH 1/3] =?UTF-8?q?Iteration=20172:=20Add=20na=5Fops=20?= =?UTF-8?q?=E2=80=94=20isna/notna/ffill/bfill?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements pandas missing-value utilities as standalone exported functions: - `isna` / `notna` / `isnull` / `notnull` — detect missing values in scalars, Series, and DataFrames (mirrors pd.isna / pd.notna) - `ffillSeries` / `bfillSeries` — forward/backward fill for Series with optional `limit` parameter - `dataFrameFfill` / `dataFrameBfill` — column-wise or row-wise fill for DataFrames with optional `limit` and `axis` parameters Metric: 28 → 29 pandas_features_ported Run: https://github.com/githubnext/tsessebe/actions/runs/24263385922 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- playground/index.html | 15 ++ playground/na_ops.html | 480 +++++++++++++++++++++++++++++++++++++ src/index.ts | 24 ++ src/stats/index.ts | 24 ++ src/stats/na_ops.ts | 336 ++++++++++++++++++++++++++ tests/stats/na_ops.test.ts | 280 ++++++++++++++++++++++ 6 files changed, 1159 insertions(+) create mode 100644 playground/na_ops.html create mode 100644 src/stats/na_ops.ts create mode 100644 tests/stats/na_ops.test.ts diff --git a/playground/index.html b/playground/index.html index 48bfbcb9..1354c865 100644 --- a/playground/index.html +++ b/playground/index.html @@ -254,6 +254,11 @@

Element-wise transformations. clip(), seriesAbs(), seriesRound() for Series and DataFrame with min/max bounds, decimal precision, and axis support.

✅ Complete
+
+

🔍 missing-value ops

+

Detect and fill missing values. isna(), notna(), isnull(), notnull() for scalars/Series/DataFrame. ffillSeries(), bfillSeries(), dataFrameFfill(), dataFrameBfill() with optional limit and axis support.

+
✅ Complete
+

🔢 value_counts

Count unique values. valueCounts() for Series and dataFrameValueCounts() for DataFrame with normalize, sort, ascending, and dropna options.

@@ -264,6 +269,16 @@

✅ Complete

+
+

📊 pct_change

+

Fractional change between elements. pctChangeSeries() and pctChangeDataFrame() with periods, fillMethod (pad/bfill), limit, and axis options.

+
✅ Complete
+
+
+

🎭 where / mask

+

Conditional keep/replace for Series and DataFrame. whereSeries(), maskSeries(), whereDataFrame(), maskDataFrame() — mirrors pandas .where() and .mask().

+
✅ Complete
+
diff --git a/playground/na_ops.html b/playground/na_ops.html new file mode 100644 index 00000000..c321438f --- /dev/null +++ b/playground/na_ops.html @@ -0,0 +1,480 @@ + + + + + + tsb — missing-value operations (isna, ffill, bfill) + + + +
+
+
Loading tsb runtime…
+
+ + ← Back to playground index + +

Missing-value operations

+

+ isna / notna — detect missing values in scalars, + Series, and DataFrames.
+ ffill / bfill — propagate the last (or next) valid + value to fill gaps.
+ Mirrors pd.isna(), Series.ffill(), and + DataFrame.bfill() from pandas. +

+ + +
+

1 · isna / notna on scalars

+

+ Returns true / false for individual values. + null, undefined, and NaN are all + considered "missing". +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · isna on a Series

+

+ When passed a Series, isna returns a boolean Series of the + same length — true where values are missing. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · isna on a DataFrame

+

+ Returns a DataFrame of booleans with the same shape — one column per + original column, true where missing. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · Forward-fill (ffillSeries)

+

+ Propagates the last valid value forward to fill gaps. Leading + nulls that have no preceding value remain null. + Use the optional limit to cap consecutive fills. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · Backward-fill (bfillSeries)

+

+ Propagates the next valid value backward to fill gaps. Trailing + nulls that have no following value remain null. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · DataFrame forward-fill & backward-fill

+

+ dataFrameFfill and dataFrameBfill apply fill + column-wise by default (axis=0). Pass axis: 1 to fill + row-wise across columns. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Module-level missing-value detection
+isna(value: Scalar): boolean
+isna(value: Series): Series<boolean>
+isna(value: DataFrame): DataFrame
+
+notna(value: Scalar): boolean
+notna(value: Series): Series<boolean>
+notna(value: DataFrame): DataFrame
+
+// Aliases
+isnull(...)  // same as isna
+notnull(...) // same as notna
+
+// Series forward / backward fill
+ffillSeries(series, options?: { limit?: number | null }): Series
+bfillSeries(series, options?: { limit?: number | null }): Series
+
+// DataFrame forward / backward fill
+dataFrameFfill(df, options?: {
+  limit?: number | null,   // max consecutive fills (default: no limit)
+  axis?: 0 | 1 | "index" | "columns",  // default 0 (column-wise)
+}): DataFrame
+
+dataFrameBfill(df, options?: {
+  limit?: number | null,
+  axis?: 0 | 1 | "index" | "columns",
+}): DataFrame
+
+ + + + + diff --git a/src/index.ts b/src/index.ts index 1dd0aa57..6594392b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -107,3 +107,27 @@ export { export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts"; export { valueCounts, dataFrameValueCounts } from "./stats/index.ts"; export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts"; +export { + isna, + notna, + isnull, + notnull, + ffillSeries, + bfillSeries, + dataFrameFfill, + dataFrameBfill, +} from "./stats/index.ts"; +export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.ts"; +export { pctChangeSeries, pctChangeDataFrame } from "./stats/index.ts"; +export type { + PctChangeFillMethod, + PctChangeOptions, + DataFramePctChangeOptions, +} from "./stats/index.ts"; +export { whereSeries, maskSeries, whereDataFrame, maskDataFrame } from "./stats/index.ts"; +export type { + SeriesCond, + SeriesWhereOptions, + DataFrameCond, + DataFrameWhereOptions, +} from "./stats/index.ts"; diff --git a/src/stats/index.ts b/src/stats/index.ts index b1de48eb..04109691 100644 --- a/src/stats/index.ts +++ b/src/stats/index.ts @@ -39,3 +39,27 @@ export { nsmallestDataFrame, } from "./nlargest.ts"; export type { NKeep, NTopOptions, NTopDataFrameOptions } from "./nlargest.ts"; +export { + isna, + notna, + isnull, + notnull, + ffillSeries, + bfillSeries, + dataFrameFfill, + dataFrameBfill, +} from "./na_ops.ts"; +export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts"; +export { pctChangeSeries, pctChangeDataFrame } from "./pct_change.ts"; +export type { + PctChangeFillMethod, + PctChangeOptions, + DataFramePctChangeOptions, +} from "./pct_change.ts"; +export { whereSeries, maskSeries, whereDataFrame, maskDataFrame } from "./where_mask.ts"; +export type { + SeriesCond, + SeriesWhereOptions, + DataFrameCond, + DataFrameWhereOptions, +} from "./where_mask.ts"; diff --git a/src/stats/na_ops.ts b/src/stats/na_ops.ts new file mode 100644 index 00000000..c776bb1f --- /dev/null +++ b/src/stats/na_ops.ts @@ -0,0 +1,336 @@ +/** + * na_ops — missing-value utilities for Series and DataFrame. + * + * Mirrors the following pandas module-level functions and methods: + * - `pd.isna(obj)` / `pd.isnull(obj)` — detect missing values + * - `pd.notna(obj)` / `pd.notnull(obj)` — detect non-missing values + * - `Series.ffill()` / `DataFrame.ffill()` — forward-fill missing values + * - `Series.bfill()` / `DataFrame.bfill()` — backward-fill missing values + * + * All functions are **pure** (return new objects; inputs are unchanged). + * + * @module + */ + +import { DataFrame } from "../core/index.ts"; +import { Series } from "../core/index.ts"; +import type { Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Options for {@link ffillSeries} and {@link bfillSeries}. */ +export interface FillDirectionOptions { + /** + * Maximum number of consecutive NaN/null values to fill. + * `null` means no limit (default). + */ + readonly limit?: number | null; +} + +/** Options for {@link dataFrameFfill} and {@link dataFrameBfill}. */ +export interface DataFrameFillOptions extends FillDirectionOptions { + /** + * - `0` or `"index"` (default): fill missing values down each **column**. + * - `1` or `"columns"`: fill missing values across each **row**. + */ + readonly axis?: 0 | 1 | "index" | "columns"; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** True when `v` should be treated as missing. */ +function isMissing(v: Scalar): boolean { + return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v)); +} + +/** Forward-fill an array of scalars in-place (returns a new array). */ +function ffillArray(vals: readonly Scalar[], limit: number | null): Scalar[] { + const out: Scalar[] = Array.from(vals); + let lastValid: Scalar = null; + let streak = 0; + for (let i = 0; i < out.length; i++) { + if (isMissing(out[i])) { + if (!isMissing(lastValid) && (limit === null || streak < limit)) { + out[i] = lastValid; + streak++; + } + } else { + lastValid = out[i] as Scalar; + streak = 0; + } + } + return out; +} + +/** Backward-fill an array of scalars (returns a new array). */ +function bfillArray(vals: readonly Scalar[], limit: number | null): Scalar[] { + const out: Scalar[] = Array.from(vals); + let nextValid: Scalar = null; + let streak = 0; + for (let i = out.length - 1; i >= 0; i--) { + if (isMissing(out[i])) { + if (!isMissing(nextValid) && (limit === null || streak < limit)) { + out[i] = nextValid; + streak++; + } + } else { + nextValid = out[i] as Scalar; + streak = 0; + } + } + return out; +} + +// ─── isna / notna ───────────────────────────────────────────────────────────── + +/** + * Detect missing values in a scalar, Series, or DataFrame. + * + * - For a **scalar**: returns `true` if the value is `null`, `undefined`, or `NaN`. + * - For a **Series**: returns a `Series` of the same index. + * - For a **DataFrame**: returns a `DataFrame` of boolean columns. + * + * Mirrors `pandas.isna()` / `pandas.isnull()`. + * + * @example + * ```ts + * import { isna } from "tsb"; + * isna(null); // true + * isna(42); // false + * isna(NaN); // true + * + * const s = new Series({ data: [1, null, NaN, 4] }); + * isna(s); // Series([false, true, true, false]) + * ``` + */ +export function isna(value: Scalar): boolean; +export function isna(value: Series): Series; +export function isna(value: DataFrame): DataFrame; +export function isna( + value: Scalar | Series | DataFrame, +): boolean | Series | DataFrame { + if (value instanceof DataFrame) { + return value.isna(); + } + if (value instanceof Series) { + return value.isna(); + } + return isMissing(value as Scalar); +} + +/** + * Detect non-missing values in a scalar, Series, or DataFrame. + * + * Mirrors `pandas.notna()` / `pandas.notnull()`. + * + * @example + * ```ts + * import { notna } from "tsb"; + * notna(null); // false + * notna(42); // true + * ``` + */ +export function notna(value: Scalar): boolean; +export function notna(value: Series): Series; +export function notna(value: DataFrame): DataFrame; +export function notna( + value: Scalar | Series | DataFrame, +): boolean | Series | DataFrame { + if (value instanceof DataFrame) { + return value.notna(); + } + if (value instanceof Series) { + return value.notna(); + } + return !isMissing(value as Scalar); +} + +/** Alias for {@link isna}. Mirrors `pandas.isnull()`. */ +export const isnull = isna; + +/** Alias for {@link notna}. Mirrors `pandas.notnull()`. */ +export const notnull = notna; + +// ─── ffill ──────────────────────────────────────────────────────────────────── + +/** + * Forward-fill missing values in a Series. + * + * Each `null`/`NaN` value is replaced with the last non-missing value + * that precedes it (if any). Values before the first non-missing value + * remain missing. + * + * Mirrors `pandas.Series.ffill()`. + * + * @param series - Input Series (unchanged). + * @param options - Optional `{ limit }` — max consecutive fills. + * @returns New Series with forward-filled values. + * + * @example + * ```ts + * import { ffillSeries } from "tsb"; + * const s = new Series({ data: [1, null, null, 4] }); + * ffillSeries(s); // Series([1, 1, 1, 4]) + * ``` + */ +export function ffillSeries( + series: Series, + options?: FillDirectionOptions, +): Series { + const limit = options?.limit ?? null; + const filled = ffillArray(series.values as readonly Scalar[], limit) as T[]; + return new Series({ + data: filled, + index: series.index, + dtype: series.dtype, + name: series.name ?? undefined, + }); +} + +/** + * Backward-fill missing values in a Series. + * + * Each `null`/`NaN` value is replaced with the next non-missing value + * that follows it (if any). Values after the last non-missing value + * remain missing. + * + * Mirrors `pandas.Series.bfill()`. + * + * @example + * ```ts + * import { bfillSeries } from "tsb"; + * const s = new Series({ data: [1, null, null, 4] }); + * bfillSeries(s); // Series([1, 4, 4, 4]) + * ``` + */ +export function bfillSeries( + series: Series, + options?: FillDirectionOptions, +): Series { + const limit = options?.limit ?? null; + const filled = bfillArray(series.values as readonly Scalar[], limit) as T[]; + return new Series({ + data: filled, + index: series.index, + dtype: series.dtype, + name: series.name ?? undefined, + }); +} + +// ─── DataFrame ffill / bfill ────────────────────────────────────────────────── + +/** + * Forward-fill missing values in a DataFrame. + * + * By default operates **column-wise** (axis=0): each column is independently + * forward-filled. With `axis=1` each row is forward-filled across columns. + * + * Mirrors `pandas.DataFrame.ffill()`. + * + * @example + * ```ts + * import { dataFrameFfill } from "tsb"; + * const df = new DataFrame({ data: { a: [1, null, 3], b: [null, 2, null] } }); + * dataFrameFfill(df); + * // a: [1, 1, 3] + * // b: [null, 2, 2] + * ``` + */ +export function dataFrameFfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame { + const limit = options?.limit ?? null; + const axis = options?.axis ?? 0; + const byRow = axis === 1 || axis === "columns"; + + if (!byRow) { + // column-wise: fill each column independently + const colMap = new Map>(); + for (const name of df.columns.values) { + const col = df.col(name); + const filled = ffillArray(col.values, limit) as Scalar[]; + colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype })); + } + return new DataFrame(colMap, df.index); + } + + // row-wise: fill across columns for each row + const nRows = df.shape[0]; + const cols = df.columns.values; + const columns = cols.map((name) => df.col(name)); + const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values)); + for (let r = 0; r < nRows; r++) { + const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null); + const filled = ffillArray(rowVals, limit); + for (let ci = 0; ci < cols.length; ci++) { + const rowsFilledCI = rowsFilled[ci]; + if (rowsFilledCI !== undefined) { + rowsFilledCI[r] = filled[ci] ?? null; + } + } + } + const colMap = new Map>(); + for (let ci = 0; ci < cols.length; ci++) { + const name = cols[ci] as string; + const col = columns[ci] as Series; + colMap.set( + name, + new Series({ + data: rowsFilled[ci] ?? [], + index: col.index, + dtype: col.dtype, + }), + ); + } + return new DataFrame(colMap, df.index); +} + +/** + * Backward-fill missing values in a DataFrame. + * + * By default operates **column-wise** (axis=0). With `axis=1` fills across rows. + * + * Mirrors `pandas.DataFrame.bfill()`. + */ +export function dataFrameBfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame { + const limit = options?.limit ?? null; + const axis = options?.axis ?? 0; + const byRow = axis === 1 || axis === "columns"; + + if (!byRow) { + const colMap = new Map>(); + for (const name of df.columns.values) { + const col = df.col(name); + const filled = bfillArray(col.values, limit) as Scalar[]; + colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype })); + } + return new DataFrame(colMap, df.index); + } + + const nRows = df.shape[0]; + const cols = df.columns.values; + const columns = cols.map((name) => df.col(name)); + const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values)); + for (let r = 0; r < nRows; r++) { + const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null); + const filled = bfillArray(rowVals, limit); + for (let ci = 0; ci < cols.length; ci++) { + const rowsFilledCI = rowsFilled[ci]; + if (rowsFilledCI !== undefined) { + rowsFilledCI[r] = filled[ci] ?? null; + } + } + } + const colMap = new Map>(); + for (let ci = 0; ci < cols.length; ci++) { + const name = cols[ci] as string; + const col = columns[ci] as Series; + colMap.set( + name, + new Series({ + data: rowsFilled[ci] ?? [], + index: col.index, + dtype: col.dtype, + }), + ); + } + return new DataFrame(colMap, df.index); +} diff --git a/tests/stats/na_ops.test.ts b/tests/stats/na_ops.test.ts new file mode 100644 index 00000000..340406ac --- /dev/null +++ b/tests/stats/na_ops.test.ts @@ -0,0 +1,280 @@ +/** + * Tests for na_ops — missing-value utilities (isna, notna, ffill, bfill). + */ + +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + bfillSeries, + dataFrameBfill, + dataFrameFfill, + ffillSeries, + isna, + isnull, + notna, + notnull, +} from "../../src/index.ts"; + +// ─── isna / notna ───────────────────────────────────────────────────────────── + +describe("isna (scalar)", () => { + it("returns true for null", () => expect(isna(null)).toBe(true)); + it("returns true for undefined", () => expect(isna(undefined)).toBe(true)); + it("returns true for NaN", () => expect(isna(Number.NaN)).toBe(true)); + it("returns false for 0", () => expect(isna(0)).toBe(false)); + it("returns false for empty string", () => expect(isna("")).toBe(false)); + it("returns false for false", () => expect(isna(false)).toBe(false)); + it("returns false for a number", () => expect(isna(42)).toBe(false)); +}); + +describe("notna (scalar)", () => { + it("returns false for null", () => expect(notna(null)).toBe(false)); + it("returns false for NaN", () => expect(notna(Number.NaN)).toBe(false)); + it("returns true for 42", () => expect(notna(42)).toBe(true)); + it("returns true for a string", () => expect(notna("hello")).toBe(true)); +}); + +describe("isnull / notnull aliases", () => { + it("isnull equals isna for scalar", () => { + expect(isnull(null)).toBe(isna(null)); + expect(isnull(42)).toBe(isna(42)); + }); + it("notnull equals notna for scalar", () => { + expect(notnull(null)).toBe(notna(null)); + expect(notnull(42)).toBe(notna(42)); + }); +}); + +describe("isna (Series)", () => { + it("returns boolean Series of correct length", () => { + const s = new Series({ data: [1, null, Number.NaN, 4] }); + const result = isna(s); + expect(result).toBeInstanceOf(Series); + expect([...result.values]).toEqual([false, true, true, false]); + }); + + it("all present", () => { + const s = new Series({ data: [1, 2, 3] }); + expect([...isna(s).values]).toEqual([false, false, false]); + }); + + it("all missing", () => { + const s = new Series({ data: [null, null, Number.NaN] }); + expect([...isna(s).values]).toEqual([true, true, true]); + }); +}); + +describe("notna (Series)", () => { + it("is the inverse of isna", () => { + const s = new Series({ data: [1, null, Number.NaN, 4] }); + const na = isna(s).values; + const nna = notna(s).values; + for (let i = 0; i < na.length; i++) { + expect(nna[i]).toBe(!na[i]); + } + }); +}); + +describe("isna (DataFrame)", () => { + it("returns DataFrame of booleans", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] }); + const result = isna(df); + expect(result).toBeInstanceOf(DataFrame); + expect([...result.col("a").values]).toEqual([false, true]); + expect([...result.col("b").values]).toEqual([true, false]); + }); +}); + +describe("notna (DataFrame)", () => { + it("returns inverse of isna DataFrame", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] }); + expect([...notna(df).col("a").values]).toEqual([true, false]); + expect([...notna(df).col("b").values]).toEqual([false, true]); + }); +}); + +// ─── ffillSeries ────────────────────────────────────────────────────────────── + +describe("ffillSeries", () => { + it("fills nulls with preceding value", () => { + const s = new Series({ data: [1, null, null, 4] }); + expect([...ffillSeries(s).values]).toEqual([1, 1, 1, 4]); + }); + + it("leaves leading nulls untouched", () => { + const s = new Series({ data: [null, null, 3, null] }); + expect([...ffillSeries(s).values]).toEqual([null, null, 3, 3]); + }); + + it("NaN is treated as missing", () => { + const s = new Series({ data: [2, Number.NaN, 5] }); + const result = ffillSeries(s).values; + expect(result[0]).toBe(2); + expect(result[1]).toBe(2); + expect(result[2]).toBe(5); + }); + + it("respects limit option", () => { + const s = new Series({ data: [1, null, null, null, 5] }); + expect([...ffillSeries(s, { limit: 1 }).values]).toEqual([1, 1, null, null, 5]); + }); + + it("preserves original Series", () => { + const s = new Series({ data: [1, null, 3] }); + ffillSeries(s); + expect([...s.values]).toEqual([1, null, 3]); + }); + + it("empty Series returns empty", () => { + const s = new Series({ data: [] }); + expect([...ffillSeries(s).values]).toEqual([]); + }); + + it("preserves name and index", () => { + const s = new Series({ data: [1, null], name: "x" }); + const filled = ffillSeries(s); + expect(filled.name).toBe("x"); + expect(filled.index.size).toBe(2); + }); +}); + +// ─── bfillSeries ────────────────────────────────────────────────────────────── + +describe("bfillSeries", () => { + it("fills nulls with following value", () => { + const s = new Series({ data: [1, null, null, 4] }); + expect([...bfillSeries(s).values]).toEqual([1, 4, 4, 4]); + }); + + it("leaves trailing nulls untouched", () => { + const s = new Series({ data: [null, 3, null, null] }); + expect([...bfillSeries(s).values]).toEqual([3, 3, null, null]); + }); + + it("respects limit option", () => { + const s = new Series({ data: [1, null, null, null, 5] }); + expect([...bfillSeries(s, { limit: 2 }).values]).toEqual([1, null, 5, 5, 5]); + }); + + it("empty Series returns empty", () => { + const s = new Series({ data: [] }); + expect([...bfillSeries(s).values]).toEqual([]); + }); +}); + +// ─── dataFrameFfill ─────────────────────────────────────────────────────────── + +describe("dataFrameFfill (column-wise)", () => { + it("fills each column independently", () => { + const df = DataFrame.fromColumns({ a: [1, null, 3], b: [null, 2, null] }); + const result = dataFrameFfill(df); + expect([...result.col("a").values]).toEqual([1, 1, 3]); + expect([...result.col("b").values]).toEqual([null, 2, 2]); + }); + + it("preserves index", () => { + const df = DataFrame.fromColumns({ x: [1, null] }); + expect(dataFrameFfill(df).index.size).toBe(2); + }); +}); + +describe("dataFrameFfill (row-wise)", () => { + it("fills across columns per row", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [null, null], c: [3, 4] }); + const result = dataFrameFfill(df, { axis: 1 }); + expect([...result.col("a").values]).toEqual([1, null]); + expect([...result.col("b").values]).toEqual([1, null]); + expect([...result.col("c").values]).toEqual([3, 4]); + }); +}); + +// ─── dataFrameBfill ─────────────────────────────────────────────────────────── + +describe("dataFrameBfill (column-wise)", () => { + it("fills each column backward", () => { + const df = DataFrame.fromColumns({ a: [null, null, 3], b: [1, null, null] }); + const result = dataFrameBfill(df); + expect([...result.col("a").values]).toEqual([3, 3, 3]); + expect([...result.col("b").values]).toEqual([1, null, null]); + }); +}); + +describe("dataFrameBfill (row-wise)", () => { + it("fills backward across columns per row", () => { + const df = DataFrame.fromColumns({ a: [null, 1], b: [null, null], c: [3, null] }); + const result = dataFrameBfill(df, { axis: 1 }); + expect([...result.col("a").values]).toEqual([3, 1]); + expect([...result.col("b").values]).toEqual([3, null]); + expect([...result.col("c").values]).toEqual([3, null]); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("property: ffill followed by bfill fills all if any non-null", () => { + it("all values filled when at least one is present", () => { + fc.assert( + fc.property( + fc.array(fc.option(fc.integer({ min: 0, max: 100 }), { nil: null }), { + minLength: 1, + maxLength: 20, + }), + (raw) => { + const hasNonNull = raw.some((v) => v !== null); + if (!hasNonNull) { + return true; + } + const s = new Series({ data: raw }); + const result = bfillSeries(ffillSeries(s)); + return result.values.every((v) => v !== null); + }, + ), + ); + }); +}); + +describe("property: ffill never introduces new non-null values beyond last valid", () => { + it("ffilled series has no nulls after first valid value", () => { + fc.assert( + fc.property( + fc.array(fc.option(fc.integer({ min: -50, max: 50 }), { nil: null }), { + minLength: 0, + maxLength: 30, + }), + (raw) => { + const s = new Series({ data: raw }); + const filled = ffillSeries(s).values; + let sawValid = false; + for (const v of filled) { + if (v !== null) { + sawValid = true; + } + if (sawValid && v === null) { + return false; + } + } + return true; + }, + ), + ); + }); +}); + +describe("property: isna is inverse of notna for scalars", () => { + it("isna(v) === !notna(v)", () => { + fc.assert( + fc.property( + fc.oneof( + fc.integer(), + fc.float({ noNaN: false }), + fc.constant(null), + fc.string(), + fc.boolean(), + ), + (v) => isna(v as Parameters[0]) === !notna(v as Parameters[0]), + ), + ); + }); +}); From 2978745344e9d452eafe6bf7e25b86d68525888a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 07:01:51 +0000 Subject: [PATCH 2/3] Iteration 174: Add pct_change for Series and DataFrame Implements pctChangeSeries() and pctChangeDataFrame() mirroring pandas.Series.pct_change() / pandas.DataFrame.pct_change(). - periods: configurable lag (positive = backward, negative = forward) - fillMethod: "pad" (default), "bfill", or null (no fill) - limit: cap consecutive fills - axis: column-wise (default) or row-wise for DataFrame Full test coverage: unit tests, edge cases, and fast-check property tests. Interactive playground page at playground/pct_change.html. Run: https://github.com/githubnext/tsessebe/actions/runs/24266545401 --- playground/pct_change.html | 448 +++++++++++++++++++++++++++++++++ src/stats/pct_change.ts | 231 +++++++++++++++++ tests/stats/pct_change.test.ts | 252 +++++++++++++++++++ 3 files changed, 931 insertions(+) create mode 100644 playground/pct_change.html create mode 100644 src/stats/pct_change.ts create mode 100644 tests/stats/pct_change.test.ts diff --git a/playground/pct_change.html b/playground/pct_change.html new file mode 100644 index 00000000..3576797a --- /dev/null +++ b/playground/pct_change.html @@ -0,0 +1,448 @@ + + + + + + tsb — pct_change + + + +
+
+
Initializing playground…
+
+ ← Back to roadmap +

📊 pct_change — Interactive Playground

+

Compute the fractional change between each element and a prior element. + Mirrors pandas.Series.pct_change() / + pandas.DataFrame.pct_change().
+ Edit any code block below and press ▶ Run + (or Ctrl+Enter) to execute it live in your browser. +

+ + +
+

1 · Basic pct_change on a Series

+

pctChangeSeries(series) returns the fractional (not percentage) change + from each previous element. The first element is always null.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Multi-period change

+

The periods option controls the lag. Use periods: 2 to + compare each value to the one two steps earlier — useful for month-over-month + comparisons in quarterly data.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · Handling missing values

+

By default, pctChangeSeries forward-fills (fillMethod: "pad") + NaN/null values before computing the ratio — so gaps don't break the chain. + Set fillMethod: null to propagate NaN instead.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · Limit consecutive fills

+

The limit option caps how many consecutive NaN values get forward-filled. + Useful when you want to tolerate short gaps but not bridge large ones.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame column-wise pct_change

+

pctChangeDataFrame(df) applies pctChangeSeries to every + column independently. Ideal for comparing multiple assets or metrics simultaneously.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Negative periods (look-forward change)

+

A negative periods value computes the forward change: how much will + this element change by the time we reach |periods| steps ahead. + Useful for computing returns on a "hold for N periods" strategy.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+

All functions return a new Series/DataFrame of the same shape — inputs are never mutated.

+
// Series
+pctChangeSeries(series, {
+  periods?: number,           // default 1 (positive = look back, negative = look forward)
+  fillMethod?: "pad" | "bfill" | null,  // default "pad"
+  limit?: number | null,      // max consecutive fills; default unlimited
+}): Series
+
+// DataFrame
+pctChangeDataFrame(df, {
+  periods?: number,
+  fillMethod?: "pad" | "bfill" | null,
+  limit?: number | null,
+  axis?: 0 | 1 | "index" | "columns",  // default 0 (column-wise)
+}): DataFrame
+
+ + + + + diff --git a/src/stats/pct_change.ts b/src/stats/pct_change.ts new file mode 100644 index 00000000..c46c9e84 --- /dev/null +++ b/src/stats/pct_change.ts @@ -0,0 +1,231 @@ +/** + * pct_change — percentage change between current and prior element. + * + * Mirrors `pandas.Series.pct_change()` / `pandas.DataFrame.pct_change()`: + * - `pctChangeSeries(series, options)` — per-element % change + * - `pctChangeDataFrame(df, options)` — column-wise % change + * + * Formula (per element i, with shift=periods): + * `result[i] = (x[i] - x[i-periods]) / x[i-periods]` + * + * When `fillMethod` is set, NaN/null values in the source are filled *before* + * computing the ratio (matching pandas' default behaviour of `fill_method="pad"`). + * + * @module + */ + +import { DataFrame } from "../core/index.ts"; +import { Series } from "../core/index.ts"; +import type { Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Fill method applied to NaN/null before computing pct_change. */ +export type PctChangeFillMethod = "pad" | "bfill"; + +/** Options for {@link pctChangeSeries} and {@link pctChangeDataFrame}. */ +export interface PctChangeOptions { + /** + * Number of periods (lags) to shift when computing the ratio. + * Positive values look backward; negative values look forward. + * Default `1`. + */ + readonly periods?: number; + /** + * How to fill NaN/null values *before* computing the ratio. + * - `"pad"` (default): forward-fill (last valid observation carries forward). + * - `"bfill"`: backward-fill (next valid observation fills backward). + * - `null`: no filling — NaN/null stays as-is. + */ + readonly fillMethod?: PctChangeFillMethod | null; + /** + * Maximum number of consecutive NaN/null values to fill when `fillMethod` + * is set. `undefined` / `null` means no limit. + */ + readonly limit?: number | null; +} + +/** Options for {@link pctChangeDataFrame} — adds an axis selector. */ +export interface DataFramePctChangeOptions extends PctChangeOptions { + /** + * - `0` or `"index"` (default): apply operation **column-wise** (down rows). + * - `1` or `"columns"`: apply operation **row-wise** (across columns). + */ + readonly axis?: 0 | 1 | "index" | "columns"; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** True when `v` is a valid number (not null, undefined, or NaN). */ +function isNum(v: Scalar): v is number { + return typeof v === "number" && !Number.isNaN(v) && v !== null; +} + +/** + * Forward-fill an array of scalars in place, respecting an optional limit. + * Returns a NEW array. + */ +function padFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] { + const out: Scalar[] = [...vals]; + let run = 0; + let lastValid: Scalar = null; + for (let i = 0; i < out.length; i++) { + const v = out[i] as Scalar; + if (v !== null && v !== undefined && !(typeof v === "number" && Number.isNaN(v))) { + lastValid = v; + run = 0; + } else if (lastValid !== null && (limit == null || run < limit)) { + out[i] = lastValid; + run++; + } + } + return out; +} + +/** + * Backward-fill an array of scalars, respecting an optional limit. + * Returns a NEW array. + */ +function bfillFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] { + const tmp = padFill([...vals].reverse(), limit); + return tmp.reverse(); +} + +/** Fill NaN/null in `vals` using the requested method. */ +function applyFill( + vals: readonly Scalar[], + method: PctChangeFillMethod | null | undefined, + limit: number | null | undefined, +): Scalar[] { + if (!method) return [...vals]; + return method === "pad" ? padFill(vals, limit) : bfillFill(vals, limit); +} + +/** Compute pct_change on a flat array of scalars. */ +function computePct(vals: readonly Scalar[], periods: number): Scalar[] { + const n = vals.length; + const out: Scalar[] = new Array(n).fill(null); + const shift = periods; + if (shift >= 0) { + for (let i = shift; i < n; i++) { + const curr = vals[i] as Scalar; + const prev = vals[i - shift] as Scalar; + if (isNum(curr) && isNum(prev) && prev !== 0) { + out[i] = curr / prev - 1; + } else if (isNum(curr) && isNum(prev) && prev === 0) { + // 0 denominator → Infinity (same as pandas) + out[i] = curr === 0 ? Number.NaN : curr > 0 ? Infinity : -Infinity; + } else { + out[i] = null; + } + } + } else { + // Negative periods: look forward + const absShift = -shift; + for (let i = 0; i < n - absShift; i++) { + const curr = vals[i] as Scalar; + const fwd = vals[i + absShift] as Scalar; + if (isNum(curr) && isNum(fwd) && curr !== 0) { + out[i] = fwd / curr - 1; + } else if (isNum(curr) && isNum(fwd) && curr === 0) { + out[i] = fwd === 0 ? Number.NaN : fwd > 0 ? Infinity : -Infinity; + } else { + out[i] = null; + } + } + } + return out; +} + +// ─── public API ─────────────────────────────────────────────────────────────── + +/** + * Compute the fractional change between a Series element and the element + * `periods` positions earlier (or later, for negative `periods`). + * + * Matches `pandas.Series.pct_change()`. + * + * @example + * ```ts + * const s = new Series({ data: [100, 110, 99, 121] }); + * pctChangeSeries(s); // [null, 0.1, -0.1, 0.2222…] + * ``` + */ +export function pctChangeSeries(series: Series, options: PctChangeOptions = {}): Series { + const periods = options.periods ?? 1; + const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad"; + const limit = options.limit ?? null; + + const filled = applyFill(series.values, fillMethod, limit); + const result = computePct(filled, periods); + + return new Series({ + data: result, + index: series.index, + name: series.name ?? undefined, + }); +} + +/** + * Compute percentage change for every column (or row) of a DataFrame. + * + * Matches `pandas.DataFrame.pct_change()`. + * + * @example + * ```ts + * const df = new DataFrame(new Map([ + * ["a", new Series({ data: [100, 110, 121] })], + * ["b", new Series({ data: [200, 180, 198] })], + * ])); + * pctChangeDataFrame(df); // fractional change per column + * ``` + */ +export function pctChangeDataFrame( + df: DataFrame, + options: DataFramePctChangeOptions = {}, +): DataFrame { + const axis = options.axis ?? 0; + const colWise = axis === 0 || axis === "index"; + + if (colWise) { + const colMap = new Map>(); + for (const name of df.columns.values) { + colMap.set(name, pctChangeSeries(df.col(name), options)); + } + return new DataFrame(colMap, df.index); + } + + // Row-wise: each row across columns + const periods = options.periods ?? 1; + const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad"; + const limit = options.limit ?? null; + const nRows = df.index.length; + const cols = df.columns.values; + const nCols = cols.length; + + const resultCols = new Map(); + for (const name of cols) { + resultCols.set(name, new Array(nRows).fill(null)); + } + + for (let r = 0; r < nRows; r++) { + const row: Scalar[] = []; + for (const name of cols) { + row.push(df.col(name).values[r] as Scalar); + } + const filled = applyFill(row, fillMethod, limit); + const pct = computePct(filled, periods); + for (let c = 0; c < nCols; c++) { + (resultCols.get(cols[c] as string) as Scalar[])[r] = pct[c] as Scalar; + } + } + + const colMap = new Map>(); + for (const name of cols) { + colMap.set( + name, + new Series({ data: resultCols.get(name) as Scalar[], index: df.index, name }), + ); + } + return new DataFrame(colMap, df.index); +} diff --git a/tests/stats/pct_change.test.ts b/tests/stats/pct_change.test.ts new file mode 100644 index 00000000..98966e8c --- /dev/null +++ b/tests/stats/pct_change.test.ts @@ -0,0 +1,252 @@ +/** + * Tests for src/stats/pct_change.ts — pctChangeSeries, pctChangeDataFrame + */ +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + pctChangeDataFrame, + pctChangeSeries, +} from "../../src/index.ts"; +import type { Scalar } from "../../src/index.ts"; + +// ─── helpers ───────────────────────────────────────────────────────────────── + +function s(data: readonly Scalar[]): Series { + return new Series({ data: [...data] }); +} + +function nanEq(a: Scalar, b: Scalar): boolean { + if (typeof a === "number" && Number.isNaN(a) && typeof b === "number" && Number.isNaN(b)) { + return true; + } + return a === b; +} + +function arrEq(a: readonly Scalar[], b: readonly Scalar[]): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (!nanEq(a[i] as Scalar, b[i] as Scalar)) return false; + } + return true; +} + +function close(a: Scalar, b: Scalar, eps = 1e-9): boolean { + if (a === null && b === null) return true; + if (typeof a !== "number" || typeof b !== "number") return false; + if (Number.isNaN(a) && Number.isNaN(b)) return true; + return Math.abs(a - b) < eps; +} + +function arrClose(a: readonly Scalar[], b: readonly Scalar[], eps = 1e-9): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (!close(a[i] as Scalar, b[i] as Scalar, eps)) return false; + } + return true; +} + +// ─── pctChangeSeries ───────────────────────────────────────────────────────── + +describe("pctChangeSeries", () => { + it("basic increasing sequence", () => { + const result = pctChangeSeries(s([100, 110, 121, 133.1])); + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + expect(close(result.values[3] as Scalar, 0.1)).toBe(true); + }); + + it("decreasing sequence", () => { + const result = pctChangeSeries(s([200, 180, 162])); + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, -0.1)).toBe(true); + expect(close(result.values[2] as Scalar, -0.1)).toBe(true); + }); + + it("periods=2", () => { + const result = pctChangeSeries(s([100, 105, 110, 121]), { periods: 2 }); + expect(result.values[0]).toBeNull(); + expect(result.values[1]).toBeNull(); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + expect(close(result.values[3] as Scalar, (121 - 105) / 105)).toBe(true); + }); + + it("negative periods (look forward)", () => { + const result = pctChangeSeries(s([100, 110, 121]), { periods: -1 }); + expect(close(result.values[0] as Scalar, 0.1)).toBe(true); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(result.values[2]).toBeNull(); + }); + + it("NaN/null propagates when fillMethod=null", () => { + const result = pctChangeSeries(s([100, null, 110]), { fillMethod: null }); + expect(result.values[0]).toBeNull(); + expect(result.values[1]).toBeNull(); + expect(result.values[2]).toBeNull(); + }); + + it("fillMethod=pad fills NaN before computing", () => { + const result = pctChangeSeries(s([100, null, 110]), { fillMethod: "pad" }); + // after pad-fill: [100, 100, 110] + // pct: [null, 0, 0.1] + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0)).toBe(true); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + }); + + it("fillMethod=bfill fills NaN backward before computing", () => { + const result = pctChangeSeries(s([100, null, 110, 121]), { fillMethod: "bfill" }); + // after bfill: [100, 110, 110, 121] + // pct: [null, 0.1, 0, 0.1] + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(close(result.values[2] as Scalar, 0)).toBe(true); + expect(close(result.values[3] as Scalar, 0.1)).toBe(true); + }); + + it("limit=1 caps forward-fill", () => { + const result = pctChangeSeries(s([100, null, null, 130]), { + fillMethod: "pad", + limit: 1, + }); + // after pad with limit=1: [100, 100, null, 130] + // pct: [null, 0, null, null] (null/100 → null) + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0)).toBe(true); + expect(result.values[2]).toBeNull(); + expect(result.values[3]).toBeNull(); + }); + + it("zero denominator returns Infinity", () => { + const result = pctChangeSeries(s([0, 10]), { fillMethod: null }); + expect(result.values[1]).toBe(Infinity); + }); + + it("zero/zero denominator returns NaN", () => { + const result = pctChangeSeries(s([0, 0]), { fillMethod: null }); + expect(Number.isNaN(result.values[1] as number)).toBe(true); + }); + + it("preserves Series name and index", () => { + const src = new Series({ data: [10, 20, 30], name: "price" }); + const result = pctChangeSeries(src); + expect(result.name).toBe("price"); + expect(result.index.length).toBe(3); + }); + + it("empty series returns empty", () => { + const result = pctChangeSeries(s([])); + expect(result.values.length).toBe(0); + }); + + it("single-element series returns [null]", () => { + const result = pctChangeSeries(s([42])); + expect(result.values[0]).toBeNull(); + }); +}); + +// ─── pctChangeDataFrame ─────────────────────────────────────────────────────── + +describe("pctChangeDataFrame", () => { + it("column-wise (default)", () => { + const df = new DataFrame( + new Map([ + ["a", new Series({ data: [100, 110, 121] })], + ["b", new Series({ data: [200, 180, 198] })], + ]), + ); + const result = pctChangeDataFrame(df); + const colA = result.col("a").values; + const colB = result.col("b").values; + expect(colA[0]).toBeNull(); + expect(close(colA[1] as Scalar, 0.1)).toBe(true); + expect(close(colA[2] as Scalar, 0.1)).toBe(true); + expect(colB[0]).toBeNull(); + expect(close(colB[1] as Scalar, -0.1)).toBe(true); + expect(close(colB[2] as Scalar, 0.1)).toBe(true); + }); + + it("row-wise (axis=1)", () => { + const df = new DataFrame( + new Map([ + ["a", new Series({ data: [100, 200] })], + ["b", new Series({ data: [110, 220] })], + ["c", new Series({ data: [121, 242] })], + ]), + ); + const result = pctChangeDataFrame(df, { axis: 1 }); + // row 0: [100, 110, 121] → [null, 0.1, 0.1] + // row 1: [200, 220, 242] → [null, 0.1, 0.1] + const row0a = result.col("a").values[0]; + const row0b = result.col("b").values[0]; + const row0c = result.col("c").values[0]; + expect(row0a).toBeNull(); + expect(close(row0b as Scalar, 0.1)).toBe(true); + expect(close(row0c as Scalar, 0.1)).toBe(true); + const row1a = result.col("a").values[1]; + const row1b = result.col("b").values[1]; + expect(row1a).toBeNull(); + expect(close(row1b as Scalar, 0.1)).toBe(true); + }); + + it("preserves column order", () => { + const df = new DataFrame( + new Map([ + ["x", new Series({ data: [1, 2] })], + ["y", new Series({ data: [3, 6] })], + ]), + ); + const result = pctChangeDataFrame(df); + expect(result.columns.values).toEqual(["x", "y"]); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("pctChangeSeries — property tests", () => { + it("result length equals input length", () => { + fc.assert( + fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 50 }), (arr) => { + const result = pctChangeSeries(s(arr)); + return result.values.length === arr.length; + }), + ); + }); + + it("first element is always null for periods=1", () => { + fc.assert( + fc.property( + fc.array(fc.float({ noNaN: true }), { minLength: 1, maxLength: 50 }), + (arr) => { + const result = pctChangeSeries(s(arr)); + return result.values[0] === null; + }, + ), + ); + }); + + it("pct_change(x, -p) equals pct_change_reversed pattern", () => { + // For a sequence of positive numbers with periods=1 and periods=-1: + // result[-1][i] represents the change looking forward, so result[-1][i] = (x[i+1]-x[i])/x[i] + // and result[+1][i+1] = (x[i+1]-x[i])/x[i], so they should agree on matching indices + fc.assert( + fc.property( + fc.array(fc.float({ noNaN: true, min: 1, max: 1000 }), { minLength: 3, maxLength: 20 }), + (arr) => { + const fwd = pctChangeSeries(s(arr), { periods: -1, fillMethod: null }); + const bwd = pctChangeSeries(s(arr), { periods: 1, fillMethod: null }); + // fwd[i] = (arr[i+1] - arr[i]) / arr[i] + // bwd[i+1] = (arr[i+1] - arr[i]) / arr[i] ← same ratio + for (let i = 0; i < arr.length - 1; i++) { + if (!close(fwd.values[i] as Scalar, bwd.values[i + 1] as Scalar, 1e-6)) { + return false; + } + } + return true; + }, + ), + ); + }); +}); From 9967569361bc6563719dc8b8f895578657d7c28c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 07:01:52 +0000 Subject: [PATCH 3/3] =?UTF-8?q?Iteration=20184:=20Add=20where/mask=20?= =?UTF-8?q?=E2=80=94=20conditional=20replacement=20for=20Series=20and=20Da?= =?UTF-8?q?taFrame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run: https://github.com/githubnext/tsessebe/actions/runs/24276582839 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- playground/where_mask.html | 358 +++++++++++++++++++++++++++++++++ src/stats/where_mask.ts | 222 ++++++++++++++++++++ tests/stats/where_mask.test.ts | 274 +++++++++++++++++++++++++ 3 files changed, 854 insertions(+) create mode 100644 playground/where_mask.html create mode 100644 src/stats/where_mask.ts create mode 100644 tests/stats/where_mask.test.ts diff --git a/playground/where_mask.html b/playground/where_mask.html new file mode 100644 index 00000000..a38d0913 --- /dev/null +++ b/playground/where_mask.html @@ -0,0 +1,358 @@ + + + + + + tsb — where / mask + + + +
+
+
Initializing playground…
+
+ ← Back to roadmap +

🎭 where / mask — Interactive Playground

+

Conditionally keep or replace values in a Series or DataFrame.
+ where(cond) keeps values where the condition is true.
+ mask(cond) keeps values where the condition is false (inverse).
+ Mirrors pandas.Series.where(), pandas.Series.mask(), + pandas.DataFrame.where(), and pandas.DataFrame.mask().
+ Edit any code block below and press ▶ Run + (or Ctrl+Enter) to execute it live in your browser. +

+ + +
+

1 · whereSeries — keep values where condition is true

+

Values at positions where the condition is false are replaced with + other (defaults to NaN).

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · maskSeries — replace values where condition is true

+

maskSeries is the inverse of whereSeries: values at + positions where the condition is true are replaced.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · Callable conditions

+

Both where and mask accept a function as the condition. + The function receives the Series and returns a boolean array — useful for + self-referential conditions.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · whereDataFrame — conditional keep on a DataFrame

+

Supply a boolean DataFrame with the same columns. Values at false + positions are replaced with other.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · maskDataFrame — conditional replace on a DataFrame

+

The inverse: values at true positions are replaced with + other.

+
+
+ TypeScript +
+ + +
+
+ +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+

All functions return a new Series/DataFrame — inputs are never mutated.

+
// Series — keep where true, replace where false
+whereSeries(series, cond, options?): Series
+maskSeries(series, cond, options?):  Series
+
+// cond can be:
+//   readonly boolean[]                   — parallel boolean array
+//   Series<boolean>                      — boolean Series (positional)
+//   (s: Series<Scalar>) => boolean[]   — callable
+
+// options:
+//   other?: Scalar   — replacement value (default NaN)
+
+// DataFrame — element-wise conditional keep/replace
+whereDataFrame(df, cond, options?): DataFrame
+maskDataFrame(df, cond, options?):  DataFrame
+
+// cond can be:
+//   DataFrame                            — boolean DataFrame (same columns)
+//   (df: DataFrame) => DataFrame         — callable returning boolean DataFrame
+
+// options:
+//   other?: Scalar   — replacement scalar (default NaN)
+
+ + + + + diff --git a/src/stats/where_mask.ts b/src/stats/where_mask.ts new file mode 100644 index 00000000..d3ccb516 --- /dev/null +++ b/src/stats/where_mask.ts @@ -0,0 +1,222 @@ +/** + * where_mask — conditional replacement for Series and DataFrame. + * + * Mirrors the following pandas methods: + * - `Series.where(cond, other=NaN)` — keep where `cond` is `true`, replace with `other` where `false` + * - `Series.mask(cond, other=NaN)` — keep where `cond` is `false`, replace with `other` where `true` + * - `DataFrame.where(cond, other=NaN)` — element-wise conditional keep/replace + * - `DataFrame.mask(cond, other=NaN)` — element-wise conditional keep/replace (inverted) + * + * All functions are **pure** — inputs are never mutated. + * + * @module + */ + +import { DataFrame } from "../core/index.ts"; +import { Series } from "../core/index.ts"; +import type { Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** + * A boolean condition for a Series where/mask operation. + * + * - `readonly boolean[]` — parallel boolean array (same length as the Series) + * - `Series` — a boolean Series (positional alignment) + * - `(s: Series) => readonly boolean[]` — callable returning a boolean array + */ +export type SeriesCond = + | readonly boolean[] + | Series + | ((s: Series) => readonly boolean[]); + +/** Options for {@link whereSeries} and {@link maskSeries}. */ +export interface SeriesWhereOptions { + /** + * Replacement value used wherever the condition is not satisfied. + * Defaults to `NaN`. + */ + readonly other?: Scalar; +} + +/** + * A boolean condition for a DataFrame where/mask operation. + * + * - `DataFrame` — a boolean DataFrame with the same columns (positional per column) + * - `(df: DataFrame) => DataFrame` — callable returning a boolean DataFrame + */ +export type DataFrameCond = DataFrame | ((df: DataFrame) => DataFrame); + +/** Options for {@link whereDataFrame} and {@link maskDataFrame}. */ +export interface DataFrameWhereOptions { + /** + * Replacement scalar used wherever the condition is not satisfied. + * Defaults to `NaN`. + */ + readonly other?: Scalar; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** Resolve a {@link SeriesCond} to a flat boolean array. */ +function resolveSeriesCond(cond: SeriesCond, s: Series): readonly boolean[] { + if (typeof cond === "function") { + return cond(s); + } + if (cond instanceof Series) { + return cond.values as readonly boolean[]; + } + return cond; +} + +/** + * Map over `vals`: keep `vals[i]` when `keep[i]` is `true`, otherwise use `other`. + */ +function applyKeep(vals: readonly Scalar[], keep: readonly boolean[], other: Scalar): Scalar[] { + return vals.map((v, i) => (keep[i] === true ? v : other)); +} + +/** Invert a boolean array. */ +function invertBools(arr: readonly boolean[]): boolean[] { + return arr.map((b) => !b); +} + +/** Resolve a {@link DataFrameCond} to a boolean DataFrame. */ +function resolveDataFrameCond(cond: DataFrameCond, df: DataFrame): DataFrame { + if (typeof cond === "function") { + return cond(df); + } + return cond; +} + +// ─── Series functions ───────────────────────────────────────────────────────── + +/** + * Return a new Series keeping values where `cond` is `true` and replacing + * them with `other` where `cond` is `false`. + * + * Mirrors `pandas.Series.where(cond, other=NaN)`. + * + * @param s - Source Series. + * @param cond - Boolean condition (array, Series, or callable). + * @param options - Optional `other` replacement value (default `NaN`). + * @returns A new Series with conditional replacements applied. + * + * @example + * ```ts + * const s = new Series({ data: [1, 2, 3, 4] }); + * whereSeries(s, [true, false, true, false], { other: 0 }); + * // Series [1, 0, 3, 0] + * ``` + */ +export function whereSeries( + s: Series, + cond: SeriesCond, + options?: SeriesWhereOptions, +): Series { + const other = options?.other ?? Number.NaN; + const keep = resolveSeriesCond(cond, s); + return s.withValues(applyKeep(s.values, keep, other)); +} + +/** + * Return a new Series keeping values where `cond` is `false` and replacing + * them with `other` where `cond` is `true`. + * + * Mirrors `pandas.Series.mask(cond, other=NaN)`. + * + * @param s - Source Series. + * @param cond - Boolean condition (array, Series, or callable). + * @param options - Optional `other` replacement value (default `NaN`). + * @returns A new Series with conditional replacements applied. + * + * @example + * ```ts + * const s = new Series({ data: [1, 2, 3, 4] }); + * maskSeries(s, [false, true, false, true], { other: 0 }); + * // Series [1, 0, 3, 0] + * ``` + */ +export function maskSeries( + s: Series, + cond: SeriesCond, + options?: SeriesWhereOptions, +): Series { + const other = options?.other ?? Number.NaN; + const keep = invertBools(resolveSeriesCond(cond, s)); + return s.withValues(applyKeep(s.values, keep, other)); +} + +// ─── DataFrame functions ────────────────────────────────────────────────────── + +/** + * Return a new DataFrame keeping each value where the corresponding entry in + * `cond` is `true` and replacing it with `other` where `cond` is `false`. + * + * Mirrors `pandas.DataFrame.where(cond, other=NaN)`. + * + * @param df - Source DataFrame. + * @param cond - A boolean DataFrame (same columns) or a callable returning one. + * @param options - Optional `other` replacement scalar (default `NaN`). + * @returns A new DataFrame with conditional replacements applied. + * + * @example + * ```ts + * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] }); + * const cond = DataFrame.fromColumns({ a: [true, false, true], b: [false, true, true] }); + * whereDataFrame(df, cond, { other: 0 }); + * // DataFrame { a: [1, 0, 3], b: [0, 5, 6] } + * ``` + */ +export function whereDataFrame( + df: DataFrame, + cond: DataFrameCond, + options?: DataFrameWhereOptions, +): DataFrame { + const other = options?.other ?? Number.NaN; + const condDf = resolveDataFrameCond(cond, df); + const colArrays: Record = {}; + for (const colName of df.columns.values) { + const srcCol = df.col(colName); + const condCol = condDf.col(colName); + const keep = condCol.values as readonly boolean[]; + colArrays[colName] = applyKeep(srcCol.values, keep, other); + } + return DataFrame.fromColumns(colArrays, { index: df.index }); +} + +/** + * Return a new DataFrame keeping each value where the corresponding entry in + * `cond` is `false` and replacing it with `other` where `cond` is `true`. + * + * Mirrors `pandas.DataFrame.mask(cond, other=NaN)`. + * + * @param df - Source DataFrame. + * @param cond - A boolean DataFrame (same columns) or a callable returning one. + * @param options - Optional `other` replacement scalar (default `NaN`). + * @returns A new DataFrame with conditional replacements applied. + * + * @example + * ```ts + * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] }); + * const cond = DataFrame.fromColumns({ a: [false, true, false], b: [true, false, false] }); + * maskDataFrame(df, cond, { other: 0 }); + * // DataFrame { a: [1, 0, 3], b: [0, 5, 6] } + * ``` + */ +export function maskDataFrame( + df: DataFrame, + cond: DataFrameCond, + options?: DataFrameWhereOptions, +): DataFrame { + const other = options?.other ?? Number.NaN; + const condDf = resolveDataFrameCond(cond, df); + const colArrays: Record = {}; + for (const colName of df.columns.values) { + const srcCol = df.col(colName); + const condCol = condDf.col(colName); + const keep = invertBools(condCol.values as readonly boolean[]); + colArrays[colName] = applyKeep(srcCol.values, keep, other); + } + return DataFrame.fromColumns(colArrays, { index: df.index }); +} diff --git a/tests/stats/where_mask.test.ts b/tests/stats/where_mask.test.ts new file mode 100644 index 00000000..8069871c --- /dev/null +++ b/tests/stats/where_mask.test.ts @@ -0,0 +1,274 @@ +/** + * Tests for where_mask — conditional replacement for Series and DataFrame. + */ + +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + maskDataFrame, + maskSeries, + whereDataFrame, + whereSeries, +} from "../../src/index.ts"; + +// ─── whereSeries ────────────────────────────────────────────────────────────── + +describe("whereSeries — array condition", () => { + it("keeps values where cond is true", () => { + const s = new Series({ data: [1, 2, 3, 4] }); + const result = whereSeries(s, [true, true, true, true]); + expect(result.values).toEqual([1, 2, 3, 4]); + }); + + it("replaces all values when cond is all-false (default NaN)", () => { + const s = new Series({ data: [1, 2, 3] }); + const result = whereSeries(s, [false, false, false]); + expect(result.values.every((v) => typeof v === "number" && Number.isNaN(v as number))).toBe( + true, + ); + }); + + it("replaces false positions with provided other", () => { + const s = new Series({ data: [10, 20, 30, 40] }); + const result = whereSeries(s, [true, false, true, false], { other: 0 }); + expect(result.values).toEqual([10, 0, 30, 0]); + }); + + it("preserves index and name", () => { + const s = new Series({ data: [1, 2, 3], name: "x" }); + const result = whereSeries(s, [true, false, true]); + expect(result.name).toBe("x"); + expect(result.index.size).toBe(3); + }); + + it("works with string other", () => { + const s = new Series({ data: ["a", "b", "c"] }); + const result = whereSeries(s, [true, false, true], { other: "X" }); + expect(result.values).toEqual(["a", "X", "c"]); + }); + + it("works with null other", () => { + const s = new Series({ data: [1, 2, 3] }); + const result = whereSeries(s, [false, true, false], { other: null }); + expect(result.values).toEqual([null, 2, null]); + }); +}); + +describe("whereSeries — Series condition", () => { + it("accepts a boolean Series as condition", () => { + const s = new Series({ data: [5, 6, 7] }); + const cond = new Series({ data: [true, false, true] }); + const result = whereSeries(s, cond, { other: -1 }); + expect(result.values).toEqual([5, -1, 7]); + }); +}); + +describe("whereSeries — callable condition", () => { + it("accepts a function as condition", () => { + const s = new Series({ data: [1, 2, 3, 4] }); + const result = whereSeries(s, (x) => (x.values as number[]).map((v) => v > 2), { other: 0 }); + expect(result.values).toEqual([0, 0, 3, 4]); + }); +}); + +// ─── maskSeries ─────────────────────────────────────────────────────────────── + +describe("maskSeries — array condition", () => { + it("replaces values where cond is true (inverse of where)", () => { + const s = new Series({ data: [10, 20, 30, 40] }); + const result = maskSeries(s, [false, true, false, true], { other: 0 }); + expect(result.values).toEqual([10, 0, 30, 0]); + }); + + it("keeps all values when cond is all-false", () => { + const s = new Series({ data: [1, 2, 3] }); + const result = maskSeries(s, [false, false, false], { other: 99 }); + expect(result.values).toEqual([1, 2, 3]); + }); + + it("replaces all values when cond is all-true", () => { + const s = new Series({ data: [1, 2, 3] }); + const result = maskSeries(s, [true, true, true], { other: 0 }); + expect(result.values).toEqual([0, 0, 0]); + }); +}); + +describe("maskSeries — callable condition", () => { + it("masks values above a threshold", () => { + const s = new Series({ data: [1, 5, 2, 8, 3] }); + const result = maskSeries(s, (x) => (x.values as number[]).map((v) => v > 4), { other: -1 }); + expect(result.values).toEqual([1, -1, 2, -1, 3]); + }); +}); + +// ─── where / mask duality ───────────────────────────────────────────────────── + +describe("where / mask duality", () => { + it("where(cond) === mask(!cond)", () => { + const s = new Series({ data: [1, 2, 3, 4, 5] }); + const cond = [true, false, true, false, true]; + const invCond = cond.map((b) => !b); + const w = whereSeries(s, cond, { other: 0 }); + const m = maskSeries(s, invCond, { other: 0 }); + expect(w.values).toEqual(m.values); + }); +}); + +// ─── whereDataFrame ─────────────────────────────────────────────────────────── + +describe("whereDataFrame — DataFrame condition", () => { + it("keeps values where cond is true", () => { + const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] }); + const cond = DataFrame.fromColumns({ + a: [true, true, true], + b: [true, true, true], + }); + const result = whereDataFrame(df, cond); + expect(result.col("a").values).toEqual([1, 2, 3]); + expect(result.col("b").values).toEqual([4, 5, 6]); + }); + + it("replaces false positions with other scalar", () => { + const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] }); + const cond = DataFrame.fromColumns({ + a: [true, false, true], + b: [false, true, true], + }); + const result = whereDataFrame(df, cond, { other: 0 }); + expect(result.col("a").values).toEqual([1, 0, 3]); + expect(result.col("b").values).toEqual([0, 5, 6]); + }); + + it("default other is NaN", () => { + const df = DataFrame.fromColumns({ a: [1, 2] }); + const cond = DataFrame.fromColumns({ a: [false, true] }); + const result = whereDataFrame(df, cond); + const vals = result.col("a").values as number[]; + expect(Number.isNaN(vals[0])).toBe(true); + expect(vals[1]).toBe(2); + }); + + it("preserves row index", () => { + const df = DataFrame.fromColumns({ a: [10, 20] }); + const cond = DataFrame.fromColumns({ a: [true, false] }); + const result = whereDataFrame(df, cond, { other: 0 }); + expect(result.index.size).toBe(2); + }); +}); + +describe("whereDataFrame — callable condition", () => { + it("accepts a function returning a boolean DataFrame", () => { + const df = DataFrame.fromColumns({ a: [1, 2, 3, 4] }); + const result = whereDataFrame( + df, + (x) => DataFrame.fromColumns({ a: (x.col("a").values as number[]).map((v) => v > 2) }), + { other: 0 }, + ); + expect(result.col("a").values).toEqual([0, 0, 3, 4]); + }); +}); + +// ─── maskDataFrame ──────────────────────────────────────────────────────────── + +describe("maskDataFrame — DataFrame condition", () => { + it("replaces true positions with other scalar", () => { + const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] }); + const cond = DataFrame.fromColumns({ + a: [false, true, false], + b: [true, false, false], + }); + const result = maskDataFrame(df, cond, { other: 0 }); + expect(result.col("a").values).toEqual([1, 0, 3]); + expect(result.col("b").values).toEqual([0, 5, 6]); + }); + + it("keeps all values when cond is all-false", () => { + const df = DataFrame.fromColumns({ a: [7, 8, 9] }); + const cond = DataFrame.fromColumns({ a: [false, false, false] }); + const result = maskDataFrame(df, cond, { other: -1 }); + expect(result.col("a").values).toEqual([7, 8, 9]); + }); +}); + +describe("maskDataFrame — callable condition", () => { + it("masks values below threshold", () => { + const df = DataFrame.fromColumns({ a: [1, 5, 2, 8, 3] }); + const result = maskDataFrame( + df, + (x) => DataFrame.fromColumns({ a: (x.col("a").values as number[]).map((v) => v < 4) }), + { other: -1 }, + ); + expect(result.col("a").values).toEqual([-1, 5, -1, 8, -1]); + }); +}); + +// ─── whereDataFrame / maskDataFrame duality ─────────────────────────────────── + +describe("whereDataFrame / maskDataFrame duality", () => { + it("whereDataFrame(cond) === maskDataFrame(!cond)", () => { + const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] }); + const cond = DataFrame.fromColumns({ + a: [true, false, true], + b: [false, true, false], + }); + const invCond = DataFrame.fromColumns({ + a: [false, true, false], + b: [true, false, true], + }); + const w = whereDataFrame(df, cond, { other: 0 }); + const m = maskDataFrame(df, invCond, { other: 0 }); + expect(w.col("a").values).toEqual(m.col("a").values); + expect(w.col("b").values).toEqual(m.col("b").values); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("whereSeries — property tests", () => { + it("where(all-true) returns original values", () => { + fc.assert( + fc.property( + fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 20 }), + (nums) => { + const s = new Series({ data: nums }); + const cond = nums.map(() => true); + const result = whereSeries(s, cond); + return (result.values as number[]).every((v, i) => v === nums[i]); + }, + ), + ); + }); + + it("where(all-false, other=0) returns all zeros", () => { + fc.assert( + fc.property( + fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 20 }), + (nums) => { + const s = new Series({ data: nums }); + const cond = nums.map(() => false); + const result = whereSeries(s, cond, { other: 0 }); + return (result.values as number[]).every((v) => v === 0); + }, + ), + ); + }); + + it("mask(cond) === where(!cond) for numeric data", () => { + fc.assert( + fc.property( + fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 20 }), + fc.boolean(), + (nums, fillBool) => { + const cond: boolean[] = nums.map(() => fillBool); + const invCond: boolean[] = cond.map((b) => !b); + const s = new Series({ data: nums }); + const w = whereSeries(s, cond, { other: -999 }); + const m = maskSeries(s, invCond, { other: -999 }); + return (w.values as number[]).every((v, i) => v === (m.values as number[])[i]); + }, + ), + ); + }); +});