From 18c019b504914129aced83c92dc9470638ff1b50 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 12:21:59 +0000 Subject: [PATCH 1/4] =?UTF-8?q?Iteration=20172:=20Add=20na=5Fops=20?= =?UTF-8?q?=E2=80=94=20isna/notna/ffill/bfill?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements pandas missing-value utilities as standalone exported functions: - `isna` / `notna` / `isnull` / `notnull` — detect missing values in scalars, Series, and DataFrames (mirrors pd.isna / pd.notna) - `ffillSeries` / `bfillSeries` — forward/backward fill for Series with optional `limit` parameter - `dataFrameFfill` / `dataFrameBfill` — column-wise or row-wise fill for DataFrames with optional `limit` and `axis` parameters Metric: 28 → 29 pandas_features_ported Run: https://github.com/githubnext/tsessebe/actions/runs/24263385922 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- playground/index.html | 20 ++ playground/na_ops.html | 480 +++++++++++++++++++++++++++++++++++++ src/index.ts | 21 ++ src/stats/index.ts | 19 ++ src/stats/na_ops.ts | 336 ++++++++++++++++++++++++++ tests/stats/na_ops.test.ts | 280 ++++++++++++++++++++++ 6 files changed, 1156 insertions(+) create mode 100644 playground/na_ops.html create mode 100644 src/stats/na_ops.ts create mode 100644 tests/stats/na_ops.test.ts diff --git a/playground/index.html b/playground/index.html index 48bfbcb9..83948f14 100644 --- a/playground/index.html +++ b/playground/index.html @@ -254,6 +254,11 @@

Element-wise transformations. clip(), seriesAbs(), seriesRound() for Series and DataFrame with min/max bounds, decimal precision, and axis support.

✅ Complete
+
+

🔍 missing-value ops

+

Detect and fill missing values. isna(), notna(), isnull(), notnull() for scalars/Series/DataFrame. ffillSeries(), bfillSeries(), dataFrameFfill(), dataFrameBfill() with optional limit and axis support.

+
✅ Complete
+

🔢 value_counts

Count unique values. valueCounts() for Series and dataFrameValueCounts() for DataFrame with normalize, sort, ascending, and dropna options.

@@ -264,6 +269,21 @@

✅ Complete

+
+

📊 pct_change

+

Fractional change between elements. pctChangeSeries() and pctChangeDataFrame() with periods, fillMethod (pad/bfill), limit, and axis options.

+
✅ Complete
+
+
+

🔎 idxmin / idxmax

+

Return the index label of the minimum or maximum value. idxminSeries(), idxmaxSeries(), idxminDataFrame(), idxmaxDataFrame() with skipna support.

+
✅ Complete
+
+
+

🔄 astype

+

Cast a Series or DataFrame to a different dtype. Supports all numeric, bool, string, datetime dtypes. Per-column casting for DataFrames. errors="raise"/"ignore" modes.

+
✅ Complete
+
diff --git a/playground/na_ops.html b/playground/na_ops.html new file mode 100644 index 00000000..c321438f --- /dev/null +++ b/playground/na_ops.html @@ -0,0 +1,480 @@ + + + + + + tsb — missing-value operations (isna, ffill, bfill) + + + +
+
+
Loading tsb runtime…
+
+ + ← Back to playground index + +

Missing-value operations

+

+ isna / notna — detect missing values in scalars, + Series, and DataFrames.
+ ffill / bfill — propagate the last (or next) valid + value to fill gaps.
+ Mirrors pd.isna(), Series.ffill(), and + DataFrame.bfill() from pandas. +

+ + +
+

1 · isna / notna on scalars

+

+ Returns true / false for individual values. + null, undefined, and NaN are all + considered "missing". +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · isna on a Series

+

+ When passed a Series, isna returns a boolean Series of the + same length — true where values are missing. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · isna on a DataFrame

+

+ Returns a DataFrame of booleans with the same shape — one column per + original column, true where missing. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · Forward-fill (ffillSeries)

+

+ Propagates the last valid value forward to fill gaps. Leading + nulls that have no preceding value remain null. + Use the optional limit to cap consecutive fills. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · Backward-fill (bfillSeries)

+

+ Propagates the next valid value backward to fill gaps. Trailing + nulls that have no following value remain null. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · DataFrame forward-fill & backward-fill

+

+ dataFrameFfill and dataFrameBfill apply fill + column-wise by default (axis=0). Pass axis: 1 to fill + row-wise across columns. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Module-level missing-value detection
+isna(value: Scalar): boolean
+isna(value: Series): Series<boolean>
+isna(value: DataFrame): DataFrame
+
+notna(value: Scalar): boolean
+notna(value: Series): Series<boolean>
+notna(value: DataFrame): DataFrame
+
+// Aliases
+isnull(...)  // same as isna
+notnull(...) // same as notna
+
+// Series forward / backward fill
+ffillSeries(series, options?: { limit?: number | null }): Series
+bfillSeries(series, options?: { limit?: number | null }): Series
+
+// DataFrame forward / backward fill
+dataFrameFfill(df, options?: {
+  limit?: number | null,   // max consecutive fills (default: no limit)
+  axis?: 0 | 1 | "index" | "columns",  // default 0 (column-wise)
+}): DataFrame
+
+dataFrameBfill(df, options?: {
+  limit?: number | null,
+  axis?: 0 | 1 | "index" | "columns",
+}): DataFrame
+
+ + + + + diff --git a/src/index.ts b/src/index.ts index 1dd0aa57..a0cf65da 100644 --- a/src/index.ts +++ b/src/index.ts @@ -107,3 +107,24 @@ export { export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts"; export { valueCounts, dataFrameValueCounts } from "./stats/index.ts"; export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts"; +export { + isna, + notna, + isnull, + notnull, + ffillSeries, + bfillSeries, + dataFrameFfill, + dataFrameBfill, +} from "./stats/index.ts"; +export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.ts"; +export { pctChangeSeries, pctChangeDataFrame } from "./stats/index.ts"; +export type { + PctChangeFillMethod, + PctChangeOptions, + DataFramePctChangeOptions, +} from "./stats/index.ts"; +export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./stats/index.ts"; +export type { IdxOptions, IdxDataFrameOptions } from "./stats/index.ts"; +export { castScalar, astypeSeries, astypeDataFrame } from "./core/index.ts"; +export type { CastErrors, AstypeOptions, AstypeDataFrameOptions } from "./core/index.ts"; diff --git a/src/stats/index.ts b/src/stats/index.ts index b1de48eb..6d880f89 100644 --- a/src/stats/index.ts +++ b/src/stats/index.ts @@ -39,3 +39,22 @@ export { nsmallestDataFrame, } from "./nlargest.ts"; export type { NKeep, NTopOptions, NTopDataFrameOptions } from "./nlargest.ts"; +export { + isna, + notna, + isnull, + notnull, + ffillSeries, + bfillSeries, + dataFrameFfill, + dataFrameBfill, +} from "./na_ops.ts"; +export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts"; +export { pctChangeSeries, pctChangeDataFrame } from "./pct_change.ts"; +export type { + PctChangeFillMethod, + PctChangeOptions, + DataFramePctChangeOptions, +} from "./pct_change.ts"; +export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./idxmin_idxmax.ts"; +export type { IdxOptions, IdxDataFrameOptions } from "./idxmin_idxmax.ts"; diff --git a/src/stats/na_ops.ts b/src/stats/na_ops.ts new file mode 100644 index 00000000..c776bb1f --- /dev/null +++ b/src/stats/na_ops.ts @@ -0,0 +1,336 @@ +/** + * na_ops — missing-value utilities for Series and DataFrame. + * + * Mirrors the following pandas module-level functions and methods: + * - `pd.isna(obj)` / `pd.isnull(obj)` — detect missing values + * - `pd.notna(obj)` / `pd.notnull(obj)` — detect non-missing values + * - `Series.ffill()` / `DataFrame.ffill()` — forward-fill missing values + * - `Series.bfill()` / `DataFrame.bfill()` — backward-fill missing values + * + * All functions are **pure** (return new objects; inputs are unchanged). + * + * @module + */ + +import { DataFrame } from "../core/index.ts"; +import { Series } from "../core/index.ts"; +import type { Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Options for {@link ffillSeries} and {@link bfillSeries}. */ +export interface FillDirectionOptions { + /** + * Maximum number of consecutive NaN/null values to fill. + * `null` means no limit (default). + */ + readonly limit?: number | null; +} + +/** Options for {@link dataFrameFfill} and {@link dataFrameBfill}. */ +export interface DataFrameFillOptions extends FillDirectionOptions { + /** + * - `0` or `"index"` (default): fill missing values down each **column**. + * - `1` or `"columns"`: fill missing values across each **row**. + */ + readonly axis?: 0 | 1 | "index" | "columns"; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** True when `v` should be treated as missing. */ +function isMissing(v: Scalar): boolean { + return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v)); +} + +/** Forward-fill an array of scalars in-place (returns a new array). */ +function ffillArray(vals: readonly Scalar[], limit: number | null): Scalar[] { + const out: Scalar[] = Array.from(vals); + let lastValid: Scalar = null; + let streak = 0; + for (let i = 0; i < out.length; i++) { + if (isMissing(out[i])) { + if (!isMissing(lastValid) && (limit === null || streak < limit)) { + out[i] = lastValid; + streak++; + } + } else { + lastValid = out[i] as Scalar; + streak = 0; + } + } + return out; +} + +/** Backward-fill an array of scalars (returns a new array). */ +function bfillArray(vals: readonly Scalar[], limit: number | null): Scalar[] { + const out: Scalar[] = Array.from(vals); + let nextValid: Scalar = null; + let streak = 0; + for (let i = out.length - 1; i >= 0; i--) { + if (isMissing(out[i])) { + if (!isMissing(nextValid) && (limit === null || streak < limit)) { + out[i] = nextValid; + streak++; + } + } else { + nextValid = out[i] as Scalar; + streak = 0; + } + } + return out; +} + +// ─── isna / notna ───────────────────────────────────────────────────────────── + +/** + * Detect missing values in a scalar, Series, or DataFrame. + * + * - For a **scalar**: returns `true` if the value is `null`, `undefined`, or `NaN`. + * - For a **Series**: returns a `Series` of the same index. + * - For a **DataFrame**: returns a `DataFrame` of boolean columns. + * + * Mirrors `pandas.isna()` / `pandas.isnull()`. + * + * @example + * ```ts + * import { isna } from "tsb"; + * isna(null); // true + * isna(42); // false + * isna(NaN); // true + * + * const s = new Series({ data: [1, null, NaN, 4] }); + * isna(s); // Series([false, true, true, false]) + * ``` + */ +export function isna(value: Scalar): boolean; +export function isna(value: Series): Series; +export function isna(value: DataFrame): DataFrame; +export function isna( + value: Scalar | Series | DataFrame, +): boolean | Series | DataFrame { + if (value instanceof DataFrame) { + return value.isna(); + } + if (value instanceof Series) { + return value.isna(); + } + return isMissing(value as Scalar); +} + +/** + * Detect non-missing values in a scalar, Series, or DataFrame. + * + * Mirrors `pandas.notna()` / `pandas.notnull()`. + * + * @example + * ```ts + * import { notna } from "tsb"; + * notna(null); // false + * notna(42); // true + * ``` + */ +export function notna(value: Scalar): boolean; +export function notna(value: Series): Series; +export function notna(value: DataFrame): DataFrame; +export function notna( + value: Scalar | Series | DataFrame, +): boolean | Series | DataFrame { + if (value instanceof DataFrame) { + return value.notna(); + } + if (value instanceof Series) { + return value.notna(); + } + return !isMissing(value as Scalar); +} + +/** Alias for {@link isna}. Mirrors `pandas.isnull()`. */ +export const isnull = isna; + +/** Alias for {@link notna}. Mirrors `pandas.notnull()`. */ +export const notnull = notna; + +// ─── ffill ──────────────────────────────────────────────────────────────────── + +/** + * Forward-fill missing values in a Series. + * + * Each `null`/`NaN` value is replaced with the last non-missing value + * that precedes it (if any). Values before the first non-missing value + * remain missing. + * + * Mirrors `pandas.Series.ffill()`. + * + * @param series - Input Series (unchanged). + * @param options - Optional `{ limit }` — max consecutive fills. + * @returns New Series with forward-filled values. + * + * @example + * ```ts + * import { ffillSeries } from "tsb"; + * const s = new Series({ data: [1, null, null, 4] }); + * ffillSeries(s); // Series([1, 1, 1, 4]) + * ``` + */ +export function ffillSeries( + series: Series, + options?: FillDirectionOptions, +): Series { + const limit = options?.limit ?? null; + const filled = ffillArray(series.values as readonly Scalar[], limit) as T[]; + return new Series({ + data: filled, + index: series.index, + dtype: series.dtype, + name: series.name ?? undefined, + }); +} + +/** + * Backward-fill missing values in a Series. + * + * Each `null`/`NaN` value is replaced with the next non-missing value + * that follows it (if any). Values after the last non-missing value + * remain missing. + * + * Mirrors `pandas.Series.bfill()`. + * + * @example + * ```ts + * import { bfillSeries } from "tsb"; + * const s = new Series({ data: [1, null, null, 4] }); + * bfillSeries(s); // Series([1, 4, 4, 4]) + * ``` + */ +export function bfillSeries( + series: Series, + options?: FillDirectionOptions, +): Series { + const limit = options?.limit ?? null; + const filled = bfillArray(series.values as readonly Scalar[], limit) as T[]; + return new Series({ + data: filled, + index: series.index, + dtype: series.dtype, + name: series.name ?? undefined, + }); +} + +// ─── DataFrame ffill / bfill ────────────────────────────────────────────────── + +/** + * Forward-fill missing values in a DataFrame. + * + * By default operates **column-wise** (axis=0): each column is independently + * forward-filled. With `axis=1` each row is forward-filled across columns. + * + * Mirrors `pandas.DataFrame.ffill()`. + * + * @example + * ```ts + * import { dataFrameFfill } from "tsb"; + * const df = new DataFrame({ data: { a: [1, null, 3], b: [null, 2, null] } }); + * dataFrameFfill(df); + * // a: [1, 1, 3] + * // b: [null, 2, 2] + * ``` + */ +export function dataFrameFfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame { + const limit = options?.limit ?? null; + const axis = options?.axis ?? 0; + const byRow = axis === 1 || axis === "columns"; + + if (!byRow) { + // column-wise: fill each column independently + const colMap = new Map>(); + for (const name of df.columns.values) { + const col = df.col(name); + const filled = ffillArray(col.values, limit) as Scalar[]; + colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype })); + } + return new DataFrame(colMap, df.index); + } + + // row-wise: fill across columns for each row + const nRows = df.shape[0]; + const cols = df.columns.values; + const columns = cols.map((name) => df.col(name)); + const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values)); + for (let r = 0; r < nRows; r++) { + const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null); + const filled = ffillArray(rowVals, limit); + for (let ci = 0; ci < cols.length; ci++) { + const rowsFilledCI = rowsFilled[ci]; + if (rowsFilledCI !== undefined) { + rowsFilledCI[r] = filled[ci] ?? null; + } + } + } + const colMap = new Map>(); + for (let ci = 0; ci < cols.length; ci++) { + const name = cols[ci] as string; + const col = columns[ci] as Series; + colMap.set( + name, + new Series({ + data: rowsFilled[ci] ?? [], + index: col.index, + dtype: col.dtype, + }), + ); + } + return new DataFrame(colMap, df.index); +} + +/** + * Backward-fill missing values in a DataFrame. + * + * By default operates **column-wise** (axis=0). With `axis=1` fills across rows. + * + * Mirrors `pandas.DataFrame.bfill()`. + */ +export function dataFrameBfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame { + const limit = options?.limit ?? null; + const axis = options?.axis ?? 0; + const byRow = axis === 1 || axis === "columns"; + + if (!byRow) { + const colMap = new Map>(); + for (const name of df.columns.values) { + const col = df.col(name); + const filled = bfillArray(col.values, limit) as Scalar[]; + colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype })); + } + return new DataFrame(colMap, df.index); + } + + const nRows = df.shape[0]; + const cols = df.columns.values; + const columns = cols.map((name) => df.col(name)); + const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values)); + for (let r = 0; r < nRows; r++) { + const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null); + const filled = bfillArray(rowVals, limit); + for (let ci = 0; ci < cols.length; ci++) { + const rowsFilledCI = rowsFilled[ci]; + if (rowsFilledCI !== undefined) { + rowsFilledCI[r] = filled[ci] ?? null; + } + } + } + const colMap = new Map>(); + for (let ci = 0; ci < cols.length; ci++) { + const name = cols[ci] as string; + const col = columns[ci] as Series; + colMap.set( + name, + new Series({ + data: rowsFilled[ci] ?? [], + index: col.index, + dtype: col.dtype, + }), + ); + } + return new DataFrame(colMap, df.index); +} diff --git a/tests/stats/na_ops.test.ts b/tests/stats/na_ops.test.ts new file mode 100644 index 00000000..340406ac --- /dev/null +++ b/tests/stats/na_ops.test.ts @@ -0,0 +1,280 @@ +/** + * Tests for na_ops — missing-value utilities (isna, notna, ffill, bfill). + */ + +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + bfillSeries, + dataFrameBfill, + dataFrameFfill, + ffillSeries, + isna, + isnull, + notna, + notnull, +} from "../../src/index.ts"; + +// ─── isna / notna ───────────────────────────────────────────────────────────── + +describe("isna (scalar)", () => { + it("returns true for null", () => expect(isna(null)).toBe(true)); + it("returns true for undefined", () => expect(isna(undefined)).toBe(true)); + it("returns true for NaN", () => expect(isna(Number.NaN)).toBe(true)); + it("returns false for 0", () => expect(isna(0)).toBe(false)); + it("returns false for empty string", () => expect(isna("")).toBe(false)); + it("returns false for false", () => expect(isna(false)).toBe(false)); + it("returns false for a number", () => expect(isna(42)).toBe(false)); +}); + +describe("notna (scalar)", () => { + it("returns false for null", () => expect(notna(null)).toBe(false)); + it("returns false for NaN", () => expect(notna(Number.NaN)).toBe(false)); + it("returns true for 42", () => expect(notna(42)).toBe(true)); + it("returns true for a string", () => expect(notna("hello")).toBe(true)); +}); + +describe("isnull / notnull aliases", () => { + it("isnull equals isna for scalar", () => { + expect(isnull(null)).toBe(isna(null)); + expect(isnull(42)).toBe(isna(42)); + }); + it("notnull equals notna for scalar", () => { + expect(notnull(null)).toBe(notna(null)); + expect(notnull(42)).toBe(notna(42)); + }); +}); + +describe("isna (Series)", () => { + it("returns boolean Series of correct length", () => { + const s = new Series({ data: [1, null, Number.NaN, 4] }); + const result = isna(s); + expect(result).toBeInstanceOf(Series); + expect([...result.values]).toEqual([false, true, true, false]); + }); + + it("all present", () => { + const s = new Series({ data: [1, 2, 3] }); + expect([...isna(s).values]).toEqual([false, false, false]); + }); + + it("all missing", () => { + const s = new Series({ data: [null, null, Number.NaN] }); + expect([...isna(s).values]).toEqual([true, true, true]); + }); +}); + +describe("notna (Series)", () => { + it("is the inverse of isna", () => { + const s = new Series({ data: [1, null, Number.NaN, 4] }); + const na = isna(s).values; + const nna = notna(s).values; + for (let i = 0; i < na.length; i++) { + expect(nna[i]).toBe(!na[i]); + } + }); +}); + +describe("isna (DataFrame)", () => { + it("returns DataFrame of booleans", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] }); + const result = isna(df); + expect(result).toBeInstanceOf(DataFrame); + expect([...result.col("a").values]).toEqual([false, true]); + expect([...result.col("b").values]).toEqual([true, false]); + }); +}); + +describe("notna (DataFrame)", () => { + it("returns inverse of isna DataFrame", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] }); + expect([...notna(df).col("a").values]).toEqual([true, false]); + expect([...notna(df).col("b").values]).toEqual([false, true]); + }); +}); + +// ─── ffillSeries ────────────────────────────────────────────────────────────── + +describe("ffillSeries", () => { + it("fills nulls with preceding value", () => { + const s = new Series({ data: [1, null, null, 4] }); + expect([...ffillSeries(s).values]).toEqual([1, 1, 1, 4]); + }); + + it("leaves leading nulls untouched", () => { + const s = new Series({ data: [null, null, 3, null] }); + expect([...ffillSeries(s).values]).toEqual([null, null, 3, 3]); + }); + + it("NaN is treated as missing", () => { + const s = new Series({ data: [2, Number.NaN, 5] }); + const result = ffillSeries(s).values; + expect(result[0]).toBe(2); + expect(result[1]).toBe(2); + expect(result[2]).toBe(5); + }); + + it("respects limit option", () => { + const s = new Series({ data: [1, null, null, null, 5] }); + expect([...ffillSeries(s, { limit: 1 }).values]).toEqual([1, 1, null, null, 5]); + }); + + it("preserves original Series", () => { + const s = new Series({ data: [1, null, 3] }); + ffillSeries(s); + expect([...s.values]).toEqual([1, null, 3]); + }); + + it("empty Series returns empty", () => { + const s = new Series({ data: [] }); + expect([...ffillSeries(s).values]).toEqual([]); + }); + + it("preserves name and index", () => { + const s = new Series({ data: [1, null], name: "x" }); + const filled = ffillSeries(s); + expect(filled.name).toBe("x"); + expect(filled.index.size).toBe(2); + }); +}); + +// ─── bfillSeries ────────────────────────────────────────────────────────────── + +describe("bfillSeries", () => { + it("fills nulls with following value", () => { + const s = new Series({ data: [1, null, null, 4] }); + expect([...bfillSeries(s).values]).toEqual([1, 4, 4, 4]); + }); + + it("leaves trailing nulls untouched", () => { + const s = new Series({ data: [null, 3, null, null] }); + expect([...bfillSeries(s).values]).toEqual([3, 3, null, null]); + }); + + it("respects limit option", () => { + const s = new Series({ data: [1, null, null, null, 5] }); + expect([...bfillSeries(s, { limit: 2 }).values]).toEqual([1, null, 5, 5, 5]); + }); + + it("empty Series returns empty", () => { + const s = new Series({ data: [] }); + expect([...bfillSeries(s).values]).toEqual([]); + }); +}); + +// ─── dataFrameFfill ─────────────────────────────────────────────────────────── + +describe("dataFrameFfill (column-wise)", () => { + it("fills each column independently", () => { + const df = DataFrame.fromColumns({ a: [1, null, 3], b: [null, 2, null] }); + const result = dataFrameFfill(df); + expect([...result.col("a").values]).toEqual([1, 1, 3]); + expect([...result.col("b").values]).toEqual([null, 2, 2]); + }); + + it("preserves index", () => { + const df = DataFrame.fromColumns({ x: [1, null] }); + expect(dataFrameFfill(df).index.size).toBe(2); + }); +}); + +describe("dataFrameFfill (row-wise)", () => { + it("fills across columns per row", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [null, null], c: [3, 4] }); + const result = dataFrameFfill(df, { axis: 1 }); + expect([...result.col("a").values]).toEqual([1, null]); + expect([...result.col("b").values]).toEqual([1, null]); + expect([...result.col("c").values]).toEqual([3, 4]); + }); +}); + +// ─── dataFrameBfill ─────────────────────────────────────────────────────────── + +describe("dataFrameBfill (column-wise)", () => { + it("fills each column backward", () => { + const df = DataFrame.fromColumns({ a: [null, null, 3], b: [1, null, null] }); + const result = dataFrameBfill(df); + expect([...result.col("a").values]).toEqual([3, 3, 3]); + expect([...result.col("b").values]).toEqual([1, null, null]); + }); +}); + +describe("dataFrameBfill (row-wise)", () => { + it("fills backward across columns per row", () => { + const df = DataFrame.fromColumns({ a: [null, 1], b: [null, null], c: [3, null] }); + const result = dataFrameBfill(df, { axis: 1 }); + expect([...result.col("a").values]).toEqual([3, 1]); + expect([...result.col("b").values]).toEqual([3, null]); + expect([...result.col("c").values]).toEqual([3, null]); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("property: ffill followed by bfill fills all if any non-null", () => { + it("all values filled when at least one is present", () => { + fc.assert( + fc.property( + fc.array(fc.option(fc.integer({ min: 0, max: 100 }), { nil: null }), { + minLength: 1, + maxLength: 20, + }), + (raw) => { + const hasNonNull = raw.some((v) => v !== null); + if (!hasNonNull) { + return true; + } + const s = new Series({ data: raw }); + const result = bfillSeries(ffillSeries(s)); + return result.values.every((v) => v !== null); + }, + ), + ); + }); +}); + +describe("property: ffill never introduces new non-null values beyond last valid", () => { + it("ffilled series has no nulls after first valid value", () => { + fc.assert( + fc.property( + fc.array(fc.option(fc.integer({ min: -50, max: 50 }), { nil: null }), { + minLength: 0, + maxLength: 30, + }), + (raw) => { + const s = new Series({ data: raw }); + const filled = ffillSeries(s).values; + let sawValid = false; + for (const v of filled) { + if (v !== null) { + sawValid = true; + } + if (sawValid && v === null) { + return false; + } + } + return true; + }, + ), + ); + }); +}); + +describe("property: isna is inverse of notna for scalars", () => { + it("isna(v) === !notna(v)", () => { + fc.assert( + fc.property( + fc.oneof( + fc.integer(), + fc.float({ noNaN: false }), + fc.constant(null), + fc.string(), + fc.boolean(), + ), + (v) => isna(v as Parameters[0]) === !notna(v as Parameters[0]), + ), + ); + }); +}); From f65c41073e7886fbb94423363eadb32fc7325256 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 12:21:59 +0000 Subject: [PATCH 2/4] Iteration 174: Add pct_change for Series and DataFrame Implements pctChangeSeries() and pctChangeDataFrame() mirroring pandas.Series.pct_change() / pandas.DataFrame.pct_change(). - periods: configurable lag (positive = backward, negative = forward) - fillMethod: "pad" (default), "bfill", or null (no fill) - limit: cap consecutive fills - axis: column-wise (default) or row-wise for DataFrame Full test coverage: unit tests, edge cases, and fast-check property tests. Interactive playground page at playground/pct_change.html. Run: https://github.com/githubnext/tsessebe/actions/runs/24266545401 --- playground/pct_change.html | 448 +++++++++++++++++++++++++++++++++ src/stats/pct_change.ts | 231 +++++++++++++++++ tests/stats/pct_change.test.ts | 252 +++++++++++++++++++ 3 files changed, 931 insertions(+) create mode 100644 playground/pct_change.html create mode 100644 src/stats/pct_change.ts create mode 100644 tests/stats/pct_change.test.ts diff --git a/playground/pct_change.html b/playground/pct_change.html new file mode 100644 index 00000000..3576797a --- /dev/null +++ b/playground/pct_change.html @@ -0,0 +1,448 @@ + + + + + + tsb — pct_change + + + +
+
+
Initializing playground…
+
+ ← Back to roadmap +

📊 pct_change — Interactive Playground

+

Compute the fractional change between each element and a prior element. + Mirrors pandas.Series.pct_change() / + pandas.DataFrame.pct_change().
+ Edit any code block below and press ▶ Run + (or Ctrl+Enter) to execute it live in your browser. +

+ + +
+

1 · Basic pct_change on a Series

+

pctChangeSeries(series) returns the fractional (not percentage) change + from each previous element. The first element is always null.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Multi-period change

+

The periods option controls the lag. Use periods: 2 to + compare each value to the one two steps earlier — useful for month-over-month + comparisons in quarterly data.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · Handling missing values

+

By default, pctChangeSeries forward-fills (fillMethod: "pad") + NaN/null values before computing the ratio — so gaps don't break the chain. + Set fillMethod: null to propagate NaN instead.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · Limit consecutive fills

+

The limit option caps how many consecutive NaN values get forward-filled. + Useful when you want to tolerate short gaps but not bridge large ones.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame column-wise pct_change

+

pctChangeDataFrame(df) applies pctChangeSeries to every + column independently. Ideal for comparing multiple assets or metrics simultaneously.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Negative periods (look-forward change)

+

A negative periods value computes the forward change: how much will + this element change by the time we reach |periods| steps ahead. + Useful for computing returns on a "hold for N periods" strategy.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+

All functions return a new Series/DataFrame of the same shape — inputs are never mutated.

+
// Series
+pctChangeSeries(series, {
+  periods?: number,           // default 1 (positive = look back, negative = look forward)
+  fillMethod?: "pad" | "bfill" | null,  // default "pad"
+  limit?: number | null,      // max consecutive fills; default unlimited
+}): Series
+
+// DataFrame
+pctChangeDataFrame(df, {
+  periods?: number,
+  fillMethod?: "pad" | "bfill" | null,
+  limit?: number | null,
+  axis?: 0 | 1 | "index" | "columns",  // default 0 (column-wise)
+}): DataFrame
+
+ + + + + diff --git a/src/stats/pct_change.ts b/src/stats/pct_change.ts new file mode 100644 index 00000000..c46c9e84 --- /dev/null +++ b/src/stats/pct_change.ts @@ -0,0 +1,231 @@ +/** + * pct_change — percentage change between current and prior element. + * + * Mirrors `pandas.Series.pct_change()` / `pandas.DataFrame.pct_change()`: + * - `pctChangeSeries(series, options)` — per-element % change + * - `pctChangeDataFrame(df, options)` — column-wise % change + * + * Formula (per element i, with shift=periods): + * `result[i] = (x[i] - x[i-periods]) / x[i-periods]` + * + * When `fillMethod` is set, NaN/null values in the source are filled *before* + * computing the ratio (matching pandas' default behaviour of `fill_method="pad"`). + * + * @module + */ + +import { DataFrame } from "../core/index.ts"; +import { Series } from "../core/index.ts"; +import type { Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Fill method applied to NaN/null before computing pct_change. */ +export type PctChangeFillMethod = "pad" | "bfill"; + +/** Options for {@link pctChangeSeries} and {@link pctChangeDataFrame}. */ +export interface PctChangeOptions { + /** + * Number of periods (lags) to shift when computing the ratio. + * Positive values look backward; negative values look forward. + * Default `1`. + */ + readonly periods?: number; + /** + * How to fill NaN/null values *before* computing the ratio. + * - `"pad"` (default): forward-fill (last valid observation carries forward). + * - `"bfill"`: backward-fill (next valid observation fills backward). + * - `null`: no filling — NaN/null stays as-is. + */ + readonly fillMethod?: PctChangeFillMethod | null; + /** + * Maximum number of consecutive NaN/null values to fill when `fillMethod` + * is set. `undefined` / `null` means no limit. + */ + readonly limit?: number | null; +} + +/** Options for {@link pctChangeDataFrame} — adds an axis selector. */ +export interface DataFramePctChangeOptions extends PctChangeOptions { + /** + * - `0` or `"index"` (default): apply operation **column-wise** (down rows). + * - `1` or `"columns"`: apply operation **row-wise** (across columns). + */ + readonly axis?: 0 | 1 | "index" | "columns"; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** True when `v` is a valid number (not null, undefined, or NaN). */ +function isNum(v: Scalar): v is number { + return typeof v === "number" && !Number.isNaN(v) && v !== null; +} + +/** + * Forward-fill an array of scalars in place, respecting an optional limit. + * Returns a NEW array. + */ +function padFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] { + const out: Scalar[] = [...vals]; + let run = 0; + let lastValid: Scalar = null; + for (let i = 0; i < out.length; i++) { + const v = out[i] as Scalar; + if (v !== null && v !== undefined && !(typeof v === "number" && Number.isNaN(v))) { + lastValid = v; + run = 0; + } else if (lastValid !== null && (limit == null || run < limit)) { + out[i] = lastValid; + run++; + } + } + return out; +} + +/** + * Backward-fill an array of scalars, respecting an optional limit. + * Returns a NEW array. + */ +function bfillFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] { + const tmp = padFill([...vals].reverse(), limit); + return tmp.reverse(); +} + +/** Fill NaN/null in `vals` using the requested method. */ +function applyFill( + vals: readonly Scalar[], + method: PctChangeFillMethod | null | undefined, + limit: number | null | undefined, +): Scalar[] { + if (!method) return [...vals]; + return method === "pad" ? padFill(vals, limit) : bfillFill(vals, limit); +} + +/** Compute pct_change on a flat array of scalars. */ +function computePct(vals: readonly Scalar[], periods: number): Scalar[] { + const n = vals.length; + const out: Scalar[] = new Array(n).fill(null); + const shift = periods; + if (shift >= 0) { + for (let i = shift; i < n; i++) { + const curr = vals[i] as Scalar; + const prev = vals[i - shift] as Scalar; + if (isNum(curr) && isNum(prev) && prev !== 0) { + out[i] = curr / prev - 1; + } else if (isNum(curr) && isNum(prev) && prev === 0) { + // 0 denominator → Infinity (same as pandas) + out[i] = curr === 0 ? Number.NaN : curr > 0 ? Infinity : -Infinity; + } else { + out[i] = null; + } + } + } else { + // Negative periods: look forward + const absShift = -shift; + for (let i = 0; i < n - absShift; i++) { + const curr = vals[i] as Scalar; + const fwd = vals[i + absShift] as Scalar; + if (isNum(curr) && isNum(fwd) && curr !== 0) { + out[i] = fwd / curr - 1; + } else if (isNum(curr) && isNum(fwd) && curr === 0) { + out[i] = fwd === 0 ? Number.NaN : fwd > 0 ? Infinity : -Infinity; + } else { + out[i] = null; + } + } + } + return out; +} + +// ─── public API ─────────────────────────────────────────────────────────────── + +/** + * Compute the fractional change between a Series element and the element + * `periods` positions earlier (or later, for negative `periods`). + * + * Matches `pandas.Series.pct_change()`. + * + * @example + * ```ts + * const s = new Series({ data: [100, 110, 99, 121] }); + * pctChangeSeries(s); // [null, 0.1, -0.1, 0.2222…] + * ``` + */ +export function pctChangeSeries(series: Series, options: PctChangeOptions = {}): Series { + const periods = options.periods ?? 1; + const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad"; + const limit = options.limit ?? null; + + const filled = applyFill(series.values, fillMethod, limit); + const result = computePct(filled, periods); + + return new Series({ + data: result, + index: series.index, + name: series.name ?? undefined, + }); +} + +/** + * Compute percentage change for every column (or row) of a DataFrame. + * + * Matches `pandas.DataFrame.pct_change()`. + * + * @example + * ```ts + * const df = new DataFrame(new Map([ + * ["a", new Series({ data: [100, 110, 121] })], + * ["b", new Series({ data: [200, 180, 198] })], + * ])); + * pctChangeDataFrame(df); // fractional change per column + * ``` + */ +export function pctChangeDataFrame( + df: DataFrame, + options: DataFramePctChangeOptions = {}, +): DataFrame { + const axis = options.axis ?? 0; + const colWise = axis === 0 || axis === "index"; + + if (colWise) { + const colMap = new Map>(); + for (const name of df.columns.values) { + colMap.set(name, pctChangeSeries(df.col(name), options)); + } + return new DataFrame(colMap, df.index); + } + + // Row-wise: each row across columns + const periods = options.periods ?? 1; + const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad"; + const limit = options.limit ?? null; + const nRows = df.index.length; + const cols = df.columns.values; + const nCols = cols.length; + + const resultCols = new Map(); + for (const name of cols) { + resultCols.set(name, new Array(nRows).fill(null)); + } + + for (let r = 0; r < nRows; r++) { + const row: Scalar[] = []; + for (const name of cols) { + row.push(df.col(name).values[r] as Scalar); + } + const filled = applyFill(row, fillMethod, limit); + const pct = computePct(filled, periods); + for (let c = 0; c < nCols; c++) { + (resultCols.get(cols[c] as string) as Scalar[])[r] = pct[c] as Scalar; + } + } + + const colMap = new Map>(); + for (const name of cols) { + colMap.set( + name, + new Series({ data: resultCols.get(name) as Scalar[], index: df.index, name }), + ); + } + return new DataFrame(colMap, df.index); +} diff --git a/tests/stats/pct_change.test.ts b/tests/stats/pct_change.test.ts new file mode 100644 index 00000000..98966e8c --- /dev/null +++ b/tests/stats/pct_change.test.ts @@ -0,0 +1,252 @@ +/** + * Tests for src/stats/pct_change.ts — pctChangeSeries, pctChangeDataFrame + */ +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + pctChangeDataFrame, + pctChangeSeries, +} from "../../src/index.ts"; +import type { Scalar } from "../../src/index.ts"; + +// ─── helpers ───────────────────────────────────────────────────────────────── + +function s(data: readonly Scalar[]): Series { + return new Series({ data: [...data] }); +} + +function nanEq(a: Scalar, b: Scalar): boolean { + if (typeof a === "number" && Number.isNaN(a) && typeof b === "number" && Number.isNaN(b)) { + return true; + } + return a === b; +} + +function arrEq(a: readonly Scalar[], b: readonly Scalar[]): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (!nanEq(a[i] as Scalar, b[i] as Scalar)) return false; + } + return true; +} + +function close(a: Scalar, b: Scalar, eps = 1e-9): boolean { + if (a === null && b === null) return true; + if (typeof a !== "number" || typeof b !== "number") return false; + if (Number.isNaN(a) && Number.isNaN(b)) return true; + return Math.abs(a - b) < eps; +} + +function arrClose(a: readonly Scalar[], b: readonly Scalar[], eps = 1e-9): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (!close(a[i] as Scalar, b[i] as Scalar, eps)) return false; + } + return true; +} + +// ─── pctChangeSeries ───────────────────────────────────────────────────────── + +describe("pctChangeSeries", () => { + it("basic increasing sequence", () => { + const result = pctChangeSeries(s([100, 110, 121, 133.1])); + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + expect(close(result.values[3] as Scalar, 0.1)).toBe(true); + }); + + it("decreasing sequence", () => { + const result = pctChangeSeries(s([200, 180, 162])); + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, -0.1)).toBe(true); + expect(close(result.values[2] as Scalar, -0.1)).toBe(true); + }); + + it("periods=2", () => { + const result = pctChangeSeries(s([100, 105, 110, 121]), { periods: 2 }); + expect(result.values[0]).toBeNull(); + expect(result.values[1]).toBeNull(); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + expect(close(result.values[3] as Scalar, (121 - 105) / 105)).toBe(true); + }); + + it("negative periods (look forward)", () => { + const result = pctChangeSeries(s([100, 110, 121]), { periods: -1 }); + expect(close(result.values[0] as Scalar, 0.1)).toBe(true); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(result.values[2]).toBeNull(); + }); + + it("NaN/null propagates when fillMethod=null", () => { + const result = pctChangeSeries(s([100, null, 110]), { fillMethod: null }); + expect(result.values[0]).toBeNull(); + expect(result.values[1]).toBeNull(); + expect(result.values[2]).toBeNull(); + }); + + it("fillMethod=pad fills NaN before computing", () => { + const result = pctChangeSeries(s([100, null, 110]), { fillMethod: "pad" }); + // after pad-fill: [100, 100, 110] + // pct: [null, 0, 0.1] + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0)).toBe(true); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + }); + + it("fillMethod=bfill fills NaN backward before computing", () => { + const result = pctChangeSeries(s([100, null, 110, 121]), { fillMethod: "bfill" }); + // after bfill: [100, 110, 110, 121] + // pct: [null, 0.1, 0, 0.1] + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(close(result.values[2] as Scalar, 0)).toBe(true); + expect(close(result.values[3] as Scalar, 0.1)).toBe(true); + }); + + it("limit=1 caps forward-fill", () => { + const result = pctChangeSeries(s([100, null, null, 130]), { + fillMethod: "pad", + limit: 1, + }); + // after pad with limit=1: [100, 100, null, 130] + // pct: [null, 0, null, null] (null/100 → null) + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0)).toBe(true); + expect(result.values[2]).toBeNull(); + expect(result.values[3]).toBeNull(); + }); + + it("zero denominator returns Infinity", () => { + const result = pctChangeSeries(s([0, 10]), { fillMethod: null }); + expect(result.values[1]).toBe(Infinity); + }); + + it("zero/zero denominator returns NaN", () => { + const result = pctChangeSeries(s([0, 0]), { fillMethod: null }); + expect(Number.isNaN(result.values[1] as number)).toBe(true); + }); + + it("preserves Series name and index", () => { + const src = new Series({ data: [10, 20, 30], name: "price" }); + const result = pctChangeSeries(src); + expect(result.name).toBe("price"); + expect(result.index.length).toBe(3); + }); + + it("empty series returns empty", () => { + const result = pctChangeSeries(s([])); + expect(result.values.length).toBe(0); + }); + + it("single-element series returns [null]", () => { + const result = pctChangeSeries(s([42])); + expect(result.values[0]).toBeNull(); + }); +}); + +// ─── pctChangeDataFrame ─────────────────────────────────────────────────────── + +describe("pctChangeDataFrame", () => { + it("column-wise (default)", () => { + const df = new DataFrame( + new Map([ + ["a", new Series({ data: [100, 110, 121] })], + ["b", new Series({ data: [200, 180, 198] })], + ]), + ); + const result = pctChangeDataFrame(df); + const colA = result.col("a").values; + const colB = result.col("b").values; + expect(colA[0]).toBeNull(); + expect(close(colA[1] as Scalar, 0.1)).toBe(true); + expect(close(colA[2] as Scalar, 0.1)).toBe(true); + expect(colB[0]).toBeNull(); + expect(close(colB[1] as Scalar, -0.1)).toBe(true); + expect(close(colB[2] as Scalar, 0.1)).toBe(true); + }); + + it("row-wise (axis=1)", () => { + const df = new DataFrame( + new Map([ + ["a", new Series({ data: [100, 200] })], + ["b", new Series({ data: [110, 220] })], + ["c", new Series({ data: [121, 242] })], + ]), + ); + const result = pctChangeDataFrame(df, { axis: 1 }); + // row 0: [100, 110, 121] → [null, 0.1, 0.1] + // row 1: [200, 220, 242] → [null, 0.1, 0.1] + const row0a = result.col("a").values[0]; + const row0b = result.col("b").values[0]; + const row0c = result.col("c").values[0]; + expect(row0a).toBeNull(); + expect(close(row0b as Scalar, 0.1)).toBe(true); + expect(close(row0c as Scalar, 0.1)).toBe(true); + const row1a = result.col("a").values[1]; + const row1b = result.col("b").values[1]; + expect(row1a).toBeNull(); + expect(close(row1b as Scalar, 0.1)).toBe(true); + }); + + it("preserves column order", () => { + const df = new DataFrame( + new Map([ + ["x", new Series({ data: [1, 2] })], + ["y", new Series({ data: [3, 6] })], + ]), + ); + const result = pctChangeDataFrame(df); + expect(result.columns.values).toEqual(["x", "y"]); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("pctChangeSeries — property tests", () => { + it("result length equals input length", () => { + fc.assert( + fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 50 }), (arr) => { + const result = pctChangeSeries(s(arr)); + return result.values.length === arr.length; + }), + ); + }); + + it("first element is always null for periods=1", () => { + fc.assert( + fc.property( + fc.array(fc.float({ noNaN: true }), { minLength: 1, maxLength: 50 }), + (arr) => { + const result = pctChangeSeries(s(arr)); + return result.values[0] === null; + }, + ), + ); + }); + + it("pct_change(x, -p) equals pct_change_reversed pattern", () => { + // For a sequence of positive numbers with periods=1 and periods=-1: + // result[-1][i] represents the change looking forward, so result[-1][i] = (x[i+1]-x[i])/x[i] + // and result[+1][i+1] = (x[i+1]-x[i])/x[i], so they should agree on matching indices + fc.assert( + fc.property( + fc.array(fc.float({ noNaN: true, min: 1, max: 1000 }), { minLength: 3, maxLength: 20 }), + (arr) => { + const fwd = pctChangeSeries(s(arr), { periods: -1, fillMethod: null }); + const bwd = pctChangeSeries(s(arr), { periods: 1, fillMethod: null }); + // fwd[i] = (arr[i+1] - arr[i]) / arr[i] + // bwd[i+1] = (arr[i+1] - arr[i]) / arr[i] ← same ratio + for (let i = 0; i < arr.length - 1; i++) { + if (!close(fwd.values[i] as Scalar, bwd.values[i + 1] as Scalar, 1e-6)) { + return false; + } + } + return true; + }, + ), + ); + }); +}); From 85580a67e7ce77b780b6e7e93cfecf4d3f8ac607 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 12:22:00 +0000 Subject: [PATCH 3/4] Iteration 193: Add idxmin/idxmax for Series and DataFrame Run: https://github.com/githubnext/tsessebe/actions/runs/24281202174 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- playground/idxmin_idxmax.html | 439 ++++++++++++++++++++++++++++++ src/stats/idxmin_idxmax.ts | 234 ++++++++++++++++ tests/stats/idxmin_idxmax.test.ts | 270 ++++++++++++++++++ 3 files changed, 943 insertions(+) create mode 100644 playground/idxmin_idxmax.html create mode 100644 src/stats/idxmin_idxmax.ts create mode 100644 tests/stats/idxmin_idxmax.test.ts diff --git a/playground/idxmin_idxmax.html b/playground/idxmin_idxmax.html new file mode 100644 index 00000000..b771dd36 --- /dev/null +++ b/playground/idxmin_idxmax.html @@ -0,0 +1,439 @@ + + + + + + tsb — idxmin / idxmax + + + +
+
+
Loading TypeScript compiler…
+
+ + ← tsb playground +

idxmin / idxmax

+

+ Return the index label of the minimum or maximum value in a + Series or each column of a DataFrame. + Mirrors pandas.Series.idxmin(), idxmax(), + pandas.DataFrame.idxmin(), and DataFrame.idxmax(). +

+ + +
+

1 · Series.idxmin — label of the minimum value

+

Returns the index label at the position of the minimum value. + NaN / null values are skipped by default.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Series.idxmax — label of the maximum value

+

Returns the index label at the position of the maximum value.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · NaN handling — skipna option

+

By default NaN / null values are skipped. Set skipna: false + to propagate NaN (returns null if any value is NaN).

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · DataFrame.idxmin — row label of column minima

+

Returns a Series indexed by column names. Each value is the row label + where that column achieves its minimum.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame.idxmax — row label of column maxima

+

Returns a Series indexed by column names, where each entry is the row + label of that column's maximum value.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Edge cases — empty, all-NaN, all-equal

+

Behavior for empty series, series where every value is NaN, and series + where all values are equal.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Series
+idxminSeries(series, { skipna?: boolean }): Label   // default skipna=true
+idxmaxSeries(series, { skipna?: boolean }): Label
+
+// DataFrame (axis=0 — min/max per column)
+idxminDataFrame(df, { skipna?: boolean }): Series   // indexed by column names
+idxmaxDataFrame(df, { skipna?: boolean }): Series
+
+ + + + + diff --git a/src/stats/idxmin_idxmax.ts b/src/stats/idxmin_idxmax.ts new file mode 100644 index 00000000..6ee745f9 --- /dev/null +++ b/src/stats/idxmin_idxmax.ts @@ -0,0 +1,234 @@ +/** + * idxmin / idxmax — return the index label of the minimum or maximum value. + * + * Mirrors `pandas.Series.idxmin()` / `pandas.Series.idxmax()` and + * `pandas.DataFrame.idxmin()` / `pandas.DataFrame.idxmax()`: + * + * - `idxminSeries(series)` — label of the minimum value (NaN/null excluded) + * - `idxmaxSeries(series)` — label of the maximum value (NaN/null excluded) + * - `idxminDataFrame(df)` — Series of row labels where each column achieves its min + * - `idxmaxDataFrame(df)` — Series of row labels where each column achieves its max + * + * When `skipna` is true (the default), NaN / null values are ignored. + * When `skipna` is false, any NaN / null causes the result to be `null`. + * + * @module + */ + +import type { DataFrame } from "../core/index.ts"; +import { Dtype, Series } from "../core/index.ts"; +import type { Label, Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Options for {@link idxminSeries}, {@link idxmaxSeries}. */ +export interface IdxOptions { + /** + * Whether to skip NaN / null values. + * @defaultValue `true` + */ + readonly skipna?: boolean; +} + +/** Options for {@link idxminDataFrame}, {@link idxmaxDataFrame}. */ +export interface IdxDataFrameOptions { + /** + * Whether to skip NaN / null values. + * @defaultValue `true` + */ + readonly skipna?: boolean; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** True when a scalar should be treated as missing. */ +function isMissing(v: Scalar): boolean { + return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v)); +} + +/** + * Find the index of the extreme value (min or max) among `values`. + * Returns `null` when all values are missing (with `skipna=true`) or when + * any value is missing (with `skipna=false`). + */ +function findExtreme( + values: readonly Scalar[], + skipna: boolean, + isBetter: (a: Scalar, b: Scalar) => boolean, +): number | null { + let bestIdx: number | null = null; + let bestVal: Scalar = null; + + for (let i = 0; i < values.length; i++) { + const v = values[i] as Scalar; + if (isMissing(v)) { + if (!skipna) { + return null; + } + continue; + } + if (bestIdx === null || isBetter(v, bestVal)) { + bestIdx = i; + bestVal = v; + } + } + return bestIdx; +} + +/** Compare scalars: returns true if `a` is less than `b`. */ +function isLess(a: Scalar, b: Scalar): boolean { + if (b === null || b === undefined) { + return false; + } + return (a as number | string | boolean) < (b as number | string | boolean); +} + +/** Compare scalars: returns true if `a` is greater than `b`. */ +function isGreater(a: Scalar, b: Scalar): boolean { + if (b === null || b === undefined) { + return false; + } + return (a as number | string | boolean) > (b as number | string | boolean); +} + +// ─── public API — Series ────────────────────────────────────────────────────── + +/** + * Return the index label of the minimum value in `series`. + * + * NaN / null values are excluded when `skipna` is true (the default). + * Returns `null` when the series is empty or all values are NaN / null. + * + * Mirrors `pandas.Series.idxmin()`. + * + * @param series - Input Series. + * @param options - Options (skipna). + * @returns The index label at the minimum value, or `null` if no valid value exists. + * + * @example + * ```ts + * import { Series, idxminSeries } from "tsb"; + * + * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] }); + * idxminSeries(s); // "b" (first occurrence of 1) + * ``` + */ +export function idxminSeries(series: Series, options: IdxOptions = {}): Label { + const skipna = options.skipna ?? true; + const idx = findExtreme(series.values, skipna, isLess); + if (idx === null) { + return null; + } + return series.index.at(idx); +} + +/** + * Return the index label of the maximum value in `series`. + * + * NaN / null values are excluded when `skipna` is true (the default). + * Returns `null` when the series is empty or all values are NaN / null. + * + * Mirrors `pandas.Series.idxmax()`. + * + * @param series - Input Series. + * @param options - Options (skipna). + * @returns The index label at the maximum value, or `null` if no valid value exists. + * + * @example + * ```ts + * import { Series, idxmaxSeries } from "tsb"; + * + * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] }); + * idxmaxSeries(s); // "e" + * ``` + */ +export function idxmaxSeries(series: Series, options: IdxOptions = {}): Label { + const skipna = options.skipna ?? true; + const idx = findExtreme(series.values, skipna, isGreater); + if (idx === null) { + return null; + } + return series.index.at(idx); +} + +// ─── public API — DataFrame ─────────────────────────────────────────────────── + +/** + * Return a Series containing the index label of the minimum value for each column. + * + * The result Series is indexed by column names. + * NaN / null values are excluded when `skipna` is true (the default). + * Columns where all values are NaN / null yield `null` in the result. + * + * Mirrors `pandas.DataFrame.idxmin()` (axis=0). + * + * @param df - Input DataFrame. + * @param options - Options (skipna). + * @returns A Series indexed by column names, containing the row label of each column's min. + * + * @example + * ```ts + * import { DataFrame, idxminDataFrame } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] }); + * idxminDataFrame(df).values; // ["y", "z"] + * ``` + */ +export function idxminDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series { + const skipna = options.skipna ?? true; + const colNames = df.columns.values; + const result: Label[] = colNames.map((colName) => { + const s = df.col(colName); + const idx = findExtreme(s.values, skipna, isLess); + if (idx === null) { + return null; + } + return df.index.at(idx); + }); + return new Series({ + data: result, + index: colNames as unknown as Label[], + name: null, + dtype: Dtype.from("object"), + }); +} + +/** + * Return a Series containing the index label of the maximum value for each column. + * + * The result Series is indexed by column names. + * NaN / null values are excluded when `skipna` is true (the default). + * Columns where all values are NaN / null yield `null` in the result. + * + * Mirrors `pandas.DataFrame.idxmax()` (axis=0). + * + * @param df - Input DataFrame. + * @param options - Options (skipna). + * @returns A Series indexed by column names, containing the row label of each column's max. + * + * @example + * ```ts + * import { DataFrame, idxmaxDataFrame } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] }); + * idxmaxDataFrame(df).values; // ["z", "y"] + * ``` + */ +export function idxmaxDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series { + const skipna = options.skipna ?? true; + const colNames = df.columns.values; + const result: Label[] = colNames.map((colName) => { + const s = df.col(colName); + const idx = findExtreme(s.values, skipna, isGreater); + if (idx === null) { + return null; + } + return df.index.at(idx); + }); + return new Series({ + data: result, + index: colNames as unknown as Label[], + name: null, + dtype: Dtype.from("object"), + }); +} diff --git a/tests/stats/idxmin_idxmax.test.ts b/tests/stats/idxmin_idxmax.test.ts new file mode 100644 index 00000000..05cfd459 --- /dev/null +++ b/tests/stats/idxmin_idxmax.test.ts @@ -0,0 +1,270 @@ +/** + * Tests for src/stats/idxmin_idxmax.ts + * — idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame + */ +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + idxmaxDataFrame, + idxmaxSeries, + idxminDataFrame, + idxminSeries, +} from "../../src/index.ts"; +import type { Label, Scalar } from "../../src/index.ts"; + +// ─── helpers ───────────────────────────────────────────────────────────────── + +function s(data: readonly Scalar[], index?: readonly Label[]): Series { + return new Series({ data: [...data], ...(index !== undefined ? { index: [...index] } : {}) }); +} + +// ─── idxminSeries ───────────────────────────────────────────────────────────── + +describe("idxminSeries", () => { + it("returns label of the minimum value", () => { + const series = s([3, 1, 4, 1, 5], ["a", "b", "c", "d", "e"]); + expect(idxminSeries(series)).toBe("b"); // first occurrence of minimum 1 + }); + + it("returns integer index label for default index", () => { + const series = s([10, 3, 7]); + expect(idxminSeries(series)).toBe(1); + }); + + it("handles single element", () => { + const series = s([42], ["x"]); + expect(idxminSeries(series)).toBe("x"); + }); + + it("returns null for empty series", () => { + const series = s([]); + expect(idxminSeries(series)).toBeNull(); + }); + + it("skips NaN by default (skipna=true)", () => { + const series = s([Number.NaN, 2, 1, Number.NaN], ["a", "b", "c", "d"]); + expect(idxminSeries(series)).toBe("c"); + }); + + it("skips null values by default", () => { + const series = s([null, 5, 2, null], ["a", "b", "c", "d"]); + expect(idxminSeries(series)).toBe("c"); + }); + + it("returns null when all values are NaN with skipna=true", () => { + const series = s([Number.NaN, Number.NaN], ["a", "b"]); + expect(idxminSeries(series)).toBeNull(); + }); + + it("returns null when any value is NaN with skipna=false", () => { + const series = s([1, Number.NaN, 3], ["a", "b", "c"]); + expect(idxminSeries(series, { skipna: false })).toBeNull(); + }); + + it("returns correct label with skipna=false when no NaN", () => { + const series = s([5, 2, 8], ["a", "b", "c"]); + expect(idxminSeries(series, { skipna: false })).toBe("b"); + }); + + it("handles negative numbers", () => { + const series = s([-1, -5, -3], ["x", "y", "z"]); + expect(idxminSeries(series)).toBe("y"); + }); + + it("handles all equal values — returns first label", () => { + const series = s([7, 7, 7], ["p", "q", "r"]); + expect(idxminSeries(series)).toBe("p"); + }); + + it("works with string values (lexicographic min)", () => { + const series = s(["banana", "apple", "cherry"], ["a", "b", "c"]); + expect(idxminSeries(series)).toBe("b"); // "apple" < "banana" < "cherry" + }); + + it("handles NaN at the start with skipna=true", () => { + const series = s([Number.NaN, 3, 1], ["a", "b", "c"]); + expect(idxminSeries(series)).toBe("c"); + }); +}); + +// ─── idxmaxSeries ───────────────────────────────────────────────────────────── + +describe("idxmaxSeries", () => { + it("returns label of the maximum value", () => { + const series = s([3, 1, 4, 1, 5], ["a", "b", "c", "d", "e"]); + expect(idxmaxSeries(series)).toBe("e"); + }); + + it("returns integer index label for default index", () => { + const series = s([10, 3, 7]); + expect(idxmaxSeries(series)).toBe(0); + }); + + it("handles single element", () => { + const series = s([42], ["x"]); + expect(idxmaxSeries(series)).toBe("x"); + }); + + it("returns null for empty series", () => { + const series = s([]); + expect(idxmaxSeries(series)).toBeNull(); + }); + + it("skips NaN by default (skipna=true)", () => { + const series = s([Number.NaN, 2, 9, Number.NaN], ["a", "b", "c", "d"]); + expect(idxmaxSeries(series)).toBe("c"); + }); + + it("returns null when all values are NaN with skipna=true", () => { + const series = s([Number.NaN, Number.NaN], ["a", "b"]); + expect(idxmaxSeries(series)).toBeNull(); + }); + + it("returns null when any value is NaN with skipna=false", () => { + const series = s([1, Number.NaN, 3], ["a", "b", "c"]); + expect(idxmaxSeries(series, { skipna: false })).toBeNull(); + }); + + it("handles negative numbers", () => { + const series = s([-1, -5, -3], ["x", "y", "z"]); + expect(idxmaxSeries(series)).toBe("x"); + }); + + it("all equal — returns first label", () => { + const series = s([3, 3, 3], ["p", "q", "r"]); + expect(idxmaxSeries(series)).toBe("p"); + }); + + it("works with string values (lexicographic max)", () => { + const series = s(["banana", "apple", "cherry"], ["a", "b", "c"]); + expect(idxmaxSeries(series)).toBe("c"); // "cherry" > "banana" > "apple" + }); +}); + +// ─── idxminDataFrame ────────────────────────────────────────────────────────── + +describe("idxminDataFrame", () => { + it("returns row label of minimum for each column", () => { + const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] }); + const result = idxminDataFrame(df); + expect(result.at("a")).toBe("y"); // min of a is 1 at row "y" + expect(result.at("b")).toBe("z"); // min of b is 5 at row "z" + }); + + it("result is indexed by column names", () => { + const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] }); + const result = idxminDataFrame(df); + expect([...result.index.values]).toEqual(["a", "b"]); + }); + + it("skips NaN by default", () => { + const df = DataFrame.fromColumns( + { a: [Number.NaN, 2, 1], b: [5, Number.NaN, 3] }, + { index: ["x", "y", "z"] }, + ); + const result = idxminDataFrame(df); + expect(result.at("a")).toBe("z"); + expect(result.at("b")).toBe("z"); + }); + + it("returns null for column with all NaN (skipna=true)", () => { + const df = DataFrame.fromColumns( + { a: [1, 2], b: [Number.NaN, Number.NaN] }, + { index: ["x", "y"] }, + ); + const result = idxminDataFrame(df); + expect(result.at("a")).toBe("x"); + expect(result.at("b")).toBeNull(); + }); + + it("handles single row DataFrame", () => { + const df = DataFrame.fromColumns({ a: [42], b: [7] }, { index: ["row0"] }); + const result = idxminDataFrame(df); + expect(result.at("a")).toBe("row0"); + expect(result.at("b")).toBe("row0"); + }); +}); + +// ─── idxmaxDataFrame ────────────────────────────────────────────────────────── + +describe("idxmaxDataFrame", () => { + it("returns row label of maximum for each column", () => { + const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] }); + const result = idxmaxDataFrame(df); + expect(result.at("a")).toBe("z"); // max of a is 4 at row "z" + expect(result.at("b")).toBe("y"); // max of b is 20 at row "y" + }); + + it("result is indexed by column names", () => { + const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] }); + const result = idxmaxDataFrame(df); + expect([...result.index.values]).toEqual(["a", "b"]); + }); + + it("skips NaN by default", () => { + const df = DataFrame.fromColumns( + { a: [Number.NaN, 2, 1], b: [5, Number.NaN, 3] }, + { index: ["x", "y", "z"] }, + ); + const result = idxmaxDataFrame(df); + expect(result.at("a")).toBe("y"); + expect(result.at("b")).toBe("x"); + }); + + it("handles single row DataFrame", () => { + const df = DataFrame.fromColumns({ a: [42], b: [7] }, { index: ["row0"] }); + const result = idxmaxDataFrame(df); + expect(result.at("a")).toBe("row0"); + expect(result.at("b")).toBe("row0"); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("idxminSeries property tests", () => { + it("idxmin label points to minimum value in series", () => { + fc.assert( + fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 1, maxLength: 20 }), (data) => { + const series = s(data); + const label = idxminSeries(series); + if (label === null) { + return true; + } + const minVal = Math.min(...data); + return series.at(label as number) === minVal; + }), + ); + }); + + it("idxmax label points to maximum value in series", () => { + fc.assert( + fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 1, maxLength: 20 }), (data) => { + const series = s(data); + const label = idxmaxSeries(series); + if (label === null) { + return true; + } + const maxVal = Math.max(...data); + return series.at(label as number) === maxVal; + }), + ); + }); + + it("idxmin and idxmax are consistent — min <= max", () => { + fc.assert( + fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 2, maxLength: 20 }), (data) => { + const series = s(data); + const minLabel = idxminSeries(series); + const maxLabel = idxmaxSeries(series); + if (minLabel === null || maxLabel === null) { + return true; + } + const minVal = series.at(minLabel as number) as number; + const maxVal = series.at(maxLabel as number) as number; + return minVal <= maxVal; + }), + ); + }); +}); From 9ffaa5fc74e579f4d7233be41f4cd9c748a058f0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 12:22:01 +0000 Subject: [PATCH 4/4] =?UTF-8?q?Iteration=20194:=20Add=20astype=20=E2=80=94?= =?UTF-8?q?=20dtype=20casting=20for=20Series=20and=20DataFrame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run: https://github.com/githubnext/tsessebe/actions/runs/24281714272 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- playground/astype.html | 490 ++++++++++++++++++++++++++++++++++++++ src/core/astype.ts | 333 ++++++++++++++++++++++++++ src/core/index.ts | 2 + tests/core/astype.test.ts | 407 +++++++++++++++++++++++++++++++ 4 files changed, 1232 insertions(+) create mode 100644 playground/astype.html create mode 100644 src/core/astype.ts create mode 100644 tests/core/astype.test.ts diff --git a/playground/astype.html b/playground/astype.html new file mode 100644 index 00000000..7f418a32 --- /dev/null +++ b/playground/astype.html @@ -0,0 +1,490 @@ + + + + + + tsb — astype + + + +
+
+
Loading TypeScript compiler…
+
+ + ← tsb playground +

astype — dtype casting

+

+ Cast a Series or DataFrame to a different data type. + Mirrors pandas.Series.astype() and + pandas.DataFrame.astype(). +

+ + +
+

1 · Series.astype — basic casting

+

Cast all values in a Series to the specified dtype. + Floats are truncated when casting to integers.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Parsing strings to numbers

+

Numeric strings can be cast to int64 or float64 + automatically. Non-numeric strings raise a TypeError by default.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · Integer subtypes — overflow wraps

+

When casting to bounded integer types (int8, uint8, + etc.), values wrap around on overflow — matching NumPy/pandas semantics.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · DataFrame.astype — cast all columns

+

Pass a single dtype name to cast every column at once.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame.astype — per-column dict

+

Pass a Record<string, DtypeName> to cast only specific columns.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Datetime casting

+

Cast strings or Unix timestamps (milliseconds) to datetime.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Series
+astypeSeries(
+  series: Series,
+  dtype: DtypeName | Dtype,
+  options?: { errors?: "raise" | "ignore" }   // default: "raise"
+): Series
+
+// DataFrame — cast all columns
+astypeDataFrame(
+  df: DataFrame,
+  dtype: DtypeName | Dtype,
+  options?: { errors?: "raise" | "ignore" }
+): DataFrame
+
+// DataFrame — cast specific columns
+astypeDataFrame(
+  df: DataFrame,
+  dtype: Record<string, DtypeName | Dtype>,
+  options?: { errors?: "raise" | "ignore" }
+): DataFrame
+
+// Low-level scalar cast
+castScalar(value: Scalar, dtype: Dtype, errors?: CastErrors): Scalar
+
+// Supported DtypeName values:
+// "int8" | "int16" | "int32" | "int64"
+// "uint8" | "uint16" | "uint32" | "uint64"
+// "float32" | "float64"
+// "bool" | "string" | "object"
+// "datetime" | "timedelta" | "category"
+
+ + + + + diff --git a/src/core/astype.ts b/src/core/astype.ts new file mode 100644 index 00000000..be0be471 --- /dev/null +++ b/src/core/astype.ts @@ -0,0 +1,333 @@ +/** + * astype — cast a Series or DataFrame to a specified dtype. + * + * Mirrors `pandas.Series.astype()` and `pandas.DataFrame.astype()`: + * + * - `astypeSeries(series, dtype)` — return a new Series with values cast to `dtype` + * - `astypeDataFrame(df, dtype)` — return a new DataFrame with columns cast to `dtype` + * + * Supported targets: all `DtypeName` values (`"int64"`, `"float64"`, `"bool"`, + * `"string"`, `"object"`, `"datetime"`, etc.). + * + * The `errors` option controls behaviour when a value cannot be cast: + * - `"raise"` (default) — throw a `TypeError`. + * - `"ignore"` — return the original value unchanged. + * + * @module + */ + +import type { DtypeName, Scalar } from "../types.ts"; +import { Dtype } from "./dtype.ts"; +import { DataFrame } from "./frame.ts"; +import { Series } from "./series.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Error handling mode for failed casts. */ +export type CastErrors = "raise" | "ignore"; + +/** Options for {@link astypeSeries}. */ +export interface AstypeOptions { + /** + * How to handle cast failures. + * @defaultValue `"raise"` + */ + readonly errors?: CastErrors; +} + +/** Options for {@link astypeDataFrame}. */ +export type AstypeDataFrameOptions = AstypeOptions; + +// ─── int clamping helpers ───────────────────────────────────────────────────── + +function clampInt8(n: number): number { + return ((((n & 0xff) + 0x80) & 0xff) - 0x80) | 0; +} + +function clampInt16(n: number): number { + return ((((n & 0xffff) + 0x8000) & 0xffff) - 0x8000) | 0; +} + +function clampInt32(n: number): number { + return n | 0; +} + +function clampUint8(n: number): number { + return (n & 0xff) >>> 0; +} + +function clampUint16(n: number): number { + return (n & 0xffff) >>> 0; +} + +function clampUint32(n: number): number { + return n >>> 0; +} + +// ─── to-number helpers ──────────────────────────────────────────────────────── + +function stringToNumber(value: string, errors: CastErrors, targetName: DtypeName): number | null { + const trimmed = value.trim(); + const n = Number(trimmed); + if (trimmed === "" || Number.isNaN(n)) { + if (errors === "raise") { + throw new TypeError(`Cannot cast "${value}" to ${targetName}`); + } + return null; + } + return n; +} + +function valueToNumber(value: Scalar, errors: CastErrors, targetName: DtypeName): number | null { + if (value === null || value === undefined) { + return null; + } + if (typeof value === "boolean") { + return value ? 1 : 0; + } + if (typeof value === "number") { + return value; + } + if (typeof value === "bigint") { + return Number(value); + } + if (value instanceof Date) { + return value.getTime(); + } + if (typeof value === "string") { + return stringToNumber(value, errors, targetName); + } + if (errors === "raise") { + throw new TypeError(`Cannot cast ${String(value)} to ${targetName}`); + } + return null; +} + +// ─── per-kind cast helpers ──────────────────────────────────────────────────── + +function castToInt(value: Scalar, dtype: Dtype, errors: CastErrors): Scalar { + const n = valueToNumber(value, errors, dtype.name); + if (n === null) { + return null; + } + if (Number.isNaN(n) || !Number.isFinite(n)) { + if (errors === "raise") { + throw new TypeError(`Cannot cast ${String(value)} to integer dtype ${dtype.name}`); + } + return value; + } + const t = Math.trunc(n); + switch (dtype.name) { + case "int8": + return clampInt8(t); + case "int16": + return clampInt16(t); + case "int32": + return clampInt32(t); + case "int64": + return t; + case "uint8": + return clampUint8(t); + case "uint16": + return clampUint16(t); + case "uint32": + return clampUint32(t); + case "uint64": + return t < 0 ? 0 : t; + default: + return t; + } +} + +function castToFloat(value: Scalar, dtype: Dtype, errors: CastErrors): Scalar { + const n = valueToNumber(value, errors, dtype.name); + if (n === null) { + return null; + } + if (Number.isNaN(n) || !Number.isFinite(n)) { + return n; + } + return dtype.name === "float32" ? Math.fround(n) : n; +} + +function castToBool(value: Scalar, errors: CastErrors): Scalar { + if (typeof value === "boolean") { + return value; + } + if (typeof value === "number") { + return value !== 0 && !Number.isNaN(value); + } + if (typeof value === "bigint") { + return value !== 0n; + } + if (value instanceof Date) { + return true; + } + if (typeof value === "string") { + const lower = value.trim().toLowerCase(); + if (lower === "true" || lower === "1") { + return true; + } + if (lower === "false" || lower === "0" || lower === "") { + return false; + } + return true; + } + if (errors === "raise") { + throw new TypeError(`Cannot cast ${String(value)} to bool`); + } + return value; +} + +function castToString(value: Scalar): Scalar { + if (typeof value === "string") { + return value; + } + if (value instanceof Date) { + return value.toISOString(); + } + return String(value); +} + +function castToDatetime(value: Scalar, errors: CastErrors): Scalar { + if (value instanceof Date) { + return value; + } + if (typeof value === "number") { + return new Date(value); + } + if (typeof value === "bigint") { + return new Date(Number(value)); + } + if (typeof value === "string") { + const d = new Date(value); + if (Number.isNaN(d.getTime())) { + if (errors === "raise") { + throw new TypeError(`Cannot parse "${value}" as datetime`); + } + return value; + } + return d; + } + if (errors === "raise") { + throw new TypeError(`Cannot cast ${String(value)} to datetime`); + } + return value; +} + +// ─── castScalar ─────────────────────────────────────────────────────────────── + +/** + * Cast a single scalar value to the given dtype. + * + * @throws {TypeError} when `errors === "raise"` and the cast fails. + */ +export function castScalar(value: Scalar, dtype: Dtype, errors: CastErrors = "raise"): Scalar { + if (value === null || value === undefined) { + return null; + } + const { kind } = dtype; + if (kind === "int" || kind === "uint") { + return castToInt(value, dtype, errors); + } + if (kind === "float") { + return castToFloat(value, dtype, errors); + } + if (kind === "bool") { + return castToBool(value, errors); + } + if (kind === "string") { + return castToString(value); + } + if (kind === "datetime") { + return castToDatetime(value, errors); + } + // object / category / timedelta — identity + return value; +} + +// ─── astypeSeries ──────────────────────────────────────────────────────────── + +/** + * Return a new Series with all values cast to `dtype`. + * + * @param series - The source series. + * @param dtype - Target dtype: a `DtypeName` string or a `Dtype` instance. + * @param options - Cast behaviour options. + * + * @example + * ```ts + * import { Series, astypeSeries } from "tsb"; + * + * const s = new Series({ data: [1.5, 2.7, 3.9], name: "x" }); + * const asInt = astypeSeries(s, "int64"); + * // → Series [1, 2, 3] + * ``` + */ +export function astypeSeries( + series: Series, + dtype: DtypeName | Dtype, + options?: AstypeOptions, +): Series { + const targetDtype = dtype instanceof Dtype ? dtype : Dtype.from(dtype); + const errors: CastErrors = options?.errors ?? "raise"; + + const castedData = series.values.map((v) => castScalar(v, targetDtype, errors)); + + return new Series({ + data: castedData, + index: series.index, + dtype: targetDtype, + name: series.name, + }); +} + +// ─── astypeDataFrame ───────────────────────────────────────────────────────── + +/** + * Return a new DataFrame with columns cast to `dtype`. + * + * When `dtype` is a `DtypeName` or `Dtype`, **every** column is cast. + * When `dtype` is a `Record`, **only the named + * columns** are cast; all others pass through unchanged. + * + * @example + * ```ts + * import { DataFrame, astypeDataFrame } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [1.5, 2.7], b: ["3", "4"] }); + * const df2 = astypeDataFrame(df, "float64"); // all columns → float64 + * const df3 = astypeDataFrame(df, { b: "int64" }); // only "b" → int64 + * ``` + */ +export function astypeDataFrame( + df: DataFrame, + dtype: DtypeName | Dtype | Readonly>, + options?: AstypeDataFrameOptions, +): DataFrame { + const errors: CastErrors = options?.errors ?? "raise"; + + const isPerColumn = + !(dtype instanceof Dtype) && typeof dtype === "object" && !(dtype instanceof Date); + + const newColMap = new Map>(); + + for (const colName of df.columns.values) { + const col = df.col(colName); + + if (isPerColumn) { + const spec = (dtype as Readonly>)[colName]; + if (spec === undefined) { + newColMap.set(colName, col); + continue; + } + const targetDtype = spec instanceof Dtype ? spec : Dtype.from(spec); + newColMap.set(colName, astypeSeries(col, targetDtype, { errors })); + } else { + const targetDtype = dtype instanceof Dtype ? dtype : Dtype.from(dtype as DtypeName); + newColMap.set(colName, astypeSeries(col, targetDtype, { errors })); + } + } + + return new DataFrame(newColMap, df.index); +} diff --git a/src/core/index.ts b/src/core/index.ts index ada43b65..7efb9afb 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -15,3 +15,5 @@ export { CategoricalAccessor } from "./cat_accessor.ts"; export type { CatSeriesLike } from "./cat_accessor.ts"; export { MultiIndex } from "./multi_index.ts"; export type { MultiIndexOptions } from "./multi_index.ts"; +export { castScalar, astypeSeries, astypeDataFrame } from "./astype.ts"; +export type { CastErrors, AstypeOptions, AstypeDataFrameOptions } from "./astype.ts"; diff --git a/tests/core/astype.test.ts b/tests/core/astype.test.ts new file mode 100644 index 00000000..465dce98 --- /dev/null +++ b/tests/core/astype.test.ts @@ -0,0 +1,407 @@ +/** + * Tests for src/core/astype.ts + * — castScalar, astypeSeries, astypeDataFrame + */ +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Dtype, + Series, + astypeDataFrame, + astypeSeries, + castScalar, +} from "../../src/index.ts"; +import type { Scalar } from "../../src/index.ts"; + +// ─── helpers ───────────────────────────────────────────────────────────────── + +function s(data: readonly Scalar[]): Series { + return new Series({ data: [...data] }); +} + +// ─── castScalar ─────────────────────────────────────────────────────────────── + +describe("castScalar", () => { + it("null → null for any dtype", () => { + for (const name of ["int64", "float64", "bool", "string", "object"] as const) { + expect(castScalar(null, Dtype.from(name))).toBeNull(); + } + }); + + it("number → int64 (truncates float)", () => { + expect(castScalar(3.9, Dtype.from("int64"))).toBe(3); + expect(castScalar(-2.1, Dtype.from("int64"))).toBe(-2); + expect(castScalar(0, Dtype.from("int64"))).toBe(0); + }); + + it("boolean → int (1/0)", () => { + expect(castScalar(true, Dtype.from("int64"))).toBe(1); + expect(castScalar(false, Dtype.from("int64"))).toBe(0); + }); + + it("numeric string → int64", () => { + expect(castScalar("42", Dtype.from("int64"))).toBe(42); + expect(castScalar(" -7 ", Dtype.from("int64"))).toBe(-7); + }); + + it("non-numeric string → TypeError with errors=raise", () => { + expect(() => castScalar("hello", Dtype.from("int64"), "raise")).toThrow(TypeError); + }); + + it("non-numeric string → original value with errors=ignore", () => { + expect(castScalar("hello", Dtype.from("int64"), "ignore")).toBe("hello"); + }); + + it("number → float64 (identity for finite)", () => { + expect(castScalar(3.14, Dtype.from("float64"))).toBe(3.14); + expect(castScalar(Number.POSITIVE_INFINITY, Dtype.from("float64"))).toBe( + Number.POSITIVE_INFINITY, + ); + }); + + it("bool → float (1.0 / 0.0)", () => { + expect(castScalar(true, Dtype.from("float64"))).toBe(1); + expect(castScalar(false, Dtype.from("float64"))).toBe(0); + }); + + it("number → float32 (applies fround)", () => { + const pi = Math.PI; + const result = castScalar(pi, Dtype.from("float32")); + expect(result).toBeCloseTo(Math.fround(pi), 5); + }); + + it("number → bool", () => { + expect(castScalar(1, Dtype.from("bool"))).toBe(true); + expect(castScalar(0, Dtype.from("bool"))).toBe(false); + expect(castScalar(-5, Dtype.from("bool"))).toBe(true); + expect(castScalar(Number.NaN, Dtype.from("bool"))).toBe(false); + }); + + it("bool → bool (identity)", () => { + expect(castScalar(true, Dtype.from("bool"))).toBe(true); + expect(castScalar(false, Dtype.from("bool"))).toBe(false); + }); + + it("string → bool (truthy rules)", () => { + expect(castScalar("true", Dtype.from("bool"))).toBe(true); + expect(castScalar("false", Dtype.from("bool"))).toBe(false); + expect(castScalar("1", Dtype.from("bool"))).toBe(true); + expect(castScalar("0", Dtype.from("bool"))).toBe(false); + expect(castScalar("", Dtype.from("bool"))).toBe(false); + expect(castScalar("hello", Dtype.from("bool"))).toBe(true); + }); + + it("number → string", () => { + expect(castScalar(42, Dtype.from("string"))).toBe("42"); + expect(castScalar(3.14, Dtype.from("string"))).toBe("3.14"); + }); + + it("bool → string", () => { + expect(castScalar(true, Dtype.from("string"))).toBe("true"); + expect(castScalar(false, Dtype.from("string"))).toBe("false"); + }); + + it("string → string (identity)", () => { + expect(castScalar("hello", Dtype.from("string"))).toBe("hello"); + }); + + it("Date → string gives ISO string", () => { + const d = new Date("2024-01-15T00:00:00.000Z"); + expect(castScalar(d, Dtype.from("string"))).toBe(d.toISOString()); + }); + + it("number → datetime creates a Date", () => { + const ts = 1705276800000; + const result = castScalar(ts, Dtype.from("datetime")); + expect(result).toBeInstanceOf(Date); + expect((result as Date).getTime()).toBe(ts); + }); + + it("string → datetime parses ISO string", () => { + const result = castScalar("2024-01-15", Dtype.from("datetime")); + expect(result).toBeInstanceOf(Date); + }); + + it("unparseable string → TypeError with errors=raise for datetime", () => { + expect(() => castScalar("not-a-date", Dtype.from("datetime"), "raise")).toThrow(TypeError); + }); + + it("unparseable string → original value with errors=ignore for datetime", () => { + expect(castScalar("not-a-date", Dtype.from("datetime"), "ignore")).toBe("not-a-date"); + }); + + it("Date → datetime is identity", () => { + const d = new Date(); + expect(castScalar(d, Dtype.from("datetime"))).toBe(d); + }); + + it("any → object is identity", () => { + const d = new Date(); + expect(castScalar(42, Dtype.from("object"))).toBe(42); + expect(castScalar("hi", Dtype.from("object"))).toBe("hi"); + expect(castScalar(d, Dtype.from("object"))).toBe(d); + }); + + it("int8 clamping wraps correctly", () => { + expect(castScalar(127, Dtype.from("int8"))).toBe(127); + expect(castScalar(128, Dtype.from("int8"))).toBe(-128); + expect(castScalar(-128, Dtype.from("int8"))).toBe(-128); + expect(castScalar(-129, Dtype.from("int8"))).toBe(127); + }); + + it("uint8 clamping wraps correctly", () => { + expect(castScalar(0, Dtype.from("uint8"))).toBe(0); + expect(castScalar(255, Dtype.from("uint8"))).toBe(255); + expect(castScalar(256, Dtype.from("uint8"))).toBe(0); + expect(castScalar(-1, Dtype.from("uint8"))).toBe(255); + }); + + it("int16 clamping wraps correctly", () => { + expect(castScalar(32767, Dtype.from("int16"))).toBe(32767); + expect(castScalar(32768, Dtype.from("int16"))).toBe(-32768); + }); + + it("uint16 clamping wraps correctly", () => { + expect(castScalar(65535, Dtype.from("uint16"))).toBe(65535); + expect(castScalar(65536, Dtype.from("uint16"))).toBe(0); + }); + + it("uint32 clamping wraps correctly", () => { + expect(castScalar(4294967295, Dtype.from("uint32"))).toBe(4294967295); + expect(castScalar(4294967296, Dtype.from("uint32"))).toBe(0); + }); + + it("uint64 negative → 0", () => { + expect(castScalar(-5, Dtype.from("uint64"))).toBe(0); + }); + + it("NaN → TypeError with errors=raise for int", () => { + expect(() => castScalar(Number.NaN, Dtype.from("int64"), "raise")).toThrow(TypeError); + }); + + it("NaN → original value with errors=ignore for int", () => { + expect(castScalar(Number.NaN, Dtype.from("int64"), "ignore")).toBe(Number.NaN); + }); +}); + +// ─── astypeSeries ───────────────────────────────────────────────────────────── + +describe("astypeSeries", () => { + it("casts float series to int64", () => { + const series = s([1.5, 2.7, 3.9]); + const result = astypeSeries(series, "int64"); + expect(result.values).toEqual([1, 2, 3]); + expect(result.dtype.name).toBe("int64"); + }); + + it("casts int series to string", () => { + const series = s([1, 2, 3]); + const result = astypeSeries(series, "string"); + expect(result.values).toEqual(["1", "2", "3"]); + expect(result.dtype.name).toBe("string"); + }); + + it("casts string series to float64", () => { + const series = s(["1.1", "2.2", "3.3"]); + const result = astypeSeries(series, "float64"); + expect(result.values[0]).toBeCloseTo(1.1); + expect(result.values[1]).toBeCloseTo(2.2); + expect(result.dtype.name).toBe("float64"); + }); + + it("preserves null values", () => { + const series = s([1.5, null, 3.9]); + const result = astypeSeries(series, "int64"); + expect(result.values).toEqual([1, null, 3]); + }); + + it("preserves index and name", () => { + const series = new Series({ data: [1, 2, 3], index: ["a", "b", "c"], name: "my_series" }); + const result = astypeSeries(series, "float64"); + expect(result.name).toBe("my_series"); + expect(result.index.values).toEqual(["a", "b", "c"]); + }); + + it("accepts a Dtype instance", () => { + const series = s([1.5, 2.7]); + const result = astypeSeries(series, Dtype.float64); + expect(result.dtype).toBe(Dtype.float64); + }); + + it("errors=raise throws on cast failure", () => { + const series = s(["hello", "world"]); + expect(() => astypeSeries(series, "int64", { errors: "raise" })).toThrow(TypeError); + }); + + it("errors=ignore returns original value on failure", () => { + const series = s(["1", "hello", "3"]); + const result = astypeSeries(series, "int64", { errors: "ignore" }); + expect(result.values[0]).toBe(1); + expect(result.values[1]).toBe("hello"); + expect(result.values[2]).toBe(3); + }); + + it("casts bool series to int64", () => { + const series = s([true, false, true]); + const result = astypeSeries(series, "int64"); + expect(result.values).toEqual([1, 0, 1]); + }); + + it("casts int series to bool", () => { + const series = s([0, 1, -1, 2]); + const result = astypeSeries(series, "bool"); + expect(result.values).toEqual([false, true, true, true]); + }); + + it("round-trip: int → string → int64", () => { + const original = s([10, 20, 30]); + const asStr = astypeSeries(original, "string"); + const backToInt = astypeSeries(asStr, "int64"); + expect(backToInt.values).toEqual([10, 20, 30]); + }); +}); + +// ─── astypeDataFrame ────────────────────────────────────────────────────────── + +describe("astypeDataFrame", () => { + it("casts all columns with a single dtype", () => { + const df = DataFrame.fromColumns({ a: [1.5, 2.7], b: [3.1, 4.9] }); + const result = astypeDataFrame(df, "int64"); + expect(result.col("a").values).toEqual([1, 2]); + expect(result.col("b").values).toEqual([3, 4]); + expect(result.col("a").dtype.name).toBe("int64"); + expect(result.col("b").dtype.name).toBe("int64"); + }); + + it("casts only specified columns with Record dtype", () => { + const df = DataFrame.fromColumns({ a: [1.5, 2.7], b: ["x", "y"] }); + const result = astypeDataFrame(df, { a: "int64" }); + expect(result.col("a").values).toEqual([1, 2]); + expect(result.col("a").dtype.name).toBe("int64"); + // b is unchanged + expect(result.col("b").values).toEqual(["x", "y"]); + }); + + it("preserves columns not listed in Record dtype", () => { + const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4], c: [5.5, 6.6] }); + const result = astypeDataFrame(df, { c: "int64" }); + expect(result.col("a").values).toEqual([1, 2]); + expect(result.col("b").values).toEqual([3, 4]); + expect(result.col("c").values).toEqual([5, 6]); + }); + + it("preserves index", () => { + const df = DataFrame.fromColumns({ a: [1.5, 2.5] }, { index: ["x", "y"] }); + const result = astypeDataFrame(df, "int64"); + expect(result.index.values).toEqual(["x", "y"]); + }); + + it("accepts Dtype instances in Record", () => { + const df = DataFrame.fromColumns({ a: [1.5, 2.5], b: [3.5, 4.5] }); + const result = astypeDataFrame(df, { a: Dtype.from("int64"), b: Dtype.from("string") }); + expect(result.col("a").dtype.name).toBe("int64"); + expect(result.col("b").dtype.name).toBe("string"); + }); + + it("errors=raise throws on cast failure", () => { + const df = DataFrame.fromColumns({ a: ["hello", "world"] }); + expect(() => astypeDataFrame(df, "int64", { errors: "raise" })).toThrow(TypeError); + }); + + it("errors=ignore keeps original value on failure", () => { + const df = DataFrame.fromColumns({ a: ["1", "hello", "3"] }); + const result = astypeDataFrame(df, "int64", { errors: "ignore" }); + expect(result.col("a").values[0]).toBe(1); + expect(result.col("a").values[1]).toBe("hello"); + expect(result.col("a").values[2]).toBe(3); + }); + + it("handles empty DataFrame gracefully", () => { + const df = DataFrame.fromColumns({}); + const result = astypeDataFrame(df, "int64"); + expect(result.columns.size).toBe(0); + }); +}); + +// ─── property-based tests ──────────────────────────────────────────────────── + +describe("castScalar property tests", () => { + it("float → int64: result is always integer or null", () => { + fc.assert( + fc.property(fc.oneof(fc.float({ noNaN: true }), fc.constant(null as Scalar)), (v) => { + const result = castScalar(v, Dtype.from("int64"), "ignore"); + if (v === null) { + return result === null; + } + if (Number.isNaN(v as number) || !Number.isFinite(v as number)) { + return true; // errors=ignore returns original + } + return typeof result === "number" && Number.isInteger(result); + }), + ); + }); + + it("any scalar → string: result is always a string or null", () => { + fc.assert( + fc.property( + fc.oneof( + fc.integer(), + fc.float({ noNaN: true }), + fc.boolean(), + fc.string(), + fc.constant(null as Scalar), + ), + (v) => { + const result = castScalar(v, Dtype.from("string"), "ignore"); + if (v === null) { + return result === null; + } + return typeof result === "string"; + }, + ), + ); + }); + + it("any numeric → bool: result is always boolean or null", () => { + fc.assert( + fc.property( + fc.oneof(fc.integer(), fc.float({ noNaN: true }), fc.constant(null as Scalar)), + (v) => { + const result = castScalar(v, Dtype.from("bool"), "ignore"); + if (v === null) { + return result === null; + } + return typeof result === "boolean"; + }, + ), + ); + }); + + it("astypeSeries float→int: output length equals input length", () => { + fc.assert( + fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 50 }), (arr) => { + const series = new Series({ data: arr }); + const result = astypeSeries(series, "int64"); + return result.size === series.size; + }), + ); + }); + + it("astypeDataFrame with single dtype: all column dtypes match", () => { + fc.assert( + fc.property( + fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 20 }), + fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 20 }), + (colA, colB) => { + const len = Math.min(colA.length, colB.length); + const df = DataFrame.fromColumns({ a: colA.slice(0, len), b: colB.slice(0, len) }); + const result = astypeDataFrame(df, "float64"); + return ( + result.col("a").dtype.name === "float64" && result.col("b").dtype.name === "float64" + ); + }, + ), + ); + }); +});