diff --git a/playground/idxmin_idxmax.html b/playground/idxmin_idxmax.html new file mode 100644 index 00000000..b771dd36 --- /dev/null +++ b/playground/idxmin_idxmax.html @@ -0,0 +1,439 @@ + + + + + + tsb — idxmin / idxmax + + + +
+
+
Loading TypeScript compiler…
+
+ + ← tsb playground +

idxmin / idxmax

+

+ Return the index label of the minimum or maximum value in a + Series or each column of a DataFrame. + Mirrors pandas.Series.idxmin(), idxmax(), + pandas.DataFrame.idxmin(), and DataFrame.idxmax(). +

+ + +
+

1 · Series.idxmin — label of the minimum value

+

Returns the index label at the position of the minimum value. + NaN / null values are skipped by default.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Series.idxmax — label of the maximum value

+

Returns the index label at the position of the maximum value.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · NaN handling — skipna option

+

By default NaN / null values are skipped. Set skipna: false + to propagate NaN (returns null if any value is NaN).

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · DataFrame.idxmin — row label of column minima

+

Returns a Series indexed by column names. Each value is the row label + where that column achieves its minimum.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame.idxmax — row label of column maxima

+

Returns a Series indexed by column names, where each entry is the row + label of that column's maximum value.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Edge cases — empty, all-NaN, all-equal

+

Behavior for empty series, series where every value is NaN, and series + where all values are equal.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Series
+idxminSeries(series, { skipna?: boolean }): Label   // default skipna=true
+idxmaxSeries(series, { skipna?: boolean }): Label
+
+// DataFrame (axis=0 — min/max per column)
+idxminDataFrame(df, { skipna?: boolean }): Series   // indexed by column names
+idxmaxDataFrame(df, { skipna?: boolean }): Series
+
+ + + + + diff --git a/playground/index.html b/playground/index.html index 48bfbcb9..a20040cc 100644 --- a/playground/index.html +++ b/playground/index.html @@ -254,6 +254,11 @@

Element-wise transformations. clip(), seriesAbs(), seriesRound() for Series and DataFrame with min/max bounds, decimal precision, and axis support.

✅ Complete
+
+

🔍 missing-value ops

+

Detect and fill missing values. isna(), notna(), isnull(), notnull() for scalars/Series/DataFrame. ffillSeries(), bfillSeries(), dataFrameFfill(), dataFrameBfill() with optional limit and axis support.

+
✅ Complete
+

🔢 value_counts

Count unique values. valueCounts() for Series and dataFrameValueCounts() for DataFrame with normalize, sort, ascending, and dropna options.

@@ -264,6 +269,16 @@

✅ Complete

+
+

📊 pct_change

+

Fractional change between elements. pctChangeSeries() and pctChangeDataFrame() with periods, fillMethod (pad/bfill), limit, and axis options.

+
✅ Complete
+
+
+

🔎 idxmin / idxmax

+

Return the index label of the minimum or maximum value. idxminSeries(), idxmaxSeries(), idxminDataFrame(), idxmaxDataFrame() with skipna support.

+
✅ Complete
+
diff --git a/playground/na_ops.html b/playground/na_ops.html new file mode 100644 index 00000000..c321438f --- /dev/null +++ b/playground/na_ops.html @@ -0,0 +1,480 @@ + + + + + + tsb — missing-value operations (isna, ffill, bfill) + + + +
+
+
Loading tsb runtime…
+
+ + ← Back to playground index + +

Missing-value operations

+

+ isna / notna — detect missing values in scalars, + Series, and DataFrames.
+ ffill / bfill — propagate the last (or next) valid + value to fill gaps.
+ Mirrors pd.isna(), Series.ffill(), and + DataFrame.bfill() from pandas. +

+ + +
+

1 · isna / notna on scalars

+

+ Returns true / false for individual values. + null, undefined, and NaN are all + considered "missing". +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · isna on a Series

+

+ When passed a Series, isna returns a boolean Series of the + same length — true where values are missing. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · isna on a DataFrame

+

+ Returns a DataFrame of booleans with the same shape — one column per + original column, true where missing. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · Forward-fill (ffillSeries)

+

+ Propagates the last valid value forward to fill gaps. Leading + nulls that have no preceding value remain null. + Use the optional limit to cap consecutive fills. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · Backward-fill (bfillSeries)

+

+ Propagates the next valid value backward to fill gaps. Trailing + nulls that have no following value remain null. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · DataFrame forward-fill & backward-fill

+

+ dataFrameFfill and dataFrameBfill apply fill + column-wise by default (axis=0). Pass axis: 1 to fill + row-wise across columns. +

+
+
+
+ + +
+
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+
// Module-level missing-value detection
+isna(value: Scalar): boolean
+isna(value: Series): Series<boolean>
+isna(value: DataFrame): DataFrame
+
+notna(value: Scalar): boolean
+notna(value: Series): Series<boolean>
+notna(value: DataFrame): DataFrame
+
+// Aliases
+isnull(...)  // same as isna
+notnull(...) // same as notna
+
+// Series forward / backward fill
+ffillSeries(series, options?: { limit?: number | null }): Series
+bfillSeries(series, options?: { limit?: number | null }): Series
+
+// DataFrame forward / backward fill
+dataFrameFfill(df, options?: {
+  limit?: number | null,   // max consecutive fills (default: no limit)
+  axis?: 0 | 1 | "index" | "columns",  // default 0 (column-wise)
+}): DataFrame
+
+dataFrameBfill(df, options?: {
+  limit?: number | null,
+  axis?: 0 | 1 | "index" | "columns",
+}): DataFrame
+
+ + + + + diff --git a/playground/pct_change.html b/playground/pct_change.html new file mode 100644 index 00000000..3576797a --- /dev/null +++ b/playground/pct_change.html @@ -0,0 +1,448 @@ + + + + + + tsb — pct_change + + + +
+
+
Initializing playground…
+
+ ← Back to roadmap +

📊 pct_change — Interactive Playground

+

Compute the fractional change between each element and a prior element. + Mirrors pandas.Series.pct_change() / + pandas.DataFrame.pct_change().
+ Edit any code block below and press ▶ Run + (or Ctrl+Enter) to execute it live in your browser. +

+ + +
+

1 · Basic pct_change on a Series

+

pctChangeSeries(series) returns the fractional (not percentage) change + from each previous element. The first element is always null.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

2 · Multi-period change

+

The periods option controls the lag. Use periods: 2 to + compare each value to the one two steps earlier — useful for month-over-month + comparisons in quarterly data.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

3 · Handling missing values

+

By default, pctChangeSeries forward-fills (fillMethod: "pad") + NaN/null values before computing the ratio — so gaps don't break the chain. + Set fillMethod: null to propagate NaN instead.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

4 · Limit consecutive fills

+

The limit option caps how many consecutive NaN values get forward-filled. + Useful when you want to tolerate short gaps but not bridge large ones.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

5 · DataFrame column-wise pct_change

+

pctChangeDataFrame(df) applies pctChangeSeries to every + column independently. Ideal for comparing multiple assets or metrics simultaneously.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

6 · Negative periods (look-forward change)

+

A negative periods value computes the forward change: how much will + this element change by the time we reach |periods| steps ahead. + Useful for computing returns on a "hold for N periods" strategy.

+
+
+ TypeScript +
+ + +
+
+ + +
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+ + +
+

API Reference

+

All functions return a new Series/DataFrame of the same shape — inputs are never mutated.

+
// Series
+pctChangeSeries(series, {
+  periods?: number,           // default 1 (positive = look back, negative = look forward)
+  fillMethod?: "pad" | "bfill" | null,  // default "pad"
+  limit?: number | null,      // max consecutive fills; default unlimited
+}): Series
+
+// DataFrame
+pctChangeDataFrame(df, {
+  periods?: number,
+  fillMethod?: "pad" | "bfill" | null,
+  limit?: number | null,
+  axis?: 0 | 1 | "index" | "columns",  // default 0 (column-wise)
+}): DataFrame
+
+ + + + + diff --git a/src/index.ts b/src/index.ts index 1dd0aa57..5b3bd98d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -107,3 +107,22 @@ export { export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts"; export { valueCounts, dataFrameValueCounts } from "./stats/index.ts"; export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts"; +export { + isna, + notna, + isnull, + notnull, + ffillSeries, + bfillSeries, + dataFrameFfill, + dataFrameBfill, +} from "./stats/index.ts"; +export type { FillDirectionOptions, DataFrameFillOptions } from "./stats/index.ts"; +export { pctChangeSeries, pctChangeDataFrame } from "./stats/index.ts"; +export type { + PctChangeFillMethod, + PctChangeOptions, + DataFramePctChangeOptions, +} from "./stats/index.ts"; +export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./stats/index.ts"; +export type { IdxOptions, IdxDataFrameOptions } from "./stats/index.ts"; diff --git a/src/stats/idxmin_idxmax.ts b/src/stats/idxmin_idxmax.ts new file mode 100644 index 00000000..6ee745f9 --- /dev/null +++ b/src/stats/idxmin_idxmax.ts @@ -0,0 +1,234 @@ +/** + * idxmin / idxmax — return the index label of the minimum or maximum value. + * + * Mirrors `pandas.Series.idxmin()` / `pandas.Series.idxmax()` and + * `pandas.DataFrame.idxmin()` / `pandas.DataFrame.idxmax()`: + * + * - `idxminSeries(series)` — label of the minimum value (NaN/null excluded) + * - `idxmaxSeries(series)` — label of the maximum value (NaN/null excluded) + * - `idxminDataFrame(df)` — Series of row labels where each column achieves its min + * - `idxmaxDataFrame(df)` — Series of row labels where each column achieves its max + * + * When `skipna` is true (the default), NaN / null values are ignored. + * When `skipna` is false, any NaN / null causes the result to be `null`. + * + * @module + */ + +import type { DataFrame } from "../core/index.ts"; +import { Dtype, Series } from "../core/index.ts"; +import type { Label, Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Options for {@link idxminSeries}, {@link idxmaxSeries}. */ +export interface IdxOptions { + /** + * Whether to skip NaN / null values. + * @defaultValue `true` + */ + readonly skipna?: boolean; +} + +/** Options for {@link idxminDataFrame}, {@link idxmaxDataFrame}. */ +export interface IdxDataFrameOptions { + /** + * Whether to skip NaN / null values. + * @defaultValue `true` + */ + readonly skipna?: boolean; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** True when a scalar should be treated as missing. */ +function isMissing(v: Scalar): boolean { + return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v)); +} + +/** + * Find the index of the extreme value (min or max) among `values`. + * Returns `null` when all values are missing (with `skipna=true`) or when + * any value is missing (with `skipna=false`). + */ +function findExtreme( + values: readonly Scalar[], + skipna: boolean, + isBetter: (a: Scalar, b: Scalar) => boolean, +): number | null { + let bestIdx: number | null = null; + let bestVal: Scalar = null; + + for (let i = 0; i < values.length; i++) { + const v = values[i] as Scalar; + if (isMissing(v)) { + if (!skipna) { + return null; + } + continue; + } + if (bestIdx === null || isBetter(v, bestVal)) { + bestIdx = i; + bestVal = v; + } + } + return bestIdx; +} + +/** Compare scalars: returns true if `a` is less than `b`. */ +function isLess(a: Scalar, b: Scalar): boolean { + if (b === null || b === undefined) { + return false; + } + return (a as number | string | boolean) < (b as number | string | boolean); +} + +/** Compare scalars: returns true if `a` is greater than `b`. */ +function isGreater(a: Scalar, b: Scalar): boolean { + if (b === null || b === undefined) { + return false; + } + return (a as number | string | boolean) > (b as number | string | boolean); +} + +// ─── public API — Series ────────────────────────────────────────────────────── + +/** + * Return the index label of the minimum value in `series`. + * + * NaN / null values are excluded when `skipna` is true (the default). + * Returns `null` when the series is empty or all values are NaN / null. + * + * Mirrors `pandas.Series.idxmin()`. + * + * @param series - Input Series. + * @param options - Options (skipna). + * @returns The index label at the minimum value, or `null` if no valid value exists. + * + * @example + * ```ts + * import { Series, idxminSeries } from "tsb"; + * + * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] }); + * idxminSeries(s); // "b" (first occurrence of 1) + * ``` + */ +export function idxminSeries(series: Series, options: IdxOptions = {}): Label { + const skipna = options.skipna ?? true; + const idx = findExtreme(series.values, skipna, isLess); + if (idx === null) { + return null; + } + return series.index.at(idx); +} + +/** + * Return the index label of the maximum value in `series`. + * + * NaN / null values are excluded when `skipna` is true (the default). + * Returns `null` when the series is empty or all values are NaN / null. + * + * Mirrors `pandas.Series.idxmax()`. + * + * @param series - Input Series. + * @param options - Options (skipna). + * @returns The index label at the maximum value, or `null` if no valid value exists. + * + * @example + * ```ts + * import { Series, idxmaxSeries } from "tsb"; + * + * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] }); + * idxmaxSeries(s); // "e" + * ``` + */ +export function idxmaxSeries(series: Series, options: IdxOptions = {}): Label { + const skipna = options.skipna ?? true; + const idx = findExtreme(series.values, skipna, isGreater); + if (idx === null) { + return null; + } + return series.index.at(idx); +} + +// ─── public API — DataFrame ─────────────────────────────────────────────────── + +/** + * Return a Series containing the index label of the minimum value for each column. + * + * The result Series is indexed by column names. + * NaN / null values are excluded when `skipna` is true (the default). + * Columns where all values are NaN / null yield `null` in the result. + * + * Mirrors `pandas.DataFrame.idxmin()` (axis=0). + * + * @param df - Input DataFrame. + * @param options - Options (skipna). + * @returns A Series indexed by column names, containing the row label of each column's min. + * + * @example + * ```ts + * import { DataFrame, idxminDataFrame } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] }); + * idxminDataFrame(df).values; // ["y", "z"] + * ``` + */ +export function idxminDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series { + const skipna = options.skipna ?? true; + const colNames = df.columns.values; + const result: Label[] = colNames.map((colName) => { + const s = df.col(colName); + const idx = findExtreme(s.values, skipna, isLess); + if (idx === null) { + return null; + } + return df.index.at(idx); + }); + return new Series({ + data: result, + index: colNames as unknown as Label[], + name: null, + dtype: Dtype.from("object"), + }); +} + +/** + * Return a Series containing the index label of the maximum value for each column. + * + * The result Series is indexed by column names. + * NaN / null values are excluded when `skipna` is true (the default). + * Columns where all values are NaN / null yield `null` in the result. + * + * Mirrors `pandas.DataFrame.idxmax()` (axis=0). + * + * @param df - Input DataFrame. + * @param options - Options (skipna). + * @returns A Series indexed by column names, containing the row label of each column's max. + * + * @example + * ```ts + * import { DataFrame, idxmaxDataFrame } from "tsb"; + * + * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] }); + * idxmaxDataFrame(df).values; // ["z", "y"] + * ``` + */ +export function idxmaxDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series { + const skipna = options.skipna ?? true; + const colNames = df.columns.values; + const result: Label[] = colNames.map((colName) => { + const s = df.col(colName); + const idx = findExtreme(s.values, skipna, isGreater); + if (idx === null) { + return null; + } + return df.index.at(idx); + }); + return new Series({ + data: result, + index: colNames as unknown as Label[], + name: null, + dtype: Dtype.from("object"), + }); +} diff --git a/src/stats/index.ts b/src/stats/index.ts index b1de48eb..6d880f89 100644 --- a/src/stats/index.ts +++ b/src/stats/index.ts @@ -39,3 +39,22 @@ export { nsmallestDataFrame, } from "./nlargest.ts"; export type { NKeep, NTopOptions, NTopDataFrameOptions } from "./nlargest.ts"; +export { + isna, + notna, + isnull, + notnull, + ffillSeries, + bfillSeries, + dataFrameFfill, + dataFrameBfill, +} from "./na_ops.ts"; +export type { FillDirectionOptions, DataFrameFillOptions } from "./na_ops.ts"; +export { pctChangeSeries, pctChangeDataFrame } from "./pct_change.ts"; +export type { + PctChangeFillMethod, + PctChangeOptions, + DataFramePctChangeOptions, +} from "./pct_change.ts"; +export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./idxmin_idxmax.ts"; +export type { IdxOptions, IdxDataFrameOptions } from "./idxmin_idxmax.ts"; diff --git a/src/stats/na_ops.ts b/src/stats/na_ops.ts new file mode 100644 index 00000000..c776bb1f --- /dev/null +++ b/src/stats/na_ops.ts @@ -0,0 +1,336 @@ +/** + * na_ops — missing-value utilities for Series and DataFrame. + * + * Mirrors the following pandas module-level functions and methods: + * - `pd.isna(obj)` / `pd.isnull(obj)` — detect missing values + * - `pd.notna(obj)` / `pd.notnull(obj)` — detect non-missing values + * - `Series.ffill()` / `DataFrame.ffill()` — forward-fill missing values + * - `Series.bfill()` / `DataFrame.bfill()` — backward-fill missing values + * + * All functions are **pure** (return new objects; inputs are unchanged). + * + * @module + */ + +import { DataFrame } from "../core/index.ts"; +import { Series } from "../core/index.ts"; +import type { Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Options for {@link ffillSeries} and {@link bfillSeries}. */ +export interface FillDirectionOptions { + /** + * Maximum number of consecutive NaN/null values to fill. + * `null` means no limit (default). + */ + readonly limit?: number | null; +} + +/** Options for {@link dataFrameFfill} and {@link dataFrameBfill}. */ +export interface DataFrameFillOptions extends FillDirectionOptions { + /** + * - `0` or `"index"` (default): fill missing values down each **column**. + * - `1` or `"columns"`: fill missing values across each **row**. + */ + readonly axis?: 0 | 1 | "index" | "columns"; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** True when `v` should be treated as missing. */ +function isMissing(v: Scalar): boolean { + return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v)); +} + +/** Forward-fill an array of scalars in-place (returns a new array). */ +function ffillArray(vals: readonly Scalar[], limit: number | null): Scalar[] { + const out: Scalar[] = Array.from(vals); + let lastValid: Scalar = null; + let streak = 0; + for (let i = 0; i < out.length; i++) { + if (isMissing(out[i])) { + if (!isMissing(lastValid) && (limit === null || streak < limit)) { + out[i] = lastValid; + streak++; + } + } else { + lastValid = out[i] as Scalar; + streak = 0; + } + } + return out; +} + +/** Backward-fill an array of scalars (returns a new array). */ +function bfillArray(vals: readonly Scalar[], limit: number | null): Scalar[] { + const out: Scalar[] = Array.from(vals); + let nextValid: Scalar = null; + let streak = 0; + for (let i = out.length - 1; i >= 0; i--) { + if (isMissing(out[i])) { + if (!isMissing(nextValid) && (limit === null || streak < limit)) { + out[i] = nextValid; + streak++; + } + } else { + nextValid = out[i] as Scalar; + streak = 0; + } + } + return out; +} + +// ─── isna / notna ───────────────────────────────────────────────────────────── + +/** + * Detect missing values in a scalar, Series, or DataFrame. + * + * - For a **scalar**: returns `true` if the value is `null`, `undefined`, or `NaN`. + * - For a **Series**: returns a `Series` of the same index. + * - For a **DataFrame**: returns a `DataFrame` of boolean columns. + * + * Mirrors `pandas.isna()` / `pandas.isnull()`. + * + * @example + * ```ts + * import { isna } from "tsb"; + * isna(null); // true + * isna(42); // false + * isna(NaN); // true + * + * const s = new Series({ data: [1, null, NaN, 4] }); + * isna(s); // Series([false, true, true, false]) + * ``` + */ +export function isna(value: Scalar): boolean; +export function isna(value: Series): Series; +export function isna(value: DataFrame): DataFrame; +export function isna( + value: Scalar | Series | DataFrame, +): boolean | Series | DataFrame { + if (value instanceof DataFrame) { + return value.isna(); + } + if (value instanceof Series) { + return value.isna(); + } + return isMissing(value as Scalar); +} + +/** + * Detect non-missing values in a scalar, Series, or DataFrame. + * + * Mirrors `pandas.notna()` / `pandas.notnull()`. + * + * @example + * ```ts + * import { notna } from "tsb"; + * notna(null); // false + * notna(42); // true + * ``` + */ +export function notna(value: Scalar): boolean; +export function notna(value: Series): Series; +export function notna(value: DataFrame): DataFrame; +export function notna( + value: Scalar | Series | DataFrame, +): boolean | Series | DataFrame { + if (value instanceof DataFrame) { + return value.notna(); + } + if (value instanceof Series) { + return value.notna(); + } + return !isMissing(value as Scalar); +} + +/** Alias for {@link isna}. Mirrors `pandas.isnull()`. */ +export const isnull = isna; + +/** Alias for {@link notna}. Mirrors `pandas.notnull()`. */ +export const notnull = notna; + +// ─── ffill ──────────────────────────────────────────────────────────────────── + +/** + * Forward-fill missing values in a Series. + * + * Each `null`/`NaN` value is replaced with the last non-missing value + * that precedes it (if any). Values before the first non-missing value + * remain missing. + * + * Mirrors `pandas.Series.ffill()`. + * + * @param series - Input Series (unchanged). + * @param options - Optional `{ limit }` — max consecutive fills. + * @returns New Series with forward-filled values. + * + * @example + * ```ts + * import { ffillSeries } from "tsb"; + * const s = new Series({ data: [1, null, null, 4] }); + * ffillSeries(s); // Series([1, 1, 1, 4]) + * ``` + */ +export function ffillSeries( + series: Series, + options?: FillDirectionOptions, +): Series { + const limit = options?.limit ?? null; + const filled = ffillArray(series.values as readonly Scalar[], limit) as T[]; + return new Series({ + data: filled, + index: series.index, + dtype: series.dtype, + name: series.name ?? undefined, + }); +} + +/** + * Backward-fill missing values in a Series. + * + * Each `null`/`NaN` value is replaced with the next non-missing value + * that follows it (if any). Values after the last non-missing value + * remain missing. + * + * Mirrors `pandas.Series.bfill()`. + * + * @example + * ```ts + * import { bfillSeries } from "tsb"; + * const s = new Series({ data: [1, null, null, 4] }); + * bfillSeries(s); // Series([1, 4, 4, 4]) + * ``` + */ +export function bfillSeries( + series: Series, + options?: FillDirectionOptions, +): Series { + const limit = options?.limit ?? null; + const filled = bfillArray(series.values as readonly Scalar[], limit) as T[]; + return new Series({ + data: filled, + index: series.index, + dtype: series.dtype, + name: series.name ?? undefined, + }); +} + +// ─── DataFrame ffill / bfill ────────────────────────────────────────────────── + +/** + * Forward-fill missing values in a DataFrame. + * + * By default operates **column-wise** (axis=0): each column is independently + * forward-filled. With `axis=1` each row is forward-filled across columns. + * + * Mirrors `pandas.DataFrame.ffill()`. + * + * @example + * ```ts + * import { dataFrameFfill } from "tsb"; + * const df = new DataFrame({ data: { a: [1, null, 3], b: [null, 2, null] } }); + * dataFrameFfill(df); + * // a: [1, 1, 3] + * // b: [null, 2, 2] + * ``` + */ +export function dataFrameFfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame { + const limit = options?.limit ?? null; + const axis = options?.axis ?? 0; + const byRow = axis === 1 || axis === "columns"; + + if (!byRow) { + // column-wise: fill each column independently + const colMap = new Map>(); + for (const name of df.columns.values) { + const col = df.col(name); + const filled = ffillArray(col.values, limit) as Scalar[]; + colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype })); + } + return new DataFrame(colMap, df.index); + } + + // row-wise: fill across columns for each row + const nRows = df.shape[0]; + const cols = df.columns.values; + const columns = cols.map((name) => df.col(name)); + const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values)); + for (let r = 0; r < nRows; r++) { + const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null); + const filled = ffillArray(rowVals, limit); + for (let ci = 0; ci < cols.length; ci++) { + const rowsFilledCI = rowsFilled[ci]; + if (rowsFilledCI !== undefined) { + rowsFilledCI[r] = filled[ci] ?? null; + } + } + } + const colMap = new Map>(); + for (let ci = 0; ci < cols.length; ci++) { + const name = cols[ci] as string; + const col = columns[ci] as Series; + colMap.set( + name, + new Series({ + data: rowsFilled[ci] ?? [], + index: col.index, + dtype: col.dtype, + }), + ); + } + return new DataFrame(colMap, df.index); +} + +/** + * Backward-fill missing values in a DataFrame. + * + * By default operates **column-wise** (axis=0). With `axis=1` fills across rows. + * + * Mirrors `pandas.DataFrame.bfill()`. + */ +export function dataFrameBfill(df: DataFrame, options?: DataFrameFillOptions): DataFrame { + const limit = options?.limit ?? null; + const axis = options?.axis ?? 0; + const byRow = axis === 1 || axis === "columns"; + + if (!byRow) { + const colMap = new Map>(); + for (const name of df.columns.values) { + const col = df.col(name); + const filled = bfillArray(col.values, limit) as Scalar[]; + colMap.set(name, new Series({ data: filled, index: col.index, dtype: col.dtype })); + } + return new DataFrame(colMap, df.index); + } + + const nRows = df.shape[0]; + const cols = df.columns.values; + const columns = cols.map((name) => df.col(name)); + const rowsFilled: Scalar[][] = columns.map((c) => Array.from(c.values)); + for (let r = 0; r < nRows; r++) { + const rowVals: Scalar[] = columns.map((_, ci) => rowsFilled[ci]?.[r] ?? null); + const filled = bfillArray(rowVals, limit); + for (let ci = 0; ci < cols.length; ci++) { + const rowsFilledCI = rowsFilled[ci]; + if (rowsFilledCI !== undefined) { + rowsFilledCI[r] = filled[ci] ?? null; + } + } + } + const colMap = new Map>(); + for (let ci = 0; ci < cols.length; ci++) { + const name = cols[ci] as string; + const col = columns[ci] as Series; + colMap.set( + name, + new Series({ + data: rowsFilled[ci] ?? [], + index: col.index, + dtype: col.dtype, + }), + ); + } + return new DataFrame(colMap, df.index); +} diff --git a/src/stats/pct_change.ts b/src/stats/pct_change.ts new file mode 100644 index 00000000..c46c9e84 --- /dev/null +++ b/src/stats/pct_change.ts @@ -0,0 +1,231 @@ +/** + * pct_change — percentage change between current and prior element. + * + * Mirrors `pandas.Series.pct_change()` / `pandas.DataFrame.pct_change()`: + * - `pctChangeSeries(series, options)` — per-element % change + * - `pctChangeDataFrame(df, options)` — column-wise % change + * + * Formula (per element i, with shift=periods): + * `result[i] = (x[i] - x[i-periods]) / x[i-periods]` + * + * When `fillMethod` is set, NaN/null values in the source are filled *before* + * computing the ratio (matching pandas' default behaviour of `fill_method="pad"`). + * + * @module + */ + +import { DataFrame } from "../core/index.ts"; +import { Series } from "../core/index.ts"; +import type { Scalar } from "../types.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Fill method applied to NaN/null before computing pct_change. */ +export type PctChangeFillMethod = "pad" | "bfill"; + +/** Options for {@link pctChangeSeries} and {@link pctChangeDataFrame}. */ +export interface PctChangeOptions { + /** + * Number of periods (lags) to shift when computing the ratio. + * Positive values look backward; negative values look forward. + * Default `1`. + */ + readonly periods?: number; + /** + * How to fill NaN/null values *before* computing the ratio. + * - `"pad"` (default): forward-fill (last valid observation carries forward). + * - `"bfill"`: backward-fill (next valid observation fills backward). + * - `null`: no filling — NaN/null stays as-is. + */ + readonly fillMethod?: PctChangeFillMethod | null; + /** + * Maximum number of consecutive NaN/null values to fill when `fillMethod` + * is set. `undefined` / `null` means no limit. + */ + readonly limit?: number | null; +} + +/** Options for {@link pctChangeDataFrame} — adds an axis selector. */ +export interface DataFramePctChangeOptions extends PctChangeOptions { + /** + * - `0` or `"index"` (default): apply operation **column-wise** (down rows). + * - `1` or `"columns"`: apply operation **row-wise** (across columns). + */ + readonly axis?: 0 | 1 | "index" | "columns"; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** True when `v` is a valid number (not null, undefined, or NaN). */ +function isNum(v: Scalar): v is number { + return typeof v === "number" && !Number.isNaN(v) && v !== null; +} + +/** + * Forward-fill an array of scalars in place, respecting an optional limit. + * Returns a NEW array. + */ +function padFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] { + const out: Scalar[] = [...vals]; + let run = 0; + let lastValid: Scalar = null; + for (let i = 0; i < out.length; i++) { + const v = out[i] as Scalar; + if (v !== null && v !== undefined && !(typeof v === "number" && Number.isNaN(v))) { + lastValid = v; + run = 0; + } else if (lastValid !== null && (limit == null || run < limit)) { + out[i] = lastValid; + run++; + } + } + return out; +} + +/** + * Backward-fill an array of scalars, respecting an optional limit. + * Returns a NEW array. + */ +function bfillFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] { + const tmp = padFill([...vals].reverse(), limit); + return tmp.reverse(); +} + +/** Fill NaN/null in `vals` using the requested method. */ +function applyFill( + vals: readonly Scalar[], + method: PctChangeFillMethod | null | undefined, + limit: number | null | undefined, +): Scalar[] { + if (!method) return [...vals]; + return method === "pad" ? padFill(vals, limit) : bfillFill(vals, limit); +} + +/** Compute pct_change on a flat array of scalars. */ +function computePct(vals: readonly Scalar[], periods: number): Scalar[] { + const n = vals.length; + const out: Scalar[] = new Array(n).fill(null); + const shift = periods; + if (shift >= 0) { + for (let i = shift; i < n; i++) { + const curr = vals[i] as Scalar; + const prev = vals[i - shift] as Scalar; + if (isNum(curr) && isNum(prev) && prev !== 0) { + out[i] = curr / prev - 1; + } else if (isNum(curr) && isNum(prev) && prev === 0) { + // 0 denominator → Infinity (same as pandas) + out[i] = curr === 0 ? Number.NaN : curr > 0 ? Infinity : -Infinity; + } else { + out[i] = null; + } + } + } else { + // Negative periods: look forward + const absShift = -shift; + for (let i = 0; i < n - absShift; i++) { + const curr = vals[i] as Scalar; + const fwd = vals[i + absShift] as Scalar; + if (isNum(curr) && isNum(fwd) && curr !== 0) { + out[i] = fwd / curr - 1; + } else if (isNum(curr) && isNum(fwd) && curr === 0) { + out[i] = fwd === 0 ? Number.NaN : fwd > 0 ? Infinity : -Infinity; + } else { + out[i] = null; + } + } + } + return out; +} + +// ─── public API ─────────────────────────────────────────────────────────────── + +/** + * Compute the fractional change between a Series element and the element + * `periods` positions earlier (or later, for negative `periods`). + * + * Matches `pandas.Series.pct_change()`. + * + * @example + * ```ts + * const s = new Series({ data: [100, 110, 99, 121] }); + * pctChangeSeries(s); // [null, 0.1, -0.1, 0.2222…] + * ``` + */ +export function pctChangeSeries(series: Series, options: PctChangeOptions = {}): Series { + const periods = options.periods ?? 1; + const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad"; + const limit = options.limit ?? null; + + const filled = applyFill(series.values, fillMethod, limit); + const result = computePct(filled, periods); + + return new Series({ + data: result, + index: series.index, + name: series.name ?? undefined, + }); +} + +/** + * Compute percentage change for every column (or row) of a DataFrame. + * + * Matches `pandas.DataFrame.pct_change()`. + * + * @example + * ```ts + * const df = new DataFrame(new Map([ + * ["a", new Series({ data: [100, 110, 121] })], + * ["b", new Series({ data: [200, 180, 198] })], + * ])); + * pctChangeDataFrame(df); // fractional change per column + * ``` + */ +export function pctChangeDataFrame( + df: DataFrame, + options: DataFramePctChangeOptions = {}, +): DataFrame { + const axis = options.axis ?? 0; + const colWise = axis === 0 || axis === "index"; + + if (colWise) { + const colMap = new Map>(); + for (const name of df.columns.values) { + colMap.set(name, pctChangeSeries(df.col(name), options)); + } + return new DataFrame(colMap, df.index); + } + + // Row-wise: each row across columns + const periods = options.periods ?? 1; + const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad"; + const limit = options.limit ?? null; + const nRows = df.index.length; + const cols = df.columns.values; + const nCols = cols.length; + + const resultCols = new Map(); + for (const name of cols) { + resultCols.set(name, new Array(nRows).fill(null)); + } + + for (let r = 0; r < nRows; r++) { + const row: Scalar[] = []; + for (const name of cols) { + row.push(df.col(name).values[r] as Scalar); + } + const filled = applyFill(row, fillMethod, limit); + const pct = computePct(filled, periods); + for (let c = 0; c < nCols; c++) { + (resultCols.get(cols[c] as string) as Scalar[])[r] = pct[c] as Scalar; + } + } + + const colMap = new Map>(); + for (const name of cols) { + colMap.set( + name, + new Series({ data: resultCols.get(name) as Scalar[], index: df.index, name }), + ); + } + return new DataFrame(colMap, df.index); +} diff --git a/tests/stats/idxmin_idxmax.test.ts b/tests/stats/idxmin_idxmax.test.ts new file mode 100644 index 00000000..05cfd459 --- /dev/null +++ b/tests/stats/idxmin_idxmax.test.ts @@ -0,0 +1,270 @@ +/** + * Tests for src/stats/idxmin_idxmax.ts + * — idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame + */ +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + idxmaxDataFrame, + idxmaxSeries, + idxminDataFrame, + idxminSeries, +} from "../../src/index.ts"; +import type { Label, Scalar } from "../../src/index.ts"; + +// ─── helpers ───────────────────────────────────────────────────────────────── + +function s(data: readonly Scalar[], index?: readonly Label[]): Series { + return new Series({ data: [...data], ...(index !== undefined ? { index: [...index] } : {}) }); +} + +// ─── idxminSeries ───────────────────────────────────────────────────────────── + +describe("idxminSeries", () => { + it("returns label of the minimum value", () => { + const series = s([3, 1, 4, 1, 5], ["a", "b", "c", "d", "e"]); + expect(idxminSeries(series)).toBe("b"); // first occurrence of minimum 1 + }); + + it("returns integer index label for default index", () => { + const series = s([10, 3, 7]); + expect(idxminSeries(series)).toBe(1); + }); + + it("handles single element", () => { + const series = s([42], ["x"]); + expect(idxminSeries(series)).toBe("x"); + }); + + it("returns null for empty series", () => { + const series = s([]); + expect(idxminSeries(series)).toBeNull(); + }); + + it("skips NaN by default (skipna=true)", () => { + const series = s([Number.NaN, 2, 1, Number.NaN], ["a", "b", "c", "d"]); + expect(idxminSeries(series)).toBe("c"); + }); + + it("skips null values by default", () => { + const series = s([null, 5, 2, null], ["a", "b", "c", "d"]); + expect(idxminSeries(series)).toBe("c"); + }); + + it("returns null when all values are NaN with skipna=true", () => { + const series = s([Number.NaN, Number.NaN], ["a", "b"]); + expect(idxminSeries(series)).toBeNull(); + }); + + it("returns null when any value is NaN with skipna=false", () => { + const series = s([1, Number.NaN, 3], ["a", "b", "c"]); + expect(idxminSeries(series, { skipna: false })).toBeNull(); + }); + + it("returns correct label with skipna=false when no NaN", () => { + const series = s([5, 2, 8], ["a", "b", "c"]); + expect(idxminSeries(series, { skipna: false })).toBe("b"); + }); + + it("handles negative numbers", () => { + const series = s([-1, -5, -3], ["x", "y", "z"]); + expect(idxminSeries(series)).toBe("y"); + }); + + it("handles all equal values — returns first label", () => { + const series = s([7, 7, 7], ["p", "q", "r"]); + expect(idxminSeries(series)).toBe("p"); + }); + + it("works with string values (lexicographic min)", () => { + const series = s(["banana", "apple", "cherry"], ["a", "b", "c"]); + expect(idxminSeries(series)).toBe("b"); // "apple" < "banana" < "cherry" + }); + + it("handles NaN at the start with skipna=true", () => { + const series = s([Number.NaN, 3, 1], ["a", "b", "c"]); + expect(idxminSeries(series)).toBe("c"); + }); +}); + +// ─── idxmaxSeries ───────────────────────────────────────────────────────────── + +describe("idxmaxSeries", () => { + it("returns label of the maximum value", () => { + const series = s([3, 1, 4, 1, 5], ["a", "b", "c", "d", "e"]); + expect(idxmaxSeries(series)).toBe("e"); + }); + + it("returns integer index label for default index", () => { + const series = s([10, 3, 7]); + expect(idxmaxSeries(series)).toBe(0); + }); + + it("handles single element", () => { + const series = s([42], ["x"]); + expect(idxmaxSeries(series)).toBe("x"); + }); + + it("returns null for empty series", () => { + const series = s([]); + expect(idxmaxSeries(series)).toBeNull(); + }); + + it("skips NaN by default (skipna=true)", () => { + const series = s([Number.NaN, 2, 9, Number.NaN], ["a", "b", "c", "d"]); + expect(idxmaxSeries(series)).toBe("c"); + }); + + it("returns null when all values are NaN with skipna=true", () => { + const series = s([Number.NaN, Number.NaN], ["a", "b"]); + expect(idxmaxSeries(series)).toBeNull(); + }); + + it("returns null when any value is NaN with skipna=false", () => { + const series = s([1, Number.NaN, 3], ["a", "b", "c"]); + expect(idxmaxSeries(series, { skipna: false })).toBeNull(); + }); + + it("handles negative numbers", () => { + const series = s([-1, -5, -3], ["x", "y", "z"]); + expect(idxmaxSeries(series)).toBe("x"); + }); + + it("all equal — returns first label", () => { + const series = s([3, 3, 3], ["p", "q", "r"]); + expect(idxmaxSeries(series)).toBe("p"); + }); + + it("works with string values (lexicographic max)", () => { + const series = s(["banana", "apple", "cherry"], ["a", "b", "c"]); + expect(idxmaxSeries(series)).toBe("c"); // "cherry" > "banana" > "apple" + }); +}); + +// ─── idxminDataFrame ────────────────────────────────────────────────────────── + +describe("idxminDataFrame", () => { + it("returns row label of minimum for each column", () => { + const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] }); + const result = idxminDataFrame(df); + expect(result.at("a")).toBe("y"); // min of a is 1 at row "y" + expect(result.at("b")).toBe("z"); // min of b is 5 at row "z" + }); + + it("result is indexed by column names", () => { + const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] }); + const result = idxminDataFrame(df); + expect([...result.index.values]).toEqual(["a", "b"]); + }); + + it("skips NaN by default", () => { + const df = DataFrame.fromColumns( + { a: [Number.NaN, 2, 1], b: [5, Number.NaN, 3] }, + { index: ["x", "y", "z"] }, + ); + const result = idxminDataFrame(df); + expect(result.at("a")).toBe("z"); + expect(result.at("b")).toBe("z"); + }); + + it("returns null for column with all NaN (skipna=true)", () => { + const df = DataFrame.fromColumns( + { a: [1, 2], b: [Number.NaN, Number.NaN] }, + { index: ["x", "y"] }, + ); + const result = idxminDataFrame(df); + expect(result.at("a")).toBe("x"); + expect(result.at("b")).toBeNull(); + }); + + it("handles single row DataFrame", () => { + const df = DataFrame.fromColumns({ a: [42], b: [7] }, { index: ["row0"] }); + const result = idxminDataFrame(df); + expect(result.at("a")).toBe("row0"); + expect(result.at("b")).toBe("row0"); + }); +}); + +// ─── idxmaxDataFrame ────────────────────────────────────────────────────────── + +describe("idxmaxDataFrame", () => { + it("returns row label of maximum for each column", () => { + const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] }); + const result = idxmaxDataFrame(df); + expect(result.at("a")).toBe("z"); // max of a is 4 at row "z" + expect(result.at("b")).toBe("y"); // max of b is 20 at row "y" + }); + + it("result is indexed by column names", () => { + const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] }); + const result = idxmaxDataFrame(df); + expect([...result.index.values]).toEqual(["a", "b"]); + }); + + it("skips NaN by default", () => { + const df = DataFrame.fromColumns( + { a: [Number.NaN, 2, 1], b: [5, Number.NaN, 3] }, + { index: ["x", "y", "z"] }, + ); + const result = idxmaxDataFrame(df); + expect(result.at("a")).toBe("y"); + expect(result.at("b")).toBe("x"); + }); + + it("handles single row DataFrame", () => { + const df = DataFrame.fromColumns({ a: [42], b: [7] }, { index: ["row0"] }); + const result = idxmaxDataFrame(df); + expect(result.at("a")).toBe("row0"); + expect(result.at("b")).toBe("row0"); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("idxminSeries property tests", () => { + it("idxmin label points to minimum value in series", () => { + fc.assert( + fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 1, maxLength: 20 }), (data) => { + const series = s(data); + const label = idxminSeries(series); + if (label === null) { + return true; + } + const minVal = Math.min(...data); + return series.at(label as number) === minVal; + }), + ); + }); + + it("idxmax label points to maximum value in series", () => { + fc.assert( + fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 1, maxLength: 20 }), (data) => { + const series = s(data); + const label = idxmaxSeries(series); + if (label === null) { + return true; + } + const maxVal = Math.max(...data); + return series.at(label as number) === maxVal; + }), + ); + }); + + it("idxmin and idxmax are consistent — min <= max", () => { + fc.assert( + fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 2, maxLength: 20 }), (data) => { + const series = s(data); + const minLabel = idxminSeries(series); + const maxLabel = idxmaxSeries(series); + if (minLabel === null || maxLabel === null) { + return true; + } + const minVal = series.at(minLabel as number) as number; + const maxVal = series.at(maxLabel as number) as number; + return minVal <= maxVal; + }), + ); + }); +}); diff --git a/tests/stats/na_ops.test.ts b/tests/stats/na_ops.test.ts new file mode 100644 index 00000000..340406ac --- /dev/null +++ b/tests/stats/na_ops.test.ts @@ -0,0 +1,280 @@ +/** + * Tests for na_ops — missing-value utilities (isna, notna, ffill, bfill). + */ + +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + bfillSeries, + dataFrameBfill, + dataFrameFfill, + ffillSeries, + isna, + isnull, + notna, + notnull, +} from "../../src/index.ts"; + +// ─── isna / notna ───────────────────────────────────────────────────────────── + +describe("isna (scalar)", () => { + it("returns true for null", () => expect(isna(null)).toBe(true)); + it("returns true for undefined", () => expect(isna(undefined)).toBe(true)); + it("returns true for NaN", () => expect(isna(Number.NaN)).toBe(true)); + it("returns false for 0", () => expect(isna(0)).toBe(false)); + it("returns false for empty string", () => expect(isna("")).toBe(false)); + it("returns false for false", () => expect(isna(false)).toBe(false)); + it("returns false for a number", () => expect(isna(42)).toBe(false)); +}); + +describe("notna (scalar)", () => { + it("returns false for null", () => expect(notna(null)).toBe(false)); + it("returns false for NaN", () => expect(notna(Number.NaN)).toBe(false)); + it("returns true for 42", () => expect(notna(42)).toBe(true)); + it("returns true for a string", () => expect(notna("hello")).toBe(true)); +}); + +describe("isnull / notnull aliases", () => { + it("isnull equals isna for scalar", () => { + expect(isnull(null)).toBe(isna(null)); + expect(isnull(42)).toBe(isna(42)); + }); + it("notnull equals notna for scalar", () => { + expect(notnull(null)).toBe(notna(null)); + expect(notnull(42)).toBe(notna(42)); + }); +}); + +describe("isna (Series)", () => { + it("returns boolean Series of correct length", () => { + const s = new Series({ data: [1, null, Number.NaN, 4] }); + const result = isna(s); + expect(result).toBeInstanceOf(Series); + expect([...result.values]).toEqual([false, true, true, false]); + }); + + it("all present", () => { + const s = new Series({ data: [1, 2, 3] }); + expect([...isna(s).values]).toEqual([false, false, false]); + }); + + it("all missing", () => { + const s = new Series({ data: [null, null, Number.NaN] }); + expect([...isna(s).values]).toEqual([true, true, true]); + }); +}); + +describe("notna (Series)", () => { + it("is the inverse of isna", () => { + const s = new Series({ data: [1, null, Number.NaN, 4] }); + const na = isna(s).values; + const nna = notna(s).values; + for (let i = 0; i < na.length; i++) { + expect(nna[i]).toBe(!na[i]); + } + }); +}); + +describe("isna (DataFrame)", () => { + it("returns DataFrame of booleans", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] }); + const result = isna(df); + expect(result).toBeInstanceOf(DataFrame); + expect([...result.col("a").values]).toEqual([false, true]); + expect([...result.col("b").values]).toEqual([true, false]); + }); +}); + +describe("notna (DataFrame)", () => { + it("returns inverse of isna DataFrame", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [Number.NaN, 2] }); + expect([...notna(df).col("a").values]).toEqual([true, false]); + expect([...notna(df).col("b").values]).toEqual([false, true]); + }); +}); + +// ─── ffillSeries ────────────────────────────────────────────────────────────── + +describe("ffillSeries", () => { + it("fills nulls with preceding value", () => { + const s = new Series({ data: [1, null, null, 4] }); + expect([...ffillSeries(s).values]).toEqual([1, 1, 1, 4]); + }); + + it("leaves leading nulls untouched", () => { + const s = new Series({ data: [null, null, 3, null] }); + expect([...ffillSeries(s).values]).toEqual([null, null, 3, 3]); + }); + + it("NaN is treated as missing", () => { + const s = new Series({ data: [2, Number.NaN, 5] }); + const result = ffillSeries(s).values; + expect(result[0]).toBe(2); + expect(result[1]).toBe(2); + expect(result[2]).toBe(5); + }); + + it("respects limit option", () => { + const s = new Series({ data: [1, null, null, null, 5] }); + expect([...ffillSeries(s, { limit: 1 }).values]).toEqual([1, 1, null, null, 5]); + }); + + it("preserves original Series", () => { + const s = new Series({ data: [1, null, 3] }); + ffillSeries(s); + expect([...s.values]).toEqual([1, null, 3]); + }); + + it("empty Series returns empty", () => { + const s = new Series({ data: [] }); + expect([...ffillSeries(s).values]).toEqual([]); + }); + + it("preserves name and index", () => { + const s = new Series({ data: [1, null], name: "x" }); + const filled = ffillSeries(s); + expect(filled.name).toBe("x"); + expect(filled.index.size).toBe(2); + }); +}); + +// ─── bfillSeries ────────────────────────────────────────────────────────────── + +describe("bfillSeries", () => { + it("fills nulls with following value", () => { + const s = new Series({ data: [1, null, null, 4] }); + expect([...bfillSeries(s).values]).toEqual([1, 4, 4, 4]); + }); + + it("leaves trailing nulls untouched", () => { + const s = new Series({ data: [null, 3, null, null] }); + expect([...bfillSeries(s).values]).toEqual([3, 3, null, null]); + }); + + it("respects limit option", () => { + const s = new Series({ data: [1, null, null, null, 5] }); + expect([...bfillSeries(s, { limit: 2 }).values]).toEqual([1, null, 5, 5, 5]); + }); + + it("empty Series returns empty", () => { + const s = new Series({ data: [] }); + expect([...bfillSeries(s).values]).toEqual([]); + }); +}); + +// ─── dataFrameFfill ─────────────────────────────────────────────────────────── + +describe("dataFrameFfill (column-wise)", () => { + it("fills each column independently", () => { + const df = DataFrame.fromColumns({ a: [1, null, 3], b: [null, 2, null] }); + const result = dataFrameFfill(df); + expect([...result.col("a").values]).toEqual([1, 1, 3]); + expect([...result.col("b").values]).toEqual([null, 2, 2]); + }); + + it("preserves index", () => { + const df = DataFrame.fromColumns({ x: [1, null] }); + expect(dataFrameFfill(df).index.size).toBe(2); + }); +}); + +describe("dataFrameFfill (row-wise)", () => { + it("fills across columns per row", () => { + const df = DataFrame.fromColumns({ a: [1, null], b: [null, null], c: [3, 4] }); + const result = dataFrameFfill(df, { axis: 1 }); + expect([...result.col("a").values]).toEqual([1, null]); + expect([...result.col("b").values]).toEqual([1, null]); + expect([...result.col("c").values]).toEqual([3, 4]); + }); +}); + +// ─── dataFrameBfill ─────────────────────────────────────────────────────────── + +describe("dataFrameBfill (column-wise)", () => { + it("fills each column backward", () => { + const df = DataFrame.fromColumns({ a: [null, null, 3], b: [1, null, null] }); + const result = dataFrameBfill(df); + expect([...result.col("a").values]).toEqual([3, 3, 3]); + expect([...result.col("b").values]).toEqual([1, null, null]); + }); +}); + +describe("dataFrameBfill (row-wise)", () => { + it("fills backward across columns per row", () => { + const df = DataFrame.fromColumns({ a: [null, 1], b: [null, null], c: [3, null] }); + const result = dataFrameBfill(df, { axis: 1 }); + expect([...result.col("a").values]).toEqual([3, 1]); + expect([...result.col("b").values]).toEqual([3, null]); + expect([...result.col("c").values]).toEqual([3, null]); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("property: ffill followed by bfill fills all if any non-null", () => { + it("all values filled when at least one is present", () => { + fc.assert( + fc.property( + fc.array(fc.option(fc.integer({ min: 0, max: 100 }), { nil: null }), { + minLength: 1, + maxLength: 20, + }), + (raw) => { + const hasNonNull = raw.some((v) => v !== null); + if (!hasNonNull) { + return true; + } + const s = new Series({ data: raw }); + const result = bfillSeries(ffillSeries(s)); + return result.values.every((v) => v !== null); + }, + ), + ); + }); +}); + +describe("property: ffill never introduces new non-null values beyond last valid", () => { + it("ffilled series has no nulls after first valid value", () => { + fc.assert( + fc.property( + fc.array(fc.option(fc.integer({ min: -50, max: 50 }), { nil: null }), { + minLength: 0, + maxLength: 30, + }), + (raw) => { + const s = new Series({ data: raw }); + const filled = ffillSeries(s).values; + let sawValid = false; + for (const v of filled) { + if (v !== null) { + sawValid = true; + } + if (sawValid && v === null) { + return false; + } + } + return true; + }, + ), + ); + }); +}); + +describe("property: isna is inverse of notna for scalars", () => { + it("isna(v) === !notna(v)", () => { + fc.assert( + fc.property( + fc.oneof( + fc.integer(), + fc.float({ noNaN: false }), + fc.constant(null), + fc.string(), + fc.boolean(), + ), + (v) => isna(v as Parameters[0]) === !notna(v as Parameters[0]), + ), + ); + }); +}); diff --git a/tests/stats/pct_change.test.ts b/tests/stats/pct_change.test.ts new file mode 100644 index 00000000..98966e8c --- /dev/null +++ b/tests/stats/pct_change.test.ts @@ -0,0 +1,252 @@ +/** + * Tests for src/stats/pct_change.ts — pctChangeSeries, pctChangeDataFrame + */ +import { describe, expect, it } from "bun:test"; +import fc from "fast-check"; +import { + DataFrame, + Series, + pctChangeDataFrame, + pctChangeSeries, +} from "../../src/index.ts"; +import type { Scalar } from "../../src/index.ts"; + +// ─── helpers ───────────────────────────────────────────────────────────────── + +function s(data: readonly Scalar[]): Series { + return new Series({ data: [...data] }); +} + +function nanEq(a: Scalar, b: Scalar): boolean { + if (typeof a === "number" && Number.isNaN(a) && typeof b === "number" && Number.isNaN(b)) { + return true; + } + return a === b; +} + +function arrEq(a: readonly Scalar[], b: readonly Scalar[]): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (!nanEq(a[i] as Scalar, b[i] as Scalar)) return false; + } + return true; +} + +function close(a: Scalar, b: Scalar, eps = 1e-9): boolean { + if (a === null && b === null) return true; + if (typeof a !== "number" || typeof b !== "number") return false; + if (Number.isNaN(a) && Number.isNaN(b)) return true; + return Math.abs(a - b) < eps; +} + +function arrClose(a: readonly Scalar[], b: readonly Scalar[], eps = 1e-9): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (!close(a[i] as Scalar, b[i] as Scalar, eps)) return false; + } + return true; +} + +// ─── pctChangeSeries ───────────────────────────────────────────────────────── + +describe("pctChangeSeries", () => { + it("basic increasing sequence", () => { + const result = pctChangeSeries(s([100, 110, 121, 133.1])); + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + expect(close(result.values[3] as Scalar, 0.1)).toBe(true); + }); + + it("decreasing sequence", () => { + const result = pctChangeSeries(s([200, 180, 162])); + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, -0.1)).toBe(true); + expect(close(result.values[2] as Scalar, -0.1)).toBe(true); + }); + + it("periods=2", () => { + const result = pctChangeSeries(s([100, 105, 110, 121]), { periods: 2 }); + expect(result.values[0]).toBeNull(); + expect(result.values[1]).toBeNull(); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + expect(close(result.values[3] as Scalar, (121 - 105) / 105)).toBe(true); + }); + + it("negative periods (look forward)", () => { + const result = pctChangeSeries(s([100, 110, 121]), { periods: -1 }); + expect(close(result.values[0] as Scalar, 0.1)).toBe(true); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(result.values[2]).toBeNull(); + }); + + it("NaN/null propagates when fillMethod=null", () => { + const result = pctChangeSeries(s([100, null, 110]), { fillMethod: null }); + expect(result.values[0]).toBeNull(); + expect(result.values[1]).toBeNull(); + expect(result.values[2]).toBeNull(); + }); + + it("fillMethod=pad fills NaN before computing", () => { + const result = pctChangeSeries(s([100, null, 110]), { fillMethod: "pad" }); + // after pad-fill: [100, 100, 110] + // pct: [null, 0, 0.1] + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0)).toBe(true); + expect(close(result.values[2] as Scalar, 0.1)).toBe(true); + }); + + it("fillMethod=bfill fills NaN backward before computing", () => { + const result = pctChangeSeries(s([100, null, 110, 121]), { fillMethod: "bfill" }); + // after bfill: [100, 110, 110, 121] + // pct: [null, 0.1, 0, 0.1] + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0.1)).toBe(true); + expect(close(result.values[2] as Scalar, 0)).toBe(true); + expect(close(result.values[3] as Scalar, 0.1)).toBe(true); + }); + + it("limit=1 caps forward-fill", () => { + const result = pctChangeSeries(s([100, null, null, 130]), { + fillMethod: "pad", + limit: 1, + }); + // after pad with limit=1: [100, 100, null, 130] + // pct: [null, 0, null, null] (null/100 → null) + expect(result.values[0]).toBeNull(); + expect(close(result.values[1] as Scalar, 0)).toBe(true); + expect(result.values[2]).toBeNull(); + expect(result.values[3]).toBeNull(); + }); + + it("zero denominator returns Infinity", () => { + const result = pctChangeSeries(s([0, 10]), { fillMethod: null }); + expect(result.values[1]).toBe(Infinity); + }); + + it("zero/zero denominator returns NaN", () => { + const result = pctChangeSeries(s([0, 0]), { fillMethod: null }); + expect(Number.isNaN(result.values[1] as number)).toBe(true); + }); + + it("preserves Series name and index", () => { + const src = new Series({ data: [10, 20, 30], name: "price" }); + const result = pctChangeSeries(src); + expect(result.name).toBe("price"); + expect(result.index.length).toBe(3); + }); + + it("empty series returns empty", () => { + const result = pctChangeSeries(s([])); + expect(result.values.length).toBe(0); + }); + + it("single-element series returns [null]", () => { + const result = pctChangeSeries(s([42])); + expect(result.values[0]).toBeNull(); + }); +}); + +// ─── pctChangeDataFrame ─────────────────────────────────────────────────────── + +describe("pctChangeDataFrame", () => { + it("column-wise (default)", () => { + const df = new DataFrame( + new Map([ + ["a", new Series({ data: [100, 110, 121] })], + ["b", new Series({ data: [200, 180, 198] })], + ]), + ); + const result = pctChangeDataFrame(df); + const colA = result.col("a").values; + const colB = result.col("b").values; + expect(colA[0]).toBeNull(); + expect(close(colA[1] as Scalar, 0.1)).toBe(true); + expect(close(colA[2] as Scalar, 0.1)).toBe(true); + expect(colB[0]).toBeNull(); + expect(close(colB[1] as Scalar, -0.1)).toBe(true); + expect(close(colB[2] as Scalar, 0.1)).toBe(true); + }); + + it("row-wise (axis=1)", () => { + const df = new DataFrame( + new Map([ + ["a", new Series({ data: [100, 200] })], + ["b", new Series({ data: [110, 220] })], + ["c", new Series({ data: [121, 242] })], + ]), + ); + const result = pctChangeDataFrame(df, { axis: 1 }); + // row 0: [100, 110, 121] → [null, 0.1, 0.1] + // row 1: [200, 220, 242] → [null, 0.1, 0.1] + const row0a = result.col("a").values[0]; + const row0b = result.col("b").values[0]; + const row0c = result.col("c").values[0]; + expect(row0a).toBeNull(); + expect(close(row0b as Scalar, 0.1)).toBe(true); + expect(close(row0c as Scalar, 0.1)).toBe(true); + const row1a = result.col("a").values[1]; + const row1b = result.col("b").values[1]; + expect(row1a).toBeNull(); + expect(close(row1b as Scalar, 0.1)).toBe(true); + }); + + it("preserves column order", () => { + const df = new DataFrame( + new Map([ + ["x", new Series({ data: [1, 2] })], + ["y", new Series({ data: [3, 6] })], + ]), + ); + const result = pctChangeDataFrame(df); + expect(result.columns.values).toEqual(["x", "y"]); + }); +}); + +// ─── property-based tests ───────────────────────────────────────────────────── + +describe("pctChangeSeries — property tests", () => { + it("result length equals input length", () => { + fc.assert( + fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 50 }), (arr) => { + const result = pctChangeSeries(s(arr)); + return result.values.length === arr.length; + }), + ); + }); + + it("first element is always null for periods=1", () => { + fc.assert( + fc.property( + fc.array(fc.float({ noNaN: true }), { minLength: 1, maxLength: 50 }), + (arr) => { + const result = pctChangeSeries(s(arr)); + return result.values[0] === null; + }, + ), + ); + }); + + it("pct_change(x, -p) equals pct_change_reversed pattern", () => { + // For a sequence of positive numbers with periods=1 and periods=-1: + // result[-1][i] represents the change looking forward, so result[-1][i] = (x[i+1]-x[i])/x[i] + // and result[+1][i+1] = (x[i+1]-x[i])/x[i], so they should agree on matching indices + fc.assert( + fc.property( + fc.array(fc.float({ noNaN: true, min: 1, max: 1000 }), { minLength: 3, maxLength: 20 }), + (arr) => { + const fwd = pctChangeSeries(s(arr), { periods: -1, fillMethod: null }); + const bwd = pctChangeSeries(s(arr), { periods: 1, fillMethod: null }); + // fwd[i] = (arr[i+1] - arr[i]) / arr[i] + // bwd[i+1] = (arr[i+1] - arr[i]) / arr[i] ← same ratio + for (let i = 0; i < arr.length - 1; i++) { + if (!close(fwd.values[i] as Scalar, bwd.values[i + 1] as Scalar, 1e-6)) { + return false; + } + } + return true; + }, + ), + ); + }); +});