diff --git a/biome.json b/biome.json
index 81748897..7c43b424 100644
--- a/biome.json
+++ b/biome.json
@@ -27,17 +27,24 @@
"rules": {
"recommended": true,
"complexity": {
- "all": true
+ "all": true,
+ "noExcessiveCognitiveComplexity": "warn",
+ "noForEach": "warn",
+ "useLiteralKeys": "warn",
+ "noUselessSwitchCase": "warn"
},
"correctness": {
- "all": true
+ "all": true,
+ "noNodejsModules": "warn",
+ "noUnusedVariables": "warn"
},
"nursery": {
"all": true
},
"performance": {
"all": true,
- "noBarrelFile": "off"
+ "noBarrelFile": "off",
+ "useTopLevelRegex": "warn"
},
"security": {
"all": true
@@ -45,10 +52,18 @@
"style": {
"all": true,
"noDefaultExport": "off",
- "useNamingConvention": "off"
+ "useNamingConvention": "off",
+ "noNonNullAssertion": "warn",
+ "noNamespaceImport": "warn",
+ "noParameterProperties": "warn",
+ "useDefaultSwitchClause": "warn",
+ "useCollapsedElseIf": "warn"
},
"suspicious": {
- "all": true
+ "all": true,
+ "noAssignInExpressions": "warn",
+ "noMisplacedAssertion": "warn",
+ "noApproximativeNumericConstant": "warn"
}
}
},
diff --git a/playground/astype.html b/playground/astype.html
new file mode 100644
index 00000000..efd9e5ed
--- /dev/null
+++ b/playground/astype.html
@@ -0,0 +1,438 @@
+
+
+
+
+
+ tsb — astype
+
+
+
+
+
+
Loading tsb runtime…
+
+
+ ← tsb playground
+ astype — dtype coercion
+
+ Cast Series and DataFrame values to a different dtype.
+ Mirrors pandas.Series.astype and pandas.DataFrame.astype.
+
+
+
+
+
1 · Series — float to int64
+
+ Cast floating-point values to integers via truncation (same as
+ pandas.Series.astype("int64")).
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Series — numbers to string
+
Convert every value to its string representation. Null/undefined values
+ become null (not the string "null").
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Overflow clamping for bounded integer dtypes
+
+ Values that overflow the target integer dtype's range are clamped to
+ [min, max] — e.g. uint8 is clamped to
+ [0, 255].
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame — cast all columns
+
Pass a single dtype name to cast every column to the same type.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame — per-column dtype mapping
+
Pass a Record<string, DtypeName> to cast individual
+ columns. Columns not listed are carried over unchanged.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Casting to bool
+
Zero, empty string, and NaN become false;
+ everything else (including non-zero numbers and non-empty strings)
+ becomes true.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Series cast
+astypeSeries(
+ series: Series,
+ dtype: DtypeName | Dtype,
+ options?: AstypeOptions,
+): Series
+
+// DataFrame cast (all columns or per-column mapping)
+astype(
+ df: DataFrame,
+ dtype: DtypeName | Dtype | Record<string, DtypeName | Dtype>,
+ options?: DataFrameAstypeOptions,
+): DataFrame
+
+// Low-level scalar cast
+castScalar(value: Scalar, dtype: Dtype): Scalar
+
+// Options
+interface AstypeOptions {
+ errors?: "raise" | "ignore"; // default "raise"
+}
+
+// Supported dtype names
+type DtypeName =
+ | "int8" | "int16" | "int32" | "int64"
+ | "uint8" | "uint16" | "uint32" | "uint64"
+ | "float32" | "float64"
+ | "bool" | "string" | "object"
+ | "datetime" | "timedelta" | "category"
+
+
+
+
+
+
diff --git a/playground/clip_advanced.html b/playground/clip_advanced.html
new file mode 100644
index 00000000..eb200294
--- /dev/null
+++ b/playground/clip_advanced.html
@@ -0,0 +1,163 @@
+
+
+
+
+
+ tsb — clip_advanced (per-element clipping)
+
+
+
+ tsb — clip_advanced (per-element clipping)
+
+ Clip Series and DataFrame values to per-element bounds.
+ Unlike the simple scalar clip, clipAdvancedSeries and
+ clipAdvancedDataFrame support array, Series, and DataFrame bounds —
+ enabling per-position or element-wise bound specification.
+
+
+ Core concept
+ // Scalar bounds (like pandas s.clip(lower=0, upper=5))
+clipAdvancedSeries(s, { lower: 0, upper: 5 })
+
+// Per-element array bounds
+clipAdvancedSeries(s, { lower: [1, 2, 3], upper: [4, 5, 6] })
+
+// Series bounds (positional alignment)
+clipAdvancedSeries(s, { lower: loSeries, upper: hiSeries })
+
+// DataFrame element-wise bounds
+clipAdvancedDataFrame(df, { lower: loDf, upper: hiDf })
+
+// Series broadcast on DataFrame (axis=0: one bound per column; axis=1: one per row)
+clipAdvancedDataFrame(df, { lower: loSeries, axis: 1 })
+
+
+ pandas equivalent:
+ s.clip(lower=lo_array, upper=hi_array)
+ df.clip(lower=lo_df, upper=hi_df)
+
+
+
+ Demo 1 — clipAdvancedSeries with scalar bounds
+
+
Code
+
const s = new Series({ data: [-3, 1, 5, 10] });
+clipAdvancedSeries(s, { lower: 0, upper: 6 }).values;
+// → [0, 1, 5, 6]
+
Run
+
+
+
+
+ Demo 2 — clipAdvancedSeries with per-element array bounds
+
+
Code
+
const s = new Series({ data: [-1, 0, 5, 12] });
+const lo = [2, -1, 4, 10];
+const hi = [5, 3, 8, 11];
+clipAdvancedSeries(s, { lower: lo, upper: hi }).values;
+// → [2, 0, 5, 11]
+
Run
+
+
+
+
+ Demo 3 — clipAdvancedSeries with Series bounds
+
+
Code
+
const s = new Series({ data: [0, 5, 10, 15] });
+const loBound = new Series({ data: [1, 3, 8, 12] });
+const hiBound = new Series({ data: [2, 7, 9, 20] });
+clipAdvancedSeries(s, { lower: loBound, upper: hiBound }).values;
+// → [1, 5, 9, 15]
+
Run
+
+
+
+
+ Demo 4 — clipAdvancedDataFrame with DataFrame bounds
+
+
Code
+
const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+const lo = DataFrame.fromColumns({ a: [2, 3, 4], b: [1, 4, 8] });
+const hi = DataFrame.fromColumns({ a: [3, 7, 8], b: [5, 9, 12] });
+const result = clipAdvancedDataFrame(df, { lower: lo, upper: hi });
+result.col("a").values; // → [2, 5, 8]
+result.col("b").values; // → [2, 6, 10]
+
Run
+
+
+
+
+ Demo 5 — clipAdvancedDataFrame with Series broadcast (axis=1)
+
+
Code
+
// axis=1: one lower bound per row
+const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+const loPerRow = new Series({ data: [0, 4, 10] });
+const result = clipAdvancedDataFrame(df, { lower: loPerRow, axis: 1 });
+result.col("a").values; // → [1, 5, 10]
+result.col("b").values; // → [2, 6, 10]
+
Run
+
+
+
+
+
+
diff --git a/playground/excel.html b/playground/excel.html
new file mode 100644
index 00000000..563736cf
--- /dev/null
+++ b/playground/excel.html
@@ -0,0 +1,561 @@
+
+
+
+
+
+ tsb — readExcel playground
+
+
+
+
+ 📊 readExcel — XLSX file reading
+
+ tsb can read Excel XLSX files natively — no dependencies. The
+ readExcel() function accepts a Uint8Array or
+ ArrayBuffer and returns a DataFrame.
+
+
+
+ Python equivalent:
+ pd.read_excel("data.xlsx")
+
+
+ Basic usage
+ import { readExcel, xlsxSheetNames } from "tsb";
+
+// Read first sheet (default)
+const df = readExcel(buffer);
+console.log(df.shape); // [rows, cols]
+console.log(df.columns.toArray()); // column names
+
+// List all sheet names
+const sheets = xlsxSheetNames(buffer);
+// → ["Sheet1", "Summary", "Data"]
+
+// Read a specific sheet by name
+const df2 = readExcel(buffer, { sheetName: "Summary" });
+
+// Read a specific sheet by index
+const df3 = readExcel(buffer, { sheetName: 1 });
+
+
+ Options
+
+
+
+ Option
+ Type
+ Default
+ Description
+
+
+
+
+ sheetName
+ string | number
+ 0
+ Sheet to read (name or 0-based index)
+
+
+ header
+ number | null
+ 0
+ Row index of the header, or null for no header
+
+
+ indexCol
+ string | number | null
+ null
+ Column to use as the row index
+
+
+ skipRows
+ number
+ 0
+ Data rows to skip after the header
+
+
+ nrows
+ number
+ unlimited
+ Maximum number of data rows to read
+
+
+ naValues
+ string[]
+ []
+ Additional strings to treat as NA
+
+
+
+
+ Interactive demo
+ Upload an .xlsx file to inspect it, or use the demo data below.
+
+
+
+
or
+
Load demo data
+
+
+
+ Sheet index:
+
+
+
+ No header (header: null)
+
+
+ Skip rows:
+
+ Max rows:
+
+ Parse
+
+
+ Upload a file or click "Load demo data" to start.
+
+ Advanced example
+ // Use a named column as the row index
+const df = readExcel(buffer, { indexCol: "ID" });
+
+// Skip 2 rows and read at most 100 rows
+const df2 = readExcel(buffer, { skipRows: 2, nrows: 100 });
+
+// Treat custom strings as missing
+const df3 = readExcel(buffer, { naValues: ["N/A", "MISSING", "-"] });
+
+// DataFrame operations work immediately
+df.describe();
+df.col("revenue").sum();
+df.groupby("region").mean();
+
+
+ Python equivalent
+ # pandas
+import pandas as pd
+
+df = pd.read_excel("data.xlsx", sheet_name=0)
+df = pd.read_excel("data.xlsx", sheet_name="Summary")
+df = pd.read_excel("data.xlsx", header=None)
+df = pd.read_excel("data.xlsx", index_col="ID")
+df = pd.read_excel("data.xlsx", skiprows=2, nrows=100)
+
+
+
+
+
diff --git a/playground/idxmin_idxmax.html b/playground/idxmin_idxmax.html
new file mode 100644
index 00000000..4ae4e7d3
--- /dev/null
+++ b/playground/idxmin_idxmax.html
@@ -0,0 +1,446 @@
+
+
+
+
+
+ tsb — idxmin / idxmax
+
+
+
+
+
+
Loading TypeScript compiler…
+
+
+ ← tsb playground
+ idxmin / idxmax
+
+ Return the index label of the minimum or maximum value in a
+ Series or each column of a DataFrame.
+ Mirrors pandas.Series.idxmin(), idxmax(),
+ pandas.DataFrame.idxmin(), and DataFrame.idxmax().
+
+
+
+
+
1 · Series.idxmin — label of the minimum value
+
Returns the index label at the position of the minimum value.
+ NaN / null values are skipped by default.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Series.idxmax — label of the maximum value
+
Returns the index label at the position of the maximum value.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · NaN handling — skipna option
+
By default NaN / null values are skipped. Set skipna: false
+ to propagate NaN (returns null if any value is NaN).
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame.idxmin — row label of column minima
+
Returns a Series indexed by column names. Each value is the row label
+ where that column achieves its minimum.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame.idxmax — row label of column maxima
+
Returns a Series indexed by column names, where each entry is the row
+ label of that column's maximum value.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Edge cases — empty, all-NaN, all-equal
+
Behavior for empty series, series where every value is NaN, and series
+ where all values are equal.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Series
+idxminSeries(series, { skipna?: boolean }): Label // default skipna=true
+idxmaxSeries(series, { skipna?: boolean }): Label
+
+// DataFrame (axis=0 — min/max per column)
+idxminDataFrame(df, { skipna?: boolean }): Series // indexed by column names
+idxmaxDataFrame(df, { skipna?: boolean }): Series
+
+
+
+
+
+
diff --git a/playground/mode.html b/playground/mode.html
new file mode 100644
index 00000000..0a149227
--- /dev/null
+++ b/playground/mode.html
@@ -0,0 +1,125 @@
+
+
+
+
+
+ tsb — mode
+
+
+
+ ← tsb playground
+ 📊 mode
+
+ modeSeries / modeDataFrame —
+ return the most-frequent value(s), mirroring
+ Series.mode() and
+ DataFrame.mode() .
+
+ Equivalent Python: series.mode()
+
+ 1 · Single mode
+
+
const s = new Series({ data: [1, 2, 2, 3] });
+modeSeries(s).values;
+// → [2]
+
+
+
+ 2 · Tied modes — all returned sorted
+
+
const s = new Series({ data: [1, 1, 2, 2, 3] });
+modeSeries(s).values;
+// → [1, 2]
+
+
+
+ 3 · String values
+
+
const s = new Series({ data: ["cat", "dog", "dog", "bird"] });
+modeSeries(s).values;
+// → ["dog"]
+
+
+
+ 4 · Null values excluded (dropna=true default)
+
+
const s = new Series({ data: [null, 1, 1, null, null] });
+modeSeries(s).values;
+// → [1]
+
+
+
+ 5 · DataFrame column-wise (axis=0)
+
+
const df = DataFrame.fromColumns({ a: [1, 1, 2, 2], b: [5, 5, 5, 6] });
+modeDataFrame(df);
+// a: [1, 2], b: [5, null] (null-padded)
+
+
+
+ 6 · DataFrame row-wise (axis=1)
+
+
const df = DataFrame.fromColumns({ a: [1, 2], b: [1, 3], c: [2, 3] });
+modeDataFrame(df, { axis: 1 });
+// row 0: mode=1, row 1: mode=3
+
+
+
+
+
+
diff --git a/playground/nancumops.html b/playground/nancumops.html
new file mode 100644
index 00000000..d7014593
--- /dev/null
+++ b/playground/nancumops.html
@@ -0,0 +1,295 @@
+
+
+
+
+
+ tsb — NaN-Ignoring Aggregates (nancumops)
+
+
+
+
+🔢 NaN-Ignoring Aggregates
+
+ nansum, nanmean, nanmedian, nanstd, nanvar,
+ nanmin, nanmax, nanprod, nancount
+ — mirrors numpy.nan* functions in pandas workflows.
+
+
+
+
+
🧮 Live Calculator
+
Enter a comma-separated list of numbers (use NaN, null for missing).
+
Input values
+
+
ddof (for std/var)
+
+ 1 (sample — default)
+ 0 (population)
+ 2
+
+
Compute All
+
+
+
+
+
+
📖 Function Reference
+
+
+
+ Function
+ Description
+ Empty/all-NaN returns
+ pandas / numpy equivalent
+
+
+
+ nancount(input)Count of valid (non-NaN) numeric values 0np.count_nonzero(~np.isnan(a))
+ nansum(input)Sum, ignoring NaN/null 0np.nansum(a)
+ nanmean(input)Mean, ignoring NaN/null NaNnp.nanmean(a)
+ nanmedian(input)Median, ignoring NaN/null NaNnp.nanmedian(a)
+ nanvar(input, {ddof})Variance (ddof=1 default) NaNnp.nanvar(a, ddof=1)
+ nanstd(input, {ddof})Std deviation (ddof=1 default) NaNnp.nanstd(a, ddof=1)
+ nanmin(input)Minimum, ignoring NaN/null NaNnp.nanmin(a)
+ nanmax(input)Maximum, ignoring NaN/null NaNnp.nanmax(a)
+ nanprod(input)Product, ignoring NaN/null 1np.nanprod(a)
+
+
+
+
+
+
+
💡 Usage Examples
+
+
+ Basic array usage
+
+import { nansum, nanmean, nanmedian, nanstd } from "tsb";
+
+const data = [1, 2, NaN, null, 3, 5];
+
+nansum(data); // 11
+nanmean(data); // 2.75
+nanmedian(data); // 2.5
+nanstd(data); // 1.708...
+
+
+# Python / pandas equivalent
+import numpy as np
+
+data = [1, 2, np.nan, np.nan, 3, 5]
+
+np.nansum(data) # 11.0
+np.nanmean(data) # 2.75
+np.nanmedian(data) # 2.5
+np.nanstd(data, ddof=1) # 1.708...
+
+
+
+
+ Using with Series
+
+import { Series, nansum, nanmean, nancount } from "tsb";
+
+const s = new Series({ data: [10, null, 30, NaN, 50] });
+
+nancount(s); // 3
+nansum(s); // 90
+nanmean(s); // 30
+
+
+# Python / pandas equivalent
+import pandas as pd, numpy as np
+
+s = pd.Series([10, np.nan, 30, np.nan, 50])
+
+s.count() # 3
+s.sum() # 90.0
+s.mean() # 30.0
+
+
+
+
+ Variance and std with ddof
+
+import { nanvar, nanstd } from "tsb";
+
+const xs = [2, 4, 4, 4, 5, 5, 7, 9];
+
+// Sample (ddof=1, default)
+nanvar(xs); // ≈ 4.571
+nanstd(xs); // ≈ 2.138
+
+// Population (ddof=0)
+nanvar(xs, { ddof: 0 }); // 4.0
+nanstd(xs, { ddof: 0 }); // 2.0
+
+
+# Python / pandas equivalent
+import numpy as np
+
+xs = [2, 4, 4, 4, 5, 5, 7, 9]
+
+np.nanvar(xs, ddof=1) # 4.571...
+np.nanstd(xs, ddof=1) # 2.138...
+
+np.nanvar(xs, ddof=0) # 4.0
+np.nanstd(xs, ddof=0) # 2.0
+
+
+
+
+
+
+
⚡ NaN Impact Demo
+
See how NaN values affect results with and without nan-ignoring functions.
+
Run Comparison
+
+
+
+
+
+
+
diff --git a/playground/nunique.html b/playground/nunique.html
new file mode 100644
index 00000000..add4399d
--- /dev/null
+++ b/playground/nunique.html
@@ -0,0 +1,112 @@
+
+
+
+
+
+ tsb — nunique / any / all
+
+
+
+ ← tsb playground
+ 🔢 nunique / any / all
+
+ Count unique values and perform boolean reductions, mirroring
+ Series.nunique() ,
+ Series.any() , and
+ Series.all() .
+
+
+ 1 · nunique — count distinct values
+
+
import { Series, nuniqueSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 2, 3, 3, 3, null] });
+
+nuniqueSeries(s); // 3 (null excluded by default)
+nuniqueSeries(s, { dropna: false }); // 4 (null counted as a distinct value)
+
nuniqueSeries(s) → 3
+nuniqueSeries(s, {dropna:false}) → 4
+
+
+ 2 · any — is any element truthy?
+
+
import { anySeries } from "tsb";
+
+const allZero = new Series({ data: [0, 0, 0] });
+const hasOne = new Series({ data: [0, 0, 1] });
+
+anySeries(allZero); // false
+anySeries(hasOne); // true
+
+// With nulls (skipna=true by default)
+const withNull = new Series({ data: [null, 0, null] });
+anySeries(withNull); // false — null skipped, 0 is falsy
+
anySeries(allZero) → false
+anySeries(hasOne) → true
+anySeries(withNull) → false
+
+
+ 3 · all — are all elements truthy?
+
+
import { allSeries } from "tsb";
+
+const allTrue = new Series({ data: [1, 2, 3] });
+const hasFalsy = new Series({ data: [1, 0, 3] });
+
+allSeries(allTrue); // true
+allSeries(hasFalsy); // false
+
+// Empty or all-null series vacuously returns true
+allSeries(new Series({ data: [] })); // true
+allSeries(new Series({ data: [null, null] })); // true
+
allSeries(allTrue) → true
+allSeries(hasFalsy) → false
+allSeries([]) → true (vacuous)
+allSeries([null]) → true (vacuous)
+
+
+ 4 · DataFrame nunique
+
+
import { DataFrame, nuniqueDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ category: ["A", "B", "A", "C"],
+ value: [1, 2, 1, 3 ],
+});
+
+nuniqueDataFrame(df); // per-column: category→3, value→3
+nuniqueDataFrame(df, { axis: 1 }); // per-row: how many distinct values in each row
+
nuniqueDataFrame(df) → category: 3, value: 3
+nuniqueDataFrame(df, {axis:1}) → row0: 2, row1: 2, row2: 2, row3: 2
+
+
+ 5 · DataFrame any / all
+
+
import { anyDataFrame, allDataFrame } from "tsb";
+
+const df2 = DataFrame.fromColumns({
+ a: [0, 0, 1],
+ b: [1, 1, 1],
+});
+
+anyDataFrame(df2); // a: true, b: true (each col has at least one truthy)
+allDataFrame(df2); // a: false, b: true (col a has a 0)
+
+// axis=1: reduce across columns per row
+anyDataFrame(df2, { axis: 1 }); // row0: true, row1: true, row2: true
+allDataFrame(df2, { axis: 1 }); // row0: false, row1: false, row2: true
+
anyDataFrame(df2) → a: true, b: true
+allDataFrame(df2) → a: false, b: true
+anyDataFrame(df2,{axis:1}) → [true, true, true]
+allDataFrame(df2,{axis:1}) → [false, false, true]
+
+
+
diff --git a/playground/pct_change.html b/playground/pct_change.html
new file mode 100644
index 00000000..ec1b4e3b
--- /dev/null
+++ b/playground/pct_change.html
@@ -0,0 +1,452 @@
+
+
+
+
+
+ tsb — pct_change
+
+
+
+
+
+
Initializing playground…
+
+ ← Back to roadmap
+ 📊 pct_change — Interactive Playground
+ Compute the fractional change between each element and a prior element.
+ Mirrors pandas.Series.pct_change() /
+ pandas.DataFrame.pct_change().
+ Edit any code block below and press ▶ Run
+ (or Ctrl+Enter) to execute it live in your browser.
+
+
+
+
+
1 · Basic pct_change on a Series
+
pctChangeSeries(series) returns the fractional (not percentage) change
+ from each previous element. The first element is always null.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Multi-period change
+
The periods option controls the lag. Use periods: 2 to
+ compare each value to the one two steps earlier — useful for month-over-month
+ comparisons in quarterly data.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Handling missing values
+
By default, pctChangeSeries forward-fills (fillMethod: "pad")
+ NaN/null values before computing the ratio — so gaps don't break the chain.
+ Set fillMethod: null to propagate NaN instead.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · Limit consecutive fills
+
The limit option caps how many consecutive NaN values get forward-filled.
+ Useful when you want to tolerate short gaps but not bridge large ones.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
5 · DataFrame column-wise pct_change
+
pctChangeDataFrame(df) applies pctChangeSeries to every
+ column independently. Ideal for comparing multiple assets or metrics simultaneously.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
6 · Negative periods (look-forward change)
+
A negative periods value computes the forward change: how much will
+ this element change by the time we reach |periods| steps ahead.
+ Useful for computing returns on a "hold for N periods" strategy.
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
All functions return a new Series/DataFrame of the same shape — inputs are never mutated.
+
// Series
+pctChangeSeries(series, {
+ periods?: number, // default 1 (positive = look back, negative = look forward)
+ fillMethod?: "pad" | "bfill" | null, // default "pad"
+ limit?: number | null, // max consecutive fills; default unlimited
+}): Series
+
+// DataFrame
+pctChangeDataFrame(df, {
+ periods?: number,
+ fillMethod?: "pad" | "bfill" | null,
+ limit?: number | null,
+ axis?: 0 | 1 | "index" | "columns", // default 0 (column-wise)
+}): DataFrame
+
+
+
+
+
+
diff --git a/playground/quantile.html b/playground/quantile.html
new file mode 100644
index 00000000..fb019d88
--- /dev/null
+++ b/playground/quantile.html
@@ -0,0 +1,182 @@
+
+
+
+
+
+ tsb — quantile
+
+
+
+ ← tsb playground
+ 📐 quantile
+
+ quantileSeries / quantileDataFrame —
+ compute quantile(s) / percentile(s), mirroring
+ Series.quantile() and
+ DataFrame.quantile() .
+
+ Equivalent Python: series.quantile(q=0.5) / df.quantile(q=0.5)
+
+ 1 · Scalar quantile (median)
+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+quantileSeries(s); // default q=0.5 → 3
+quantileSeries(s, { q: 0.25 }); // → 2
+quantileSeries(s, { q: 0.75 }); // → 4
+
+
+
+ 2 · Multiple quantile levels
+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+const q = quantileSeries(s, { q: [0.25, 0.5, 0.75] });
+// Series indexed by q-values: { 0.25: 2, 0.5: 3, 0.75: 4 }
+
+
+
+ 3 · Interpolation methods
+
+
const s = new Series({ data: [0, 10] });
+// q=0.5 → position 0.5 between indices 0 and 1
+quantileSeries(s, { q: 0.5, interpolation: "linear" }); // 5
+quantileSeries(s, { q: 0.5, interpolation: "lower" }); // 0
+quantileSeries(s, { q: 0.5, interpolation: "higher" }); // 10
+quantileSeries(s, { q: 0.5, interpolation: "midpoint" }); // 5
+quantileSeries(s, { q: 0.5, interpolation: "nearest" }); // 0
+
+
+
+ 4 · NaN handling (skipna=true by default)
+
+
const s = new Series({ data: [1, null, 3, NaN, 5] });
+quantileSeries(s, { q: 0.5 }); // ignores null/NaN → 3
+quantileSeries(s, { q: 0.5, skipna: false }); // NaN propagates → NaN
+
+
+
+ 5 · DataFrame — axis=0 (per-column quantiles)
+
+
const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [10, 20, 30, 40] });
+quantileDataFrame(df, { q: 0.5 });
+// Series { a: 2.5, b: 25 }
+
+quantileDataFrame(df, { q: [0.25, 0.5, 0.75] });
+// DataFrame 3×2: rows=[0.25, 0.5, 0.75], cols=[a, b]
+
+
+
+ 6 · DataFrame — axis=1 (per-row quantiles)
+
+
const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [3, 4, 5], c: [5, 6, 7] });
+quantileDataFrame(df, { axis: 1, q: 0.5 });
+// Series — median of each row: [3, 4, 5]
+
+
+
+ 7 · Q=[0, 0.25, 0.5, 0.75, 1] summary table
+
+
const df = DataFrame.fromColumns({ score: [55, 70, 80, 88, 92, 95, 99] });
+quantileDataFrame(df, { q: [0, 0.25, 0.5, 0.75, 1] });
+// → summary statistics table
+
+
+
+
+
+
diff --git a/playground/replace.html b/playground/replace.html
new file mode 100644
index 00000000..19da518a
--- /dev/null
+++ b/playground/replace.html
@@ -0,0 +1,408 @@
+
+
+
+
+
+ tsb — replace (value substitution)
+
+
+
+
+
+
Loading tsb runtime…
+
+
+ ← Back to playground index
+
+ replace — value substitution
+
+ replaceSeries / replaceDataFrame substitute values
+ matching a pattern with a new value.
+ Supports scalar, array, and mapping (Record / Map) replacement specs.
+ Mirrors Series.replace() and DataFrame.replace() from pandas.
+
+
+
+
+
1 · Scalar → scalar replacement
+
+ Replace every occurrence of a single value with another value.
+ Works on numbers, strings, booleans, and null.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
2 · Array replacement
+
+ Replace a list of values with a single target, or perform pair-wise
+ replacement using two equal-length arrays.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
3 · Mapping (Record / Map) replacement
+
+ Pass a lookup table as either a plain object (Record<string, Scalar>)
+ or a JavaScript Map for full type flexibility.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
4 · DataFrame replacement
+
+ replaceDataFrame applies the same spec to all columns by
+ default. Use the columns option to restrict which columns
+ are affected.
+
+
+
+
+
+
Click ▶ Run to execute
+
Ctrl+Enter to run · Tab to indent
+
+
+
+
+
+
API Reference
+
// Replace values in a Series
+replaceSeries(
+ series: Series,
+ spec: ReplaceSpec,
+ options?: ReplaceOptions,
+): Series
+
+// Replace values in a DataFrame
+replaceDataFrame(
+ df: DataFrame,
+ spec: ReplaceSpec,
+ options?: DataFrameReplaceOptions,
+): DataFrame
+
+// Replacement spec variants
+type ReplaceSpec =
+ | { toReplace: Scalar; value: Scalar } // scalar → scalar
+ | { toReplace: Scalar[]; value: Scalar } // array → scalar
+ | { toReplace: Scalar[]; value: Scalar[] } // array → array (pair-wise)
+ | { toReplace: Record<string, Scalar> } // Record mapping
+ | { toReplace: Map<Scalar, Scalar> } // Map mapping
+
+// Options
+interface ReplaceOptions {
+ matchNaN?: boolean; // treat NaN===NaN for matching (default: true)
+}
+
+interface DataFrameReplaceOptions extends ReplaceOptions {
+ columns?: string[]; // only replace in these columns (default: all)
+}
+
+
+
+
+
+
diff --git a/playground/sem_var.html b/playground/sem_var.html
new file mode 100644
index 00000000..a3114054
--- /dev/null
+++ b/playground/sem_var.html
@@ -0,0 +1,90 @@
+
+
+
+
+
+ tsb — sem_var
+
+
+
+ ← tsb playground
+ 📊 Variance & Standard Error (sem_var)
+
+ varSeries / semSeries /
+ varDataFrame / semDataFrame —
+ compute sample/population variance and standard error of the mean, mirroring
+ Series.var() and
+ Series.sem() .
+
+ Equivalent Python: series.var(ddof=1) / series.sem()
+
+ 1 · Sample variance (ddof=1)
+
+
import { Series, varSeries } from "tsb";
+
+const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+varSeries(s); // 4.0 (sample variance, ddof=1)
+varSeries(s, { ddof: 0 }); // 3.5 (population variance, ddof=0)
+
varSeries(s) → 4.0
+varSeries(s, {ddof:0}) → 3.5
+
+
+ 2 · Standard error of the mean
+
+
import { semSeries } from "tsb";
+
+// SEM = sqrt(var / n)
+semSeries(s); // sqrt(4 / 8) ≈ 0.7071
+
semSeries(s) ≈ 0.7071
+
+
+ 3 · Handling missing values
+
+
const s2 = new Series({ data: [1, 2, 3, null, 5] });
+
+varSeries(s2); // skipna=true (default): ignores null
+varSeries(s2, { skipna: false }); // propagates NaN when null present
+varSeries(s2, { minCount: 5 }); // NaN: need 5 valid values but only 4
+
varSeries(s2) → 2.9167 (approx)
+varSeries(s2, {skipna:false}) → NaN
+varSeries(s2, {minCount:5}) → NaN
+
+
+ 4 · DataFrame column-wise variance
+
+
import { DataFrame, varDataFrame, semDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, 2, 3],
+ b: [10, 20, 30],
+});
+
+varDataFrame(df); // Series { a: 1, b: 100 }
+semDataFrame(df); // Series { a: sqrt(1/3), b: sqrt(100/3) }
+varDataFrame(df, { axis: 1 }); // row-wise variance
+
varDataFrame(df) → a: 1.0, b: 100.0
+semDataFrame(df) → a: ≈0.577, b: ≈5.774
+varDataFrame(df, {axis:1}) → row0: 20.25, row1: 81.0, row2: 182.25
+
+
+ 5 · numericOnly — skip non-numeric columns
+
+
const df2 = DataFrame.fromColumns({
+ score: [10, 20, 30],
+ label: ["A", "B", "C"],
+});
+
+varDataFrame(df2, { numericOnly: true });
+// Only includes "score", excludes "label"
+
varDataFrame(df2, {numericOnly:true}) → score: 100.0
+
+
+
diff --git a/playground/skew_kurt.html b/playground/skew_kurt.html
new file mode 100644
index 00000000..bec28a8b
--- /dev/null
+++ b/playground/skew_kurt.html
@@ -0,0 +1,137 @@
+
+
+
+
+
+ tsb — skew & kurtosis
+
+
+
+ ← tsb playground
+ 📐 skewSeries / kurtSeries
+
+ skewSeries / kurtSeries —
+ compute the adjusted Fisher–Pearson skewness and excess kurtosis (bias-corrected), mirroring
+ Series.skew() and
+ Series.kurt() .
+
+ Equivalent Python: series.skew() / series.kurt()
+
+ 1 · Symmetric distribution — skew ≈ 0
+
+
const s = new Series({ data: [1, 2, 3, 4, 5] });
+skewSeries(s);
+// → 0
+
+
+
+ 2 · Right-skewed distribution — positive skew
+
+
const s = new Series({ data: [1, 2, 3, 4, 100] });
+skewSeries(s);
+// → large positive value
+
+
+
+ 3 · Kurtosis — uniform-like (platykurtic, negative excess)
+
+
const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+kurtSeries(s);
+// → negative (flatter than normal)
+
+
+
+ 4 · NaN propagation — too few values
+
+
skewSeries(new Series({ data: [1, 2] })); // NaN — need ≥ 3
+kurtSeries(new Series({ data: [1, 2, 3] })); // NaN — need ≥ 4
+
+
+
+ 5 · DataFrame column-wise skewness
+
+
const df = DataFrame.fromColumns({
+ symmetric: [1, 2, 3, 4, 5],
+ right_skew: [1, 2, 3, 4, 100],
+});
+skewDataFrame(df).values;
+
+
+
+ 6 · DataFrame row-wise kurtosis
+
+
const df = DataFrame.fromColumns({
+ a: [1, 10], b: [2, 10], c: [3, 10], d: [4, 10], e: [100, 10], f: [5, 10],
+});
+kurtDataFrame(df, { axis: 1 }).values;
+
+
+
+
+
+
diff --git a/playground/to_datetime.html b/playground/to_datetime.html
new file mode 100644
index 00000000..9ed06810
--- /dev/null
+++ b/playground/to_datetime.html
@@ -0,0 +1,118 @@
+
+
+
+
+
+ tsb — toDatetime
+
+
+
+ ← tsb playground
+ toDatetime stats
+
+ Convert scalars, arrays, or Series values to JavaScript
+ Date objects — mirroring
+ pandas.to_datetime() .
+
+
+ Supported input formats
+
+ Format Example Result
+ ISO 8601 date "2024-03-15"Mar 15 2024
+ ISO 8601 datetime "2024-03-15T12:00:00Z"Mar 15 2024 12:00 UTC
+ US format (MM/DD/YYYY) "01/15/2024"Jan 15 2024
+ European (DD-MM-YYYY) "15-03-2024"Mar 15 2024
+ Compact (YYYYMMDD) "20240315"Mar 15 2024
+ Unix ms (number) 1710460800000Mar 15 2024 00:00 UTC
+ Unix s (unit="s") 1710460800Mar 15 2024 00:00 UTC
+ Date object new Date(2024,2,15)unchanged
+ null / undefined / NaN nullnull
+
+
+ Error handling
+
+ errors= Behaviour
+ "raise" (default)Throws TypeError on unparseable input
+ "coerce"Returns null on unparseable input
+ "ignore"Returns the original value unchanged
+
+
+ Quick examples
+ import { toDatetime, Series } from "tsb";
+
+// Scalar
+toDatetime("2024-03-15"); // Date: Mar 15 2024
+toDatetime(1710460800000); // Date from Unix ms
+toDatetime(1710460800, { unit: "s" }); // Date from Unix seconds
+toDatetime(null); // null
+toDatetime("nope", { errors: "coerce" }); // null
+toDatetime("nope", { errors: "ignore" }); // "nope"
+
+// Array
+toDatetime(["2024-01-01", null, "2024-06-15"]);
+// => [Date, null, Date]
+
+// Series
+const s = new Series({ data: ["2024-01-01", "2024-06-15", null] });
+toDatetime(s);
+// => Series<Date | null> with dtype=datetime
+
+ Python / pandas equivalent
+
+
+ Live demo
+ Enter a date string or number and click Convert :
+
+ Convert
+ —
+
+
+
+
diff --git a/src/core/astype.ts b/src/core/astype.ts
new file mode 100644
index 00000000..572352c4
--- /dev/null
+++ b/src/core/astype.ts
@@ -0,0 +1,239 @@
+/**
+ * astype — dtype coercion for Series and DataFrame.
+ *
+ * Mirrors `pandas.Series.astype` and `pandas.DataFrame.astype`:
+ * cast values to a target dtype, with null/NaN passthrough semantics
+ * matching pandas' default `errors="raise"` behaviour.
+ *
+ * @module
+ */
+
+import type { DtypeName, Scalar } from "../types.ts";
+import { Dtype } from "./dtype.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isNull(v: Scalar): v is null | undefined {
+ return v === null || v === undefined;
+}
+
+/** Integer clamp ranges for each integer dtype name. */
+const INT_RANGES: Readonly> = {
+ int8: { lo: -128, hi: 127, unsigned: false },
+ int16: { lo: -32768, hi: 32767, unsigned: false },
+ int32: { lo: -2147483648, hi: 2147483647, unsigned: false },
+ int64: { lo: Number.MIN_SAFE_INTEGER, hi: Number.MAX_SAFE_INTEGER, unsigned: false },
+ uint8: { lo: 0, hi: 255, unsigned: true },
+ uint16: { lo: 0, hi: 65535, unsigned: true },
+ uint32: { lo: 0, hi: 4294967295, unsigned: true },
+ uint64: { lo: 0, hi: Number.MAX_SAFE_INTEGER, unsigned: true },
+};
+
+/**
+ * Cast a single scalar value to the target dtype.
+ *
+ * Rules per dtype kind:
+ * - **int/uint**: `Math.trunc(Number(v))`, clamped to the dtype range. `null/undefined → null`.
+ * - **float32/float64**: `Number(v)`. `null/undefined → null`. Strings that
+ * are not parsable become `NaN` (same as pandas `errors="coerce"`-like
+ * number coercion).
+ * - **bool**: falsy values → `false`; truthy → `true`. `null/undefined → null`.
+ * - **string**: `String(v)`. `null/undefined → null`.
+ * - **datetime**: `new Date(Number(v))` for numbers; `new Date(String(v))` for
+ * strings; `null/undefined → null`.
+ * - **object/category/timedelta**: value is returned as-is (no transformation).
+ */
+export function castScalar(v: Scalar, dtype: Dtype): Scalar {
+ if (isNull(v)) {
+ return null;
+ }
+
+ const k = dtype.kind;
+
+ if (k === "int" || k === "uint") {
+ if (typeof v === "boolean") {
+ return v ? 1 : 0;
+ }
+ if (v instanceof Date) {
+ return Math.trunc(v.getTime());
+ }
+ const n = Number(v);
+ if (Number.isNaN(n)) {
+ return null;
+ }
+ const range = INT_RANGES[dtype.name];
+ if (range === undefined) {
+ return Math.trunc(n);
+ }
+ const t = Math.trunc(n);
+ return Math.max(range.lo, Math.min(range.hi, t));
+ }
+
+ if (k === "float") {
+ if (typeof v === "boolean") {
+ return v ? 1.0 : 0.0;
+ }
+ if (v instanceof Date) {
+ return v.getTime();
+ }
+ return Number(v);
+ }
+
+ if (k === "bool") {
+ if (typeof v === "number") {
+ return !Number.isNaN(v) && v !== 0;
+ }
+ if (v instanceof Date) {
+ return true;
+ }
+ return Boolean(v);
+ }
+
+ if (k === "string") {
+ if (v instanceof Date) {
+ return v.toISOString();
+ }
+ return String(v);
+ }
+
+ if (k === "datetime") {
+ if (v instanceof Date) {
+ return v;
+ }
+ if (typeof v === "number") {
+ return new Date(v);
+ }
+ const d = new Date(String(v));
+ return Number.isNaN(d.getTime()) ? null : d;
+ }
+
+ // object / category / timedelta — return unchanged
+ return v;
+}
+
+// ─── AstypeOptions ────────────────────────────────────────────────────────────
+
+/** Options accepted by {@link astypeSeries} and {@link astype}. */
+export interface AstypeOptions {
+ /**
+ * When `true`, values that cannot be cast are silently replaced with
+ * `null` instead of throwing.
+ *
+ * @default false
+ */
+ readonly errors?: "raise" | "ignore";
+}
+
+// ─── astypeSeries ─────────────────────────────────────────────────────────────
+
+/**
+ * Cast a Series to a different dtype.
+ *
+ * Returns a new Series whose values have been coerced to `dtype`. The index
+ * and name are preserved unchanged.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1.9, 2.1, 3.7], name: "x" });
+ * const si = astypeSeries(s, "int64");
+ * si.values; // [1, 2, 3]
+ * si.dtype.name; // "int64"
+ * ```
+ */
+export function astypeSeries(
+ s: Series,
+ dtype: DtypeName | Dtype,
+ options: AstypeOptions = {},
+): Series {
+ const targetDtype = dtype instanceof Dtype ? dtype : Dtype.from(dtype as DtypeName);
+ const { errors = "raise" } = options;
+
+ const casted: Scalar[] = [];
+ for (const v of s.values) {
+ let out: Scalar;
+ try {
+ out = castScalar(v, targetDtype);
+ } catch (e) {
+ if (errors === "ignore") {
+ out = v;
+ } else {
+ throw e;
+ }
+ }
+ casted.push(out);
+ }
+
+ return new Series({
+ data: casted,
+ index: s.index,
+ dtype: targetDtype,
+ name: s.name,
+ });
+}
+
+// ─── DataFrame astype ─────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link astype} (DataFrame variant).
+ */
+export interface DataFrameAstypeOptions extends AstypeOptions {
+ /**
+ * When `true`, only the columns listed in `dtype` (when `dtype` is a
+ * `Record`) are recast; other columns are carried over unchanged.
+ *
+ * When `false` (default) and `dtype` is a `Record`, columns not listed
+ * in the map are carried over unchanged (same behaviour).
+ *
+ * This option exists for pandas API compatibility.
+ */
+ readonly copy?: boolean;
+}
+
+/**
+ * Cast one or more columns in a DataFrame to the specified dtype(s).
+ *
+ * - Pass a single `DtypeName` or `Dtype` to cast **all** columns.
+ * - Pass a `Record` to cast individual columns.
+ * Columns not listed are returned unchanged.
+ *
+ * Returns a new DataFrame; the original is not modified.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1.5, 2.7], b: ["3", "4"] });
+ *
+ * // Cast all columns to float64
+ * astype(df, "float64");
+ *
+ * // Cast only column "b" to int64
+ * astype(df, { b: "int64" });
+ * ```
+ */
+export function astype(
+ df: DataFrame,
+ dtype: DtypeName | Dtype | Readonly>,
+ options: DataFrameAstypeOptions = {},
+): DataFrame {
+ const colMap = new Map>();
+
+ const isSingleDtype = typeof dtype === "string" || dtype instanceof Dtype;
+
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ if (isSingleDtype) {
+ colMap.set(name, astypeSeries(col, dtype as DtypeName | Dtype, options));
+ } else {
+ const mapping = dtype as Readonly>;
+ const target = mapping[name];
+ if (target !== undefined) {
+ colMap.set(name, astypeSeries(col, target, options));
+ } else {
+ colMap.set(name, col);
+ }
+ }
+ }
+
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/core/index.ts b/src/core/index.ts
index e737ec8f..3fd31e7c 100644
--- a/src/core/index.ts
+++ b/src/core/index.ts
@@ -126,3 +126,5 @@ export {
isPeriodDtype,
isIntervalDtype,
} from "./api_types.ts";
+export { astypeSeries, astype, castScalar } from "./astype.ts";
+export type { AstypeOptions, DataFrameAstypeOptions } from "./astype.ts";
diff --git a/src/index.ts b/src/index.ts
index b95ad4be..37c6e62e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -465,3 +465,73 @@ export type {
SeriesToStringOptions,
DataFrameToStringOptions,
} from "./stats/index.ts";
+
+// PR #120 unique modules — re-exported from sub-barrels
+export { astypeSeries, astype, castScalar } from "./core/index.ts";
+export type { AstypeOptions, DataFrameAstypeOptions } from "./core/index.ts";
+// readExcel / xlsxSheetNames use node:zlib — import from "tsb/io/read_excel" directly
+export { clipAdvancedSeries, clipAdvancedDataFrame } from "./stats/index.ts";
+export type {
+ SeriesBound,
+ DataFrameBound,
+ ClipAdvancedSeriesOptions,
+ ClipAdvancedDataFrameOptions,
+} from "./stats/index.ts";
+export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./stats/index.ts";
+export type { IdxOptions, IdxDataFrameOptions } from "./stats/index.ts";
+export { modeSeries, modeDataFrame } from "./stats/index.ts";
+export type { ModeSeriesOptions, ModeDataFrameOptions } from "./stats/index.ts";
+export {
+ nancount,
+ nansum,
+ nanmean,
+ nanmedian,
+ nanvar,
+ nanstd,
+ nanmin,
+ nanmax,
+ nanprod,
+} from "./stats/index.ts";
+export type { NanInput, NanAggOptions } from "./stats/index.ts";
+export {
+ nuniqueSeries,
+ nuniqueDataFrame,
+ anySeries,
+ allSeries,
+ anyDataFrame,
+ allDataFrame,
+} from "./stats/index.ts";
+export type {
+ NuniqueSeriesOptions,
+ NuniqueDataFrameOptions,
+ AnyAllSeriesOptions,
+ AnyAllDataFrameOptions,
+} from "./stats/index.ts";
+export { pctChangeSeries, pctChangeDataFrame } from "./stats/index.ts";
+export type {
+ PctChangeFillMethod,
+ PctChangeOptions,
+ DataFramePctChangeOptions,
+} from "./stats/index.ts";
+export { quantileSeries, quantileDataFrame } from "./stats/index.ts";
+export type {
+ QuantileInterpolation,
+ QuantileSeriesOptions,
+ QuantileDataFrameOptions,
+} from "./stats/index.ts";
+export { replaceSeries, replaceDataFrame } from "./stats/index.ts";
+export type {
+ ReplaceMapping,
+ ReplaceSpec,
+ ReplaceOptions,
+ DataFrameReplaceOptions,
+} from "./stats/index.ts";
+export { varSeries, semSeries, varDataFrame, semDataFrame } from "./stats/index.ts";
+export type { VarSemSeriesOptions, VarSemDataFrameOptions } from "./stats/index.ts";
+export { skewSeries, kurtSeries, skewDataFrame, kurtDataFrame } from "./stats/index.ts";
+export type {
+ SkewKurtSeriesOptions,
+ SkewKurtDataFrameOptions,
+} from "./stats/index.ts";
+export { toDatetime } from "./stats/index.ts";
+export type { DatetimeUnit, DatetimeErrors, ToDatetimeOptions } from "./stats/index.ts";
diff --git a/src/io/index.ts b/src/io/index.ts
index d4f27f3b..e868c4c8 100644
--- a/src/io/index.ts
+++ b/src/io/index.ts
@@ -10,3 +10,6 @@ export { readJson, toJson } from "./json.ts";
export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./json.ts";
export { jsonNormalize } from "./json_normalize.ts";
export type { JsonPath, JsonNormalizeOptions } from "./json_normalize.ts";
+// readExcel / xlsxSheetNames use node:zlib and cannot be bundled for the
+// browser. Import them directly from "tsb/io/read_excel" when running in
+// Node / Bun.
diff --git a/src/io/read_excel.ts b/src/io/read_excel.ts
new file mode 100644
index 00000000..97d06065
--- /dev/null
+++ b/src/io/read_excel.ts
@@ -0,0 +1,645 @@
+/**
+ * readExcel — XLSX file reading for DataFrame.
+ *
+ * Mirrors `pandas.read_excel()`:
+ * - `readExcel(data, options?)` — parse an XLSX binary buffer into a DataFrame.
+ * - `xlsxSheetNames(data)` — list sheet names without parsing cell data.
+ *
+ * Supports:
+ * - Shared string table (type `"s"`)
+ * - Inline strings (type `"inlineStr"`)
+ * - Numbers (type absent or `"n"`)
+ * - Booleans (type `"b"`)
+ * - Formula cached values (type `"str"`)
+ * - Error cells (type `"e"`) — returned as null
+ * - ZIP STORED (method 0) and DEFLATED (method 8) entries
+ *
+ * Limitations (deferred):
+ * - XLSX only — not XLS (legacy binary format)
+ * - No ZIP64 support (up to ~4 GB)
+ * - Date serial numbers are not converted (returned as numeric)
+ *
+ * @module
+ */
+
+// biome-ignore lint/correctness/noNodejsModules: raw DEFLATE decompression for ZIP/XLSX requires node:zlib
+import { inflateRawSync } from "node:zlib";
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { RangeIndex } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import { Dtype } from "../core/index.ts";
+import type { DtypeName, Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link readExcel}. */
+export interface ReadExcelOptions {
+ /**
+ * Which sheet to read.
+ * - `string`: exact sheet name
+ * - `number`: 0-based sheet index
+ * - Default: `0` (first sheet)
+ */
+ readonly sheetName?: string | number;
+ /**
+ * Row index of the header row, or `null` for no header (columns become
+ * `"0"`, `"1"`, `"2"`, …).
+ * Default: `0`.
+ */
+ readonly header?: number | null;
+ /**
+ * Column name or 0-based index of the column to use as the row index.
+ * Default: `null` (use a default `RangeIndex`).
+ */
+ readonly indexCol?: string | number | null;
+ /**
+ * Number of data rows to skip after the header row.
+ * Default: `0`.
+ */
+ readonly skipRows?: number;
+ /**
+ * Maximum number of data rows to read.
+ */
+ readonly nrows?: number;
+ /**
+ * Additional strings to treat as NA (beyond the built-in set:
+ * `""`, `"NA"`, `"N/A"`, `"null"`, `"NaN"`, `"nan"`, `"#N/A"`).
+ */
+ readonly naValues?: readonly string[];
+ /**
+ * Explicit dtype overrides per column name.
+ */
+ readonly dtype?: Readonly>;
+}
+
+// ─── ZIP low-level helpers ────────────────────────────────────────────────────
+
+/** Read a little-endian uint16 from a buffer. */
+function readU16(buf: Uint8Array, off: number): number {
+ return ((buf[off] ?? 0) | ((buf[off + 1] ?? 0) << 8)) >>> 0;
+}
+
+/** Read a little-endian uint32 from a buffer. */
+function readU32(buf: Uint8Array, off: number): number {
+ return (
+ ((buf[off] ?? 0) |
+ ((buf[off + 1] ?? 0) << 8) |
+ ((buf[off + 2] ?? 0) << 16) |
+ ((buf[off + 3] ?? 0) << 24)) >>>
+ 0
+ );
+}
+
+const ZIP_EOCD_SIG = 0x06054b50;
+const ZIP_CD_SIG = 0x02014b50;
+const ZIP_COMP_STORED = 0;
+const ZIP_COMP_DEFLATE = 8;
+
+interface ZipEntry {
+ readonly name: string;
+ readonly compressedSize: number;
+ readonly uncompressedSize: number;
+ readonly method: number;
+ readonly dataOffset: number;
+}
+
+/** Search for the End-of-Central-Directory record. */
+function findEocd(buf: Uint8Array): number {
+ const minOff = Math.max(0, buf.length - 65558);
+ for (let i = buf.length - 22; i >= minOff; i--) {
+ if (readU32(buf, i) === ZIP_EOCD_SIG) {
+ return i;
+ }
+ }
+ throw new Error("Not a valid XLSX file: no ZIP end-of-central-directory found");
+}
+
+/** Compute the actual data offset from the local file header. */
+function localDataOffset(buf: Uint8Array, localOff: number): number {
+ const nameLen = readU16(buf, localOff + 26);
+ const extraLen = readU16(buf, localOff + 28);
+ return localOff + 30 + nameLen + extraLen;
+}
+
+/** Parse the ZIP central directory and return a name→entry map. */
+function parseZipEntries(buf: Uint8Array): Map {
+ const eocd = findEocd(buf);
+ const cdOffset = readU32(buf, eocd + 16);
+ const cdSize = readU32(buf, eocd + 12);
+ const dec = new TextDecoder("utf-8");
+ const entries = new Map();
+ let pos = cdOffset;
+ while (pos < cdOffset + cdSize && pos + 46 <= buf.length) {
+ if (readU32(buf, pos) !== ZIP_CD_SIG) {
+ break;
+ }
+ const method = readU16(buf, pos + 10);
+ const compressedSize = readU32(buf, pos + 20);
+ const uncompressedSize = readU32(buf, pos + 24);
+ const nameLen = readU16(buf, pos + 28);
+ const extraLen = readU16(buf, pos + 30);
+ const commentLen = readU16(buf, pos + 32);
+ const localOff = readU32(buf, pos + 42);
+ const name = dec.decode(buf.subarray(pos + 46, pos + 46 + nameLen));
+ const dataOffset = localDataOffset(buf, localOff);
+ entries.set(name, { name, compressedSize, uncompressedSize, method, dataOffset });
+ pos += 46 + nameLen + extraLen + commentLen;
+ }
+ return entries;
+}
+
+/** Decompress a ZIP entry and decode it as a UTF-8 string. */
+function extractEntry(buf: Uint8Array, entry: ZipEntry): string {
+ const raw = buf.subarray(entry.dataOffset, entry.dataOffset + entry.compressedSize);
+ let bytes: Uint8Array;
+ if (entry.method === ZIP_COMP_STORED) {
+ bytes = raw;
+ } else if (entry.method === ZIP_COMP_DEFLATE) {
+ bytes = inflateRawSync(raw);
+ } else {
+ throw new Error(`Unsupported ZIP compression method: ${entry.method}`);
+ }
+ return new TextDecoder("utf-8").decode(bytes);
+}
+
+/** Extract a named entry or return null if absent. */
+function getZipEntry(buf: Uint8Array, entries: Map, name: string): string | null {
+ const entry = entries.get(name);
+ if (entry === undefined) {
+ return null;
+ }
+ return extractEntry(buf, entry);
+}
+
+// ─── XML helpers ──────────────────────────────────────────────────────────────
+
+// Top-level regex constants (Biome useTopLevelRegex)
+const RE_XML_ENTITY = /&(?:amp|lt|gt|quot|apos);/g;
+const RE_SST_SI = /([\s\S]*?)<\/si>/g;
+const RE_SST_T = /]*)>([\s\S]*?)<\/t>/g;
+const RE_WB_SHEET = /]*)>/g;
+const RE_REL = /]*)>/g;
+const RE_ROW = /]*)>([\s\S]*?)<\/row>/g;
+const RE_CELL = /]*)>([\s\S]*?)<\/c>/g;
+const RE_CELL_V = /([\s\S]*?)<\/v>/;
+const RE_CELL_IS = /[\s\S]*?]*)>([\s\S]*?)<\/t>/;
+const RE_COL_LETTERS = /^([A-Z]+)(\d+)$/;
+
+/** Replace XML character references with their literal characters. */
+function xmlUnescape(s: string): string {
+ return s.replace(RE_XML_ENTITY, (m) => {
+ if (m === "&") {
+ return "&";
+ }
+ if (m === "<") {
+ return "<";
+ }
+ if (m === ">") {
+ return ">";
+ }
+ if (m === """) {
+ return '"';
+ }
+ return "'";
+ });
+}
+
+/**
+ * Extract the value of a single named XML attribute from an attribute string.
+ * Uses `new RegExp` (not a literal) to support dynamic attribute names.
+ */
+function attrVal(attrStr: string, key: string): string {
+ const re = new RegExp(`\\b${key}="([^"]*)"`);
+ return re.exec(attrStr)?.[1] ?? "";
+}
+
+// ─── XLSX-specific XML parsing ────────────────────────────────────────────────
+
+/** Iterate all non-overlapping matches of a global regex against a string. */
+function* regexAll(re: RegExp, str: string): Generator {
+ re.lastIndex = 0;
+ let m = re.exec(str);
+ while (m !== null) {
+ yield m;
+ m = re.exec(str);
+ }
+}
+
+/** Parse the shared string table XML into an array of strings. */
+function parseSiText(siContent: string): string {
+ let text = "";
+ for (const t of regexAll(RE_SST_T, siContent)) {
+ text += xmlUnescape(t[1] ?? "");
+ }
+ return text;
+}
+
+/** Parse the shared string table XML into an array of strings. */
+function parseSharedStrings(xml: string): string[] {
+ const strings: string[] = [];
+ for (const si of regexAll(RE_SST_SI, xml)) {
+ strings.push(parseSiText(si[1] ?? ""));
+ }
+ return strings;
+}
+
+interface SheetInfo {
+ readonly name: string;
+ readonly rid: string;
+}
+
+/** Parse the workbook XML and return a list of sheet descriptors. */
+function parseWorkbookSheets(xml: string): SheetInfo[] {
+ const sheets: SheetInfo[] = [];
+ for (const m of regexAll(RE_WB_SHEET, xml)) {
+ const attrs = m[1] ?? "";
+ const name = xmlUnescape(attrVal(attrs, "name"));
+ const rid = attrVal(attrs, "r:id");
+ if (name !== "") {
+ sheets.push({ name, rid });
+ }
+ }
+ return sheets;
+}
+
+/** Parse the workbook relationships XML and return a rid→target map. */
+function parseRelationships(xml: string): Map {
+ const map = new Map();
+ for (const m of regexAll(RE_REL, xml)) {
+ const attrs = m[1] ?? "";
+ const id = attrVal(attrs, "Id");
+ const target = attrVal(attrs, "Target");
+ if (id !== "") {
+ map.set(id, target);
+ }
+ }
+ return map;
+}
+
+// ─── Cell parsing ─────────────────────────────────────────────────────────────
+
+/** Convert a column letter string (e.g. "A", "AB") to a 0-based index. */
+function colLetterToIndex(col: string): number {
+ let idx = 0;
+ for (const ch of col) {
+ idx = idx * 26 + (ch.charCodeAt(0) - 64);
+ }
+ return idx - 1;
+}
+
+/**
+ * Parse a cell reference (e.g. "A1") into [rowIndex, colIndex] (both 0-based).
+ */
+function parseCellRef(ref: string): readonly [number, number] {
+ const m = RE_COL_LETTERS.exec(ref);
+ if (m === null) {
+ throw new Error(`Invalid cell reference: ${ref}`);
+ }
+ const colLetters = m[1] ?? "";
+ const rowNum = Number.parseInt(m[2] ?? "1", 10);
+ return [rowNum - 1, colLetterToIndex(colLetters)];
+}
+
+/** Resolve a cell value given its type tag and raw text. */
+function resolveCellValue(
+ cellType: string,
+ vText: string,
+ isText: string,
+ sharedStrings: readonly string[],
+): Scalar {
+ if (cellType === "s") {
+ const idx = Number.parseInt(vText, 10);
+ return sharedStrings[idx] ?? null;
+ }
+ if (cellType === "b") {
+ return vText === "1";
+ }
+ if (cellType === "inlineStr") {
+ return xmlUnescape(isText);
+ }
+ if (cellType === "e") {
+ return null;
+ }
+ // "str" (formula string), "n" (number), or absent (number)
+ if (vText === "") {
+ return null;
+ }
+ const n = Number(vText);
+ return Number.isNaN(n) ? xmlUnescape(vText) : n;
+}
+
+interface RawRow {
+ readonly rowIndex: number;
+ readonly cells: ReadonlyMap;
+}
+
+/** Parse a single `` element into a RawRow. */
+function parseOneRow(
+ rowAttrs: string,
+ rowContent: string,
+ sharedStrings: readonly string[],
+): RawRow {
+ const rowIdxStr = attrVal(rowAttrs, "r");
+ const rowIndex = rowIdxStr === "" ? 0 : Number.parseInt(rowIdxStr, 10) - 1;
+ const cells = new Map();
+ for (const cellMatch of regexAll(RE_CELL, rowContent)) {
+ const cellAttrs = cellMatch[1] ?? "";
+ const cellContent = cellMatch[2] ?? "";
+ const ref = attrVal(cellAttrs, "r");
+ if (ref === "") {
+ continue;
+ }
+ const cellType = attrVal(cellAttrs, "t");
+ const vMatch = RE_CELL_V.exec(cellContent);
+ const vText = vMatch !== null ? xmlUnescape(vMatch[1] ?? "") : "";
+ const isMatch = RE_CELL_IS.exec(cellContent);
+ const isText = isMatch?.[1] ?? "";
+ const [, colIdx] = parseCellRef(ref);
+ cells.set(colIdx, resolveCellValue(cellType, vText, isText, sharedStrings));
+ }
+ return { rowIndex, cells };
+}
+
+/** Parse all `` elements from a worksheet XML string. */
+function parseWorksheetRows(xml: string, sharedStrings: readonly string[]): RawRow[] {
+ const rows: RawRow[] = [];
+ for (const rowMatch of regexAll(RE_ROW, xml)) {
+ rows.push(parseOneRow(rowMatch[1] ?? "", rowMatch[2] ?? "", sharedStrings));
+ }
+ return rows;
+}
+
+// ─── DataFrame construction ───────────────────────────────────────────────────
+
+const BUILTIN_NA = new Set(["", "NA", "N/A", "null", "NaN", "nan", "#N/A"]);
+
+/** True when a string value should be coerced to null. */
+function isNaStr(s: string, extraNa: ReadonlySet): boolean {
+ return BUILTIN_NA.has(s) || extraNa.has(s);
+}
+
+/** Coerce a raw cell value to null when it matches an NA sentinel. */
+function coerceNa(val: Scalar, extraNa: ReadonlySet): Scalar {
+ if (typeof val === "string" && isNaStr(val, extraNa)) {
+ return null;
+ }
+ return val;
+}
+
+/** Compute the maximum column index across all rows. */
+function maxColIndex(rows: readonly RawRow[]): number {
+ let max = 0;
+ for (const row of rows) {
+ for (const col of row.cells.keys()) {
+ if (col > max) {
+ max = col;
+ }
+ }
+ }
+ return max;
+}
+
+interface ColumnarData {
+ readonly columns: string[];
+ readonly data: Scalar[][];
+}
+
+/** Pad header labels array to `numCols` with numeric fallback names. */
+function padHeaderLabels(labels: string[], numCols: number): void {
+ while (labels.length < numCols) {
+ labels.push(String(labels.length));
+ }
+}
+
+/** Extract header labels from the header row. */
+function extractHeaderLabels(
+ rows: readonly RawRow[],
+ headerRow: number,
+ numCols: number,
+): string[] {
+ const labels: string[] = [];
+ const hRow = rows.find((r) => r.rowIndex === headerRow);
+ if (hRow !== undefined) {
+ for (let c = 0; c < numCols; c++) {
+ const v = hRow.cells.get(c) ?? null;
+ labels.push(v !== null ? String(v) : String(c));
+ }
+ }
+ return labels;
+}
+
+/** Pivot sliced data rows into per-column arrays. */
+function pivotToColumns(
+ sliced: readonly RawRow[],
+ numCols: number,
+ extraNa: ReadonlySet,
+): Scalar[][] {
+ const data: Scalar[][] = Array.from({ length: numCols }, (): Scalar[] => []);
+ for (const row of sliced) {
+ for (let c = 0; c < numCols; c++) {
+ const val = coerceNa(row.cells.get(c) ?? null, extraNa);
+ (data[c] as Scalar[]).push(val);
+ }
+ }
+ return data;
+}
+
+/** Separate header and data rows, then pivot to column-oriented arrays. */
+function buildColumnarData(
+ rows: readonly RawRow[],
+ headerRow: number | null,
+ skipRows: number,
+ nrows: number | undefined,
+ extraNa: ReadonlySet,
+): ColumnarData {
+ const numCols = rows.length === 0 ? 0 : maxColIndex(rows) + 1;
+ const dataRows = rows.filter((r) => headerRow === null || r.rowIndex !== headerRow);
+ const headerLabels = headerRow !== null ? extractHeaderLabels(rows, headerRow, numCols) : [];
+ padHeaderLabels(headerLabels, numCols);
+ const sliced = dataRows.slice(skipRows, nrows !== undefined ? skipRows + nrows : undefined);
+ const data = pivotToColumns(sliced, numCols, extraNa);
+ return { columns: headerLabels, data };
+}
+
+/** Infer a dtype from a column's scalar values. */
+function inferColDtype(values: readonly Scalar[], override: DtypeName | undefined): DtypeName {
+ if (override !== undefined) {
+ return override;
+ }
+ let allNum = true;
+ let allBool = true;
+ let allStr = true;
+ for (const v of values) {
+ if (v === null || v === undefined) {
+ continue;
+ }
+ if (typeof v !== "number") {
+ allNum = false;
+ }
+ if (typeof v !== "boolean") {
+ allBool = false;
+ }
+ if (typeof v !== "string") {
+ allStr = false;
+ }
+ }
+ if (allBool) {
+ return "bool";
+ }
+ if (allNum) {
+ return "float64";
+ }
+ if (allStr) {
+ return "string";
+ }
+ return "object";
+}
+
+/** Build a DataFrame from parsed rows and options. */
+function buildDataFrame(rows: readonly RawRow[], options: ReadExcelOptions): DataFrame {
+ const headerRow = options.header !== undefined ? (options.header ?? null) : 0;
+ const skipRows = options.skipRows ?? 0;
+ const extraNa = new Set(options.naValues ?? []);
+ const dtypeOvr: Readonly> = options.dtype ?? {};
+ const { columns, data } = buildColumnarData(rows, headerRow, skipRows, options.nrows, extraNa);
+ const indexColOpt = options.indexCol ?? null;
+ const indexColIdx = resolveIndexColIdx(columns, indexColOpt);
+ const rowCount = (data[0] ?? []).length;
+ const colMap = new Map>();
+ for (let c = 0; c < columns.length; c++) {
+ if (c === indexColIdx) {
+ continue;
+ }
+ const colName = columns[c] ?? String(c);
+ const colData = data[c] ?? [];
+ const dtypeName = inferColDtype(colData, dtypeOvr[colName]);
+ colMap.set(colName, new Series({ data: colData, dtype: Dtype.from(dtypeName), name: colName }));
+ }
+ const toLabel = (v: Scalar): Label =>
+ v === undefined || typeof v === "bigint" || v instanceof Date ? null : v;
+ const rowIndex =
+ indexColIdx >= 0
+ ? new Index((data[indexColIdx] ?? []).map(toLabel))
+ : new RangeIndex(rowCount);
+ return new DataFrame(colMap, rowIndex);
+}
+
+/** Resolve the numeric column index for the index column option. */
+function resolveIndexColIdx(columns: readonly string[], opt: string | number | null): number {
+ if (opt === null) {
+ return -1;
+ }
+ if (typeof opt === "number") {
+ return opt;
+ }
+ const idx = columns.indexOf(opt);
+ return idx;
+}
+
+// ─── sheet path resolution ────────────────────────────────────────────────────
+
+/** Resolve the XML path inside the ZIP for a given sheet. */
+function resolveSheetPath(
+ rels: ReadonlyMap,
+ sheetInfo: SheetInfo,
+ sheetIndex: number,
+): string {
+ const target = rels.get(sheetInfo.rid) ?? `worksheets/sheet${sheetIndex + 1}.xml`;
+ return target.startsWith("/") ? target.slice(1) : `xl/${target}`;
+}
+
+/** Select the SheetInfo for the requested sheetName option. */
+function selectSheet(sheets: readonly SheetInfo[], sheetName: string | number): SheetInfo {
+ if (typeof sheetName === "number") {
+ const s = sheets[sheetName];
+ if (s === undefined) {
+ throw new Error(`Sheet index out of range: ${sheetName}`);
+ }
+ return s;
+ }
+ const s = sheets.find((sh) => sh.name === sheetName);
+ if (s === undefined) {
+ throw new Error(`Sheet not found: "${sheetName}"`);
+ }
+ return s;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Parse an XLSX binary buffer into a `DataFrame`.
+ *
+ * Mirrors `pandas.read_excel()`.
+ *
+ * @param data - XLSX file contents as a `Uint8Array` or `ArrayBuffer`.
+ * @param options - Parsing options (sheet selection, header, index column, etc.).
+ * @returns A `DataFrame` containing the sheet data.
+ *
+ * @example
+ * ```ts
+ * import { readFileSync } from "node:fs";
+ * const buf = readFileSync("data.xlsx");
+ * const df = readExcel(new Uint8Array(buf));
+ * // df.shape → [100, 5]
+ * ```
+ */
+export function readExcel(
+ data: Uint8Array | ArrayBufferLike,
+ options?: ReadExcelOptions,
+): DataFrame {
+ const buf = data instanceof Uint8Array ? data : new Uint8Array(data);
+ const opts = options ?? {};
+ const entries = parseZipEntries(buf);
+
+ // Load shared strings (optional — may be absent for numeric-only sheets)
+ const sstXml = getZipEntry(buf, entries, "xl/sharedStrings.xml") ?? "";
+ const sharedStrings = sstXml === "" ? [] : parseSharedStrings(sstXml);
+
+ // Load workbook to find sheet names
+ const wbXml = getZipEntry(buf, entries, "xl/workbook.xml");
+ if (wbXml === null) {
+ throw new Error("Invalid XLSX: xl/workbook.xml not found");
+ }
+ const sheets = parseWorkbookSheets(wbXml);
+ if (sheets.length === 0) {
+ throw new Error("Invalid XLSX: no sheets found in workbook");
+ }
+
+ const sheetName = opts.sheetName ?? 0;
+ const sheetInfo = selectSheet(sheets, sheetName);
+ const sheetIndex = typeof sheetName === "number" ? sheetName : sheets.indexOf(sheetInfo);
+
+ // Resolve sheet XML path via workbook relationships
+ const relsXml = getZipEntry(buf, entries, "xl/_rels/workbook.xml.rels") ?? "";
+ const rels = relsXml === "" ? new Map() : parseRelationships(relsXml);
+ const sheetPath = resolveSheetPath(rels, sheetInfo, sheetIndex);
+
+ const wsXml = getZipEntry(buf, entries, sheetPath);
+ if (wsXml === null) {
+ throw new Error(`Sheet XML not found at path: ${sheetPath}`);
+ }
+ const rows = parseWorksheetRows(wsXml, sharedStrings);
+ return buildDataFrame(rows, opts);
+}
+
+/**
+ * Return the sheet names in an XLSX file without parsing cell data.
+ *
+ * @param data - XLSX file contents as a `Uint8Array` or `ArrayBuffer`.
+ * @returns Array of sheet name strings in workbook order.
+ *
+ * @example
+ * ```ts
+ * xlsxSheetNames(buf); // ["Sheet1", "Sheet2"]
+ * ```
+ */
+export function xlsxSheetNames(data: Uint8Array | ArrayBufferLike): string[] {
+ const buf = data instanceof Uint8Array ? data : new Uint8Array(data);
+ const entries = parseZipEntries(buf);
+ const wbXml = getZipEntry(buf, entries, "xl/workbook.xml");
+ if (wbXml === null) {
+ return [];
+ }
+ return parseWorkbookSheets(wbXml).map((s) => s.name);
+}
diff --git a/src/stats/clip_advanced.ts b/src/stats/clip_advanced.ts
new file mode 100644
index 00000000..032bce5b
--- /dev/null
+++ b/src/stats/clip_advanced.ts
@@ -0,0 +1,290 @@
+/**
+ * clip_advanced — per-element clipping for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods with array/Series/DataFrame bounds:
+ * - `Series.clip(lower, upper)` — per-element bounds from scalar, array, or Series
+ * - `DataFrame.clip(lower, upper, axis?)` — per-element bounds with broadcast support
+ *
+ * Unlike the simple scalar `clip` in `elem_ops`, this module supports:
+ * - Per-position bounds (array or positionally-aligned Series)
+ * - DataFrame-shaped bounds for element-wise clipping
+ * - Axis-based broadcasting when bounds is a Series
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ * Missing values (null / NaN) are propagated through every operation.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Axis, Scalar } from "../types.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Scalar or per-element bound accepted by {@link clipAdvancedSeries}. */
+export type SeriesBound = number | null | undefined | readonly number[] | Series;
+
+/** Scalar or per-element bound accepted by {@link clipAdvancedDataFrame}. */
+export type DataFrameBound =
+ | number
+ | null
+ | undefined
+ | readonly number[]
+ | Series
+ | DataFrame;
+
+/** Options for {@link clipAdvancedSeries}. */
+export interface ClipAdvancedSeriesOptions {
+ /**
+ * Lower bound — scalar, array, or positionally-aligned Series.
+ * `null` / `undefined` means no lower bound.
+ */
+ readonly lower?: SeriesBound;
+ /**
+ * Upper bound — scalar, array, or positionally-aligned Series.
+ * `null` / `undefined` means no upper bound.
+ */
+ readonly upper?: SeriesBound;
+}
+
+/** Options for {@link clipAdvancedDataFrame}. */
+export interface ClipAdvancedDataFrameOptions {
+ /**
+ * Lower bound — scalar, array, Series, or element-wise DataFrame.
+ * `null` / `undefined` means no lower bound.
+ */
+ readonly lower?: DataFrameBound;
+ /**
+ * Upper bound — scalar, array, Series, or element-wise DataFrame.
+ * `null` / `undefined` means no upper bound.
+ */
+ readonly upper?: DataFrameBound;
+ /**
+ * When `lower` or `upper` is a Series, this axis controls broadcasting.
+ * - `0` or `"index"` (default): broadcast Series along rows (one bound per column).
+ * - `1` or `"columns"`: broadcast Series along columns (one bound per row).
+ */
+ readonly axis?: Axis;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a finite number (not null / undefined / NaN). */
+function isFiniteNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v);
+}
+
+/** Clip a numeric value to [lo, hi], preserving missing values. */
+function clipValue(v: Scalar, lo: number, hi: number): Scalar {
+ if (!isFiniteNum(v)) {
+ return v;
+ }
+ if (v < lo) {
+ return lo;
+ }
+ if (v > hi) {
+ return hi;
+ }
+ return v;
+}
+
+/**
+ * Resolve a Series bound to a positional number for index `i`.
+ * Arrays are accessed by position; Series are accessed by position.
+ */
+function resolveSeriesBound(bound: SeriesBound, i: number): number {
+ if (bound === null || bound === undefined) {
+ return Number.NaN; // sentinel: no bound
+ }
+ if (typeof bound === "number") {
+ return bound;
+ }
+ if (Array.isArray(bound)) {
+ const v = (bound as readonly number[])[i];
+ return v !== undefined ? v : Number.NaN;
+ }
+ // Series — positional access
+ const s = bound as Series;
+ if (i >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(i);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+}
+
+// ─── clipAdvancedSeries ────────────────────────────────────────────────────────
+
+/**
+ * Clip each element of a Series to per-element [lower, upper] bounds.
+ *
+ * Bounds may be:
+ * - A scalar `number` — applies the same bound to every element
+ * - A `number[]` array — per-position bounds aligned by position
+ * - A `Series` — per-position bounds taken positionally (label order ignored)
+ * - `null` / `undefined` — no bound in that direction
+ *
+ * Non-numeric values (null, NaN, strings, …) pass through unchanged.
+ * Mirrors `pandas.Series.clip(lower, upper)` with array bounds.
+ *
+ * @example
+ * ```ts
+ * import { Series, clipAdvancedSeries } from "tsb";
+ * const s = new Series({ data: [-3, 1, 5, 10] });
+ * const lo = new Series({ data: [-1, 0, 2, 8] });
+ * clipAdvancedSeries(s, { lower: lo }).values; // [-1, 1, 5, 10]
+ * ```
+ */
+export function clipAdvancedSeries(
+ series: Series,
+ options: ClipAdvancedSeriesOptions = {},
+): Series {
+ const { lower, upper } = options;
+ const n = series.size;
+ const out: Scalar[] = new Array(n);
+
+ for (let i = 0; i < n; i++) {
+ const v = series.iat(i);
+ if (!isFiniteNum(v)) {
+ out[i] = v;
+ continue;
+ }
+
+ const lo = resolveSeriesBound(lower, i);
+ const hi = resolveSeriesBound(upper, i);
+
+ const effectiveLo = Number.isNaN(lo) ? Number.NEGATIVE_INFINITY : lo;
+ const effectiveHi = Number.isNaN(hi) ? Number.POSITIVE_INFINITY : hi;
+
+ out[i] = clipValue(v, effectiveLo, effectiveHi);
+ }
+
+ return new Series({ data: out, index: series.index, name: series.name });
+}
+
+// ─── DataFrame bound helpers ───────────────────────────────────────────────────
+
+/** Resolve bound for a DataFrame cell where the bound is a Series (axis-based). */
+function resolveSeriesBoundForDf(s: Series, r: number, c: number, axis: Axis): number {
+ const isRowAxis = axis === 0 || axis === "index";
+ if (isRowAxis) {
+ // broadcast along rows → one bound per column → use col index `c`
+ if (c >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(c);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+ }
+ // broadcast along columns → one bound per row → use row index `r`
+ if (r >= s.size) {
+ return Number.NaN;
+ }
+ const sv = s.iat(r);
+ return isFiniteNum(sv) ? sv : Number.NaN;
+}
+
+/** Resolve bound for a DataFrame cell where the bound is a DataFrame (element-wise). */
+function resolveDataFrameBoundFromDf(bound: DataFrame, r: number, colName: string): number {
+ let val: Scalar = null;
+ try {
+ val = bound.col(colName).iat(r);
+ } catch {
+ return Number.NaN;
+ }
+ return isFiniteNum(val) ? val : Number.NaN;
+}
+
+/**
+ * Resolve a DataFrame bound value for cell (row r, col c).
+ * Supports: scalar, row-array, Series (broadcast by axis), DataFrame (element-wise).
+ */
+function resolveDataFrameBound(
+ bound: DataFrameBound,
+ r: number,
+ c: number,
+ colName: string,
+ axis: Axis,
+): number {
+ if (bound === null || bound === undefined) {
+ return Number.NaN;
+ }
+ if (typeof bound === "number") {
+ return bound;
+ }
+ if (bound instanceof DataFrame) {
+ return resolveDataFrameBoundFromDf(bound, r, colName);
+ }
+ if (bound instanceof Series) {
+ return resolveSeriesBoundForDf(bound as Series, r, c, axis);
+ }
+ // plain array: treat as row-indexed (one bound per row)
+ if (Array.isArray(bound)) {
+ const v = (bound as readonly number[])[r];
+ return v !== undefined ? v : Number.NaN;
+ }
+ return Number.NaN;
+}
+
+// ─── clipAdvancedDataFrame ─────────────────────────────────────────────────────
+
+/**
+ * Clip each element of a DataFrame to per-element [lower, upper] bounds.
+ *
+ * Bounds may be:
+ * - A scalar `number` — same bound applied to every cell
+ * - A `number[]` array — per-row bounds (one per row, broadcast across columns)
+ * - A `Series` — broadcast by `axis`:
+ * - `axis=0` (default): one bound per **column** (series index = column position)
+ * - `axis=1`: one bound per **row** (series index = row position)
+ * - A `DataFrame` — element-wise bounds (same shape, same column names)
+ * - `null` / `undefined` — no bound in that direction
+ *
+ * Non-numeric values (null, NaN, strings, …) pass through unchanged.
+ * Mirrors `pandas.DataFrame.clip(lower, upper, axis=0)` with array/Series/DF bounds.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, clipAdvancedDataFrame } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+ * const loBound = DataFrame.fromColumns({ a: [2, 3, 4], b: [1, 4, 8] });
+ * clipAdvancedDataFrame(df, { lower: loBound }).col("a").values; // [2, 5, 9]
+ * ```
+ */
+export function clipAdvancedDataFrame(
+ df: DataFrame,
+ options: ClipAdvancedDataFrameOptions = {},
+): DataFrame {
+ const { lower, upper } = options;
+ const axis: Axis = options.axis ?? 0;
+ const colNames = df.columns.values;
+ const colMap = new Map>();
+
+ for (let c = 0; c < colNames.length; c++) {
+ const colName = colNames[c];
+ if (colName === undefined) {
+ continue;
+ }
+ const col = df.col(colName);
+ const out: Scalar[] = new Array(df.index.size);
+
+ for (let r = 0; r < df.index.size; r++) {
+ const v = col.iat(r);
+ if (!isFiniteNum(v)) {
+ out[r] = v;
+ continue;
+ }
+
+ const lo = resolveDataFrameBound(lower, r, c, colName, axis);
+ const hi = resolveDataFrameBound(upper, r, c, colName, axis);
+
+ const effectiveLo = Number.isNaN(lo) ? Number.NEGATIVE_INFINITY : lo;
+ const effectiveHi = Number.isNaN(hi) ? Number.POSITIVE_INFINITY : hi;
+
+ out[r] = clipValue(v, effectiveLo, effectiveHi);
+ }
+
+ colMap.set(colName, new Series({ data: out, index: df.index, name: colName }));
+ }
+
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/clip_with_bounds.ts b/src/stats/clip_with_bounds.ts
index 15d8efa9..3c02099e 100644
--- a/src/stats/clip_with_bounds.ts
+++ b/src/stats/clip_with_bounds.ts
@@ -45,8 +45,15 @@ export interface SeriesClipBoundsOptions {
readonly upper?: BoundArg;
}
+/** A bound that also accepts a DataFrame (for element-wise clipping). */
+export type DataFrameBoundArg = BoundArg | DataFrame;
+
/** Options for {@link clipDataFrameWithBounds}. */
-export interface DataFrameClipBoundsOptions extends SeriesClipBoundsOptions {
+export interface DataFrameClipBoundsOptions {
+ /** Lower bound. May be any `BoundArg` or a `DataFrame` for element-wise clipping. */
+ readonly lower?: DataFrameBoundArg;
+ /** Upper bound. May be any `BoundArg` or a `DataFrame` for element-wise clipping. */
+ readonly upper?: DataFrameBoundArg;
/**
* Axis along which a Series bound is broadcast:
* - `0` / `"index"` (default): Series is indexed on **row labels** — each row
diff --git a/src/stats/idxmin_idxmax.ts b/src/stats/idxmin_idxmax.ts
new file mode 100644
index 00000000..6ee745f9
--- /dev/null
+++ b/src/stats/idxmin_idxmax.ts
@@ -0,0 +1,234 @@
+/**
+ * idxmin / idxmax — return the index label of the minimum or maximum value.
+ *
+ * Mirrors `pandas.Series.idxmin()` / `pandas.Series.idxmax()` and
+ * `pandas.DataFrame.idxmin()` / `pandas.DataFrame.idxmax()`:
+ *
+ * - `idxminSeries(series)` — label of the minimum value (NaN/null excluded)
+ * - `idxmaxSeries(series)` — label of the maximum value (NaN/null excluded)
+ * - `idxminDataFrame(df)` — Series of row labels where each column achieves its min
+ * - `idxmaxDataFrame(df)` — Series of row labels where each column achieves its max
+ *
+ * When `skipna` is true (the default), NaN / null values are ignored.
+ * When `skipna` is false, any NaN / null causes the result to be `null`.
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Dtype, Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link idxminSeries}, {@link idxmaxSeries}. */
+export interface IdxOptions {
+ /**
+ * Whether to skip NaN / null values.
+ * @defaultValue `true`
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link idxminDataFrame}, {@link idxmaxDataFrame}. */
+export interface IdxDataFrameOptions {
+ /**
+ * Whether to skip NaN / null values.
+ * @defaultValue `true`
+ */
+ readonly skipna?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar should be treated as missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Find the index of the extreme value (min or max) among `values`.
+ * Returns `null` when all values are missing (with `skipna=true`) or when
+ * any value is missing (with `skipna=false`).
+ */
+function findExtreme(
+ values: readonly Scalar[],
+ skipna: boolean,
+ isBetter: (a: Scalar, b: Scalar) => boolean,
+): number | null {
+ let bestIdx: number | null = null;
+ let bestVal: Scalar = null;
+
+ for (let i = 0; i < values.length; i++) {
+ const v = values[i] as Scalar;
+ if (isMissing(v)) {
+ if (!skipna) {
+ return null;
+ }
+ continue;
+ }
+ if (bestIdx === null || isBetter(v, bestVal)) {
+ bestIdx = i;
+ bestVal = v;
+ }
+ }
+ return bestIdx;
+}
+
+/** Compare scalars: returns true if `a` is less than `b`. */
+function isLess(a: Scalar, b: Scalar): boolean {
+ if (b === null || b === undefined) {
+ return false;
+ }
+ return (a as number | string | boolean) < (b as number | string | boolean);
+}
+
+/** Compare scalars: returns true if `a` is greater than `b`. */
+function isGreater(a: Scalar, b: Scalar): boolean {
+ if (b === null || b === undefined) {
+ return false;
+ }
+ return (a as number | string | boolean) > (b as number | string | boolean);
+}
+
+// ─── public API — Series ──────────────────────────────────────────────────────
+
+/**
+ * Return the index label of the minimum value in `series`.
+ *
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Returns `null` when the series is empty or all values are NaN / null.
+ *
+ * Mirrors `pandas.Series.idxmin()`.
+ *
+ * @param series - Input Series.
+ * @param options - Options (skipna).
+ * @returns The index label at the minimum value, or `null` if no valid value exists.
+ *
+ * @example
+ * ```ts
+ * import { Series, idxminSeries } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] });
+ * idxminSeries(s); // "b" (first occurrence of 1)
+ * ```
+ */
+export function idxminSeries(series: Series, options: IdxOptions = {}): Label {
+ const skipna = options.skipna ?? true;
+ const idx = findExtreme(series.values, skipna, isLess);
+ if (idx === null) {
+ return null;
+ }
+ return series.index.at(idx);
+}
+
+/**
+ * Return the index label of the maximum value in `series`.
+ *
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Returns `null` when the series is empty or all values are NaN / null.
+ *
+ * Mirrors `pandas.Series.idxmax()`.
+ *
+ * @param series - Input Series.
+ * @param options - Options (skipna).
+ * @returns The index label at the maximum value, or `null` if no valid value exists.
+ *
+ * @example
+ * ```ts
+ * import { Series, idxmaxSeries } from "tsb";
+ *
+ * const s = new Series({ data: [3, 1, 4, 1, 5], index: ["a", "b", "c", "d", "e"] });
+ * idxmaxSeries(s); // "e"
+ * ```
+ */
+export function idxmaxSeries(series: Series, options: IdxOptions = {}): Label {
+ const skipna = options.skipna ?? true;
+ const idx = findExtreme(series.values, skipna, isGreater);
+ if (idx === null) {
+ return null;
+ }
+ return series.index.at(idx);
+}
+
+// ─── public API — DataFrame ───────────────────────────────────────────────────
+
+/**
+ * Return a Series containing the index label of the minimum value for each column.
+ *
+ * The result Series is indexed by column names.
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Columns where all values are NaN / null yield `null` in the result.
+ *
+ * Mirrors `pandas.DataFrame.idxmin()` (axis=0).
+ *
+ * @param df - Input DataFrame.
+ * @param options - Options (skipna).
+ * @returns A Series indexed by column names, containing the row label of each column's min.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, idxminDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] });
+ * idxminDataFrame(df).values; // ["y", "z"]
+ * ```
+ */
+export function idxminDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series {
+ const skipna = options.skipna ?? true;
+ const colNames = df.columns.values;
+ const result: Label[] = colNames.map((colName) => {
+ const s = df.col(colName);
+ const idx = findExtreme(s.values, skipna, isLess);
+ if (idx === null) {
+ return null;
+ }
+ return df.index.at(idx);
+ });
+ return new Series({
+ data: result,
+ index: colNames as unknown as Label[],
+ name: null,
+ dtype: Dtype.from("object"),
+ });
+}
+
+/**
+ * Return a Series containing the index label of the maximum value for each column.
+ *
+ * The result Series is indexed by column names.
+ * NaN / null values are excluded when `skipna` is true (the default).
+ * Columns where all values are NaN / null yield `null` in the result.
+ *
+ * Mirrors `pandas.DataFrame.idxmax()` (axis=0).
+ *
+ * @param df - Input DataFrame.
+ * @param options - Options (skipna).
+ * @returns A Series indexed by column names, containing the row label of each column's max.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, idxmaxDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] });
+ * idxmaxDataFrame(df).values; // ["z", "y"]
+ * ```
+ */
+export function idxmaxDataFrame(df: DataFrame, options: IdxDataFrameOptions = {}): Series {
+ const skipna = options.skipna ?? true;
+ const colNames = df.columns.values;
+ const result: Label[] = colNames.map((colName) => {
+ const s = df.col(colName);
+ const idx = findExtreme(s.values, skipna, isGreater);
+ if (idx === null) {
+ return null;
+ }
+ return df.index.at(idx);
+ });
+ return new Series({
+ data: result,
+ index: colNames as unknown as Label[],
+ name: null,
+ dtype: Dtype.from("object"),
+ });
+}
diff --git a/src/stats/index.ts b/src/stats/index.ts
index 27e6f330..63864005 100644
--- a/src/stats/index.ts
+++ b/src/stats/index.ts
@@ -284,3 +284,79 @@ export type {
SeriesToStringOptions,
DataFrameToStringOptions,
} from "./format_ops.ts";
+
+export { clipAdvancedSeries, clipAdvancedDataFrame } from "./clip_advanced.ts";
+export type {
+ SeriesBound,
+ DataFrameBound,
+ ClipAdvancedSeriesOptions,
+ ClipAdvancedDataFrameOptions,
+} from "./clip_advanced.ts";
+
+export { idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame } from "./idxmin_idxmax.ts";
+export type { IdxOptions, IdxDataFrameOptions } from "./idxmin_idxmax.ts";
+
+export { modeSeries, modeDataFrame } from "./mode.ts";
+export type { ModeSeriesOptions, ModeDataFrameOptions } from "./mode.ts";
+
+export {
+ nancount,
+ nansum,
+ nanmean,
+ nanmedian,
+ nanvar,
+ nanstd,
+ nanmin,
+ nanmax,
+ nanprod,
+} from "./nancumops.ts";
+export type { NanInput, NanAggOptions } from "./nancumops.ts";
+
+export {
+ nuniqueSeries,
+ nuniqueDataFrame,
+ anySeries,
+ allSeries,
+ anyDataFrame,
+ allDataFrame,
+} from "./nunique.ts";
+export type {
+ NuniqueSeriesOptions,
+ NuniqueDataFrameOptions,
+ AnyAllSeriesOptions,
+ AnyAllDataFrameOptions,
+} from "./nunique.ts";
+
+export { pctChangeSeries, pctChangeDataFrame } from "./pct_change.ts";
+export type {
+ PctChangeFillMethod,
+ PctChangeOptions,
+ DataFramePctChangeOptions,
+} from "./pct_change.ts";
+
+export { quantileSeries, quantileDataFrame } from "./quantile.ts";
+export type {
+ QuantileInterpolation,
+ QuantileSeriesOptions,
+ QuantileDataFrameOptions,
+} from "./quantile.ts";
+
+export { replaceSeries, replaceDataFrame } from "./replace.ts";
+export type {
+ ReplaceMapping,
+ ReplaceSpec,
+ ReplaceOptions,
+ DataFrameReplaceOptions,
+} from "./replace.ts";
+
+export { varSeries, semSeries, varDataFrame, semDataFrame } from "./sem_var.ts";
+export type { VarSemSeriesOptions, VarSemDataFrameOptions } from "./sem_var.ts";
+
+export { skewSeries, kurtSeries, skewDataFrame, kurtDataFrame } from "./skew_kurt.ts";
+export type {
+ SkewKurtSeriesOptions,
+ SkewKurtDataFrameOptions,
+} from "./skew_kurt.ts";
+
+export { toDatetime } from "./to_datetime.ts";
+export type { DatetimeUnit, DatetimeErrors, ToDatetimeOptions } from "./to_datetime.ts";
diff --git a/src/stats/mode.ts b/src/stats/mode.ts
new file mode 100644
index 00000000..53c1c370
--- /dev/null
+++ b/src/stats/mode.ts
@@ -0,0 +1,305 @@
+/**
+ * mode — most-frequent value(s) in a Series or DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.mode(dropna?)`
+ * - `pandas.DataFrame.mode(axis?, numeric_only?, dropna?)`
+ *
+ * Returns a new Series/DataFrame containing all tied modal values sorted
+ * ascending. The result index is always a 0-based integer index.
+ * For DataFrames with `axis=0` (default, column-wise), each column's modes
+ * are returned, null-padded to the length of the longest mode list.
+ *
+ * @module
+ */
+
+import { DataFrame, type Dtype, Index, Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link modeSeries}. */
+export interface ModeSeriesOptions {
+ /**
+ * If `true` (default), exclude null/NaN values from frequency counts.
+ */
+ readonly dropna?: boolean;
+}
+
+/** Options for {@link modeDataFrame}. */
+export interface ModeDataFrameOptions {
+ /**
+ * Axis along which to compute the mode.
+ * - `0` (default): compute per column.
+ * - `1`: compute per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true`, only include numeric columns when `axis=0`.
+ * Has no effect for `axis=1`.
+ * @defaultValue `false`
+ */
+ readonly numericOnly?: boolean;
+ /**
+ * If `true` (default), exclude null/NaN values.
+ */
+ readonly dropna?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a dtype kind is numeric (int, uint, or float). */
+function isNumericKind(kind: DtypeKind): boolean {
+ if (kind === "int") {
+ return true;
+ }
+ if (kind === "uint") {
+ return true;
+ }
+ if (kind === "float") {
+ return true;
+ }
+ return false;
+}
+
+/** Stable string key for a Scalar value used in frequency maps. */
+function scalarKey(v: Scalar): string {
+ if (v === null) {
+ return "\0null";
+ }
+ if (v === undefined) {
+ return "\0undefined";
+ }
+ if (v instanceof Date) {
+ return `\0date:${v.getTime().toString()}`;
+ }
+ return String(v);
+}
+
+/**
+ * Compare two Scalar values for ascending sort.
+ * Numbers < strings < booleans < dates < null/undefined.
+ */
+function compareScalars(a: Scalar, b: Scalar): number {
+ // Both missing — equal
+ if (isMissing(a) && isMissing(b)) {
+ return 0;
+ }
+ // Missing values sort last
+ if (isMissing(a)) {
+ return 1;
+ }
+ if (isMissing(b)) {
+ return -1;
+ }
+ // Both numbers
+ if (typeof a === "number" && typeof b === "number") {
+ return a - b;
+ }
+ // Both bigints
+ if (typeof a === "bigint" && typeof b === "bigint") {
+ if (a < b) {
+ return -1;
+ }
+ if (a > b) {
+ return 1;
+ }
+ return 0;
+ }
+ // Both dates
+ if (a instanceof Date && b instanceof Date) {
+ return a.getTime() - b.getTime();
+ }
+ // Both booleans
+ if (typeof a === "boolean" && typeof b === "boolean") {
+ if (a === b) {
+ return 0;
+ }
+ return a ? 1 : -1;
+ }
+ // Both strings
+ if (typeof a === "string" && typeof b === "string") {
+ if (a < b) {
+ return -1;
+ }
+ if (a > b) {
+ return 1;
+ }
+ return 0;
+ }
+ // Mixed types — compare by canonical key
+ return scalarKey(a) < scalarKey(b) ? -1 : 1;
+}
+
+/**
+ * Compute the modal value(s) from an array of scalars.
+ * Returns all values tied for the highest frequency, sorted ascending.
+ */
+function computeModes(values: readonly Scalar[], dropna: boolean): Scalar[] {
+ const freq = new Map();
+ let maxCount = 0;
+
+ for (const v of values) {
+ if (dropna && isMissing(v)) {
+ continue;
+ }
+ const key = scalarKey(v);
+ const entry = freq.get(key);
+ if (entry === undefined) {
+ freq.set(key, { value: v, count: 1 });
+ if (maxCount === 0) {
+ maxCount = 1;
+ }
+ } else {
+ entry.count += 1;
+ if (entry.count > maxCount) {
+ maxCount = entry.count;
+ }
+ }
+ }
+
+ if (maxCount === 0) {
+ return [];
+ }
+
+ const result: Scalar[] = [];
+ for (const { value, count } of freq.values()) {
+ if (count === maxCount) {
+ result.push(value);
+ }
+ }
+ result.sort(compareScalars);
+ return result;
+}
+
+/** Build an integer-index Series from a Scalar array, preserving the source dtype. */
+function buildModeSeries(modes: readonly Scalar[], name: Label, dtype: Dtype): Series {
+ const idx = new Index(modes.map((_, i) => i));
+ const seriesName = typeof name === "string" ? name : name === null ? null : String(name);
+ return new Series({ data: modes.slice(), index: idx, dtype, name: seriesName });
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Return the most-frequent value(s) in a Series.
+ *
+ * When multiple values share the highest frequency they are all returned,
+ * sorted in ascending order. The result has a 0-based integer index.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 2, 3, 3] });
+ * modeSeries(s); // Series([2, 3])
+ * ```
+ */
+export function modeSeries(
+ series: Series,
+ options: ModeSeriesOptions = {},
+): Series {
+ const dropna = options.dropna ?? true;
+ const modes = computeModes(series.values as readonly Scalar[], dropna);
+ return buildModeSeries(modes, series.name, series.dtype);
+}
+
+/**
+ * Return the most-frequent value(s) per column (axis=0) or per row (axis=1).
+ *
+ * For `axis=0` (default): each column gets its own mode list. Columns with
+ * shorter mode lists are null-padded to match the column with the most modes.
+ * The result index is 0-based integers; the columns are the same as the input
+ * (or only numeric columns when `numericOnly=true`).
+ *
+ * For `axis=1`: each row is reduced to its modal values. The result has the
+ * same row index as the input; the columns are 0-based integers.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 1, 2], b: [3, 3, 3] });
+ * modeDataFrame(df); // {a: [1], b: [3]}
+ * ```
+ */
+export function modeDataFrame(df: DataFrame, options: ModeDataFrameOptions = {}): DataFrame {
+ const axis = options.axis ?? 0;
+ const numericOnly = options.numericOnly ?? false;
+ const dropna = options.dropna ?? true;
+
+ if (axis === 0) {
+ return modeByColumn(df, numericOnly, dropna);
+ }
+ return modeByRow(df, dropna);
+}
+
+/** Compute column-wise modes (axis=0). */
+function modeByColumn(df: DataFrame, numericOnly: boolean, dropna: boolean): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const selectedCols = numericOnly
+ ? colNames.filter((c) => {
+ const series = df.col(c);
+ return isNumericKind(series.dtype.kind);
+ })
+ : colNames;
+
+ // Compute modes per column
+ const columnModes: Map = new Map();
+ let maxLen = 0;
+
+ for (const col of selectedCols) {
+ const series = df.col(col);
+ const vals = series.values as readonly Scalar[];
+ const modes = computeModes(vals, dropna);
+ columnModes.set(col, modes);
+ if (modes.length > maxLen) {
+ maxLen = modes.length;
+ }
+ }
+
+ // Null-pad shorter mode lists
+ const record: Record = {};
+ for (const col of selectedCols) {
+ const modes = columnModes.get(col) ?? [];
+ const padded: Scalar[] = modes.slice();
+ while (padded.length < maxLen) {
+ padded.push(null);
+ }
+ record[col] = padded;
+ }
+
+ return DataFrame.fromColumns(record);
+}
+
+/** Compute row-wise modes (axis=1). */
+function modeByRow(df: DataFrame, dropna: boolean): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const rowModes: Scalar[][] = [];
+ let maxLen = 0;
+
+ for (let r = 0; r < rowCount; r++) {
+ const rowVals: Scalar[] = [];
+ for (const col of colNames) {
+ const v = df.col(col).values[r] as Scalar;
+ rowVals.push(v);
+ }
+ const modes = computeModes(rowVals, dropna);
+ rowModes.push(modes);
+ if (modes.length > maxLen) {
+ maxLen = modes.length;
+ }
+ }
+
+ // Build result columns (0, 1, 2, … maxLen-1), rows = original row index
+ const resultCols: Record = {};
+ for (let c = 0; c < maxLen; c++) {
+ const colKey = String(c);
+ resultCols[colKey] = rowModes.map((modes) => (c < modes.length ? (modes[c] as Scalar) : null));
+ }
+
+ return DataFrame.fromColumns(resultCols, { index: df.index });
+}
diff --git a/src/stats/nancumops.ts b/src/stats/nancumops.ts
new file mode 100644
index 00000000..6bdbe769
--- /dev/null
+++ b/src/stats/nancumops.ts
@@ -0,0 +1,272 @@
+/**
+ * nancumops — nan-ignoring aggregate functions for arrays and Series.
+ *
+ * Mirrors the following numpy / pandas utilities:
+ * - `nansum(data, options?)` — sum, ignoring NaN / null
+ * - `nanmean(data, options?)` — mean, ignoring NaN / null
+ * - `nanmedian(data, options?)` — median, ignoring NaN / null
+ * - `nanstd(data, options?)` — standard deviation, ignoring NaN / null
+ * - `nanvar(data, options?)` — variance, ignoring NaN / null
+ * - `nanmin(data, options?)` — minimum, ignoring NaN / null
+ * - `nanmax(data, options?)` — maximum, ignoring NaN / null
+ * - `nanprod(data, options?)` — product, ignoring NaN / null
+ * - `nancount(data)` — count of non-NaN numeric values
+ *
+ * All functions accept `readonly Scalar[]` **or** a `Series` and
+ * return a `number`. Non-numeric scalars (strings, booleans, Dates) are
+ * treated as if they were NaN and excluded.
+ *
+ * @module
+ */
+
+import type { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Input accepted by every nan-aggregate function. */
+export type NanInput = readonly Scalar[] | Series;
+
+/** Options shared by most nan-aggregate functions. */
+export interface NanAggOptions {
+ /**
+ * Degrees of freedom for std / var (default `1` — matches numpy and
+ * pandas default for `ddof`).
+ *
+ * Only meaningful for {@link nanstd} and {@link nanvar}.
+ */
+ readonly ddof?: number;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Returns the underlying array from a Series or passes the array through. */
+function toValues(input: NanInput): readonly Scalar[] {
+ if (Array.isArray(input)) {
+ return input as readonly Scalar[];
+ }
+ // Series — read via .values
+ return (input as Series).values;
+}
+
+/** Returns only the finite numeric values from the input (NaN, null, undefined,
+ * non-numeric scalars are all dropped). */
+function numericValues(input: NanInput): number[] {
+ const raw = toValues(input);
+ const out: number[] = [];
+ for (const v of raw) {
+ if (typeof v === "number" && !Number.isNaN(v)) {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/** Sorts an array of numbers in ascending order (returns a new array). */
+function sortedAsc(xs: number[]): number[] {
+ return xs.slice().sort((a, b) => a - b);
+}
+
+// ─── public functions ─────────────────────────────────────────────────────────
+
+/**
+ * Count of non-NaN numeric values in `input`.
+ *
+ * @example
+ * ```ts
+ * nancount([1, 2, NaN, null, 3]); // 3
+ * ```
+ */
+export function nancount(input: NanInput): number {
+ return numericValues(input).length;
+}
+
+/**
+ * Sum of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `0` when there are no valid values (matches numpy behaviour).
+ *
+ * @example
+ * ```ts
+ * nansum([1, 2, NaN, null, 3]); // 6
+ * ```
+ */
+export function nansum(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return 0;
+ }
+ let s = 0;
+ for (const x of xs) {
+ s += x;
+ }
+ return s;
+}
+
+/**
+ * Arithmetic mean of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `Number.NaN` when there are no valid values.
+ *
+ * @example
+ * ```ts
+ * nanmean([1, 2, NaN, 3]); // 2
+ * ```
+ */
+export function nanmean(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return Number.NaN;
+ }
+ let s = 0;
+ for (const x of xs) {
+ s += x;
+ }
+ return s / xs.length;
+}
+
+/**
+ * Median of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `Number.NaN` when there are no valid values.
+ *
+ * @example
+ * ```ts
+ * nanmedian([1, 3, 2, NaN]); // 2
+ * ```
+ */
+export function nanmedian(input: NanInput): number {
+ const xs = sortedAsc(numericValues(input));
+ const n = xs.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ const mid = Math.floor(n / 2);
+ if (n % 2 === 1) {
+ return xs[mid] as number;
+ }
+ return ((xs[mid - 1] as number) + (xs[mid] as number)) / 2;
+}
+
+/**
+ * Variance of numeric values in `input`, ignoring NaN / null.
+ *
+ * @param input - Array or Series of scalars.
+ * @param options - `ddof` (degrees of freedom, default `1`).
+ *
+ * Returns `Number.NaN` when there are fewer valid values than `ddof + 1`.
+ *
+ * @example
+ * ```ts
+ * nanvar([2, 4, 4, 4, 5, 5, 7, 9], { ddof: 1 }); // 4.571...
+ * ```
+ */
+export function nanvar(input: NanInput, options: NanAggOptions = {}): number {
+ const ddof = options.ddof ?? 1;
+ const xs = numericValues(input);
+ const n = xs.length;
+ if (n <= ddof) {
+ return Number.NaN;
+ }
+ let s = 0;
+ for (const x of xs) {
+ s += x;
+ }
+ const mean = s / n;
+ let ss = 0;
+ for (const x of xs) {
+ const diff = x - mean;
+ ss += diff * diff;
+ }
+ return ss / (n - ddof);
+}
+
+/**
+ * Standard deviation of numeric values in `input`, ignoring NaN / null.
+ *
+ * @param input - Array or Series of scalars.
+ * @param options - `ddof` (degrees of freedom, default `1`).
+ *
+ * Returns `Number.NaN` when there are fewer valid values than `ddof + 1`.
+ *
+ * @example
+ * ```ts
+ * nanstd([2, 4, 4, 4, 5, 5, 7, 9], { ddof: 1 }); // 2.138...
+ * ```
+ */
+export function nanstd(input: NanInput, options: NanAggOptions = {}): number {
+ return Math.sqrt(nanvar(input, options));
+}
+
+/**
+ * Minimum of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `Number.NaN` when there are no valid values.
+ *
+ * @example
+ * ```ts
+ * nanmin([3, 1, NaN, 2]); // 1
+ * ```
+ */
+export function nanmin(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return Number.NaN;
+ }
+ let m = xs[0] as number;
+ for (let i = 1; i < xs.length; i++) {
+ const v = xs[i] as number;
+ if (v < m) {
+ m = v;
+ }
+ }
+ return m;
+}
+
+/**
+ * Maximum of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `Number.NaN` when there are no valid values.
+ *
+ * @example
+ * ```ts
+ * nanmax([3, 1, NaN, 2]); // 3
+ * ```
+ */
+export function nanmax(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return Number.NaN;
+ }
+ let m = xs[0] as number;
+ for (let i = 1; i < xs.length; i++) {
+ const v = xs[i] as number;
+ if (v > m) {
+ m = v;
+ }
+ }
+ return m;
+}
+
+/**
+ * Product of numeric values in `input`, ignoring NaN / null.
+ *
+ * Returns `1` when there are no valid values (matches numpy behaviour for
+ * an empty product — identity element).
+ *
+ * @example
+ * ```ts
+ * nanprod([1, 2, NaN, 3]); // 6
+ * ```
+ */
+export function nanprod(input: NanInput): number {
+ const xs = numericValues(input);
+ if (xs.length === 0) {
+ return 1;
+ }
+ let p = 1;
+ for (const x of xs) {
+ p *= x;
+ }
+ return p;
+}
diff --git a/src/stats/nunique.ts b/src/stats/nunique.ts
new file mode 100644
index 00000000..af6d1e11
--- /dev/null
+++ b/src/stats/nunique.ts
@@ -0,0 +1,291 @@
+/**
+ * nunique_any_all — count unique values and boolean reductions for Series and
+ * DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.nunique(dropna?)` — count distinct non-null values
+ * - `pandas.Series.any(skipna?)` — true if any element is truthy
+ * - `pandas.Series.all(skipna?)` — true if all elements are truthy
+ * - `pandas.DataFrame.nunique(axis?, dropna?)`
+ * - `pandas.DataFrame.any(axis?, skipna?, bool_only?)`
+ * - `pandas.DataFrame.all(axis?, skipna?, bool_only?)`
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link nuniqueSeries}. */
+export interface NuniqueSeriesOptions {
+ /**
+ * If `true` (default), exclude null/NaN values from the unique count.
+ */
+ readonly dropna?: boolean;
+}
+
+/** Options for {@link nuniqueDataFrame}. */
+export interface NuniqueDataFrameOptions extends NuniqueSeriesOptions {
+ /**
+ * Axis along which to count unique values.
+ * - `0` (default): count per column.
+ * - `1`: count per row.
+ */
+ readonly axis?: 0 | 1;
+}
+
+/** Options for {@link anySeries} and {@link allSeries}. */
+export interface AnyAllSeriesOptions {
+ /**
+ * If `true` (default), skip null/NaN values (they do not contribute).
+ * If `false`, null/NaN is treated as falsy.
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link anyDataFrame} and {@link allDataFrame}. */
+export interface AnyAllDataFrameOptions extends AnyAllSeriesOptions {
+ /**
+ * Axis along which to reduce.
+ * - `0` (default): reduce along rows, one result per column.
+ * - `1`: reduce along columns, one result per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true`, only include boolean-typed columns when `axis=0`.
+ * @defaultValue `false`
+ */
+ readonly boolOnly?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a value is truthy (treating missing as falsy). */
+function isTruthy(v: Scalar): boolean {
+ if (isMissing(v)) {
+ return false;
+ }
+ return Boolean(v);
+}
+
+/** Return true if any value in `vals` is truthy, skipping missing when skipna=true. */
+function anyInSlice(vals: readonly Scalar[], skipna: boolean): boolean {
+ for (const v of vals) {
+ if (skipna && isMissing(v)) {
+ continue;
+ }
+ if (isTruthy(v)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/** Return true if all values in `vals` are truthy, skipping missing when skipna=true. */
+function allInSlice(vals: readonly Scalar[], skipna: boolean): boolean {
+ for (const v of vals) {
+ if (skipna && isMissing(v)) {
+ continue;
+ }
+ if (!isTruthy(v)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// ─── nunique ──────────────────────────────────────────────────────────────────
+
+/**
+ * Count the number of unique values in a Series.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 2, 3, null] as Scalar[] });
+ * nuniqueSeries(s); // 3 (null excluded)
+ * nuniqueSeries(s, { dropna: false }); // 4
+ * ```
+ */
+export function nuniqueSeries(series: Series, options: NuniqueSeriesOptions = {}): number {
+ const dropna = options.dropna ?? true;
+ const seen = new Set();
+ for (const v of series.values as readonly Scalar[]) {
+ if (dropna && isMissing(v)) {
+ continue;
+ }
+ seen.add(v);
+ }
+ return seen.size;
+}
+
+/**
+ * Count unique values per column (`axis=0`) or per row (`axis=1`) of a
+ * DataFrame.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 2], b: ["x", "x", "y"] });
+ * nuniqueDataFrame(df); // Series { a: 2, b: 2 }
+ * ```
+ */
+export function nuniqueDataFrame(
+ df: DataFrame,
+ options: NuniqueDataFrameOptions = {},
+): Series {
+ const axis = options.axis ?? 0;
+ const dropna = options.dropna ?? true;
+ const colNames = df.columns.values as readonly string[];
+
+ if (axis === 0) {
+ const labels: string[] = [];
+ const values: number[] = [];
+ for (const col of colNames) {
+ labels.push(col);
+ values.push(nuniqueSeries(df.col(col), { dropna }));
+ }
+ return new Series({ data: values, index: labels });
+ }
+
+ // axis === 1: count unique values across each row
+ const rowCount = df.index.size;
+ const values: number[] = [];
+ for (let r = 0; r < rowCount; r++) {
+ const seen = new Set();
+ for (const col of colNames) {
+ const v = df.col(col).values[r] as Scalar;
+ if (dropna && isMissing(v)) {
+ continue;
+ }
+ seen.add(v);
+ }
+ values.push(seen.size);
+ }
+ return new Series({ data: values, index: df.index });
+}
+
+// ─── any ──────────────────────────────────────────────────────────────────────
+
+/**
+ * Return `true` if any element in the Series is truthy.
+ *
+ * With `skipna=true` (default), null/NaN values are skipped.
+ * An empty (or all-null with skipna) series returns `false`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [0, 0, 1] });
+ * anySeries(s); // true
+ * ```
+ */
+export function anySeries(series: Series, options: AnyAllSeriesOptions = {}): boolean {
+ return anyInSlice(series.values as readonly Scalar[], options.skipna ?? true);
+}
+
+/**
+ * Return `true` if all elements in the Series are truthy.
+ *
+ * With `skipna=true` (default), null/NaN values are skipped.
+ * An empty (or all-null with skipna) series returns `true` (vacuous truth).
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3] });
+ * allSeries(s); // true
+ * ```
+ */
+export function allSeries(series: Series, options: AnyAllSeriesOptions = {}): boolean {
+ return allInSlice(series.values as readonly Scalar[], options.skipna ?? true);
+}
+
+// ─── DataFrame any/all ────────────────────────────────────────────────────────
+
+/** Get the column values for a single row `r` from df. */
+function rowValues(df: DataFrame, colNames: readonly string[], r: number): Scalar[] {
+ const row: Scalar[] = [];
+ for (const col of colNames) {
+ row.push(df.col(col).values[r] as Scalar);
+ }
+ return row;
+}
+
+/**
+ * Return whether any element is truthy along an axis of a DataFrame.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [0, 0], b: [0, 1] });
+ * anyDataFrame(df); // Series { a: false, b: true }
+ * ```
+ */
+export function anyDataFrame(df: DataFrame, options: AnyAllDataFrameOptions = {}): Series {
+ const axis = options.axis ?? 0;
+ const skipna = options.skipna ?? true;
+ const boolOnly = options.boolOnly ?? false;
+ const colNames = df.columns.values as readonly string[];
+
+ if (axis === 0) {
+ const labels: string[] = [];
+ const values: boolean[] = [];
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (boolOnly && s.dtype.kind !== "bool") {
+ continue;
+ }
+ labels.push(col);
+ values.push(anySeries(s, { skipna }));
+ }
+ return new Series({ data: values, index: labels });
+ }
+
+ // axis === 1: any across columns for each row
+ const values: boolean[] = [];
+ for (let r = 0; r < df.index.size; r++) {
+ values.push(anyInSlice(rowValues(df, colNames, r), skipna));
+ }
+ return new Series({ data: values, index: df.index });
+}
+
+/**
+ * Return whether all elements are truthy along an axis of a DataFrame.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 1], b: [1, 0] });
+ * allDataFrame(df); // Series { a: true, b: false }
+ * ```
+ */
+export function allDataFrame(df: DataFrame, options: AnyAllDataFrameOptions = {}): Series {
+ const axis = options.axis ?? 0;
+ const skipna = options.skipna ?? true;
+ const boolOnly = options.boolOnly ?? false;
+ const colNames = df.columns.values as readonly string[];
+
+ if (axis === 0) {
+ const labels: string[] = [];
+ const values: boolean[] = [];
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (boolOnly && s.dtype.kind !== "bool") {
+ continue;
+ }
+ labels.push(col);
+ values.push(allSeries(s, { skipna }));
+ }
+ return new Series({ data: values, index: labels });
+ }
+
+ // axis === 1: all across columns for each row
+ const values: boolean[] = [];
+ for (let r = 0; r < df.index.size; r++) {
+ values.push(allInSlice(rowValues(df, colNames, r), skipna));
+ }
+ return new Series({ data: values, index: df.index });
+}
diff --git a/src/stats/pct_change.ts b/src/stats/pct_change.ts
new file mode 100644
index 00000000..10e527ec
--- /dev/null
+++ b/src/stats/pct_change.ts
@@ -0,0 +1,238 @@
+/**
+ * pct_change — percentage change between current and prior element.
+ *
+ * Mirrors `pandas.Series.pct_change()` / `pandas.DataFrame.pct_change()`:
+ * - `pctChangeSeries(series, options)` — per-element % change
+ * - `pctChangeDataFrame(df, options)` — column-wise % change
+ *
+ * Formula (per element i, with shift=periods):
+ * `result[i] = (x[i] - x[i-periods]) / x[i-periods]`
+ *
+ * When `fillMethod` is set, NaN/null values in the source are filled *before*
+ * computing the ratio (matching pandas' default behaviour of `fill_method="pad"`).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Fill method applied to NaN/null before computing pct_change. */
+export type PctChangeFillMethod = "pad" | "bfill";
+
+/** Options for {@link pctChangeSeries} and {@link pctChangeDataFrame}. */
+export interface PctChangeOptions {
+ /**
+ * Number of periods (lags) to shift when computing the ratio.
+ * Positive values look backward; negative values look forward.
+ * Default `1`.
+ */
+ readonly periods?: number;
+ /**
+ * How to fill NaN/null values *before* computing the ratio.
+ * - `"pad"` (default): forward-fill (last valid observation carries forward).
+ * - `"bfill"`: backward-fill (next valid observation fills backward).
+ * - `null`: no filling — NaN/null stays as-is.
+ */
+ readonly fillMethod?: PctChangeFillMethod | null;
+ /**
+ * Maximum number of consecutive NaN/null values to fill when `fillMethod`
+ * is set. `undefined` / `null` means no limit.
+ */
+ readonly limit?: number | null;
+}
+
+/** Options for {@link pctChangeDataFrame} — adds an axis selector. */
+export interface DataFramePctChangeOptions extends PctChangeOptions {
+ /**
+ * - `0` or `"index"` (default): apply operation **column-wise** (down rows).
+ * - `1` or `"columns"`: apply operation **row-wise** (across columns).
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a valid number (not null, undefined, or NaN). */
+function isNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v) && v !== null;
+}
+
+/**
+ * Forward-fill an array of scalars in place, respecting an optional limit.
+ * Returns a NEW array.
+ */
+function padFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] {
+ const out: Scalar[] = [...vals];
+ let run = 0;
+ let lastValid: Scalar = null;
+ for (let i = 0; i < out.length; i++) {
+ const v = out[i] as Scalar;
+ if (v !== null && v !== undefined && !(typeof v === "number" && Number.isNaN(v))) {
+ lastValid = v;
+ run = 0;
+ } else if (lastValid !== null && (limit == null || run < limit)) {
+ out[i] = lastValid;
+ run++;
+ }
+ }
+ return out;
+}
+
+/**
+ * Backward-fill an array of scalars, respecting an optional limit.
+ * Returns a NEW array.
+ */
+function bfillFill(vals: readonly Scalar[], limit: number | null | undefined): Scalar[] {
+ const tmp = padFill([...vals].reverse(), limit);
+ return tmp.reverse();
+}
+
+/** Fill NaN/null in `vals` using the requested method. */
+function applyFill(
+ vals: readonly Scalar[],
+ method: PctChangeFillMethod | null | undefined,
+ limit: number | null | undefined,
+): Scalar[] {
+ if (!method) {
+ return [...vals];
+ }
+ return method === "pad" ? padFill(vals, limit) : bfillFill(vals, limit);
+}
+
+/** Compute pct_change on a flat array of scalars. */
+function computePct(vals: readonly Scalar[], periods: number): Scalar[] {
+ const n = vals.length;
+ const out: Scalar[] = new Array(n).fill(null);
+ const shift = periods;
+ if (shift >= 0) {
+ for (let i = shift; i < n; i++) {
+ const curr = vals[i] as Scalar;
+ const prev = vals[i - shift] as Scalar;
+ if (isNum(curr) && isNum(prev) && prev !== 0) {
+ out[i] = curr / prev - 1;
+ } else if (isNum(curr) && isNum(prev) && prev === 0) {
+ // 0 denominator → Infinity (same as pandas)
+ out[i] =
+ curr === 0 ? Number.NaN : curr > 0 ? Number.POSITIVE_INFINITY : Number.NEGATIVE_INFINITY;
+ } else {
+ out[i] = null;
+ }
+ }
+ } else {
+ // Negative periods: look forward
+ const absShift = -shift;
+ for (let i = 0; i < n - absShift; i++) {
+ const curr = vals[i] as Scalar;
+ const fwd = vals[i + absShift] as Scalar;
+ if (isNum(curr) && isNum(fwd) && curr !== 0) {
+ out[i] = fwd / curr - 1;
+ } else if (isNum(curr) && isNum(fwd) && curr === 0) {
+ out[i] =
+ fwd === 0 ? Number.NaN : fwd > 0 ? Number.POSITIVE_INFINITY : Number.NEGATIVE_INFINITY;
+ } else {
+ out[i] = null;
+ }
+ }
+ }
+ return out;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Compute the fractional change between a Series element and the element
+ * `periods` positions earlier (or later, for negative `periods`).
+ *
+ * Matches `pandas.Series.pct_change()`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [100, 110, 99, 121] });
+ * pctChangeSeries(s); // [null, 0.1, -0.1, 0.2222…]
+ * ```
+ */
+export function pctChangeSeries(
+ series: Series,
+ options: PctChangeOptions = {},
+): Series {
+ const periods = options.periods ?? 1;
+ const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad";
+ const limit = options.limit ?? null;
+
+ const filled = applyFill(series.values, fillMethod, limit);
+ const result = computePct(filled, periods);
+
+ return new Series({
+ data: result,
+ index: series.index,
+ name: series.name,
+ });
+}
+
+/**
+ * Compute percentage change for every column (or row) of a DataFrame.
+ *
+ * Matches `pandas.DataFrame.pct_change()`.
+ *
+ * @example
+ * ```ts
+ * const df = new DataFrame(new Map([
+ * ["a", new Series({ data: [100, 110, 121] })],
+ * ["b", new Series({ data: [200, 180, 198] })],
+ * ]));
+ * pctChangeDataFrame(df); // fractional change per column
+ * ```
+ */
+export function pctChangeDataFrame(
+ df: DataFrame,
+ options: DataFramePctChangeOptions = {},
+): DataFrame {
+ const axis = options.axis ?? 0;
+ const colWise = axis === 0 || axis === "index";
+
+ if (colWise) {
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ colMap.set(name, pctChangeSeries(df.col(name), options));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ // Row-wise: each row across columns
+ const periods = options.periods ?? 1;
+ const fillMethod = options.fillMethod !== undefined ? options.fillMethod : "pad";
+ const limit = options.limit ?? null;
+ const nRows = df.index.size;
+ const cols = df.columns.values;
+ const nCols = cols.length;
+
+ const resultCols = new Map();
+ for (const name of cols) {
+ resultCols.set(name, new Array(nRows).fill(null));
+ }
+
+ for (let r = 0; r < nRows; r++) {
+ const row: Scalar[] = [];
+ for (const name of cols) {
+ row.push(df.col(name).values[r] as Scalar);
+ }
+ const filled = applyFill(row, fillMethod, limit);
+ const pct = computePct(filled, periods);
+ for (let c = 0; c < nCols; c++) {
+ (resultCols.get(cols[c] as string) as Scalar[])[r] = pct[c] as Scalar;
+ }
+ }
+
+ const colMap = new Map>();
+ for (const name of cols) {
+ colMap.set(
+ name,
+ new Series({ data: resultCols.get(name) as Scalar[], index: df.index, name }),
+ );
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/quantile.ts b/src/stats/quantile.ts
new file mode 100644
index 00000000..60f6e0dc
--- /dev/null
+++ b/src/stats/quantile.ts
@@ -0,0 +1,361 @@
+/**
+ * quantile — quantile/percentile for Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.quantile(q=0.5, interpolation='linear')`
+ * - `pandas.DataFrame.quantile(q=0.5, axis=0, numeric_only=True, interpolation='linear')`
+ *
+ * ### Interpolation methods (pandas-compatible)
+ * - `"linear"`: linear interpolation between adjacent values
+ * - `"lower"`: take the lower of the two surrounding values
+ * - `"higher"`: take the higher of the two surrounding values
+ * - `"midpoint"`: arithmetic mean of the two surrounding values
+ * - `"nearest"`: whichever of the two surrounding indices is closest
+ *
+ * @module
+ */
+
+import { DataFrame, Index, Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Interpolation method for quantile estimation. */
+export type QuantileInterpolation = "linear" | "lower" | "higher" | "midpoint" | "nearest";
+
+/** Options for {@link quantileSeries}. */
+export interface QuantileSeriesOptions {
+ /**
+ * Quantile level(s) in [0, 1].
+ * - A single number returns a `number`.
+ * - An array returns a `Series` indexed by the q-values.
+ * @defaultValue `0.5`
+ */
+ readonly q?: number | readonly number[];
+ /**
+ * Interpolation method when the desired quantile lies between two values.
+ * @defaultValue `"linear"`
+ */
+ readonly interpolation?: QuantileInterpolation;
+ /**
+ * If `true` (default), ignore null/NaN values before computing.
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link quantileDataFrame}. */
+export interface QuantileDataFrameOptions {
+ /**
+ * Quantile level(s) in [0, 1].
+ * - A single number returns a `Series`.
+ * - An array returns a `DataFrame`.
+ * @defaultValue `0.5`
+ */
+ readonly q?: number | readonly number[];
+ /**
+ * Axis along which to compute.
+ * - `0` (default): across rows — one value per column.
+ * - `1`: across columns — one value per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true` (default), only include numeric columns.
+ * If `false`, non-numeric columns produce `NaN`.
+ */
+ readonly numericOnly?: boolean;
+ /**
+ * Interpolation method when the desired quantile lies between two values.
+ * @defaultValue `"linear"`
+ */
+ readonly interpolation?: QuantileInterpolation;
+ /**
+ * If `true` (default), ignore null/NaN values before computing.
+ */
+ readonly skipna?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a dtype kind is numeric. */
+function isNumericKind(kind: DtypeKind): boolean {
+ return kind === "int" || kind === "uint" || kind === "float";
+}
+
+/** Extract numeric (non-missing) values, respecting skipna. */
+function extractNumbers(values: readonly Scalar[], skipna: boolean): number[] {
+ const out: number[] = [];
+ for (const v of values) {
+ if (isMissing(v)) {
+ if (!skipna) {
+ return [];
+ }
+ continue;
+ }
+ if (typeof v === "number") {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/** Sort numbers ascending (non-mutating). */
+function sortAsc(xs: number[]): number[] {
+ return xs.slice().sort((a, b) => a - b);
+}
+
+/**
+ * Compute a single quantile from a **sorted** array using the given method.
+ *
+ * Returns `NaN` when the array is empty.
+ */
+function computeOne(sorted: readonly number[], q: number, method: QuantileInterpolation): number {
+ const n = sorted.length;
+ if (n === 0) {
+ return Number.NaN;
+ }
+ if (n === 1) {
+ return sorted[0] as number;
+ }
+ const pos = q * (n - 1);
+ const lo = Math.floor(pos);
+ const hi = Math.ceil(pos);
+ const vlo = sorted[lo] as number;
+ const vhi = sorted[hi] as number;
+ if (lo === hi) {
+ return vlo;
+ }
+ switch (method) {
+ case "lower":
+ return vlo;
+ case "higher":
+ return vhi;
+ case "midpoint":
+ return (vlo + vhi) / 2;
+ case "nearest": {
+ const frac = pos - lo;
+ return frac <= 0.5 ? vlo : vhi;
+ }
+ default: {
+ const frac = pos - lo;
+ return vlo * (1 - frac) + vhi * frac;
+ }
+ }
+}
+
+/** Compute multiple quantile levels from a sorted array. */
+function computeMany(
+ sorted: readonly number[],
+ qLevels: readonly number[],
+ method: QuantileInterpolation,
+): number[] {
+ return qLevels.map((q) => computeOne(sorted, q, method));
+}
+
+// ─── Series API ───────────────────────────────────────────────────────────────
+
+/**
+ * Compute quantile(s) for a Series.
+ *
+ * When `q` is a single number, returns a scalar `number`.
+ * When `q` is an array, returns a `Series` indexed by the q-values.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * quantileSeries(s); // 3
+ * quantileSeries(s, { q: 0.25 }); // 1.75
+ * quantileSeries(s, { q: [0.25, 0.5, 0.75] }); // Series { 0.25:1.75, 0.5:3, 0.75:4.25 }
+ * quantileSeries(s, { q: 0.5, interpolation: "lower" }); // 2
+ * ```
+ */
+export function quantileSeries(
+ series: Series,
+ options: QuantileSeriesOptions = {},
+): number | Series {
+ const method: QuantileInterpolation = options.interpolation ?? "linear";
+ const skipna = options.skipna ?? true;
+ const qInput = options.q ?? 0.5;
+
+ const sorted = sortAsc(extractNumbers(series.values as readonly Scalar[], skipna));
+
+ if (typeof qInput === "number") {
+ return computeOne(sorted, qInput, method);
+ }
+
+ const qArr = qInput as readonly number[];
+ const results = computeMany(sorted, qArr, method);
+ return new Series({ data: results, index: qArr as unknown as readonly Label[] });
+}
+
+// ─── DataFrame helpers ────────────────────────────────────────────────────────
+
+/** Build a column record (name → Scalar[]) for DataFrame.fromColumns(). */
+function buildRecord(
+ colMap: ReadonlyMap,
+): Record {
+ const obj: Record = {};
+ for (const [name, vals] of colMap) {
+ obj[name] = vals;
+ }
+ return obj;
+}
+
+/** Collect sorted numeric arrays for each selected column. */
+function collectCols(
+ df: DataFrame,
+ numericOnly: boolean,
+ skipna: boolean,
+): { names: string[]; sorted: number[][] } {
+ const colNames = df.columns.values as readonly string[];
+ const names: string[] = [];
+ const sorted: number[][] = [];
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (numericOnly && !isNumericKind(s.dtype.kind)) {
+ continue;
+ }
+ names.push(col);
+ if (isNumericKind(s.dtype.kind)) {
+ sorted.push(sortAsc(extractNumbers(s.values as readonly Scalar[], skipna)));
+ } else {
+ sorted.push([]);
+ }
+ }
+ return { names, sorted };
+}
+
+// ─── axis=0 (reduce rows, one result per column) ─────────────────────────────
+
+function axis0SingleQ(
+ df: DataFrame,
+ q: number,
+ method: QuantileInterpolation,
+ skipna: boolean,
+ numericOnly: boolean,
+): Series {
+ const { names, sorted } = collectCols(df, numericOnly, skipna);
+ const vals = sorted.map((xs) => computeOne(xs, q, method));
+ return new Series({ data: vals, index: names });
+}
+
+function axis0MultiQ(
+ df: DataFrame,
+ qLevels: readonly number[],
+ method: QuantileInterpolation,
+ skipna: boolean,
+ numericOnly: boolean,
+): DataFrame {
+ const { names, sorted } = collectCols(df, numericOnly, skipna);
+ const rowIndex = new Index(qLevels as unknown as Label[]);
+ const colData = new Map();
+ for (let ci = 0; ci < names.length; ci++) {
+ const col = names[ci] as string;
+ const xs = sorted[ci] as number[];
+ colData.set(col, computeMany(xs, qLevels, method));
+ }
+ return DataFrame.fromColumns(buildRecord(colData), { index: rowIndex });
+}
+
+// ─── axis=1 (reduce columns, one result per row) ─────────────────────────────
+
+/** Extract numeric values for a given row across all columns. */
+function rowValues(df: DataFrame, colNames: readonly string[], rowIdx: number): Scalar[] {
+ const out: Scalar[] = [];
+ for (const col of colNames) {
+ out.push(df.col(col).values[rowIdx] as Scalar);
+ }
+ return out;
+}
+
+function axis1SingleQ(
+ df: DataFrame,
+ q: number,
+ method: QuantileInterpolation,
+ skipna: boolean,
+): Series {
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const vals: number[] = [];
+ for (let r = 0; r < rowCount; r++) {
+ const xs = sortAsc(extractNumbers(rowValues(df, colNames, r), skipna));
+ vals.push(computeOne(xs, q, method));
+ }
+ return new Series({ data: vals, index: df.index });
+}
+
+function axis1MultiQ(
+ df: DataFrame,
+ qLevels: readonly number[],
+ method: QuantileInterpolation,
+ skipna: boolean,
+): DataFrame {
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const qColData: number[][] = qLevels.map(() => []);
+ for (let r = 0; r < rowCount; r++) {
+ const xs = sortAsc(extractNumbers(rowValues(df, colNames, r), skipna));
+ for (let qi = 0; qi < qLevels.length; qi++) {
+ const arr = qColData[qi];
+ if (arr !== undefined) {
+ arr.push(computeOne(xs, qLevels[qi] as number, method));
+ }
+ }
+ }
+ const resultCols: Record = {};
+ for (let qi = 0; qi < qLevels.length; qi++) {
+ resultCols[String(qLevels[qi])] = (qColData[qi] ?? []) as Scalar[];
+ }
+ return DataFrame.fromColumns(resultCols, { index: df.index });
+}
+
+// ─── public DataFrame API ─────────────────────────────────────────────────────
+
+/**
+ * Compute quantile(s) for a DataFrame.
+ *
+ * When `q` is a single number:
+ * - `axis=0`: returns a `Series` (one value per column)
+ * - `axis=1`: returns a `Series` (one value per row)
+ *
+ * When `q` is an array:
+ * - `axis=0`: returns a `DataFrame` (q-values as rows, columns as columns)
+ * - `axis=1`: returns a `DataFrame` (rows as rows, q-values as columns)
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * quantileDataFrame(df); // Series { a: 2, b: 5 }
+ * quantileDataFrame(df, { q: [0.25, 0.75] }); // DataFrame 2×2
+ * quantileDataFrame(df, { axis: 1, q: 0.5 }); // Series (one value per row)
+ * ```
+ */
+export function quantileDataFrame(
+ df: DataFrame,
+ options: QuantileDataFrameOptions = {},
+): Series | DataFrame {
+ const method: QuantileInterpolation = options.interpolation ?? "linear";
+ const skipna = options.skipna ?? true;
+ const numericOnly = options.numericOnly ?? true;
+ const axis = options.axis ?? 0;
+ const qInput = options.q ?? 0.5;
+
+ const multiQ = Array.isArray(qInput);
+
+ if (axis === 0) {
+ if (multiQ) {
+ return axis0MultiQ(df, qInput as readonly number[], method, skipna, numericOnly);
+ }
+ return axis0SingleQ(df, qInput as number, method, skipna, numericOnly);
+ }
+
+ if (multiQ) {
+ return axis1MultiQ(df, qInput as readonly number[], method, skipna);
+ }
+ return axis1SingleQ(df, qInput as number, method, skipna);
+}
diff --git a/src/stats/replace.ts b/src/stats/replace.ts
new file mode 100644
index 00000000..54c2662e
--- /dev/null
+++ b/src/stats/replace.ts
@@ -0,0 +1,237 @@
+/**
+ * replace — value substitution for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.replace(to_replace, value)` / `Series.replace(mapping)`
+ * - `DataFrame.replace(to_replace, value)` / `DataFrame.replace(mapping)`
+ *
+ * Supported replacement specs:
+ * - **Scalar → Scalar**: replace every occurrence of one value with another.
+ * - **Array → Scalar**: replace every value in the array with a single value.
+ * - **Array → Array**: pair-wise replacement (must be same length).
+ * - **Record / Map**: lookup-table replacement (`{ old: new, ... }`).
+ *
+ * All functions are **pure** (return new objects; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/** A lookup table mapping old values to new values. */
+export type ReplaceMapping = Readonly> | ReadonlyMap;
+
+/**
+ * Replacement specification accepted by {@link replaceSeries} /
+ * {@link replaceDataFrame}.
+ *
+ * Mirrors the first two positional args of `pandas.Series.replace`.
+ */
+export type ReplaceSpec =
+ | { readonly toReplace: Scalar; readonly value: Scalar }
+ | { readonly toReplace: readonly Scalar[]; readonly value: Scalar }
+ | { readonly toReplace: readonly Scalar[]; readonly value: readonly Scalar[] }
+ | { readonly toReplace: ReplaceMapping };
+
+/** Options shared by {@link replaceSeries} and {@link replaceDataFrame}. */
+export interface ReplaceOptions {
+ /**
+ * When `true`, treat `NaN` values as equal for matching purposes.
+ * Default `true`.
+ */
+ readonly matchNaN?: boolean;
+}
+
+/** Options for {@link replaceDataFrame}. */
+export interface DataFrameReplaceOptions extends ReplaceOptions {
+ /**
+ * If provided, only replace values in these column names.
+ * By default all columns are processed.
+ */
+ readonly columns?: readonly string[];
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `a` and `b` are equal (with optional NaN=NaN equality). */
+function scalarEq(a: Scalar, b: Scalar, matchNaN: boolean): boolean {
+ if (
+ matchNaN &&
+ typeof a === "number" &&
+ typeof b === "number" &&
+ Number.isNaN(a) &&
+ Number.isNaN(b)
+ ) {
+ return true;
+ }
+ if (a instanceof Date && b instanceof Date) {
+ return a.getTime() === b.getTime();
+ }
+ return a === b;
+}
+
+/**
+ * Build a replacement function from a {@link ReplaceSpec}.
+ * Returns `(v) => new_value` or `v` unchanged if no match.
+ */
+function buildReplacer(spec: ReplaceSpec, matchNaN: boolean): (v: Scalar) => Scalar {
+ // Mapping variant
+ if (
+ "toReplace" in spec &&
+ !Array.isArray(spec.toReplace) &&
+ typeof spec.toReplace === "object" &&
+ spec.toReplace !== null &&
+ !(spec.toReplace instanceof Map) &&
+ !("value" in spec)
+ ) {
+ // Record
+ const rec = spec.toReplace as Readonly>;
+ return (v: Scalar): Scalar => {
+ const key = String(v);
+ return Object.prototype.hasOwnProperty.call(rec, key) ? (rec[key] as Scalar) : v;
+ };
+ }
+
+ if ("toReplace" in spec && spec.toReplace instanceof Map) {
+ const map = spec.toReplace as ReadonlyMap;
+ return (v: Scalar): Scalar => {
+ for (const [k, val] of map) {
+ if (scalarEq(v, k, matchNaN)) {
+ return val;
+ }
+ }
+ return v;
+ };
+ }
+
+ // Mapping passed via { toReplace: mapping } shape
+ if ("toReplace" in spec && !("value" in spec)) {
+ const mapping = spec.toReplace as ReplaceMapping;
+ if (mapping instanceof Map) {
+ const map = mapping as ReadonlyMap;
+ return (v: Scalar): Scalar => {
+ for (const [k, val] of map) {
+ if (scalarEq(v, k, matchNaN)) {
+ return val;
+ }
+ }
+ return v;
+ };
+ }
+ const rec = mapping as Readonly>;
+ return (v: Scalar): Scalar => {
+ const key = String(v);
+ return Object.prototype.hasOwnProperty.call(rec, key) ? (rec[key] as Scalar) : v;
+ };
+ }
+
+ const s = spec as { toReplace: Scalar | readonly Scalar[]; value: Scalar | readonly Scalar[] };
+
+ if (!Array.isArray(s.toReplace)) {
+ // Scalar → Scalar
+ const old = s.toReplace as Scalar;
+ const newVal = s.value as Scalar;
+ return (v: Scalar): Scalar => (scalarEq(v, old, matchNaN) ? newVal : v);
+ }
+
+ const oldArr = s.toReplace as readonly Scalar[];
+
+ if (!Array.isArray(s.value)) {
+ // Array → Scalar
+ const newVal = s.value as Scalar;
+ return (v: Scalar): Scalar => {
+ for (const old of oldArr) {
+ if (scalarEq(v, old, matchNaN)) {
+ return newVal;
+ }
+ }
+ return v;
+ };
+ }
+
+ // Array → Array (pair-wise)
+ const newArr = s.value as readonly Scalar[];
+ if (oldArr.length !== newArr.length) {
+ throw new RangeError(
+ `replace: toReplace and value arrays must have the same length (got ${oldArr.length} and ${newArr.length})`,
+ );
+ }
+ return (v: Scalar): Scalar => {
+ for (let i = 0; i < oldArr.length; i++) {
+ if (scalarEq(v, oldArr[i] as Scalar, matchNaN)) {
+ return newArr[i] as Scalar;
+ }
+ }
+ return v;
+ };
+}
+
+// ─── Series ───────────────────────────────────────────────────────────────────
+
+/**
+ * Replace values in a Series according to `spec`.
+ *
+ * @example
+ * ```ts
+ * import { Series } from "tsb";
+ * import { replaceSeries } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 2, 1] });
+ * const r = replaceSeries(s, { toReplace: 2, value: 99 });
+ * // r.values → [1, 99, 3, 99, 1]
+ * ```
+ */
+export function replaceSeries(
+ series: Series,
+ spec: ReplaceSpec,
+ options: ReplaceOptions = {},
+): Series {
+ const matchNaN = options.matchNaN ?? true;
+ const replacer = buildReplacer(spec, matchNaN);
+ const newData = Array.from({ length: series.size }, (_, i) =>
+ replacer(series.values[i] as Scalar),
+ );
+ return new Series({ data: newData, index: series.index, name: series.name });
+}
+
+// ─── DataFrame ────────────────────────────────────────────────────────────────
+
+/**
+ * Replace values in a DataFrame according to `spec`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame } from "tsb";
+ * import { replaceDataFrame } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [2, 2, 4] });
+ * const r = replaceDataFrame(df, { toReplace: 2, value: 0 });
+ * // r.col("a").values → [1, 0, 3]
+ * // r.col("b").values → [0, 0, 4]
+ * ```
+ */
+export function replaceDataFrame(
+ df: DataFrame,
+ spec: ReplaceSpec,
+ options: DataFrameReplaceOptions = {},
+): DataFrame {
+ const matchNaN = options.matchNaN ?? true;
+ const replacer = buildReplacer(spec, matchNaN);
+ const targetCols = new Set(options.columns ?? df.columns.values);
+
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name) as Series;
+ if (targetCols.has(name)) {
+ const newData = Array.from({ length: col.size }, (_, i) => replacer(col.values[i] as Scalar));
+ colMap.set(name, new Series({ data: newData, index: col.index, name: col.name }));
+ } else {
+ colMap.set(name, col);
+ }
+ }
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/sem_var.ts b/src/stats/sem_var.ts
new file mode 100644
index 00000000..9e1604dd
--- /dev/null
+++ b/src/stats/sem_var.ts
@@ -0,0 +1,283 @@
+/**
+ * sem_var — sample/population variance and standard error of the mean for
+ * Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.var(ddof?, skipna?, min_count?)` — variance
+ * - `pandas.Series.sem(ddof?, skipna?, min_count?)` — standard error of mean
+ * - `pandas.DataFrame.var(axis?, ddof?, skipna?, numeric_only?)`
+ * - `pandas.DataFrame.sem(axis?, ddof?, skipna?, numeric_only?)`
+ *
+ * `ddof` (degrees of freedom delta):
+ * - `1` (default): sample variance — divides by `n - 1`
+ * - `0`: population variance — divides by `n`
+ *
+ * `skipna` (default `true`): ignore NaN/null values.
+ * `minCount` (default `1`): minimum number of valid observations required;
+ * returns `NaN` if fewer are present.
+ *
+ * SEM = sqrt(var / n) where var uses the given ddof.
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link varSeries} and {@link semSeries}. */
+export interface VarSemSeriesOptions {
+ /**
+ * Delta degrees of freedom. Divisor is `n - ddof`.
+ * @defaultValue `1`
+ */
+ readonly ddof?: number;
+ /**
+ * If `true` (default), exclude null/NaN values.
+ */
+ readonly skipna?: boolean;
+ /**
+ * Minimum number of non-null observations required. Returns `NaN` when
+ * fewer valid values are present.
+ * @defaultValue `1`
+ */
+ readonly minCount?: number;
+}
+
+/** Options for {@link varDataFrame} and {@link semDataFrame}. */
+export interface VarSemDataFrameOptions extends VarSemSeriesOptions {
+ /**
+ * Axis along which to compute.
+ * - `0` (default): reduce along rows, one result per column.
+ * - `1`: reduce along columns, one result per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true`, only include numeric columns when `axis=0`.
+ * @defaultValue `false`
+ */
+ readonly numericOnly?: boolean;
+}
+
+/** Internal callback type for variance/SEM reduction. */
+type StatFn = (xs: readonly number[], ddof: number, minCount: number) => number;
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a dtype kind is numeric. */
+function isNumericKind(kind: DtypeKind): boolean {
+ return kind === "int" || kind === "uint" || kind === "float";
+}
+
+/**
+ * Extract numeric values, respecting skipna and minCount.
+ * Returns an empty array when skipna=false and any missing value is present.
+ */
+function extractNumbers(values: readonly Scalar[], skipna: boolean): number[] {
+ const out: number[] = [];
+ for (const v of values) {
+ if (isMissing(v)) {
+ if (!skipna) {
+ return []; // NaN propagation when skipna=false
+ }
+ continue;
+ }
+ if (typeof v === "number") {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/**
+ * Compute sample/population variance.
+ *
+ * Returns `NaN` when fewer than `minCount` values are present, or when
+ * `n - ddof <= 0`.
+ */
+function computeVar(xs: readonly number[], ddof: number, minCount: number): number {
+ const n = xs.length;
+ if (n < minCount) {
+ return Number.NaN;
+ }
+ const denom = n - ddof;
+ if (denom <= 0) {
+ return Number.NaN;
+ }
+ let sum = 0;
+ for (const x of xs) {
+ sum += x;
+ }
+ const mean = sum / n;
+ let ss = 0;
+ for (const x of xs) {
+ const d = x - mean;
+ ss += d * d;
+ }
+ return ss / denom;
+}
+
+/**
+ * Compute standard error of the mean: sqrt(var(ddof) / n).
+ *
+ * Returns `NaN` when variance is `NaN` or n = 0.
+ */
+function computeSem(xs: readonly number[], ddof: number, minCount: number): number {
+ const n = xs.length;
+ if (n < minCount || n === 0) {
+ return Number.NaN;
+ }
+ const v = computeVar(xs, ddof, minCount);
+ if (Number.isNaN(v)) {
+ return Number.NaN;
+ }
+ return Math.sqrt(v / n);
+}
+
+// ─── Series reduction ─────────────────────────────────────────────────────────
+
+function reduceSeriesImpl(
+ series: Series,
+ options: VarSemSeriesOptions,
+ statFn: StatFn,
+): number {
+ const ddof = options.ddof ?? 1;
+ const skipna = options.skipna ?? true;
+ const minCount = options.minCount ?? 1;
+ const xs = extractNumbers(series.values as readonly Scalar[], skipna);
+ return statFn(xs, ddof, minCount);
+}
+
+// ─── DataFrame reduction ──────────────────────────────────────────────────────
+
+/** Reduce each column of df to a scalar using statFn. */
+function reduceColumns(
+ df: DataFrame,
+ options: VarSemDataFrameOptions,
+ statFn: StatFn,
+): Series {
+ const ddof = options.ddof ?? 1;
+ const skipna = options.skipna ?? true;
+ const minCount = options.minCount ?? 1;
+ const numericOnly = options.numericOnly ?? false;
+
+ const colNames = df.columns.values as readonly string[];
+ const labels: string[] = [];
+ const values: number[] = [];
+
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (numericOnly && !isNumericKind(s.dtype.kind)) {
+ continue;
+ }
+ labels.push(col);
+ if (!isNumericKind(s.dtype.kind)) {
+ values.push(Number.NaN);
+ continue;
+ }
+ const xs = extractNumbers(s.values as readonly Scalar[], skipna);
+ values.push(statFn(xs, ddof, minCount));
+ }
+
+ return new Series({ data: values, index: labels });
+}
+
+/** Reduce each row of df to a scalar using statFn. */
+function reduceRows(
+ df: DataFrame,
+ options: VarSemDataFrameOptions,
+ statFn: StatFn,
+): Series {
+ const ddof = options.ddof ?? 1;
+ const skipna = options.skipna ?? true;
+ const minCount = options.minCount ?? 1;
+
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const values: number[] = [];
+
+ for (let r = 0; r < rowCount; r++) {
+ const rowVals: Scalar[] = [];
+ for (const col of colNames) {
+ const s = df.col(col);
+ if (isNumericKind(s.dtype.kind)) {
+ rowVals.push(s.values[r] as Scalar);
+ }
+ }
+ const xs = extractNumbers(rowVals, skipna);
+ values.push(statFn(xs, ddof, minCount));
+ }
+
+ return new Series({ data: values, index: df.index });
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Return the variance of a numeric Series.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+ * varSeries(s); // 4 (sample variance, ddof=1)
+ * varSeries(s, { ddof: 0 }); // 3.5 (population variance)
+ * ```
+ */
+export function varSeries(series: Series, options: VarSemSeriesOptions = {}): number {
+ return reduceSeriesImpl(series, options, computeVar);
+}
+
+/**
+ * Return the standard error of the mean (SEM) of a numeric Series.
+ *
+ * SEM = sqrt(var(ddof) / n) where n is the number of valid observations.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+ * semSeries(s); // sqrt(4 / 8) = 0.707...
+ * ```
+ */
+export function semSeries(series: Series, options: VarSemSeriesOptions = {}): number {
+ return reduceSeriesImpl(series, options, computeSem);
+}
+
+/**
+ * Return the variance of each column (`axis=0`, default) or each row
+ * (`axis=1`) of a DataFrame as a numeric Series.
+ *
+ * Non-numeric columns without `numericOnly` contribute `NaN` to the result.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * varDataFrame(df); // Series { a: 1, b: 1 }
+ * ```
+ */
+export function varDataFrame(df: DataFrame, options: VarSemDataFrameOptions = {}): Series {
+ const axis = options.axis ?? 0;
+ return axis === 0 ? reduceColumns(df, options, computeVar) : reduceRows(df, options, computeVar);
+}
+
+/**
+ * Return the standard error of the mean for each column (`axis=0`, default)
+ * or each row (`axis=1`) of a DataFrame as a numeric Series.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * semDataFrame(df); // Series { a: sqrt(1/3), b: sqrt(1/3) }
+ * ```
+ */
+export function semDataFrame(df: DataFrame, options: VarSemDataFrameOptions = {}): Series {
+ const axis = options.axis ?? 0;
+ return axis === 0 ? reduceColumns(df, options, computeSem) : reduceRows(df, options, computeSem);
+}
diff --git a/src/stats/skew_kurt.ts b/src/stats/skew_kurt.ts
new file mode 100644
index 00000000..96b3e32f
--- /dev/null
+++ b/src/stats/skew_kurt.ts
@@ -0,0 +1,318 @@
+/**
+ * skew_kurt — skewness and excess kurtosis for Series and DataFrame.
+ *
+ * Mirrors:
+ * - `pandas.Series.skew(skipna?, min_count?)` — Fisher–Pearson coefficient
+ * - `pandas.Series.kurt(skipna?)` — Fisher's definition of excess kurtosis
+ * - `pandas.DataFrame.skew(axis?, skipna?, numeric_only?)`
+ * - `pandas.DataFrame.kurt(axis?, skipna?, numeric_only?)`
+ *
+ * Formulas follow pandas defaults:
+ * - Skewness: adjusted Fisher–Pearson (unbiased, n/(n-1)/(n-2) correction)
+ * - Kurtosis: excess kurtosis (subtract 3) with pandas' bias-correction factor
+ *
+ * @module
+ */
+
+import type { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { DtypeKind } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link skewSeries} and {@link kurtSeries}. */
+export interface SkewKurtSeriesOptions {
+ /**
+ * If `true` (default), exclude null/NaN values before computing.
+ */
+ readonly skipna?: boolean;
+}
+
+/** Options for {@link skewDataFrame} and {@link kurtDataFrame}. */
+export interface SkewKurtDataFrameOptions {
+ /**
+ * Axis along which to compute.
+ * - `0` (default): reduce along rows, one result per column.
+ * - `1`: reduce along columns, one result per row.
+ */
+ readonly axis?: 0 | 1;
+ /**
+ * If `true` (default), exclude null/NaN values.
+ */
+ readonly skipna?: boolean;
+ /**
+ * If `true`, only include numeric columns (when `axis=0`).
+ * @defaultValue `false`
+ */
+ readonly numericOnly?: boolean;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when a scalar value is missing (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** True when a dtype kind is numeric (int, uint, or float). */
+function isNumericKind(kind: DtypeKind): boolean {
+ if (kind === "int") {
+ return true;
+ }
+ if (kind === "uint") {
+ return true;
+ }
+ if (kind === "float") {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Extract finite numeric values from a Scalar array, optionally skipping
+ * missing values.
+ */
+function extractNumbers(values: readonly Scalar[], skipna: boolean): number[] {
+ const out: number[] = [];
+ for (const v of values) {
+ if (isMissing(v)) {
+ if (!skipna) {
+ return []; // presence of NaN propagates as NaN
+ }
+ continue;
+ }
+ if (typeof v === "number") {
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+/**
+ * Compute the adjusted Fisher–Pearson skewness coefficient (unbiased).
+ *
+ * Formula (same as pandas):
+ * G1 = n / ((n-1)(n-2)) * sum((x - mean)^3) / std_sample^3
+ *
+ * Returns `NaN` when `n < 3` or std is 0.
+ */
+function computeSkewness(xs: readonly number[]): number {
+ const n = xs.length;
+ if (n < 3) {
+ return Number.NaN;
+ }
+
+ let sum = 0;
+ for (const x of xs) {
+ sum += x;
+ }
+ const mean = sum / n;
+
+ let m2 = 0;
+ let m3 = 0;
+ for (const x of xs) {
+ const d = x - mean;
+ m2 += d * d;
+ m3 += d * d * d;
+ }
+
+ const variance = m2 / (n - 1); // sample variance
+ const std = Math.sqrt(variance);
+
+ if (std === 0) {
+ return Number.NaN;
+ }
+
+ const skew = (n / ((n - 1) * (n - 2))) * (m3 / (std * std * std));
+ return skew;
+}
+
+/**
+ * Compute excess kurtosis with pandas' bias-correction factor.
+ *
+ * Formula (same as pandas):
+ * G2 = n(n+1)/((n-1)(n-2)(n-3)) * sum((x-mean)^4) / s_sample^4
+ * - 3(n-1)^2 / ((n-2)(n-3))
+ * where s_sample^2 = sum((x-mean)^2) / (n-1)
+ *
+ * Returns `NaN` when `n < 4` or sample variance is 0.
+ */
+function computeKurtosis(xs: readonly number[]): number {
+ const n = xs.length;
+ if (n < 4) {
+ return Number.NaN;
+ }
+
+ let sum = 0;
+ for (const x of xs) {
+ sum += x;
+ }
+ const mean = sum / n;
+
+ let m2 = 0;
+ let m4 = 0;
+ for (const x of xs) {
+ const d = x - mean;
+ const d2 = d * d;
+ m2 += d2;
+ m4 += d2 * d2;
+ }
+
+ // Sample variance (denominator n-1)
+ const sampleVar = m2 / (n - 1);
+
+ if (sampleVar === 0) {
+ return Number.NaN;
+ }
+
+ const a = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3));
+ const b = m4 / (sampleVar * sampleVar);
+ const c = (3 * (n - 1) * (n - 1)) / ((n - 2) * (n - 3));
+
+ return a * b - c;
+}
+
+// ─── public API — Series ──────────────────────────────────────────────────────
+
+/**
+ * Return the adjusted Fisher–Pearson skewness of a numeric Series.
+ *
+ * Returns `NaN` when fewer than 3 non-null values are present or the
+ * standard deviation is zero.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3, 4, 100] });
+ * skewSeries(s); // approx 2.02
+ * ```
+ */
+export function skewSeries(series: Series, options: SkewKurtSeriesOptions = {}): number {
+ const skipna = options.skipna ?? true;
+ const xs = extractNumbers(series.values as readonly Scalar[], skipna);
+ return computeSkewness(xs);
+}
+
+/**
+ * Return the excess kurtosis (Fisher's definition, bias-corrected) of a
+ * numeric Series.
+ *
+ * Returns `NaN` when fewer than 4 non-null values are present or the
+ * standard deviation is zero.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 2, 3, 3, 3, 4, 4, 5] });
+ * kurtSeries(s); // approx -0.44
+ * ```
+ */
+export function kurtSeries(series: Series, options: SkewKurtSeriesOptions = {}): number {
+ const skipna = options.skipna ?? true;
+ const xs = extractNumbers(series.values as readonly Scalar[], skipna);
+ return computeKurtosis(xs);
+}
+
+// ─── public API — DataFrame ───────────────────────────────────────────────────
+
+/**
+ * Return the skewness of each column (axis=0) or each row (axis=1) of a
+ * DataFrame as a numeric Series.
+ *
+ * Non-numeric columns are omitted when `axis=0`. When `axis=1`, only
+ * numeric values in each row contribute.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 8, 16] });
+ * skewDataFrame(df); // Series with index ["a","b"]
+ * ```
+ */
+export function skewDataFrame(
+ df: DataFrame,
+ options: SkewKurtDataFrameOptions = {},
+): Series {
+ const axis = options.axis ?? 0;
+ const skipna = options.skipna ?? true;
+ const numericOnly = options.numericOnly ?? false;
+
+ if (axis === 0) {
+ return reduceColumns(df, numericOnly, skipna, computeSkewness);
+ }
+ return reduceRows(df, skipna, computeSkewness);
+}
+
+/**
+ * Return the excess kurtosis of each column (axis=0) or each row (axis=1)
+ * of a DataFrame as a numeric Series.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [1, 1, 8, 8] });
+ * kurtDataFrame(df); // Series with index ["a","b"]
+ * ```
+ */
+export function kurtDataFrame(
+ df: DataFrame,
+ options: SkewKurtDataFrameOptions = {},
+): Series {
+ const axis = options.axis ?? 0;
+ const skipna = options.skipna ?? true;
+ const numericOnly = options.numericOnly ?? false;
+
+ if (axis === 0) {
+ return reduceColumns(df, numericOnly, skipna, computeKurtosis);
+ }
+ return reduceRows(df, skipna, computeKurtosis);
+}
+
+/** Reduce each numeric column to a single number using the given statistic. */
+function reduceColumns(
+ df: DataFrame,
+ numericOnly: boolean,
+ skipna: boolean,
+ statFn: (xs: readonly number[]) => number,
+): Series {
+ const colNames = df.columns.values as readonly string[];
+ const labels: string[] = [];
+ const values: number[] = [];
+
+ for (const col of colNames) {
+ const series = df.col(col);
+ if (numericOnly && !isNumericKind(series.dtype.kind)) {
+ continue;
+ }
+ if (!isNumericKind(series.dtype.kind)) {
+ continue;
+ }
+ labels.push(col);
+ const xs = extractNumbers(series.values as readonly Scalar[], skipna);
+ values.push(statFn(xs));
+ }
+
+ return new Series({ data: values, index: labels });
+}
+
+/** Reduce each row to a single number using the given statistic. */
+function reduceRows(
+ df: DataFrame,
+ skipna: boolean,
+ statFn: (xs: readonly number[]) => number,
+): Series {
+ const colNames = df.columns.values as readonly string[];
+ const rowCount = df.index.size;
+ const values: number[] = [];
+
+ for (let r = 0; r < rowCount; r++) {
+ const rowVals: Scalar[] = [];
+ for (const col of colNames) {
+ const series = df.col(col);
+ if (isNumericKind(series.dtype.kind)) {
+ rowVals.push(series.values[r] as Scalar);
+ }
+ }
+ const xs = extractNumbers(rowVals, skipna);
+ values.push(statFn(xs));
+ }
+
+ return new Series({ data: values, index: df.index });
+}
diff --git a/src/stats/string_ops.ts b/src/stats/string_ops.ts
index 4d283a12..b5b486fb 100644
--- a/src/stats/string_ops.ts
+++ b/src/stats/string_ops.ts
@@ -33,7 +33,7 @@ export type NormalizeForm = "NFC" | "NFD" | "NFKC" | "NFKD";
export type StrInput = Series | readonly Scalar[] | readonly string[] | string;
/** Options for {@link strGetDummies}. */
-export interface GetDummiesOptions {
+export interface StrGetDummiesOptions {
/**
* The delimiter used to split each element into tokens.
* @default "|"
@@ -163,7 +163,7 @@ export function strNormalize(
*/
export function strGetDummies(
input: readonly string[] | Series,
- options: GetDummiesOptions = {},
+ options: StrGetDummiesOptions = {},
): DataFrame {
const sep = options.sep ?? "|";
const prefix = options.prefix ?? "";
diff --git a/src/stats/to_datetime.ts b/src/stats/to_datetime.ts
new file mode 100644
index 00000000..c152ed7e
--- /dev/null
+++ b/src/stats/to_datetime.ts
@@ -0,0 +1,312 @@
+/**
+ * to_datetime — convert scalars, arrays, or Series to Date objects.
+ *
+ * Mirrors `pandas.to_datetime()`:
+ * - `toDatetime(value, options?)` — parse a single scalar to a `Date | null`
+ * - `toDatetime(values, options?)` — parse an array to `(Date | null)[]`
+ * - `toDatetime(series, options?)` — parse a Series to `Series`
+ *
+ * Supported input types:
+ * - `Date` — returned as-is (or converted to UTC if `utc: true`)
+ * - `number` — treated as a Unix timestamp; unit controls scale
+ * - `string` — parsed using ISO 8601, US-style (M/D/Y), European (D-M-Y),
+ * and compact (YYYYMMDD) formats
+ * - `null` / `undefined` / `NaN` — treated as missing (returns `null`)
+ *
+ * @module
+ */
+
+import { Dtype, Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── top-level regex constants (biome: useTopLevelRegex) ──────────────────────
+
+/** ISO 8601 / RFC 3339: 2024-01-15T12:00:00Z or 2024-01-15 */
+const RE_ISO =
+ /^\d{4}-\d{2}-\d{2}(?:[T ]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?)?(?:Z|[+-]\d{2}:?\d{2})?)?$/;
+
+/** US style: MM/DD/YYYY or MM/DD/YY */
+const RE_MDY = /^(\d{1,2})\/(\d{1,2})\/(\d{2,4})(?:\s(\d{1,2}):(\d{2})(?::(\d{2}))?)?$/;
+
+/** European dash: DD-MM-YYYY */
+const RE_DMY_DASH = /^(\d{1,2})-(\d{1,2})-(\d{4})(?:\s(\d{1,2}):(\d{2})(?::(\d{2}))?)?$/;
+
+/** Compact: YYYYMMDD */
+const RE_COMPACT = /^(\d{4})(\d{2})(\d{2})$/;
+
+/** Pure integer string (e.g. Unix timestamp) */
+const RE_INT = /^-?\d+$/;
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Time unit for numeric inputs. */
+export type DatetimeUnit = "s" | "ms" | "us" | "ns";
+
+/** Error handling behaviour — mirrors pandas `errors` parameter. */
+export type DatetimeErrors = "raise" | "coerce" | "ignore";
+
+/** Options for `toDatetime`. */
+export interface ToDatetimeOptions {
+ /**
+ * Unit for numeric inputs (default `"ms"`).
+ * - `"s"` — seconds since Unix epoch
+ * - `"ms"` — milliseconds since Unix epoch
+ * - `"us"` — microseconds since Unix epoch
+ * - `"ns"` — nanoseconds since Unix epoch
+ */
+ readonly unit?: DatetimeUnit;
+ /**
+ * Error handling (default `"raise"`).
+ * - `"raise"` — throw a `TypeError` on unparseable input
+ * - `"coerce"` — return `null` on unparseable input
+ * - `"ignore"` — return the original value unchanged
+ */
+ readonly errors?: DatetimeErrors;
+ /**
+ * If `true`, return dates anchored to UTC timezone (default `false`).
+ * For numeric inputs this is a no-op (numbers are always UTC epoch).
+ */
+ readonly utc?: boolean;
+ /**
+ * If `true`, interpret ambiguous numeric-string dates as day-first
+ * (e.g. `"01/02/2024"` → Feb 1, not Jan 2). Default `false`.
+ */
+ readonly dayfirst?: boolean;
+}
+
+// ─── overloads ────────────────────────────────────────────────────────────────
+
+/**
+ * Convert a single scalar value to a `Date`.
+ *
+ * @param value - Scalar to convert.
+ * @param options - Conversion options.
+ * @returns A `Date` object, or `null` if the value is missing or unparseable
+ * (when `errors` is `"coerce"`). When `errors` is `"ignore"` the original
+ * value is returned.
+ */
+export function toDatetime(value: Scalar, options?: ToDatetimeOptions): Date | null;
+
+/**
+ * Convert an array of scalars to an array of `Date` objects.
+ *
+ * @param values - Array of scalars to convert.
+ * @param options - Conversion options.
+ */
+export function toDatetime(values: readonly Scalar[], options?: ToDatetimeOptions): (Date | null)[];
+
+/**
+ * Convert a `Series` of scalars to a `Series`.
+ *
+ * @param series - Series whose values are converted element-wise.
+ * @param options - Conversion options.
+ */
+export function toDatetime(
+ series: Series,
+ options?: ToDatetimeOptions,
+): Series;
+
+// ─── implementation ───────────────────────────────────────────────────────────
+
+export function toDatetime(
+ input: Scalar | readonly Scalar[] | Series,
+ options: ToDatetimeOptions = {},
+): Date | null | (Date | null)[] | Series {
+ if (input instanceof Series) {
+ return convertSeries(input, options);
+ }
+ if (Array.isArray(input)) {
+ return (input as readonly Scalar[]).map((v) => convertOne(v, options));
+ }
+ return convertOne(input as Scalar, options);
+}
+
+// ─── series conversion ────────────────────────────────────────────────────────
+
+function convertSeries(s: Series, options: ToDatetimeOptions): Series {
+ const converted = s.values.map((v) => convertOne(v, options));
+ return new Series({
+ data: converted as (Date | null)[],
+ index: s.index,
+ dtype: Dtype.datetime,
+ name: s.name,
+ });
+}
+
+// ─── scalar conversion ────────────────────────────────────────────────────────
+
+function convertOne(value: Scalar, options: ToDatetimeOptions): Date | null {
+ const errors = options.errors ?? "raise";
+
+ if (isMissing(value)) {
+ return null;
+ }
+
+ if (value instanceof Date) {
+ return normalizeDate(value, options);
+ }
+
+ if (typeof value === "number") {
+ return convertNumber(value, options);
+ }
+
+ if (typeof value === "string") {
+ return convertString(value, options);
+ }
+
+ return handleFailure(value, errors, `Cannot convert ${typeof value} to datetime`);
+}
+
+/** True for null / undefined / NaN. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Apply UTC anchoring to an existing Date. */
+function normalizeDate(d: Date, options: ToDatetimeOptions): Date {
+ if (options.utc === true) {
+ return new Date(d.getTime());
+ }
+ return d;
+}
+
+/** Convert a numeric value using the configured unit. */
+function convertNumber(value: number, options: ToDatetimeOptions): Date | null {
+ const unit = options.unit ?? "ms";
+ const ms = numericToMs(value, unit);
+ if (!Number.isFinite(ms)) {
+ return handleFailure(
+ value as unknown as Scalar,
+ options.errors ?? "raise",
+ `Invalid numeric datetime: ${value}`,
+ );
+ }
+ return new Date(ms);
+}
+
+/** Scale a numeric value to milliseconds based on unit. */
+function numericToMs(value: number, unit: DatetimeUnit): number {
+ if (unit === "s") {
+ return value * 1000;
+ }
+ if (unit === "us") {
+ return value / 1000;
+ }
+ if (unit === "ns") {
+ return value / 1_000_000;
+ }
+ return value; // ms
+}
+
+/** Parse a string to a Date. */
+function convertString(value: string, options: ToDatetimeOptions): Date | null {
+ const errors = options.errors ?? "raise";
+ const dayfirst = options.dayfirst ?? false;
+
+ // Try compact YYYYMMDD before treating as integer (both match all-digit strings)
+ if (RE_COMPACT.test(value)) {
+ const m = RE_COMPACT.exec(value);
+ if (m !== null) {
+ const result = parseCompact(m);
+ if (result !== null) {
+ return result;
+ }
+ }
+ }
+
+ if (RE_INT.test(value)) {
+ return convertNumber(Number(value), options);
+ }
+
+ const d = tryParseString(value, dayfirst);
+ if (d !== null) {
+ return d;
+ }
+
+ return handleFailure(value as unknown as Scalar, errors, `Cannot parse "${value}" as datetime`);
+}
+
+/** Try all known string formats; return a Date or null on no match. */
+function tryParseString(value: string, dayfirst: boolean): Date | null {
+ if (RE_ISO.test(value)) {
+ const d = new Date(value);
+ return Number.isNaN(d.getTime()) ? null : d;
+ }
+
+ const compact = RE_COMPACT.exec(value);
+ if (compact !== null) {
+ return parseCompact(compact);
+ }
+
+ const mdy = RE_MDY.exec(value);
+ if (mdy !== null) {
+ return parseMDY(mdy, dayfirst);
+ }
+
+ const dmy = RE_DMY_DASH.exec(value);
+ if (dmy !== null) {
+ return parseDMY(dmy);
+ }
+
+ return null;
+}
+
+/** Parse YYYYMMDD compact format. */
+function parseCompact(m: RegExpExecArray): Date | null {
+ const y = Number(m[1]);
+ const mo = Number(m[2]) - 1;
+ const d = Number(m[3]);
+ const dt = new Date(y, mo, d);
+ return Number.isNaN(dt.getTime()) ? null : dt;
+}
+
+/** Parse MM/DD/YYYY (or DD/MM/YYYY when dayfirst=true). */
+function parseMDY(m: RegExpExecArray, dayfirst: boolean): Date | null {
+ const a = Number(m[1]);
+ const b = Number(m[2]);
+ const y = expandYear(Number(m[3]));
+ const h = m[4] !== undefined ? Number(m[4]) : 0;
+ const min = m[5] !== undefined ? Number(m[5]) : 0;
+ const sec = m[6] !== undefined ? Number(m[6]) : 0;
+ const mo = dayfirst ? b - 1 : a - 1;
+ const day = dayfirst ? a : b;
+ const dt = new Date(y, mo, day, h, min, sec);
+ return Number.isNaN(dt.getTime()) ? null : dt;
+}
+
+/** Parse DD-MM-YYYY European format. */
+function parseDMY(m: RegExpExecArray): Date | null {
+ const day = Number(m[1]);
+ const mo = Number(m[2]) - 1;
+ const y = Number(m[3]);
+ const h = m[4] !== undefined ? Number(m[4]) : 0;
+ const min = m[5] !== undefined ? Number(m[5]) : 0;
+ const sec = m[6] !== undefined ? Number(m[6]) : 0;
+ const dt = new Date(y, mo, day, h, min, sec);
+ return Number.isNaN(dt.getTime()) ? null : dt;
+}
+
+/** Expand 2-digit years: 00–68 → 2000–2068, 69–99 → 1969–1999. */
+function expandYear(y: number): number {
+ if (y >= 100) {
+ return y;
+ }
+ return y <= 68 ? 2000 + y : 1900 + y;
+}
+
+/**
+ * Handle a parse failure according to the `errors` option.
+ * - `"raise"` → throws TypeError
+ * - `"coerce"` → returns null
+ * - `"ignore"` → returns original value (cast-escaped by callers via `as unknown`)
+ */
+function handleFailure(original: Scalar, errors: DatetimeErrors, message: string): Date | null {
+ if (errors === "raise") {
+ throw new TypeError(message);
+ }
+ if (errors === "coerce") {
+ return null;
+ }
+ // errors === "ignore": return original value unchanged
+ return original as unknown as Date;
+}
diff --git a/tests/core/astype.test.ts b/tests/core/astype.test.ts
new file mode 100644
index 00000000..f6336137
--- /dev/null
+++ b/tests/core/astype.test.ts
@@ -0,0 +1,292 @@
+/**
+ * Tests for astype — dtype coercion for Series and DataFrame.
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Dtype, Series, astype, astypeSeries, castScalar } from "../../src/index.ts";
+
+describe("castScalar", () => {
+ describe("int64", () => {
+ it("casts float to int via truncation", () => {
+ expect(castScalar(3.9, Dtype.int64)).toBe(3);
+ expect(castScalar(-2.1, Dtype.int64)).toBe(-2);
+ });
+
+ it("casts boolean true/false", () => {
+ expect(castScalar(true, Dtype.int64)).toBe(1);
+ expect(castScalar(false, Dtype.int64)).toBe(0);
+ });
+
+ it("casts numeric string", () => {
+ expect(castScalar("42", Dtype.int64)).toBe(42);
+ });
+
+ it("returns null for null/undefined", () => {
+ expect(castScalar(null, Dtype.int64)).toBeNull();
+ expect(castScalar(undefined, Dtype.int64)).toBeNull();
+ });
+
+ it("returns null for non-numeric string", () => {
+ expect(castScalar("abc", Dtype.int64)).toBeNull();
+ });
+ });
+
+ describe("int8 clamping", () => {
+ it("clamps to [-128, 127]", () => {
+ expect(castScalar(200, Dtype.from("int8"))).toBe(127);
+ expect(castScalar(-200, Dtype.from("int8"))).toBe(-128);
+ expect(castScalar(100, Dtype.from("int8"))).toBe(100);
+ });
+ });
+
+ describe("uint8 clamping", () => {
+ it("clamps to [0, 255]", () => {
+ expect(castScalar(-5, Dtype.from("uint8"))).toBe(0);
+ expect(castScalar(300, Dtype.from("uint8"))).toBe(255);
+ expect(castScalar(128, Dtype.from("uint8"))).toBe(128);
+ });
+ });
+
+ describe("float64", () => {
+ it("casts integer to float", () => {
+ expect(castScalar(3, Dtype.float64)).toBe(3.0);
+ });
+
+ it("casts boolean to 0.0/1.0", () => {
+ expect(castScalar(true, Dtype.float64)).toBe(1.0);
+ expect(castScalar(false, Dtype.float64)).toBe(0.0);
+ });
+
+ it("returns null for null", () => {
+ expect(castScalar(null, Dtype.float64)).toBeNull();
+ });
+
+ it("returns NaN for non-numeric string", () => {
+ expect(castScalar("hello", Dtype.float64)).toBeNaN();
+ });
+
+ it("parses numeric string", () => {
+ expect(castScalar("3.14", Dtype.float64)).toBeCloseTo(3.14);
+ });
+ });
+
+ describe("bool", () => {
+ it("truthy number → true", () => {
+ expect(castScalar(1, Dtype.bool)).toBe(true);
+ expect(castScalar(0, Dtype.bool)).toBe(false);
+ });
+
+ it("string 'hello' → true", () => {
+ expect(castScalar("hello", Dtype.bool)).toBe(true);
+ expect(castScalar("", Dtype.bool)).toBe(false);
+ });
+
+ it("null → null", () => {
+ expect(castScalar(null, Dtype.bool)).toBeNull();
+ });
+
+ it("NaN → false", () => {
+ expect(castScalar(Number.NaN, Dtype.bool)).toBe(false);
+ });
+ });
+
+ describe("string", () => {
+ it("converts number to string", () => {
+ expect(castScalar(42, Dtype.string)).toBe("42");
+ });
+
+ it("converts boolean to string", () => {
+ expect(castScalar(true, Dtype.string)).toBe("true");
+ });
+
+ it("null → null", () => {
+ expect(castScalar(null, Dtype.string)).toBeNull();
+ });
+
+ it("converts Date to ISO string", () => {
+ const d = new Date("2024-01-15T00:00:00.000Z");
+ expect(castScalar(d, Dtype.string)).toBe("2024-01-15T00:00:00.000Z");
+ });
+ });
+
+ describe("datetime", () => {
+ it("converts timestamp number to Date", () => {
+ const ts = 1705276800000;
+ const result = castScalar(ts, Dtype.datetime);
+ expect(result instanceof Date).toBe(true);
+ expect((result as Date).getTime()).toBe(ts);
+ });
+
+ it("converts ISO string to Date", () => {
+ const result = castScalar("2024-01-15T00:00:00.000Z", Dtype.datetime);
+ expect(result instanceof Date).toBe(true);
+ expect((result as Date).getFullYear()).toBe(2024);
+ });
+
+ it("returns null for invalid date string", () => {
+ expect(castScalar("not-a-date", Dtype.datetime)).toBeNull();
+ });
+
+ it("passes Date through unchanged", () => {
+ const d = new Date(0);
+ expect(castScalar(d, Dtype.datetime)).toBe(d);
+ });
+
+ it("null → null", () => {
+ expect(castScalar(null, Dtype.datetime)).toBeNull();
+ });
+ });
+
+ describe("object passthrough", () => {
+ it("returns value unchanged for object dtype", () => {
+ const v = { x: 1 } as unknown as import("../../src/types.ts").Scalar;
+ expect(castScalar(v, Dtype.object)).toBe(v);
+ });
+ });
+});
+
+describe("astypeSeries", () => {
+ it("casts float series to int64", () => {
+ const s = new Series({ data: [1.9, 2.1, 3.7], name: "x" });
+ const si = astypeSeries(s, "int64");
+ expect(si.dtype.name).toBe("int64");
+ expect([...si.values]).toEqual([1, 2, 3]);
+ expect(si.name).toBe("x");
+ });
+
+ it("casts int series to float64", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ const sf = astypeSeries(s, "float64");
+ expect(sf.dtype.name).toBe("float64");
+ expect([...sf.values]).toEqual([1.0, 2.0, 3.0]);
+ });
+
+ it("casts int series to bool", () => {
+ const s = new Series({ data: [0, 1, 2] });
+ const sb = astypeSeries(s, "bool");
+ expect([...sb.values]).toEqual([false, true, true]);
+ expect(sb.dtype.name).toBe("bool");
+ });
+
+ it("casts number series to string", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ const ss = astypeSeries(s, "string");
+ expect([...ss.values]).toEqual(["1", "2", "3"]);
+ expect(ss.dtype.name).toBe("string");
+ });
+
+ it("preserves index labels", () => {
+ const s = new Series({ data: [1.5, 2.5], index: ["a", "b"] });
+ const si = astypeSeries(s, "int64");
+ expect(si.index.at(0)).toBe("a");
+ expect(si.index.at(1)).toBe("b");
+ });
+
+ it("null values become null in int cast", () => {
+ const s = new Series({ data: [1, null, 3] });
+ const si = astypeSeries(s, "int64");
+ expect(si.values[1]).toBeNull();
+ });
+
+ it("accepts a Dtype instance", () => {
+ const s = new Series({ data: [1.9, 2.1] });
+ const si = astypeSeries(s, Dtype.int64);
+ expect(si.dtype).toBe(Dtype.int64);
+ expect([...si.values]).toEqual([1, 2]);
+ });
+
+ it("property: float→int→float recovers integer part", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ min: -1000, max: 1000, noNaN: true }), { minLength: 0, maxLength: 20 }),
+ (arr) => {
+ const s = new Series({ data: arr });
+ const si = astypeSeries(s, "int64");
+ const sf = astypeSeries(si, "float64");
+ for (let i = 0; i < arr.length; i++) {
+ const expected = Math.trunc(arr[i] as number);
+ expect(sf.values[i]).toBe(expected);
+ }
+ },
+ ),
+ );
+ });
+
+ it("property: string→int64 for integers recovers value", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: -1000, max: 1000 }), { minLength: 0, maxLength: 20 }),
+ (arr) => {
+ const s = new Series({ data: arr.map(String) });
+ const si = astypeSeries(s, "int64");
+ for (let i = 0; i < arr.length; i++) {
+ expect(si.values[i]).toBe(arr[i]);
+ }
+ },
+ ),
+ );
+ });
+});
+
+describe("astype (DataFrame)", () => {
+ it("casts all columns with a single dtype name", () => {
+ const df = DataFrame.fromColumns({ a: [1.5, 2.5], b: [3.9, 4.1] });
+ const di = astype(df, "int64");
+ expect([...di.col("a").values]).toEqual([1, 2]);
+ expect([...di.col("b").values]).toEqual([3, 4]);
+ expect(di.col("a").dtype.name).toBe("int64");
+ expect(di.col("b").dtype.name).toBe("int64");
+ });
+
+ it("casts all columns with a Dtype instance", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ const ds = astype(df, Dtype.string);
+ expect([...ds.col("a").values]).toEqual(["1", "2"]);
+ });
+
+ it("casts individual columns using a Record mapping", () => {
+ const df = DataFrame.fromColumns({ a: [1.5, 2.5], b: ["10", "20"] });
+ const di = astype(df, { a: "int64", b: "float64" });
+ expect([...di.col("a").values]).toEqual([1, 2]);
+ expect([...di.col("b").values]).toEqual([10, 20]);
+ });
+
+ it("leaves unmapped columns unchanged", () => {
+ const df = DataFrame.fromColumns({ a: [1.5, 2.5], b: [true, false] });
+ const di = astype(df, { a: "int64" });
+ expect([...di.col("a").values]).toEqual([1, 2]);
+ // column b is bool and unchanged
+ expect([...di.col("b").values]).toEqual([true, false]);
+ });
+
+ it("preserves row index", () => {
+ const df = DataFrame.fromColumns({ x: [10, 20, 30] });
+ const di = astype(df, "float64");
+ expect(di.index.size).toBe(3);
+ });
+
+ it("preserves column order", () => {
+ const df = DataFrame.fromColumns({ z: [1], a: [2], m: [3] });
+ const di = astype(df, "float64");
+ expect([...di.columns.values]).toEqual(["z", "a", "m"]);
+ });
+
+ it("does not mutate the original DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [1.5, 2.5] });
+ astype(df, "int64");
+ expect(df.col("a").dtype.name).toBe("float64");
+ });
+
+ it("property: roundtrip int↔float preserves integer values", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 10 }),
+ (arr) => {
+ const df = DataFrame.fromColumns({ v: arr });
+ const df2 = astype(astype(df, "float64"), "int64");
+ expect([...df2.col("v").values]).toEqual(arr);
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/io/read_excel.test.ts b/tests/io/read_excel.test.ts
new file mode 100644
index 00000000..e7802aff
--- /dev/null
+++ b/tests/io/read_excel.test.ts
@@ -0,0 +1,495 @@
+/**
+ * Tests for src/io/read_excel.ts — readExcel() and xlsxSheetNames().
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame } from "../../src/index.ts";
+import { readExcel, xlsxSheetNames } from "../../src/io/read_excel.ts";
+
+// ─── minimal XLSX fixture builder ─────────────────────────────────────────────
+
+const ENC = new TextEncoder();
+
+function le16(n: number): Uint8Array {
+ const v = n & 0xffff;
+ return new Uint8Array([v & 0xff, (v >> 8) & 0xff]);
+}
+
+function le32(n: number): Uint8Array {
+ const v = n >>> 0;
+ return new Uint8Array([v & 0xff, (v >> 8) & 0xff, (v >> 16) & 0xff, (v >> 24) & 0xff]);
+}
+
+function concat(...parts: Uint8Array[]): Uint8Array {
+ let total = 0;
+ for (const p of parts) {
+ total += p.length;
+ }
+ const out = new Uint8Array(total);
+ let pos = 0;
+ for (const p of parts) {
+ out.set(p, pos);
+ pos += p.length;
+ }
+ return out;
+}
+
+interface ZipFile {
+ name: string;
+ data: Uint8Array;
+}
+
+/** Build a minimal STORED (uncompressed) ZIP archive. CRC is set to 0. */
+function buildStoredZip(files: ZipFile[]): Uint8Array {
+ const localParts: Uint8Array[] = [];
+ const localOffsets: number[] = [];
+ let curOffset = 0;
+
+ for (const f of files) {
+ const nameBytes = ENC.encode(f.name);
+ const lh = concat(
+ new Uint8Array([0x50, 0x4b, 0x03, 0x04]), // local file sig
+ le16(20), // version needed
+ le16(0), // flags
+ le16(0), // method = STORED
+ le16(0),
+ le16(0), // mod time, mod date
+ le32(0), // CRC-32 (zeroed)
+ le32(f.data.length), // compressed size
+ le32(f.data.length), // uncompressed size
+ le16(nameBytes.length), // filename length
+ le16(0), // extra field length
+ nameBytes,
+ f.data,
+ );
+ localOffsets.push(curOffset);
+ localParts.push(lh);
+ curOffset += lh.length;
+ }
+
+ const cdParts: Uint8Array[] = [];
+ for (const [i, f] of files.entries()) {
+ const nameBytes = ENC.encode(f.name);
+ const off = localOffsets[i] ?? 0;
+ const cd = concat(
+ new Uint8Array([0x50, 0x4b, 0x01, 0x02]), // central dir sig
+ le16(20),
+ le16(20), // version made by, needed
+ le16(0),
+ le16(0), // flags, method = STORED
+ le16(0),
+ le16(0), // mod time, mod date
+ le32(0), // CRC-32
+ le32(f.data.length), // compressed size
+ le32(f.data.length), // uncompressed size
+ le16(nameBytes.length), // filename length
+ le16(0),
+ le16(0), // extra, comment length
+ le16(0),
+ le16(0), // disk start, internal attrs
+ le32(0), // external attrs
+ le32(off), // local header offset
+ nameBytes,
+ );
+ cdParts.push(cd);
+ }
+
+ const cdSize = cdParts.reduce((s, p) => s + p.length, 0);
+ const cdOffset = curOffset;
+
+ const eocd = concat(
+ new Uint8Array([0x50, 0x4b, 0x05, 0x06]), // end of central dir sig
+ le16(0),
+ le16(0), // disk numbers
+ le16(files.length),
+ le16(files.length), // entry counts
+ le32(cdSize), // central dir size
+ le32(cdOffset), // central dir offset
+ le16(0), // comment length
+ );
+
+ return concat(...localParts, ...cdParts, eocd);
+}
+
+function escXml(s: string): string {
+ return s
+ .replace(/&/g, "&")
+ .replace(//g, ">")
+ .replace(/"/g, """);
+}
+
+function numToColLetter(c: number): string {
+ let col = c + 1;
+ let result = "";
+ while (col > 0) {
+ const rem = (col - 1) % 26;
+ result = String.fromCharCode(65 + rem) + result;
+ col = Math.floor((col - 1) / 26);
+ }
+ return result;
+}
+
+type CellValue = string | number | boolean | null;
+
+interface SstContext {
+ stringList: string[];
+ stringIdx: Map;
+}
+
+/** Register a string in the SST and return its index. */
+function regStr(ctx: SstContext, s: string): number {
+ const existing = ctx.stringIdx.get(s);
+ if (existing !== undefined) {
+ return existing;
+ }
+ const idx = ctx.stringList.length;
+ ctx.stringList.push(s);
+ ctx.stringIdx.set(s, idx);
+ return idx;
+}
+
+/** Build SST XML from a context. */
+function buildSstXml(ctx: SstContext): string {
+ return [
+ '',
+ ``,
+ ...ctx.stringList.map((s) => `${escXml(s)} `),
+ " ",
+ ].join("\n");
+}
+
+/** Render one data cell to XML. */
+function renderCell(ref: string, cell: CellValue, ctx: SstContext): string {
+ if (typeof cell === "string") {
+ const idx = regStr(ctx, cell);
+ return `${idx} `;
+ }
+ if (typeof cell === "boolean") {
+ return `${cell ? 1 : 0} `;
+ }
+ return `${cell} `;
+}
+
+/** Build worksheet XML from headers, rows, and SST context. */
+function buildWsXml(headers: string[], rows: CellValue[][], ctx: SstContext): string {
+ const wsRowParts: string[] = [];
+ const hCells = headers.map((h, c) => {
+ const idx = regStr(ctx, h);
+ return `${idx} `;
+ });
+ wsRowParts.push(`${hCells.join("")}
`);
+ for (const [ri, row] of rows.entries()) {
+ const rowNum = ri + 2;
+ const cells: string[] = [];
+ for (const [ci, cell] of row.entries()) {
+ if (cell === null || cell === undefined) {
+ continue;
+ }
+ cells.push(renderCell(`${numToColLetter(ci)}${rowNum}`, cell, ctx));
+ }
+ wsRowParts.push(`${cells.join("")}
`);
+ }
+ return [
+ '',
+ '',
+ "",
+ ...wsRowParts,
+ " ",
+ " ",
+ ].join("\n");
+}
+
+/** Build a minimal single-sheet XLSX buffer. */
+function makeXlsx(headers: string[], rows: CellValue[][], sheetName = "Sheet1"): Uint8Array {
+ const ctx: SstContext = { stringList: [], stringIdx: new Map() };
+ // Pre-register header strings
+ for (const h of headers) {
+ regStr(ctx, h);
+ }
+ // Pre-register data strings
+ for (const row of rows) {
+ for (const cell of row) {
+ if (typeof cell === "string") {
+ regStr(ctx, cell);
+ }
+ }
+ }
+ const sstXml = buildSstXml(ctx);
+ const wsXml = buildWsXml(headers, rows, ctx);
+ const wbXml = [
+ '',
+ '',
+ ` `,
+ " ",
+ ].join("\n");
+ const wbRelsXml =
+ '\n ';
+ const relsXml =
+ '\n ';
+ const ctXml =
+ '\n ';
+ return buildStoredZip([
+ { name: "[Content_Types].xml", data: ENC.encode(ctXml) },
+ { name: "_rels/.rels", data: ENC.encode(relsXml) },
+ { name: "xl/workbook.xml", data: ENC.encode(wbXml) },
+ { name: "xl/_rels/workbook.xml.rels", data: ENC.encode(wbRelsXml) },
+ { name: "xl/sharedStrings.xml", data: ENC.encode(sstXml) },
+ { name: "xl/worksheets/sheet1.xml", data: ENC.encode(wsXml) },
+ ]);
+}
+
+// ─── tests ────────────────────────────────────────────────────────────────────
+
+describe("readExcel — basic reading", () => {
+ it("reads a simple 3-column sheet with numbers", () => {
+ const buf = makeXlsx(
+ ["a", "b", "c"],
+ [
+ [1, 2, 3],
+ [4, 5, 6],
+ ],
+ );
+ const df = readExcel(buf);
+ expect(df.shape).toEqual([2, 3]);
+ expect([...df.columns.values]).toEqual(["a", "b", "c"]);
+ expect([...df.col("a").values]).toEqual([1, 4]);
+ expect([...df.col("b").values]).toEqual([2, 5]);
+ expect([...df.col("c").values]).toEqual([3, 6]);
+ });
+
+ it("reads string columns", () => {
+ const buf = makeXlsx(
+ ["name", "city"],
+ [
+ ["Alice", "New York"],
+ ["Bob", "London"],
+ ],
+ );
+ const df = readExcel(buf);
+ expect(df.shape).toEqual([2, 2]);
+ expect([...df.col("name").values]).toEqual(["Alice", "Bob"]);
+ expect([...df.col("city").values]).toEqual(["New York", "London"]);
+ });
+
+ it("reads boolean columns", () => {
+ const buf = makeXlsx(
+ ["flag", "val"],
+ [
+ [true, 1],
+ [false, 2],
+ [true, 3],
+ ],
+ );
+ const df = readExcel(buf);
+ expect([...df.col("flag").values]).toEqual([true, false, true]);
+ });
+
+ it("reads mixed-type columns as object dtype", () => {
+ const buf = makeXlsx(["mixed"], [["hello"], [42], ["world"]]);
+ const df = readExcel(buf);
+ expect([...df.col("mixed").values]).toEqual(["hello", 42, "world"]);
+ });
+
+ it("handles null/empty cells", () => {
+ const buf = makeXlsx(
+ ["a", "b"],
+ [
+ [1, null],
+ [null, 2],
+ ],
+ );
+ const df = readExcel(buf);
+ expect([...df.col("a").values]).toEqual([1, null]);
+ expect([...df.col("b").values]).toEqual([null, 2]);
+ });
+
+ it("returns a DataFrame instance", () => {
+ const buf = makeXlsx(["x"], [[1], [2]]);
+ const df = readExcel(buf);
+ expect(df).toBeInstanceOf(DataFrame);
+ });
+});
+
+describe("readExcel — sheetName option", () => {
+ it("sheetName: 0 returns the first sheet (default)", () => {
+ const buf = makeXlsx(["x"], [[10], [20]]);
+ const df0 = readExcel(buf, { sheetName: 0 });
+ const dfDefault = readExcel(buf);
+ expect([...df0.col("x").values]).toEqual([...dfDefault.col("x").values]);
+ });
+
+ it("sheetName: string finds sheet by name", () => {
+ const buf = makeXlsx(["v"], [[99]], "MySheet");
+ const df = readExcel(buf, { sheetName: "MySheet" });
+ expect([...df.col("v").values]).toEqual([99]);
+ });
+
+ it("throws on invalid sheet name", () => {
+ const buf = makeXlsx(["x"], [[1]]);
+ expect(() => readExcel(buf, { sheetName: "NoSuch" })).toThrow();
+ });
+
+ it("throws on out-of-range sheet index", () => {
+ const buf = makeXlsx(["x"], [[1]]);
+ expect(() => readExcel(buf, { sheetName: 5 })).toThrow();
+ });
+});
+
+describe("readExcel — header option", () => {
+ it("header: null uses numeric column names", () => {
+ const buf = makeXlsx(["a", "b"], [[1, 2]]);
+ const df = readExcel(buf, { header: null });
+ // With header: null, all rows are data — columns are "0", "1"
+ expect([...df.columns.values]).toEqual(["0", "1"]);
+ // Both rows become data rows
+ expect(df.shape[0]).toBe(2);
+ });
+
+ it("header: 0 is the default", () => {
+ const buf = makeXlsx(["name", "score"], [["Alice", 95]]);
+ const df = readExcel(buf, { header: 0 });
+ expect([...df.columns.values]).toEqual(["name", "score"]);
+ });
+});
+
+describe("readExcel — indexCol option", () => {
+ it("indexCol: string sets named column as row index", () => {
+ const buf = makeXlsx(
+ ["id", "val"],
+ [
+ ["a", 1],
+ ["b", 2],
+ ],
+ );
+ const df = readExcel(buf, { indexCol: "id" });
+ expect(df.shape).toEqual([2, 1]);
+ expect([...df.index.values]).toEqual(["a", "b"]);
+ expect([...df.columns.values]).toEqual(["val"]);
+ });
+
+ it("indexCol: number sets column by position", () => {
+ const buf = makeXlsx(
+ ["k", "v"],
+ [
+ ["x", 10],
+ ["y", 20],
+ ],
+ );
+ const df = readExcel(buf, { indexCol: 0 });
+ expect([...df.index.values]).toEqual(["x", "y"]);
+ expect([...df.columns.values]).toEqual(["v"]);
+ });
+});
+
+describe("readExcel — skipRows / nrows options", () => {
+ it("skipRows skips leading data rows", () => {
+ const buf = makeXlsx(["n"], [[1], [2], [3], [4]]);
+ const df = readExcel(buf, { skipRows: 2 });
+ expect([...df.col("n").values]).toEqual([3, 4]);
+ });
+
+ it("nrows limits the number of rows returned", () => {
+ const buf = makeXlsx(["n"], [[1], [2], [3], [4]]);
+ const df = readExcel(buf, { nrows: 2 });
+ expect([...df.col("n").values]).toEqual([1, 2]);
+ });
+
+ it("skipRows + nrows combined", () => {
+ const buf = makeXlsx(["n"], [[1], [2], [3], [4]]);
+ const df = readExcel(buf, { skipRows: 1, nrows: 2 });
+ expect([...df.col("n").values]).toEqual([2, 3]);
+ });
+});
+
+describe("readExcel — naValues option", () => {
+ it("treats empty string cells as null by default", () => {
+ const buf = makeXlsx(["v"], [[""], [1]]);
+ const df = readExcel(buf);
+ expect(df.col("v").values[0]).toBeNull();
+ });
+
+ it("treats custom naValues strings as null", () => {
+ const buf = makeXlsx(["v"], [["MISSING"], ["ok"]]);
+ const df = readExcel(buf, { naValues: ["MISSING"] });
+ expect(df.col("v").values[0]).toBeNull();
+ expect(df.col("v").values[1]).toBe("ok");
+ });
+});
+
+describe("readExcel — error cases", () => {
+ it("throws on non-ZIP input", () => {
+ const notZip = new Uint8Array([1, 2, 3, 4, 5]);
+ expect(() => readExcel(notZip)).toThrow();
+ });
+
+ it("accepts ArrayBuffer input", () => {
+ const buf = makeXlsx(["x"], [[42]]);
+ const ab = buf.buffer;
+ const df = readExcel(ab);
+ expect([...df.col("x").values]).toEqual([42]);
+ });
+});
+
+describe("xlsxSheetNames", () => {
+ it("returns sheet names from workbook", () => {
+ const buf = makeXlsx(["x"], [[1]], "DataSheet");
+ const names = xlsxSheetNames(buf);
+ expect(names).toEqual(["DataSheet"]);
+ });
+
+ it("returns empty array for invalid ZIP (no workbook)", () => {
+ // Build a ZIP with no xl/workbook.xml
+ const buf = buildStoredZip([{ name: "dummy.txt", data: ENC.encode("hello") }]);
+ const names = xlsxSheetNames(buf);
+ expect(names).toEqual([]);
+ });
+
+ it("accepts ArrayBuffer", () => {
+ const buf = makeXlsx(["a"], [[1]]);
+ const names = xlsxSheetNames(buf.buffer);
+ expect(names).toEqual(["Sheet1"]);
+ });
+});
+
+describe("readExcel — property-based tests", () => {
+ it("round-trips numeric data: shape is preserved", () => {
+ fc.assert(
+ fc.property(
+ fc.uniqueArray(fc.string({ minLength: 1, maxLength: 6 }), {
+ minLength: 1,
+ maxLength: 5,
+ }),
+ fc.array(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), {
+ minLength: 1,
+ maxLength: 5,
+ }),
+ { minLength: 1, maxLength: 10 },
+ ),
+ (headers, rowsRaw) => {
+ // Ensure all rows have same width as headers
+ const width = headers.length;
+ const rows = rowsRaw.map((r) =>
+ Array.from({ length: width }, (_, i): CellValue => r[i] ?? 0),
+ );
+ const buf = makeXlsx(headers, rows);
+ const df = readExcel(buf);
+ expect(df.shape[0]).toBe(rows.length);
+ expect(df.shape[1]).toBe(width);
+ },
+ ),
+ );
+ });
+
+ it("xlsxSheetNames: returns non-empty array for valid XLSX", () => {
+ fc.assert(
+ fc.property(fc.string({ minLength: 1, maxLength: 20 }), (sheetName) => {
+ const buf = makeXlsx(["x"], [[1]], sheetName);
+ const names = xlsxSheetNames(buf);
+ expect(names.length).toBe(1);
+ expect(names[0]).toBe(sheetName);
+ }),
+ );
+ });
+});
diff --git a/tests/stats/clip_advanced.test.ts b/tests/stats/clip_advanced.test.ts
new file mode 100644
index 00000000..ffcb74b1
--- /dev/null
+++ b/tests/stats/clip_advanced.test.ts
@@ -0,0 +1,215 @@
+/**
+ * Tests for stats/clip_advanced.ts
+ */
+
+import { describe, expect, test } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import { clipAdvancedDataFrame, clipAdvancedSeries } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── clipAdvancedSeries ────────────────────────────────────────────────────────
+
+describe("clipAdvancedSeries", () => {
+ test("scalar lower bound", () => {
+ const s = new Series({ data: [-3, 0, 5] });
+ expect(clipAdvancedSeries(s, { lower: 0 }).values).toEqual([0, 0, 5]);
+ });
+
+ test("scalar upper bound", () => {
+ const s = new Series({ data: [1, 5, 10] });
+ expect(clipAdvancedSeries(s, { upper: 6 }).values).toEqual([1, 5, 6]);
+ });
+
+ test("scalar lower and upper bounds", () => {
+ const s = new Series({ data: [-3, 1, 5, 10] });
+ expect(clipAdvancedSeries(s, { lower: 0, upper: 6 }).values).toEqual([0, 1, 5, 6]);
+ });
+
+ test("array lower bounds", () => {
+ const s = new Series({ data: [-1, 0, 5] });
+ expect(clipAdvancedSeries(s, { lower: [2, -1, 6] }).values).toEqual([2, 0, 6]);
+ });
+
+ test("array upper bounds", () => {
+ const s = new Series({ data: [10, 5, 1] });
+ expect(clipAdvancedSeries(s, { upper: [8, 4, 3] }).values).toEqual([8, 4, 1]);
+ });
+
+ test("Series lower bounds — positional", () => {
+ const s = new Series({ data: [-1, 0, 5, 10] });
+ const lo = new Series({ data: [0, 1, 2, 3] });
+ expect(clipAdvancedSeries(s, { lower: lo }).values).toEqual([0, 1, 5, 10]);
+ });
+
+ test("Series upper bounds — positional", () => {
+ const s = new Series({ data: [0, 5, 10, 15] });
+ const hi = new Series({ data: [2, 4, 12, 14] });
+ expect(clipAdvancedSeries(s, { upper: hi }).values).toEqual([0, 4, 10, 14]);
+ });
+
+ test("no bounds returns original values", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect(clipAdvancedSeries(s).values).toEqual([1, 2, 3]);
+ });
+
+ test("null values pass through", () => {
+ const s = new Series({ data: [null, 5, null] });
+ const result = clipAdvancedSeries(s, { lower: 0, upper: 4 });
+ expect(result.values[0]).toBeNull();
+ expect(result.values[1]).toBe(4);
+ expect(result.values[2]).toBeNull();
+ });
+
+ test("preserves Series name", () => {
+ const s = new Series({ data: [1, 2, 3], name: "vals" });
+ expect(clipAdvancedSeries(s, { lower: 0 }).name).toBe("vals");
+ });
+
+ test("preserves index", () => {
+ const s = new Series({ data: [1, 2, 3], index: ["a", "b", "c"] });
+ const result = clipAdvancedSeries(s, { lower: 0 });
+ expect(result.index.at(0)).toBe("a");
+ expect(result.index.at(2)).toBe("c");
+ });
+
+ // property: clipped value always >= lower and <= upper (for numeric values)
+ test("property: clipped value is within bounds (scalar)", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, noDefaultInfinity: true }), {
+ minLength: 1,
+ maxLength: 20,
+ }),
+ fc.float({ noNaN: true, noDefaultInfinity: true }),
+ fc.float({ noNaN: true, noDefaultInfinity: true }),
+ (data, a, b) => {
+ const lo = Math.min(a, b);
+ const hi = Math.max(a, b);
+ const s = new Series({ data: data as Scalar[] });
+ const result = clipAdvancedSeries(s, { lower: lo, upper: hi });
+ return (result.values as number[]).every((v) => v >= lo && v <= hi);
+ },
+ ),
+ );
+ });
+
+ // property: clipped values are >= per-element lower bound
+ test("property: array lower bound respected element-wise", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 1, maxLength: 10 }),
+ fc.array(fc.integer({ min: -50, max: 50 }), { minLength: 1, maxLength: 10 }),
+ (data, lower) => {
+ const len = Math.min(data.length, lower.length);
+ const s = new Series({ data: data.slice(0, len) as Scalar[] });
+ const loBound = lower.slice(0, len);
+ const result = clipAdvancedSeries(s, { lower: loBound });
+ return (result.values as number[]).every((v, i) => {
+ const lo = loBound[i];
+ return lo === undefined || v >= lo;
+ });
+ },
+ ),
+ );
+ });
+});
+
+// ─── clipAdvancedDataFrame ─────────────────────────────────────────────────────
+
+describe("clipAdvancedDataFrame", () => {
+ test("scalar lower bound clips all cells", () => {
+ const df = DataFrame.fromColumns({ a: [-1, 2, 5], b: [0, 3, 8] });
+ const result = clipAdvancedDataFrame(df, { lower: 1 });
+ expect(result.col("a").values).toEqual([1, 2, 5]);
+ expect(result.col("b").values).toEqual([1, 3, 8]);
+ });
+
+ test("scalar upper bound clips all cells", () => {
+ const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+ const result = clipAdvancedDataFrame(df, { upper: 6 });
+ expect(result.col("a").values).toEqual([1, 5, 6]);
+ expect(result.col("b").values).toEqual([2, 6, 6]);
+ });
+
+ test("DataFrame lower bound — element-wise", () => {
+ const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+ const loBound = DataFrame.fromColumns({ a: [2, 3, 4], b: [1, 4, 8] });
+ const result = clipAdvancedDataFrame(df, { lower: loBound });
+ expect(result.col("a").values).toEqual([2, 5, 9]);
+ expect(result.col("b").values).toEqual([2, 6, 10]);
+ });
+
+ test("DataFrame upper bound — element-wise", () => {
+ const df = DataFrame.fromColumns({ a: [10, 5, 9], b: [2, 6, 10] });
+ const hiBound = DataFrame.fromColumns({ a: [8, 6, 7], b: [3, 5, 9] });
+ const result = clipAdvancedDataFrame(df, { upper: hiBound });
+ expect(result.col("a").values).toEqual([8, 5, 7]);
+ expect(result.col("b").values).toEqual([2, 5, 9]);
+ });
+
+ test("Series lower bound axis=0 (broadcast over columns)", () => {
+ // axis=0: Series index maps to column positions
+ // Series has 2 elements for 2 columns [a, b]
+ const df = DataFrame.fromColumns({ a: [1, 5], b: [2, 6] });
+ const lo = new Series({ data: [3, 4] }); // col a: lo=3, col b: lo=4
+ const result = clipAdvancedDataFrame(df, { lower: lo, axis: 0 });
+ expect(result.col("a").values).toEqual([3, 5]);
+ expect(result.col("b").values).toEqual([4, 6]);
+ });
+
+ test("Series lower bound axis=1 (broadcast over rows)", () => {
+ // axis=1: Series has one element per row
+ const df = DataFrame.fromColumns({ a: [1, 5, 9], b: [2, 6, 10] });
+ const lo = new Series({ data: [0, 4, 10] }); // row 0: lo=0, row 1: lo=4, row 2: lo=10
+ const result = clipAdvancedDataFrame(df, { lower: lo, axis: 1 });
+ expect(result.col("a").values).toEqual([1, 5, 10]);
+ expect(result.col("b").values).toEqual([2, 6, 10]);
+ });
+
+ test("null values pass through unchanged", () => {
+ const df = DataFrame.fromColumns({ a: [null, 5], b: [3, null] });
+ const result = clipAdvancedDataFrame(df, { lower: 0, upper: 4 });
+ expect(result.col("a").values[0]).toBeNull();
+ expect(result.col("a").values[1]).toBe(4);
+ expect(result.col("b").values[0]).toBe(3);
+ expect(result.col("b").values[1]).toBeNull();
+ });
+
+ test("no bounds returns same values", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ const result = clipAdvancedDataFrame(df);
+ expect(result.col("a").values).toEqual([1, 2, 3]);
+ expect(result.col("b").values).toEqual([4, 5, 6]);
+ });
+
+ test("preserves index", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ expect(clipAdvancedDataFrame(df, { lower: 0 }).index.size).toBe(2);
+ });
+
+ // property: scalar bounds — all cells within [lo, hi]
+ test("property: scalar bounds respected for all cells", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 2, maxLength: 6 }),
+ fc.array(fc.integer({ min: -100, max: 100 }), { minLength: 2, maxLength: 6 }),
+ fc.integer({ min: -50, max: 0 }),
+ fc.integer({ min: 1, max: 50 }),
+ (col1, col2, lo, hi) => {
+ const len = Math.min(col1.length, col2.length);
+ const df = DataFrame.fromColumns({
+ a: col1.slice(0, len) as Scalar[],
+ b: col2.slice(0, len) as Scalar[],
+ });
+ const result = clipAdvancedDataFrame(df, { lower: lo, upper: hi });
+ const vals = [
+ ...(result.col("a").values as number[]),
+ ...(result.col("b").values as number[]),
+ ];
+ return vals.every((v) => v >= lo && v <= hi);
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/idxmin_idxmax.test.ts b/tests/stats/idxmin_idxmax.test.ts
new file mode 100644
index 00000000..05cfd459
--- /dev/null
+++ b/tests/stats/idxmin_idxmax.test.ts
@@ -0,0 +1,270 @@
+/**
+ * Tests for src/stats/idxmin_idxmax.ts
+ * — idxminSeries, idxmaxSeries, idxminDataFrame, idxmaxDataFrame
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ idxmaxDataFrame,
+ idxmaxSeries,
+ idxminDataFrame,
+ idxminSeries,
+} from "../../src/index.ts";
+import type { Label, Scalar } from "../../src/index.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function s(data: readonly Scalar[], index?: readonly Label[]): Series {
+ return new Series({ data: [...data], ...(index !== undefined ? { index: [...index] } : {}) });
+}
+
+// ─── idxminSeries ─────────────────────────────────────────────────────────────
+
+describe("idxminSeries", () => {
+ it("returns label of the minimum value", () => {
+ const series = s([3, 1, 4, 1, 5], ["a", "b", "c", "d", "e"]);
+ expect(idxminSeries(series)).toBe("b"); // first occurrence of minimum 1
+ });
+
+ it("returns integer index label for default index", () => {
+ const series = s([10, 3, 7]);
+ expect(idxminSeries(series)).toBe(1);
+ });
+
+ it("handles single element", () => {
+ const series = s([42], ["x"]);
+ expect(idxminSeries(series)).toBe("x");
+ });
+
+ it("returns null for empty series", () => {
+ const series = s([]);
+ expect(idxminSeries(series)).toBeNull();
+ });
+
+ it("skips NaN by default (skipna=true)", () => {
+ const series = s([Number.NaN, 2, 1, Number.NaN], ["a", "b", "c", "d"]);
+ expect(idxminSeries(series)).toBe("c");
+ });
+
+ it("skips null values by default", () => {
+ const series = s([null, 5, 2, null], ["a", "b", "c", "d"]);
+ expect(idxminSeries(series)).toBe("c");
+ });
+
+ it("returns null when all values are NaN with skipna=true", () => {
+ const series = s([Number.NaN, Number.NaN], ["a", "b"]);
+ expect(idxminSeries(series)).toBeNull();
+ });
+
+ it("returns null when any value is NaN with skipna=false", () => {
+ const series = s([1, Number.NaN, 3], ["a", "b", "c"]);
+ expect(idxminSeries(series, { skipna: false })).toBeNull();
+ });
+
+ it("returns correct label with skipna=false when no NaN", () => {
+ const series = s([5, 2, 8], ["a", "b", "c"]);
+ expect(idxminSeries(series, { skipna: false })).toBe("b");
+ });
+
+ it("handles negative numbers", () => {
+ const series = s([-1, -5, -3], ["x", "y", "z"]);
+ expect(idxminSeries(series)).toBe("y");
+ });
+
+ it("handles all equal values — returns first label", () => {
+ const series = s([7, 7, 7], ["p", "q", "r"]);
+ expect(idxminSeries(series)).toBe("p");
+ });
+
+ it("works with string values (lexicographic min)", () => {
+ const series = s(["banana", "apple", "cherry"], ["a", "b", "c"]);
+ expect(idxminSeries(series)).toBe("b"); // "apple" < "banana" < "cherry"
+ });
+
+ it("handles NaN at the start with skipna=true", () => {
+ const series = s([Number.NaN, 3, 1], ["a", "b", "c"]);
+ expect(idxminSeries(series)).toBe("c");
+ });
+});
+
+// ─── idxmaxSeries ─────────────────────────────────────────────────────────────
+
+describe("idxmaxSeries", () => {
+ it("returns label of the maximum value", () => {
+ const series = s([3, 1, 4, 1, 5], ["a", "b", "c", "d", "e"]);
+ expect(idxmaxSeries(series)).toBe("e");
+ });
+
+ it("returns integer index label for default index", () => {
+ const series = s([10, 3, 7]);
+ expect(idxmaxSeries(series)).toBe(0);
+ });
+
+ it("handles single element", () => {
+ const series = s([42], ["x"]);
+ expect(idxmaxSeries(series)).toBe("x");
+ });
+
+ it("returns null for empty series", () => {
+ const series = s([]);
+ expect(idxmaxSeries(series)).toBeNull();
+ });
+
+ it("skips NaN by default (skipna=true)", () => {
+ const series = s([Number.NaN, 2, 9, Number.NaN], ["a", "b", "c", "d"]);
+ expect(idxmaxSeries(series)).toBe("c");
+ });
+
+ it("returns null when all values are NaN with skipna=true", () => {
+ const series = s([Number.NaN, Number.NaN], ["a", "b"]);
+ expect(idxmaxSeries(series)).toBeNull();
+ });
+
+ it("returns null when any value is NaN with skipna=false", () => {
+ const series = s([1, Number.NaN, 3], ["a", "b", "c"]);
+ expect(idxmaxSeries(series, { skipna: false })).toBeNull();
+ });
+
+ it("handles negative numbers", () => {
+ const series = s([-1, -5, -3], ["x", "y", "z"]);
+ expect(idxmaxSeries(series)).toBe("x");
+ });
+
+ it("all equal — returns first label", () => {
+ const series = s([3, 3, 3], ["p", "q", "r"]);
+ expect(idxmaxSeries(series)).toBe("p");
+ });
+
+ it("works with string values (lexicographic max)", () => {
+ const series = s(["banana", "apple", "cherry"], ["a", "b", "c"]);
+ expect(idxmaxSeries(series)).toBe("c"); // "cherry" > "banana" > "apple"
+ });
+});
+
+// ─── idxminDataFrame ──────────────────────────────────────────────────────────
+
+describe("idxminDataFrame", () => {
+ it("returns row label of minimum for each column", () => {
+ const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] });
+ const result = idxminDataFrame(df);
+ expect(result.at("a")).toBe("y"); // min of a is 1 at row "y"
+ expect(result.at("b")).toBe("z"); // min of b is 5 at row "z"
+ });
+
+ it("result is indexed by column names", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ const result = idxminDataFrame(df);
+ expect([...result.index.values]).toEqual(["a", "b"]);
+ });
+
+ it("skips NaN by default", () => {
+ const df = DataFrame.fromColumns(
+ { a: [Number.NaN, 2, 1], b: [5, Number.NaN, 3] },
+ { index: ["x", "y", "z"] },
+ );
+ const result = idxminDataFrame(df);
+ expect(result.at("a")).toBe("z");
+ expect(result.at("b")).toBe("z");
+ });
+
+ it("returns null for column with all NaN (skipna=true)", () => {
+ const df = DataFrame.fromColumns(
+ { a: [1, 2], b: [Number.NaN, Number.NaN] },
+ { index: ["x", "y"] },
+ );
+ const result = idxminDataFrame(df);
+ expect(result.at("a")).toBe("x");
+ expect(result.at("b")).toBeNull();
+ });
+
+ it("handles single row DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [42], b: [7] }, { index: ["row0"] });
+ const result = idxminDataFrame(df);
+ expect(result.at("a")).toBe("row0");
+ expect(result.at("b")).toBe("row0");
+ });
+});
+
+// ─── idxmaxDataFrame ──────────────────────────────────────────────────────────
+
+describe("idxmaxDataFrame", () => {
+ it("returns row label of maximum for each column", () => {
+ const df = DataFrame.fromColumns({ a: [3, 1, 4], b: [10, 20, 5] }, { index: ["x", "y", "z"] });
+ const result = idxmaxDataFrame(df);
+ expect(result.at("a")).toBe("z"); // max of a is 4 at row "z"
+ expect(result.at("b")).toBe("y"); // max of b is 20 at row "y"
+ });
+
+ it("result is indexed by column names", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ const result = idxmaxDataFrame(df);
+ expect([...result.index.values]).toEqual(["a", "b"]);
+ });
+
+ it("skips NaN by default", () => {
+ const df = DataFrame.fromColumns(
+ { a: [Number.NaN, 2, 1], b: [5, Number.NaN, 3] },
+ { index: ["x", "y", "z"] },
+ );
+ const result = idxmaxDataFrame(df);
+ expect(result.at("a")).toBe("y");
+ expect(result.at("b")).toBe("x");
+ });
+
+ it("handles single row DataFrame", () => {
+ const df = DataFrame.fromColumns({ a: [42], b: [7] }, { index: ["row0"] });
+ const result = idxmaxDataFrame(df);
+ expect(result.at("a")).toBe("row0");
+ expect(result.at("b")).toBe("row0");
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("idxminSeries property tests", () => {
+ it("idxmin label points to minimum value in series", () => {
+ fc.assert(
+ fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 1, maxLength: 20 }), (data) => {
+ const series = s(data);
+ const label = idxminSeries(series);
+ if (label === null) {
+ return true;
+ }
+ const minVal = Math.min(...data);
+ return series.at(label as number) === minVal;
+ }),
+ );
+ });
+
+ it("idxmax label points to maximum value in series", () => {
+ fc.assert(
+ fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 1, maxLength: 20 }), (data) => {
+ const series = s(data);
+ const label = idxmaxSeries(series);
+ if (label === null) {
+ return true;
+ }
+ const maxVal = Math.max(...data);
+ return series.at(label as number) === maxVal;
+ }),
+ );
+ });
+
+ it("idxmin and idxmax are consistent — min <= max", () => {
+ fc.assert(
+ fc.property(fc.array(fc.double({ noNaN: true }), { minLength: 2, maxLength: 20 }), (data) => {
+ const series = s(data);
+ const minLabel = idxminSeries(series);
+ const maxLabel = idxmaxSeries(series);
+ if (minLabel === null || maxLabel === null) {
+ return true;
+ }
+ const minVal = series.at(minLabel as number) as number;
+ const maxVal = series.at(maxLabel as number) as number;
+ return minVal <= maxVal;
+ }),
+ );
+ });
+});
diff --git a/tests/stats/mode.test.ts b/tests/stats/mode.test.ts
new file mode 100644
index 00000000..98e81afa
--- /dev/null
+++ b/tests/stats/mode.test.ts
@@ -0,0 +1,202 @@
+/**
+ * Tests for src/stats/mode.ts — modeSeries() and modeDataFrame().
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series, modeDataFrame, modeSeries } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── modeSeries ───────────────────────────────────────────────────────────────
+
+describe("modeSeries", () => {
+ it("single mode", () => {
+ const s = new Series({ data: [1, 2, 2, 3] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual([2]);
+ expect(result.index.values).toEqual([0]);
+ });
+
+ it("multiple modes (tie)", () => {
+ const s = new Series({ data: [1, 1, 2, 2, 3] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual([1, 2]);
+ });
+
+ it("all unique values — all are modes", () => {
+ const s = new Series({ data: [3, 1, 2] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual([1, 2, 3]);
+ });
+
+ it("empty series returns empty", () => {
+ const s = new Series({ data: [] as Scalar[] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual([]);
+ });
+
+ it("series with string values", () => {
+ const s = new Series({ data: ["a", "b", "b", "c"] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual(["b"]);
+ });
+
+ it("dropna=true (default) excludes nulls", () => {
+ const s = new Series({ data: [null, 1, 1, null, null] as Scalar[] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual([1]);
+ });
+
+ it("dropna=false counts nulls", () => {
+ const s = new Series({ data: [null, null, 1] as Scalar[] });
+ const result = modeSeries(s, { dropna: false });
+ expect(result.values[0]).toBeNull();
+ });
+
+ it("all null values with dropna=true returns empty", () => {
+ const s = new Series({ data: [null, null] as Scalar[] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual([]);
+ });
+
+ it("single element", () => {
+ const s = new Series({ data: [42] });
+ expect(modeSeries(s).values).toEqual([42]);
+ });
+
+ it("modes are sorted ascending for numbers", () => {
+ const s = new Series({ data: [5, 5, 3, 3, 1, 1] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual([1, 3, 5]);
+ });
+
+ it("modes are sorted ascending for strings", () => {
+ const s = new Series({ data: ["c", "c", "a", "a", "b", "b"] });
+ const result = modeSeries(s);
+ expect(result.values).toEqual(["a", "b", "c"]);
+ });
+
+ it("preserves series name", () => {
+ const s = new Series({ data: [1, 1, 2], name: "x" });
+ expect(modeSeries(s).name).toBe("x");
+ });
+
+ it("result index is 0-based integers", () => {
+ const s = new Series({ data: [1, 1, 2, 2] });
+ const result = modeSeries(s);
+ expect(result.index.values).toEqual([0, 1]);
+ });
+});
+
+// ─── modeDataFrame — axis=0 ───────────────────────────────────────────────────
+
+describe("modeDataFrame axis=0", () => {
+ it("single mode per column", () => {
+ const df = DataFrame.fromColumns({ a: [1, 1, 2], b: [3, 3, 3] });
+ const result = modeDataFrame(df);
+ expect(result.col("a")?.values).toEqual([1]);
+ expect(result.col("b")?.values).toEqual([3]);
+ });
+
+ it("null-pads shorter mode lists", () => {
+ const df = DataFrame.fromColumns({ a: [1, 1, 2, 2], b: [5, 5, 5, 6] });
+ const result = modeDataFrame(df);
+ // a has 2 modes [1,2], b has 1 mode [5]
+ expect(result.col("a")?.values).toEqual([1, 2]);
+ expect(result.col("b")?.values).toEqual([5, null]);
+ });
+
+ it("numericOnly skips string columns", () => {
+ const df = DataFrame.fromColumns({ n: [1, 1, 2], s: ["x", "x", "y"] });
+ const result = modeDataFrame(df, { numericOnly: true });
+ expect(result.columns.values).toContain("n");
+ expect(result.columns.values).not.toContain("s");
+ });
+
+ it("dropna=false counts nulls", () => {
+ const df = DataFrame.fromColumns({ a: [null, null, 1] as Scalar[] });
+ const result = modeDataFrame(df, { dropna: false });
+ expect(result.col("a")?.values[0]).toBeNull();
+ });
+
+ it("result index is 0-based", () => {
+ const df = DataFrame.fromColumns({ a: [1, 1, 2] });
+ const result = modeDataFrame(df);
+ expect(result.index.values).toEqual([0]);
+ });
+});
+
+// ─── modeDataFrame — axis=1 ───────────────────────────────────────────────────
+
+describe("modeDataFrame axis=1", () => {
+ it("row-wise mode", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [1, 3], c: [2, 3] });
+ const result = modeDataFrame(df, { axis: 1 });
+ // row 0: [1,1,2] → mode=1
+ expect(result.col("0")?.values[0]).toBe(1);
+ // row 1: [2,3,3] → mode=3
+ expect(result.col("0")?.values[1]).toBe(3);
+ });
+
+ it("preserves original row index", () => {
+ const df = DataFrame.fromColumns(
+ { a: [10, 20, 30], b: [10, 10, 30] },
+ { index: ["x", "y", "z"] },
+ );
+ const result = modeDataFrame(df, { axis: 1 });
+ expect(result.index.values).toEqual(["x", "y", "z"]);
+ });
+});
+
+// ─── property tests ───────────────────────────────────────────────────────────
+
+describe("modeSeries property tests", () => {
+ it("mode value always appears in original series", () => {
+ fc.assert(
+ fc.property(fc.array(fc.integer({ min: 1, max: 5 }), { minLength: 1 }), (arr) => {
+ const s = new Series({ data: arr });
+ const result = modeSeries(s);
+ for (const v of result.values as number[]) {
+ expect(arr).toContain(v);
+ }
+ }),
+ );
+ });
+
+ it("all mode values have equal and maximal frequency", () => {
+ fc.assert(
+ fc.property(fc.array(fc.integer({ min: 1, max: 4 }), { minLength: 2 }), (arr) => {
+ const s = new Series({ data: arr });
+ const result = modeSeries(s);
+ if ((result.values as number[]).length === 0) {
+ return;
+ }
+ const freq = new Map();
+ for (const v of arr) {
+ freq.set(v, (freq.get(v) ?? 0) + 1);
+ }
+ const modeFreq = freq.get(result.values[0] as number) ?? 0;
+ for (const v of result.values as number[]) {
+ expect(freq.get(v)).toBe(modeFreq);
+ }
+ // No non-mode value has higher frequency
+ for (const [val, cnt] of freq) {
+ if (!(result.values as number[]).includes(val)) {
+ expect(cnt).toBeLessThan(modeFreq);
+ }
+ }
+ }),
+ );
+ });
+
+ it("result is sorted ascending", () => {
+ fc.assert(
+ fc.property(fc.array(fc.integer({ min: 1, max: 5 }), { minLength: 2 }), (arr) => {
+ const s = new Series({ data: arr });
+ const result = modeSeries(s).values as number[];
+ for (let i = 1; i < result.length; i++) {
+ expect((result[i] as number) >= (result[i - 1] as number)).toBe(true);
+ }
+ }),
+ );
+ });
+});
diff --git a/tests/stats/nancumops.test.ts b/tests/stats/nancumops.test.ts
new file mode 100644
index 00000000..3542ed7e
--- /dev/null
+++ b/tests/stats/nancumops.test.ts
@@ -0,0 +1,261 @@
+/**
+ * Tests for stats/nancumops — nan-ignoring aggregate functions.
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Series } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+import {
+ nancount,
+ nanmax,
+ nanmean,
+ nanmedian,
+ nanmin,
+ nanprod,
+ nanstd,
+ nansum,
+ nanvar,
+} from "../../src/index.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function series(data: Scalar[]): Series {
+ return new Series({ data });
+}
+
+// ─── nancount ─────────────────────────────────────────────────────────────────
+
+describe("nancount", () => {
+ it("counts only numeric non-NaN values", () => {
+ expect(nancount([1, 2, 3])).toBe(3);
+ expect(nancount([1, Number.NaN, null, undefined, 2])).toBe(2);
+ expect(nancount([])).toBe(0);
+ expect(nancount([null, undefined, Number.NaN])).toBe(0);
+ });
+
+ it("ignores string and boolean scalars", () => {
+ expect(nancount([1, "a", true, 2])).toBe(2);
+ });
+
+ it("works with a Series", () => {
+ expect(nancount(series([1, 2, null, Number.NaN]))).toBe(2);
+ });
+});
+
+// ─── nansum ───────────────────────────────────────────────────────────────────
+
+describe("nansum", () => {
+ it("sums numeric values, ignoring NaN/null", () => {
+ expect(nansum([1, 2, 3])).toBe(6);
+ expect(nansum([1, Number.NaN, null, undefined, 2])).toBe(3);
+ expect(nansum([Number.NaN, null])).toBe(0);
+ expect(nansum([])).toBe(0);
+ });
+
+ it("returns 0 for all-missing input", () => {
+ expect(nansum([null, undefined, Number.NaN])).toBe(0);
+ });
+
+ it("works with a Series", () => {
+ expect(nansum(series([1, 2, null, 3]))).toBe(6);
+ });
+
+ it("works with negative values", () => {
+ expect(nansum([-1, -2, Number.NaN, 3])).toBe(0);
+ });
+});
+
+// ─── nanmean ──────────────────────────────────────────────────────────────────
+
+describe("nanmean", () => {
+ it("computes mean over non-NaN values", () => {
+ expect(nanmean([1, 2, 3])).toBe(2);
+ expect(nanmean([1, Number.NaN, null, 3])).toBe(2);
+ expect(nanmean([4, 4, Number.NaN, 8])).toBe(16 / 3);
+ });
+
+ it("returns NaN for empty input", () => {
+ expect(Number.isNaN(nanmean([]))).toBe(true);
+ expect(Number.isNaN(nanmean([null, Number.NaN]))).toBe(true);
+ });
+
+ it("works with a Series", () => {
+ expect(nanmean(series([2, null, 4]))).toBe(3);
+ });
+});
+
+// ─── nanmedian ────────────────────────────────────────────────────────────────
+
+describe("nanmedian", () => {
+ it("returns median of odd-count values", () => {
+ expect(nanmedian([3, 1, 2])).toBe(2);
+ expect(nanmedian([1, Number.NaN, 3, null, 2])).toBe(2);
+ });
+
+ it("returns average of two middle values for even count", () => {
+ expect(nanmedian([1, 2, 3, 4])).toBe(2.5);
+ expect(nanmedian([1, Number.NaN, 3, 4])).toBe(3); // [1,3,4] → 3
+ });
+
+ it("returns NaN for empty / all-missing", () => {
+ expect(Number.isNaN(nanmedian([]))).toBe(true);
+ expect(Number.isNaN(nanmedian([null, Number.NaN]))).toBe(true);
+ });
+
+ it("handles single value", () => {
+ expect(nanmedian([42, null])).toBe(42);
+ });
+
+ it("works with a Series", () => {
+ expect(nanmedian(series([1, null, 3]))).toBe(2);
+ });
+});
+
+// ─── nanvar ───────────────────────────────────────────────────────────────────
+
+describe("nanvar", () => {
+ it("computes sample variance (ddof=1 default)", () => {
+ // [2,4,4,4,5,5,7,9] — numpy var ddof=1 ≈ 4.5714...
+ const v = nanvar([2, 4, 4, 4, 5, 5, 7, 9]);
+ expect(v).toBeCloseTo(4.5714, 3);
+ });
+
+ it("computes population variance (ddof=0)", () => {
+ // [2,4,6] mean=4 → (4+0+4)/3 = 8/3
+ expect(nanvar([2, 4, 6], { ddof: 0 })).toBeCloseTo(8 / 3, 10);
+ });
+
+ it("ignores NaN/null values", () => {
+ expect(nanvar([2, Number.NaN, 4, null, 6], { ddof: 0 })).toBeCloseTo(8 / 3, 10);
+ });
+
+ it("returns NaN when n <= ddof", () => {
+ expect(Number.isNaN(nanvar([]))).toBe(true);
+ expect(Number.isNaN(nanvar([5]))).toBe(true); // n=1, ddof=1
+ expect(Number.isNaN(nanvar([5], { ddof: 0 }))).toBe(false); // n=1, ddof=0 → 0
+ });
+
+ it("works with a Series", () => {
+ const v = nanvar(series([2, 4, null, 6]), { ddof: 0 });
+ expect(v).toBeCloseTo(8 / 3, 10);
+ });
+});
+
+// ─── nanstd ───────────────────────────────────────────────────────────────────
+
+describe("nanstd", () => {
+ it("is the square root of nanvar", () => {
+ const xs: Scalar[] = [2, 4, 4, 4, 5, 5, 7, 9];
+ expect(nanstd(xs)).toBeCloseTo(Math.sqrt(nanvar(xs)), 10);
+ });
+
+ it("ignores NaN/null", () => {
+ expect(nanstd([2, Number.NaN, 4, null, 6], { ddof: 0 })).toBeCloseTo(Math.sqrt(8 / 3), 10);
+ });
+
+ it("returns NaN for insufficient data", () => {
+ expect(Number.isNaN(nanstd([]))).toBe(true);
+ expect(Number.isNaN(nanstd([5]))).toBe(true);
+ });
+});
+
+// ─── nanmin ───────────────────────────────────────────────────────────────────
+
+describe("nanmin", () => {
+ it("returns minimum, ignoring NaN/null", () => {
+ expect(nanmin([3, 1, 2])).toBe(1);
+ expect(nanmin([3, Number.NaN, null, 1])).toBe(1);
+ expect(nanmin([-5, -3, -10])).toBe(-10);
+ });
+
+ it("returns NaN for empty / all-missing", () => {
+ expect(Number.isNaN(nanmin([]))).toBe(true);
+ expect(Number.isNaN(nanmin([null, Number.NaN]))).toBe(true);
+ });
+
+ it("works with a Series", () => {
+ expect(nanmin(series([3, null, 1, 2]))).toBe(1);
+ });
+});
+
+// ─── nanmax ───────────────────────────────────────────────────────────────────
+
+describe("nanmax", () => {
+ it("returns maximum, ignoring NaN/null", () => {
+ expect(nanmax([3, 1, 2])).toBe(3);
+ expect(nanmax([3, Number.NaN, null, 1])).toBe(3);
+ expect(nanmax([-5, -3, -10])).toBe(-3);
+ });
+
+ it("returns NaN for empty / all-missing", () => {
+ expect(Number.isNaN(nanmax([]))).toBe(true);
+ expect(Number.isNaN(nanmax([null, Number.NaN]))).toBe(true);
+ });
+
+ it("works with a Series", () => {
+ expect(nanmax(series([3, null, 1, 2]))).toBe(3);
+ });
+});
+
+// ─── nanprod ──────────────────────────────────────────────────────────────────
+
+describe("nanprod", () => {
+ it("returns product, ignoring NaN/null", () => {
+ expect(nanprod([1, 2, 3])).toBe(6);
+ expect(nanprod([1, Number.NaN, null, 2, 3])).toBe(6);
+ expect(nanprod([])).toBe(1);
+ expect(nanprod([null, Number.NaN])).toBe(1);
+ });
+
+ it("works with a Series", () => {
+ expect(nanprod(series([2, null, 3]))).toBe(6);
+ });
+
+ it("handles zero", () => {
+ expect(nanprod([2, 0, 3])).toBe(0);
+ expect(nanprod([2, Number.NaN, 0])).toBe(0);
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("property tests", () => {
+ const finiteNum = fc.float({ noDefaultInfinity: true, noNaN: true, min: -1e6, max: 1e6 });
+ const posNum = fc.float({ min: 0, max: 100, noNaN: true, noDefaultInfinity: true });
+
+ it("nansum is >= 0 for all-positive inputs", () => {
+ fc.assert(
+ fc.property(fc.array(posNum), (xs) => {
+ return nansum(xs) >= 0;
+ }),
+ );
+ });
+
+ it("nanmean is between nanmin and nanmax for non-empty finite arrays", () => {
+ fc.assert(
+ fc.property(fc.array(finiteNum, { minLength: 1 }), (xs) => {
+ const mean = nanmean(xs);
+ const mn = nanmin(xs);
+ const mx = nanmax(xs);
+ return mean >= mn - 1e-9 && mean <= mx + 1e-9;
+ }),
+ );
+ });
+
+ it("nanvar >= 0 for all finite inputs", () => {
+ fc.assert(
+ fc.property(fc.array(finiteNum, { minLength: 2 }), (xs) => {
+ return nanvar(xs) >= 0;
+ }),
+ );
+ });
+
+ it("nancount === xs.length for all-finite arrays", () => {
+ fc.assert(
+ fc.property(fc.array(finiteNum, { minLength: 0, maxLength: 50 }), (xs) => {
+ return nancount(xs) === xs.length;
+ }),
+ );
+ });
+});
diff --git a/tests/stats/nunique.test.ts b/tests/stats/nunique.test.ts
new file mode 100644
index 00000000..78cb9d4a
--- /dev/null
+++ b/tests/stats/nunique.test.ts
@@ -0,0 +1,238 @@
+/**
+ * Tests for src/stats/nunique.ts — nuniqueSeries(), nuniqueDataFrame(),
+ * anySeries(), allSeries(), anyDataFrame(), allDataFrame().
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ allDataFrame,
+ allSeries,
+ anyDataFrame,
+ anySeries,
+ nuniqueDataFrame,
+ nuniqueSeries,
+} from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── nuniqueSeries ────────────────────────────────────────────────────────────
+
+describe("nuniqueSeries", () => {
+ it("counts distinct values (no nulls)", () => {
+ const s = new Series({ data: [1, 2, 2, 3, 3, 3] });
+ expect(nuniqueSeries(s)).toBe(3);
+ });
+
+ it("dropna=true (default) excludes null/NaN from count", () => {
+ const s = new Series({ data: [1, 2, 2, null, null] as Scalar[] });
+ expect(nuniqueSeries(s)).toBe(2);
+ });
+
+ it("dropna=false includes null in unique count", () => {
+ const s = new Series({ data: [1, 2, null] as Scalar[] });
+ expect(nuniqueSeries(s, { dropna: false })).toBe(3);
+ });
+
+ it("empty series returns 0", () => {
+ const s = new Series({ data: [] as Scalar[] });
+ expect(nuniqueSeries(s)).toBe(0);
+ });
+
+ it("all-null series with dropna returns 0", () => {
+ const s = new Series({ data: [null, null] as Scalar[] });
+ expect(nuniqueSeries(s)).toBe(0);
+ });
+
+ it("string values", () => {
+ const s = new Series({ data: ["a", "b", "a", "c"] as Scalar[] });
+ expect(nuniqueSeries(s)).toBe(3);
+ });
+});
+
+// ─── nuniqueDataFrame ─────────────────────────────────────────────────────────
+
+describe("nuniqueDataFrame", () => {
+ it("axis=0 (default) counts per column", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 2], b: ["x", "x", "y"] });
+ const result = nuniqueDataFrame(df);
+ expect(result.index.values).toEqual(["a", "b"]);
+ expect(result.values[0]).toBe(2);
+ expect(result.values[1]).toBe(2);
+ });
+
+ it("axis=1 counts per row", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [1, 3] });
+ const result = nuniqueDataFrame(df, { axis: 1 });
+ expect(result.values[0]).toBe(1); // row [1,1] → 1 unique
+ expect(result.values[1]).toBe(2); // row [2,3] → 2 unique
+ });
+
+ it("dropna=true excludes nulls in count", () => {
+ const df = DataFrame.fromColumns({ a: [1, null, 1] as Scalar[] });
+ const result = nuniqueDataFrame(df);
+ expect(result.values[0]).toBe(1);
+ });
+});
+
+// ─── anySeries ────────────────────────────────────────────────────────────────
+
+describe("anySeries", () => {
+ it("returns true when any element is truthy", () => {
+ const s = new Series({ data: [0, 0, 1] });
+ expect(anySeries(s)).toBe(true);
+ });
+
+ it("returns false when all elements are falsy", () => {
+ const s = new Series({ data: [0, 0, 0] });
+ expect(anySeries(s)).toBe(false);
+ });
+
+ it("empty series returns false", () => {
+ const s = new Series({ data: [] as Scalar[] });
+ expect(anySeries(s)).toBe(false);
+ });
+
+ it("skipna=true skips null values", () => {
+ const s = new Series({ data: [null, 0, null] as Scalar[] });
+ expect(anySeries(s)).toBe(false);
+ });
+
+ it("skipna=true: non-null truthy makes it true", () => {
+ const s = new Series({ data: [null, 1] as Scalar[] });
+ expect(anySeries(s)).toBe(true);
+ });
+
+ it("all-null series with skipna=true returns false", () => {
+ const s = new Series({ data: [null, null] as Scalar[] });
+ expect(anySeries(s)).toBe(false);
+ });
+
+ it("boolean series", () => {
+ const s = new Series({ data: [false, false, true] as Scalar[] });
+ expect(anySeries(s)).toBe(true);
+ });
+
+ it("string series: non-empty string is truthy", () => {
+ const s = new Series({ data: ["", "hello"] as Scalar[] });
+ expect(anySeries(s)).toBe(true);
+ });
+});
+
+// ─── allSeries ────────────────────────────────────────────────────────────────
+
+describe("allSeries", () => {
+ it("returns true when all elements are truthy", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect(allSeries(s)).toBe(true);
+ });
+
+ it("returns false when any element is falsy", () => {
+ const s = new Series({ data: [1, 0, 3] });
+ expect(allSeries(s)).toBe(false);
+ });
+
+ it("empty series returns true (vacuous truth)", () => {
+ const s = new Series({ data: [] as Scalar[] });
+ expect(allSeries(s)).toBe(true);
+ });
+
+ it("skipna=true: all-null series returns true (vacuous)", () => {
+ const s = new Series({ data: [null, null] as Scalar[] });
+ expect(allSeries(s)).toBe(true);
+ });
+
+ it("skipna=true skips null but checks others", () => {
+ const s = new Series({ data: [1, null, 0] as Scalar[] });
+ expect(allSeries(s)).toBe(false);
+ });
+
+ it("boolean series all true", () => {
+ const s = new Series({ data: [true, true, true] as Scalar[] });
+ expect(allSeries(s)).toBe(true);
+ });
+});
+
+// ─── anyDataFrame ─────────────────────────────────────────────────────────────
+
+describe("anyDataFrame", () => {
+ it("axis=0: reduces each column to bool", () => {
+ const df = DataFrame.fromColumns({ a: [0, 0], b: [0, 1] });
+ const result = anyDataFrame(df);
+ expect(result.index.values).toEqual(["a", "b"]);
+ expect(result.values[0]).toBe(false);
+ expect(result.values[1]).toBe(true);
+ });
+
+ it("axis=1: reduces each row to bool", () => {
+ const df = DataFrame.fromColumns({ a: [0, 1], b: [0, 0] });
+ const result = anyDataFrame(df, { axis: 1 });
+ expect(result.values[0]).toBe(false);
+ expect(result.values[1]).toBe(true);
+ });
+
+ it("boolOnly=true skips non-bool columns", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [false, false, false] as Scalar[] });
+ const result = anyDataFrame(df, { boolOnly: true });
+ expect(result.index.values).toEqual(["b"]);
+ expect(result.values[0]).toBe(false);
+ });
+});
+
+// ─── allDataFrame ─────────────────────────────────────────────────────────────
+
+describe("allDataFrame", () => {
+ it("axis=0: reduces each column to bool", () => {
+ const df = DataFrame.fromColumns({ a: [1, 1], b: [1, 0] });
+ const result = allDataFrame(df);
+ expect(result.values[0]).toBe(true);
+ expect(result.values[1]).toBe(false);
+ });
+
+ it("axis=1: reduces each row to bool", () => {
+ const df = DataFrame.fromColumns({ a: [1, 1], b: [1, 0] });
+ const result = allDataFrame(df, { axis: 1 });
+ expect(result.values[0]).toBe(true);
+ expect(result.values[1]).toBe(false);
+ });
+
+ it("empty DataFrame columns axis=1 returns all true (vacuous)", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2] });
+ const result = allDataFrame(df, { axis: 1, boolOnly: true });
+ // boolOnly excludes numeric column → empty col set → vacuous true for each row
+ expect(result.values[0]).toBe(true);
+ expect(result.values[1]).toBe(true);
+ });
+});
+
+// ─── property tests ───────────────────────────────────────────────────────────
+
+describe("nuniqueSeries — property tests", () => {
+ it("nunique is always between 0 and n", () => {
+ fc.assert(
+ fc.property(fc.array(fc.integer({ min: 0, max: 10 }), { maxLength: 20 }), (arr) => {
+ const s = new Series({ data: arr });
+ const n = nuniqueSeries(s);
+ return n >= 0 && n <= arr.length;
+ }),
+ );
+ });
+});
+
+describe("anySeries / allSeries — property tests", () => {
+ it("any >= all (if all is true then any must also be true)", () => {
+ fc.assert(
+ fc.property(fc.array(fc.integer({ min: 0, max: 1 }), { minLength: 1 }), (arr) => {
+ const s = new Series({ data: arr });
+ const a = allSeries(s);
+ const b = anySeries(s);
+ // all => any (vacuous: if all is true and array non-empty, any must be true too,
+ // but all-zero returns false for both)
+ if (a) {
+ return b;
+ }
+ return true;
+ }),
+ );
+ });
+});
diff --git a/tests/stats/pct_change.test.ts b/tests/stats/pct_change.test.ts
new file mode 100644
index 00000000..f7015e4a
--- /dev/null
+++ b/tests/stats/pct_change.test.ts
@@ -0,0 +1,252 @@
+/**
+ * Tests for src/stats/pct_change.ts — pctChangeSeries, pctChangeDataFrame
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series, pctChangeDataFrame, pctChangeSeries } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function s(data: readonly Scalar[]): Series {
+ return new Series({ data: [...data] });
+}
+
+function nanEq(a: Scalar, b: Scalar): boolean {
+ if (typeof a === "number" && Number.isNaN(a) && typeof b === "number" && Number.isNaN(b)) {
+ return true;
+ }
+ return a === b;
+}
+
+function arrEq(a: readonly Scalar[], b: readonly Scalar[]): boolean {
+ if (a.length !== b.length) {
+ return false;
+ }
+ for (let i = 0; i < a.length; i++) {
+ if (!nanEq(a[i] as Scalar, b[i] as Scalar)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+function close(a: Scalar, b: Scalar, eps = 1e-9): boolean {
+ if (a === null && b === null) {
+ return true;
+ }
+ if (typeof a !== "number" || typeof b !== "number") {
+ return false;
+ }
+ if (Number.isNaN(a) && Number.isNaN(b)) {
+ return true;
+ }
+ return Math.abs(a - b) < eps;
+}
+
+function arrClose(a: readonly Scalar[], b: readonly Scalar[], eps = 1e-9): boolean {
+ if (a.length !== b.length) {
+ return false;
+ }
+ for (let i = 0; i < a.length; i++) {
+ if (!close(a[i] as Scalar, b[i] as Scalar, eps)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// ─── pctChangeSeries ─────────────────────────────────────────────────────────
+
+describe("pctChangeSeries", () => {
+ it("basic increasing sequence", () => {
+ const result = pctChangeSeries(s([100, 110, 121, 133.1]));
+ expect(result.values[0]).toBeNull();
+ expect(close(result.values[1] as Scalar, 0.1)).toBe(true);
+ expect(close(result.values[2] as Scalar, 0.1)).toBe(true);
+ expect(close(result.values[3] as Scalar, 0.1)).toBe(true);
+ });
+
+ it("decreasing sequence", () => {
+ const result = pctChangeSeries(s([200, 180, 162]));
+ expect(result.values[0]).toBeNull();
+ expect(close(result.values[1] as Scalar, -0.1)).toBe(true);
+ expect(close(result.values[2] as Scalar, -0.1)).toBe(true);
+ });
+
+ it("periods=2", () => {
+ const result = pctChangeSeries(s([100, 105, 110, 121]), { periods: 2 });
+ expect(result.values[0]).toBeNull();
+ expect(result.values[1]).toBeNull();
+ expect(close(result.values[2] as Scalar, 0.1)).toBe(true);
+ expect(close(result.values[3] as Scalar, (121 - 105) / 105)).toBe(true);
+ });
+
+ it("negative periods (look forward)", () => {
+ const result = pctChangeSeries(s([100, 110, 121]), { periods: -1 });
+ expect(close(result.values[0] as Scalar, 0.1)).toBe(true);
+ expect(close(result.values[1] as Scalar, 0.1)).toBe(true);
+ expect(result.values[2]).toBeNull();
+ });
+
+ it("NaN/null propagates when fillMethod=null", () => {
+ const result = pctChangeSeries(s([100, null, 110]), { fillMethod: null });
+ expect(result.values[0]).toBeNull();
+ expect(result.values[1]).toBeNull();
+ expect(result.values[2]).toBeNull();
+ });
+
+ it("fillMethod=pad fills NaN before computing", () => {
+ const result = pctChangeSeries(s([100, null, 110]), { fillMethod: "pad" });
+ // after pad-fill: [100, 100, 110]
+ // pct: [null, 0, 0.1]
+ expect(result.values[0]).toBeNull();
+ expect(close(result.values[1] as Scalar, 0)).toBe(true);
+ expect(close(result.values[2] as Scalar, 0.1)).toBe(true);
+ });
+
+ it("fillMethod=bfill fills NaN backward before computing", () => {
+ const result = pctChangeSeries(s([100, null, 110, 121]), { fillMethod: "bfill" });
+ // after bfill: [100, 110, 110, 121]
+ // pct: [null, 0.1, 0, 0.1]
+ expect(result.values[0]).toBeNull();
+ expect(close(result.values[1] as Scalar, 0.1)).toBe(true);
+ expect(close(result.values[2] as Scalar, 0)).toBe(true);
+ expect(close(result.values[3] as Scalar, 0.1)).toBe(true);
+ });
+
+ it("limit=1 caps forward-fill", () => {
+ const result = pctChangeSeries(s([100, null, null, 130]), {
+ fillMethod: "pad",
+ limit: 1,
+ });
+ // after pad with limit=1: [100, 100, null, 130]
+ // pct: [null, 0, null, null] (null/100 → null)
+ expect(result.values[0]).toBeNull();
+ expect(close(result.values[1] as Scalar, 0)).toBe(true);
+ expect(result.values[2]).toBeNull();
+ expect(result.values[3]).toBeNull();
+ });
+
+ it("zero denominator returns Infinity", () => {
+ const result = pctChangeSeries(s([0, 10]), { fillMethod: null });
+ expect(result.values[1]).toBe(Number.POSITIVE_INFINITY);
+ });
+
+ it("zero/zero denominator returns NaN", () => {
+ const result = pctChangeSeries(s([0, 0]), { fillMethod: null });
+ expect(Number.isNaN(result.values[1] as number)).toBe(true);
+ });
+
+ it("preserves Series name and index", () => {
+ const src = new Series({ data: [10, 20, 30], name: "price" });
+ const result = pctChangeSeries(src);
+ expect(result.name).toBe("price");
+ expect(result.index.size).toBe(3);
+ });
+
+ it("empty series returns empty", () => {
+ const result = pctChangeSeries(s([]));
+ expect(result.values.length).toBe(0);
+ });
+
+ it("single-element series returns [null]", () => {
+ const result = pctChangeSeries(s([42]));
+ expect(result.values[0]).toBeNull();
+ });
+});
+
+// ─── pctChangeDataFrame ───────────────────────────────────────────────────────
+
+describe("pctChangeDataFrame", () => {
+ it("column-wise (default)", () => {
+ const df = DataFrame.fromColumns({
+ a: [100, 110, 121],
+ b: [200, 180, 198],
+ });
+ const result = pctChangeDataFrame(df);
+ const colA = result.col("a").values;
+ const colB = result.col("b").values;
+ expect(colA[0]).toBeNull();
+ expect(close(colA[1] as Scalar, 0.1)).toBe(true);
+ expect(close(colA[2] as Scalar, 0.1)).toBe(true);
+ expect(colB[0]).toBeNull();
+ expect(close(colB[1] as Scalar, -0.1)).toBe(true);
+ expect(close(colB[2] as Scalar, 0.1)).toBe(true);
+ });
+
+ it("row-wise (axis=1)", () => {
+ const df = DataFrame.fromColumns({
+ a: [100, 200],
+ b: [110, 220],
+ c: [121, 242],
+ });
+ const result = pctChangeDataFrame(df, { axis: 1 });
+ // row 0: [100, 110, 121] → [null, 0.1, 0.1]
+ // row 1: [200, 220, 242] → [null, 0.1, 0.1]
+ const row0a = result.col("a").values[0];
+ const row0b = result.col("b").values[0];
+ const row0c = result.col("c").values[0];
+ expect(row0a).toBeNull();
+ expect(close(row0b as Scalar, 0.1)).toBe(true);
+ expect(close(row0c as Scalar, 0.1)).toBe(true);
+ const row1a = result.col("a").values[1];
+ const row1b = result.col("b").values[1];
+ expect(row1a).toBeNull();
+ expect(close(row1b as Scalar, 0.1)).toBe(true);
+ });
+
+ it("preserves column order", () => {
+ const df = DataFrame.fromColumns({
+ x: [1, 2],
+ y: [3, 6],
+ });
+ const result = pctChangeDataFrame(df);
+ expect(result.columns.values).toEqual(["x", "y"]);
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("pctChangeSeries — property tests", () => {
+ it("result length equals input length", () => {
+ fc.assert(
+ fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 0, maxLength: 50 }), (arr) => {
+ const result = pctChangeSeries(s(arr));
+ return result.values.length === arr.length;
+ }),
+ );
+ });
+
+ it("first element is always null for periods=1", () => {
+ fc.assert(
+ fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 1, maxLength: 50 }), (arr) => {
+ const result = pctChangeSeries(s(arr));
+ return result.values[0] === null;
+ }),
+ );
+ });
+
+ it("pct_change(x, -p) equals pct_change_reversed pattern", () => {
+ // For a sequence of positive numbers with periods=1 and periods=-1:
+ // result[-1][i] represents the change looking forward, so result[-1][i] = (x[i+1]-x[i])/x[i]
+ // and result[+1][i+1] = (x[i+1]-x[i])/x[i], so they should agree on matching indices
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, min: 1, max: 1000 }), { minLength: 3, maxLength: 20 }),
+ (arr) => {
+ const fwd = pctChangeSeries(s(arr), { periods: -1, fillMethod: null });
+ const bwd = pctChangeSeries(s(arr), { periods: 1, fillMethod: null });
+ // fwd[i] = (arr[i+1] - arr[i]) / arr[i]
+ // bwd[i+1] = (arr[i+1] - arr[i]) / arr[i] ← same ratio
+ for (let i = 0; i < arr.length - 1; i++) {
+ if (!close(fwd.values[i] as Scalar, bwd.values[i + 1] as Scalar, 1e-6)) {
+ return false;
+ }
+ }
+ return true;
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/quantile.test.ts b/tests/stats/quantile.test.ts
new file mode 100644
index 00000000..4abf1300
--- /dev/null
+++ b/tests/stats/quantile.test.ts
@@ -0,0 +1,364 @@
+/**
+ * Tests for stats/quantile — quantileSeries and quantileDataFrame.
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import type { Scalar } from "../../src/index.ts";
+import { DataFrame, Series } from "../../src/index.ts";
+import { quantileDataFrame, quantileSeries } from "../../src/index.ts";
+
+// ─── quantileSeries ───────────────────────────────────────────────────────────
+
+describe("quantileSeries — basic", () => {
+ it("median of odd-length series", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(quantileSeries(s)).toBe(3);
+ });
+
+ it("median of even-length series — linear interpolation", () => {
+ const s = new Series({ data: [1, 2, 3, 4] });
+ expect(quantileSeries(s, { q: 0.5 })).toBe(2.5);
+ });
+
+ it("q=0 returns minimum", () => {
+ const s = new Series({ data: [3, 1, 4, 1, 5, 9] });
+ expect(quantileSeries(s, { q: 0 })).toBe(1);
+ });
+
+ it("q=1 returns maximum", () => {
+ const s = new Series({ data: [3, 1, 4, 1, 5, 9] });
+ expect(quantileSeries(s, { q: 1 })).toBe(9);
+ });
+
+ it("q=0.25 first quartile", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ // pos = 0.25 * 4 = 1.0 → exact index 1 → value 2
+ expect(quantileSeries(s, { q: 0.25 })).toBe(2);
+ });
+
+ it("q=0.75 third quartile", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ // pos = 0.75 * 4 = 3.0 → exact index 3 → value 4
+ expect(quantileSeries(s, { q: 0.75 })).toBe(4);
+ });
+
+ it("linear interpolation between two values", () => {
+ const s = new Series({ data: [0, 10] });
+ // pos = 0.5 * 1 = 0.5; lo=0, hi=1; 0*(0.5) + 10*(0.5) = 5
+ expect(quantileSeries(s, { q: 0.5 })).toBe(5);
+ });
+
+ it("unsorted input — sorts internally", () => {
+ const s = new Series({ data: [5, 1, 3, 2, 4] });
+ expect(quantileSeries(s, { q: 0.5 })).toBe(3);
+ });
+
+ it("single element series", () => {
+ const s = new Series({ data: [42] });
+ expect(quantileSeries(s, { q: 0.25 })).toBe(42);
+ expect(quantileSeries(s, { q: 0.5 })).toBe(42);
+ expect(quantileSeries(s, { q: 0.75 })).toBe(42);
+ });
+
+ it("empty series returns NaN", () => {
+ const s = new Series({ data: [] });
+ expect(quantileSeries(s, { q: 0.5 })).toBeNaN();
+ });
+
+ it("series with all NaN/null — skipna=true returns NaN", () => {
+ const s = new Series({ data: [null, null, Number.NaN] });
+ expect(quantileSeries(s, { q: 0.5 })).toBeNaN();
+ });
+
+ it("series with NaN — skipna=true (default) ignores NaN", () => {
+ const s = new Series({ data: [1, Number.NaN, 3, null, 5] });
+ // valid: [1, 3, 5]; sorted: [1, 3, 5]; median = 3
+ expect(quantileSeries(s, { q: 0.5 })).toBe(3);
+ });
+
+ it("skipna=false propagates NaN", () => {
+ const s = new Series({ data: [1, Number.NaN, 3] });
+ expect(quantileSeries(s, { q: 0.5, skipna: false })).toBeNaN();
+ });
+});
+
+describe("quantileSeries — multi-q", () => {
+ it("returns Series when q is array", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const result = quantileSeries(s, { q: [0.25, 0.5, 0.75] });
+ expect(result).toBeInstanceOf(Series);
+ });
+
+ it("multi-q values correct", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const result = quantileSeries(s, { q: [0, 0.5, 1] }) as Series;
+ const vals = result.values as number[];
+ expect(vals[0]).toBe(1);
+ expect(vals[1]).toBe(3);
+ expect(vals[2]).toBe(5);
+ });
+
+ it("multi-q index matches q values", () => {
+ const s = new Series({ data: [10, 20, 30] });
+ const result = quantileSeries(s, { q: [0.25, 0.75] }) as Series;
+ const idx = result.index.values as number[];
+ expect(idx[0]).toBe(0.25);
+ expect(idx[1]).toBe(0.75);
+ });
+});
+
+describe("quantileSeries — interpolation methods", () => {
+ // data: [0, 10]; q=0.5 → pos=0.5, lo=0, hi=1
+ const s = new Series({ data: [0, 10] });
+
+ it("linear", () => {
+ expect(quantileSeries(s, { q: 0.5, interpolation: "linear" })).toBe(5);
+ });
+
+ it("lower", () => {
+ expect(quantileSeries(s, { q: 0.5, interpolation: "lower" })).toBe(0);
+ });
+
+ it("higher", () => {
+ expect(quantileSeries(s, { q: 0.5, interpolation: "higher" })).toBe(10);
+ });
+
+ it("midpoint", () => {
+ expect(quantileSeries(s, { q: 0.5, interpolation: "midpoint" })).toBe(5);
+ });
+
+ it("nearest — frac < 0.5 returns lower", () => {
+ // data: [0, 10, 20]; q=0.4 → pos=0.8, lo=0, hi=1, frac=0.8 > 0.5 → hi=10
+ const s3 = new Series({ data: [0, 10, 20] });
+ const r = quantileSeries(s3, { q: 0.4, interpolation: "nearest" });
+ expect(r).toBe(10); // frac=0.8, use hi
+ });
+
+ it("nearest — frac=0.5 returns lower", () => {
+ // data: [0, 10]; q=0.5 → frac=0.5, returns lo=0
+ expect(quantileSeries(s, { q: 0.5, interpolation: "nearest" })).toBe(0);
+ });
+
+ it("exact index hit — all methods agree", () => {
+ const sx = new Series({ data: [1, 2, 3] });
+ // q=0.5 → pos=1.0, exact → value 2
+ for (const interp of ["linear", "lower", "higher", "midpoint", "nearest"] as const) {
+ expect(quantileSeries(sx, { q: 0.5, interpolation: interp })).toBe(2);
+ }
+ });
+});
+
+// ─── quantileDataFrame ────────────────────────────────────────────────────────
+
+describe("quantileDataFrame — axis=0 single q", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [10, 20, 30] });
+
+ it("returns Series", () => {
+ const r = quantileDataFrame(df);
+ expect(r).toBeInstanceOf(Series);
+ });
+
+ it("median of each column", () => {
+ const r = quantileDataFrame(df, { q: 0.5 }) as Series;
+ const vals = r.values as number[];
+ expect(vals[0]).toBe(2);
+ expect(vals[1]).toBe(20);
+ });
+
+ it("q=0 → min of each column", () => {
+ const r = quantileDataFrame(df, { q: 0 }) as Series;
+ const vals = r.values as number[];
+ expect(vals[0]).toBe(1);
+ expect(vals[1]).toBe(10);
+ });
+
+ it("q=1 → max of each column", () => {
+ const r = quantileDataFrame(df, { q: 1 }) as Series;
+ const vals = r.values as number[];
+ expect(vals[0]).toBe(3);
+ expect(vals[1]).toBe(30);
+ });
+
+ it("index of result matches column names", () => {
+ const r = quantileDataFrame(df, { q: 0.5 }) as Series;
+ const idx = r.index.values as string[];
+ expect(idx).toEqual(["a", "b"]);
+ });
+
+ it("numericOnly=true (default) skips string columns", () => {
+ const df2 = DataFrame.fromColumns({ a: [1, 2, 3], label: ["x", "y", "z"] });
+ const r = quantileDataFrame(df2, { q: 0.5 }) as Series;
+ expect((r.index.values as string[]).includes("label")).toBe(false);
+ expect((r.index.values as string[]).includes("a")).toBe(true);
+ });
+
+ it("numericOnly=false includes non-numeric as NaN", () => {
+ const df2 = DataFrame.fromColumns({ a: [1, 2, 3], label: ["x", "y", "z"] });
+ const r = quantileDataFrame(df2, { q: 0.5, numericOnly: false }) as Series;
+ expect((r.index.values as string[]).includes("label")).toBe(true);
+ const vals = r.values as number[];
+ const labelIdx = (r.index.values as string[]).indexOf("label");
+ expect(Number.isNaN(vals[labelIdx])).toBe(true);
+ });
+});
+
+describe("quantileDataFrame — axis=0 multi q", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3, 4], b: [10, 20, 30, 40] });
+
+ it("returns DataFrame", () => {
+ const r = quantileDataFrame(df, { q: [0.25, 0.5, 0.75] });
+ expect(r).toBeInstanceOf(DataFrame);
+ });
+
+ it("result shape: rows=q.length, cols=df.columns", () => {
+ const r = quantileDataFrame(df, { q: [0.25, 0.5, 0.75] }) as DataFrame;
+ expect(r.shape).toEqual([3, 2]);
+ });
+
+ it("column names preserved", () => {
+ const r = quantileDataFrame(df, { q: [0, 1] }) as DataFrame;
+ const cols = r.columns.values as string[];
+ expect(cols).toEqual(["a", "b"]);
+ });
+
+ it("q=0 row is min, q=1 row is max", () => {
+ const r = quantileDataFrame(df, { q: [0, 1] }) as DataFrame;
+ const aVals = r.col("a").values as number[];
+ expect(aVals[0]).toBe(1);
+ expect(aVals[1]).toBe(4);
+ });
+
+ it("row index matches q values", () => {
+ const r = quantileDataFrame(df, { q: [0.25, 0.75] }) as DataFrame;
+ const rowIdx = r.index.values as number[];
+ expect(rowIdx[0]).toBe(0.25);
+ expect(rowIdx[1]).toBe(0.75);
+ });
+});
+
+describe("quantileDataFrame — axis=1", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [3, 4, 5], c: [5, 6, 7] });
+
+ it("axis=1 single q returns Series", () => {
+ const r = quantileDataFrame(df, { axis: 1, q: 0.5 });
+ expect(r).toBeInstanceOf(Series);
+ });
+
+ it("axis=1 single q computes median across columns per row", () => {
+ // row 0: [1,3,5] → sorted → median=3
+ // row 1: [2,4,6] → median=4
+ // row 2: [3,5,7] → median=5
+ const r = quantileDataFrame(df, { axis: 1, q: 0.5 }) as Series;
+ const vals = r.values as number[];
+ expect(vals[0]).toBe(3);
+ expect(vals[1]).toBe(4);
+ expect(vals[2]).toBe(5);
+ });
+
+ it("axis=1 multi-q returns DataFrame", () => {
+ const r = quantileDataFrame(df, { axis: 1, q: [0, 1] });
+ expect(r).toBeInstanceOf(DataFrame);
+ });
+
+ it("axis=1 multi-q shape: rows=df.rows, cols=q.length", () => {
+ const r = quantileDataFrame(df, { axis: 1, q: [0, 0.5, 1] }) as DataFrame;
+ expect(r.shape).toEqual([3, 3]);
+ });
+
+ it("axis=1 multi-q column names are q string values", () => {
+ const r = quantileDataFrame(df, { axis: 1, q: [0, 1] }) as DataFrame;
+ const cols = r.columns.values as string[];
+ expect(cols).toEqual(["0", "1"]);
+ });
+});
+
+describe("quantileDataFrame — NaN handling", () => {
+ it("skipna=true (default) ignores null/NaN", () => {
+ const df = DataFrame.fromColumns({ a: [1, null, 3], b: [Number.NaN, 2, 4] });
+ const r = quantileDataFrame(df, { q: 0.5 }) as Series;
+ const vals = r.values as number[];
+ // a: [1, 3] sorted, median=2; b: [2, 4] sorted, median=3
+ expect(vals[0]).toBe(2);
+ expect(vals[1]).toBe(3);
+ });
+
+ it("skipna=false propagates NaN in columns with missing values", () => {
+ const df = DataFrame.fromColumns({ a: [1, Number.NaN, 3], b: [2, 4, 6] });
+ const r = quantileDataFrame(df, { q: 0.5, skipna: false }) as Series;
+ const vals = r.values as number[];
+ expect(Number.isNaN(vals[0])).toBe(true);
+ expect(vals[1]).toBe(4);
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("quantileSeries — property tests", () => {
+ it("q=0 always returns minimum", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, min: -1e6, max: 1e6 }), { minLength: 1, maxLength: 50 }),
+ (data) => {
+ const s = new Series({ data });
+ const q0 = quantileSeries(s, { q: 0 });
+ const expected = Math.min(...data);
+ return typeof q0 === "number" && Math.abs(q0 - expected) < 1e-9;
+ },
+ ),
+ );
+ });
+
+ it("q=1 always returns maximum", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, min: -1e6, max: 1e6 }), { minLength: 1, maxLength: 50 }),
+ (data) => {
+ const s = new Series({ data });
+ const q1 = quantileSeries(s, { q: 1 });
+ const expected = Math.max(...data);
+ return typeof q1 === "number" && Math.abs(q1 - expected) < 1e-9;
+ },
+ ),
+ );
+ });
+
+ it("q monotonicity: q1 <= q2 implies result(q1) <= result(q2)", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, min: -1e6, max: 1e6 }), { minLength: 2, maxLength: 50 }),
+ fc.tuple(
+ fc.float({ noNaN: true, min: 0, max: 1 }),
+ fc.float({ noNaN: true, min: 0, max: 1 }),
+ ),
+ (data, [qa, qb]) => {
+ const q1 = Math.min(qa, qb);
+ const q2 = Math.max(qa, qb);
+ const s = new Series({ data });
+ const r1 = quantileSeries(s, { q: q1 });
+ const r2 = quantileSeries(s, { q: q2 });
+ return typeof r1 === "number" && typeof r2 === "number" && r1 <= r2 + 1e-9;
+ },
+ ),
+ );
+ });
+
+ it("lower <= linear <= higher for any q", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ noNaN: true, min: -1e3, max: 1e3 }), { minLength: 2, maxLength: 30 }),
+ fc.float({ noNaN: true, min: 0, max: 1 }),
+ (data, q) => {
+ const s = new Series({ data });
+ const lo = quantileSeries(s, { q, interpolation: "lower" });
+ const lin = quantileSeries(s, { q, interpolation: "linear" });
+ const hi = quantileSeries(s, { q, interpolation: "higher" });
+ if (typeof lo !== "number" || typeof lin !== "number" || typeof hi !== "number") {
+ return false;
+ }
+ return lo <= lin + 1e-9 && lin <= hi + 1e-9;
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/replace.test.ts b/tests/stats/replace.test.ts
new file mode 100644
index 00000000..c4f94b2d
--- /dev/null
+++ b/tests/stats/replace.test.ts
@@ -0,0 +1,254 @@
+/**
+ * Tests for stats/replace — value substitution for Series and DataFrame.
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { DataFrame, Series } from "../../src/index.ts";
+import { replaceDataFrame, replaceSeries } from "../../src/stats/replace.ts";
+
+// ─── replaceSeries — scalar → scalar ─────────────────────────────────────────
+
+describe("replaceSeries: scalar → scalar", () => {
+ it("replaces a matching value", () => {
+ const s = new Series({ data: [1, 2, 3, 2, 1] });
+ const r = replaceSeries(s, { toReplace: 2, value: 99 });
+ expect([...r.values]).toEqual([1, 99, 3, 99, 1]);
+ });
+
+ it("leaves non-matching values unchanged", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ const r = replaceSeries(s, { toReplace: 9, value: 0 });
+ expect([...r.values]).toEqual([1, 2, 3]);
+ });
+
+ it("replaces string values", () => {
+ const s = new Series({ data: ["a", "b", "a", "c"] });
+ const r = replaceSeries(s, { toReplace: "a", value: "z" });
+ expect([...r.values]).toEqual(["z", "b", "z", "c"]);
+ });
+
+ it("replaces null values", () => {
+ const s = new Series({ data: [1, null, 3, null] });
+ const r = replaceSeries(s, { toReplace: null, value: 0 });
+ expect([...r.values]).toEqual([1, 0, 3, 0]);
+ });
+
+ it("replaces NaN values when matchNaN=true (default)", () => {
+ const s = new Series({ data: [1, Number.NaN, 3] });
+ const r = replaceSeries(s, { toReplace: Number.NaN, value: 0 });
+ expect([...r.values]).toEqual([1, 0, 3]);
+ });
+
+ it("does NOT replace NaN when matchNaN=false", () => {
+ const s = new Series({ data: [1, Number.NaN, 3] });
+ const r = replaceSeries(s, { toReplace: Number.NaN, value: 0 }, { matchNaN: false });
+ expect(Number.isNaN(r.values[1] as number)).toBe(true);
+ });
+
+ it("preserves index", () => {
+ const s = new Series({ data: [1, 2, 3], index: ["x", "y", "z"] });
+ const r = replaceSeries(s, { toReplace: 2, value: 20 });
+ expect([...r.index.values]).toEqual(["x", "y", "z"]);
+ });
+
+ it("preserves name", () => {
+ const s = new Series({ data: [1, 2], name: "myCol" });
+ const r = replaceSeries(s, { toReplace: 1, value: 0 });
+ expect(r.name).toBe("myCol");
+ });
+
+ it("returns empty series when input is empty", () => {
+ const s = new Series({ data: [] });
+ const r = replaceSeries(s, { toReplace: 1, value: 0 });
+ expect(r.size).toBe(0);
+ });
+});
+
+// ─── replaceSeries — array → scalar ───────────────────────────────────────────
+
+describe("replaceSeries: array → scalar", () => {
+ it("replaces all listed values with single value", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ const r = replaceSeries(s, { toReplace: [1, 3, 5], value: 0 });
+ expect([...r.values]).toEqual([0, 2, 0, 4, 0]);
+ });
+
+ it("handles empty toReplace array", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ const r = replaceSeries(s, { toReplace: [], value: 0 });
+ expect([...r.values]).toEqual([1, 2, 3]);
+ });
+});
+
+// ─── replaceSeries — array → array ────────────────────────────────────────────
+
+describe("replaceSeries: array → array", () => {
+ it("performs pair-wise replacement", () => {
+ const s = new Series({ data: [1, 2, 3, 1, 2] });
+ const r = replaceSeries(s, { toReplace: [1, 2], value: [10, 20] });
+ expect([...r.values]).toEqual([10, 20, 3, 10, 20]);
+ });
+
+ it("throws when array lengths differ", () => {
+ const s = new Series({ data: [1, 2, 3] });
+ expect(() => replaceSeries(s, { toReplace: [1, 2], value: [10] })).toThrow(RangeError);
+ });
+});
+
+// ─── replaceSeries — mapping (Record) ─────────────────────────────────────────
+
+describe("replaceSeries: Record mapping", () => {
+ it("replaces using a Record map", () => {
+ const s = new Series({ data: [1, 2, 3, 4] });
+ const r = replaceSeries(s, { toReplace: { "1": 10, "3": 30 } });
+ expect([...r.values]).toEqual([10, 2, 30, 4]);
+ });
+
+ it("leaves values with no mapping entry unchanged", () => {
+ const s = new Series({ data: ["a", "b", "c"] });
+ const r = replaceSeries(s, { toReplace: { a: "A" } });
+ expect([...r.values]).toEqual(["A", "b", "c"]);
+ });
+});
+
+// ─── replaceSeries — mapping (Map) ────────────────────────────────────────────
+
+describe("replaceSeries: Map mapping", () => {
+ it("replaces using a Map", () => {
+ const s = new Series({ data: [1, 2, 3, 2, 1] });
+ const map = new Map<
+ number | string | boolean | bigint | null | undefined | Date,
+ number | string | boolean | bigint | null | undefined | Date
+ >([
+ [1, 100],
+ [2, 200],
+ ]);
+ const r = replaceSeries(s, { toReplace: map });
+ expect([...r.values]).toEqual([100, 200, 3, 200, 100]);
+ });
+
+ it("handles NaN keys in Map with matchNaN=true", () => {
+ const s = new Series({ data: [1, Number.NaN, 3] });
+ const map = new Map([[Number.NaN, 99]]);
+ const r = replaceSeries(s, { toReplace: map });
+ expect([...r.values]).toEqual([1, 99, 3]);
+ });
+});
+
+// ─── replaceDataFrame ─────────────────────────────────────────────────────────
+
+describe("replaceDataFrame: basic", () => {
+ it("replaces value in all columns", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [2, 2, 4] });
+ const r = replaceDataFrame(df, { toReplace: 2, value: 0 });
+ expect([...r.col("a").values]).toEqual([1, 0, 3]);
+ expect([...r.col("b").values]).toEqual([0, 0, 4]);
+ });
+
+ it("restricts replacement to specified columns", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [2, 2, 4] });
+ const r = replaceDataFrame(df, { toReplace: 2, value: 0 }, { columns: ["a"] });
+ expect([...r.col("a").values]).toEqual([1, 0, 3]);
+ expect([...r.col("b").values]).toEqual([2, 2, 4]);
+ });
+
+ it("preserves index", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const r = replaceDataFrame(df, { toReplace: 1, value: 10 });
+ expect([...r.index.values]).toEqual([...df.index.values]);
+ });
+
+ it("preserves columns order", () => {
+ const df = DataFrame.fromColumns({ a: [1], b: [2], c: [3] });
+ const r = replaceDataFrame(df, { toReplace: 1, value: 99 });
+ expect([...r.columns.values]).toEqual(["a", "b", "c"]);
+ });
+
+ it("uses array → scalar replacement across columns", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [3, 4, 5] });
+ const r = replaceDataFrame(df, { toReplace: [1, 3], value: 0 });
+ expect([...r.col("a").values]).toEqual([0, 2, 0]);
+ expect([...r.col("b").values]).toEqual([0, 4, 5]);
+ });
+
+ it("uses Record mapping across columns", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [2, 3] });
+ const r = replaceDataFrame(df, { toReplace: { "2": 20 } });
+ expect([...r.col("a").values]).toEqual([1, 20]);
+ expect([...r.col("b").values]).toEqual([20, 3]);
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("replaceSeries: properties", () => {
+ it("scalar→scalar: replaced value never appears where original matched", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 9 }), { minLength: 0, maxLength: 20 }),
+ fc.integer({ min: 0, max: 9 }),
+ fc.integer({ min: 10, max: 99 }),
+ (data, old, newVal) => {
+ const s = new Series({ data });
+ const r = replaceSeries(s, { toReplace: old, value: newVal });
+ for (let i = 0; i < s.size; i++) {
+ if (s.values[i] === old) {
+ if (r.values[i] !== newVal) {
+ return false;
+ }
+ } else if (r.values[i] !== s.values[i]) {
+ return false;
+ }
+ }
+ return true;
+ },
+ ),
+ );
+ });
+
+ it("size is preserved", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 9 }), { minLength: 0, maxLength: 30 }),
+ (data) => {
+ const s = new Series({ data });
+ const r = replaceSeries(s, { toReplace: 5, value: 0 });
+ return r.size === s.size;
+ },
+ ),
+ );
+ });
+
+ it("no-op when toReplace not present", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 5 }), { minLength: 1, maxLength: 20 }),
+ (data) => {
+ const s = new Series({ data });
+ // 99 is never in the array since data is 0-5
+ const r = replaceSeries(s, { toReplace: 99, value: -1 });
+ return [...r.values].every((v, i) => v === data[i]);
+ },
+ ),
+ );
+ });
+
+ it("array→array: pair-wise replacement is consistent", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.integer({ min: 0, max: 5 }), { minLength: 0, maxLength: 20 }),
+ (data) => {
+ const s = new Series({ data });
+ const r = replaceSeries(s, { toReplace: [1, 2, 3], value: [10, 20, 30] });
+ const mapping: Record = { 1: 10, 2: 20, 3: 30 };
+ return [...r.values].every((v, i) => {
+ const orig = data[i] as number;
+ const expected = mapping[orig] ?? orig;
+ return v === expected;
+ });
+ },
+ ),
+ );
+ });
+});
diff --git a/tests/stats/sem_var.test.ts b/tests/stats/sem_var.test.ts
new file mode 100644
index 00000000..dddd4783
--- /dev/null
+++ b/tests/stats/sem_var.test.ts
@@ -0,0 +1,212 @@
+/**
+ * Tests for src/stats/sem_var.ts — varSeries(), semSeries(),
+ * varDataFrame(), semDataFrame().
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ semDataFrame,
+ semSeries,
+ varDataFrame,
+ varSeries,
+} from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+function round(v: number, d = 10): number {
+ const f = 10 ** d;
+ return Math.round(v * f) / f;
+}
+
+// ─── varSeries ────────────────────────────────────────────────────────────────
+
+describe("varSeries", () => {
+ it("sample variance (ddof=1) of [2,4,4,4,5,5,7,9] ≈ 32/7", () => {
+ const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+ // mean=5, SS=32, ddof=1 → 32/7
+ expect(round(varSeries(s), 8)).toBe(round(32 / 7, 8));
+ });
+
+ it("population variance (ddof=0) of [2,4,4,4,5,5,7,9] = 4", () => {
+ const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+ // mean=5, SS=32, ddof=0 → 32/8 = 4
+ expect(round(varSeries(s, { ddof: 0 }), 10)).toBe(4);
+ });
+
+ it("constant series has variance 0", () => {
+ const s = new Series({ data: [5, 5, 5, 5] });
+ expect(varSeries(s)).toBe(0);
+ });
+
+ it("single element returns NaN (ddof=1, n-ddof=0)", () => {
+ const s = new Series({ data: [7] });
+ expect(Number.isNaN(varSeries(s))).toBe(true);
+ });
+
+ it("single element with ddof=0 returns 0", () => {
+ const s = new Series({ data: [7] });
+ expect(varSeries(s, { ddof: 0 })).toBe(0);
+ });
+
+ it("empty series returns NaN", () => {
+ const s = new Series({ data: [] as Scalar[] });
+ expect(Number.isNaN(varSeries(s))).toBe(true);
+ });
+
+ it("skipna=true (default) ignores nulls", () => {
+ const withNull = new Series({ data: [1, 2, 3, null] as Scalar[] });
+ const withoutNull = new Series({ data: [1, 2, 3] });
+ expect(round(varSeries(withNull))).toBe(round(varSeries(withoutNull)));
+ });
+
+ it("skipna=false returns NaN when null present", () => {
+ const s = new Series({ data: [1, 2, null, 4] as Scalar[] });
+ expect(Number.isNaN(varSeries(s, { skipna: false }))).toBe(true);
+ });
+
+ it("minCount threshold: returns NaN when not enough valid values", () => {
+ const s = new Series({ data: [1, null, null] as Scalar[] });
+ expect(Number.isNaN(varSeries(s, { minCount: 2 }))).toBe(true);
+ });
+
+ it("minCount met: returns normal value", () => {
+ const s = new Series({ data: [1, 2, null] as Scalar[] });
+ expect(Number.isFinite(varSeries(s, { minCount: 2 }))).toBe(true);
+ });
+});
+
+// ─── semSeries ────────────────────────────────────────────────────────────────
+
+describe("semSeries", () => {
+ it("SEM of [2,4,4,4,5,5,7,9]: sqrt((32/7)/8)", () => {
+ const s = new Series({ data: [2, 4, 4, 4, 5, 5, 7, 9] });
+ // var(ddof=1)=32/7, n=8 → sem = sqrt((32/7)/8)
+ expect(round(semSeries(s), 8)).toBe(round(Math.sqrt(32 / 7 / 8), 8));
+ });
+
+ it("constant series has SEM = 0", () => {
+ const s = new Series({ data: [3, 3, 3] });
+ expect(semSeries(s)).toBe(0);
+ });
+
+ it("empty series returns NaN", () => {
+ const s = new Series({ data: [] as Scalar[] });
+ expect(Number.isNaN(semSeries(s))).toBe(true);
+ });
+
+ it("single element with ddof=1 returns NaN", () => {
+ expect(Number.isNaN(semSeries(new Series({ data: [5] })))).toBe(true);
+ });
+
+ it("skipna=true ignores nulls", () => {
+ const withNull = new Series({ data: [1, 2, 3, null] as Scalar[] });
+ const withoutNull = new Series({ data: [1, 2, 3] });
+ expect(round(semSeries(withNull))).toBe(round(semSeries(withoutNull)));
+ });
+});
+
+// ─── varDataFrame ─────────────────────────────────────────────────────────────
+
+describe("varDataFrame", () => {
+ it("column-wise (axis=0) variance", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ const result = varDataFrame(df);
+ expect(round(result.values[0] as number)).toBe(1);
+ expect(round(result.values[1] as number)).toBe(1);
+ });
+
+ it("result Series has correct column index", () => {
+ const df = DataFrame.fromColumns({ x: [1, 2, 3], y: [4, 4, 4] });
+ const result = varDataFrame(df);
+ expect(result.index.values).toEqual(["x", "y"]);
+ expect(result.values[1]).toBe(0);
+ });
+
+ it("non-numeric column returns NaN without numericOnly", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: ["x", "y", "z"] });
+ const result = varDataFrame(df);
+ expect(Number.isFinite(result.values[0] as number)).toBe(true);
+ expect(Number.isNaN(result.values[1] as number)).toBe(true);
+ });
+
+ it("numericOnly=true excludes non-numeric columns", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3], b: ["x", "y", "z"] });
+ const result = varDataFrame(df, { numericOnly: true });
+ expect(result.index.values).toEqual(["a"]);
+ });
+
+ it("axis=1 row-wise variance", () => {
+ const df = DataFrame.fromColumns({ a: [1, 10], b: [3, 10] });
+ const result = varDataFrame(df, { axis: 1 });
+ expect(round(result.values[0] as number)).toBe(2);
+ expect(result.values[1]).toBe(0);
+ });
+
+ it("ddof=0 population variance", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const sample = varDataFrame(df);
+ const population = varDataFrame(df, { ddof: 0 });
+ expect(population.values[0] as number).toBeLessThan(sample.values[0] as number);
+ });
+});
+
+// ─── semDataFrame ─────────────────────────────────────────────────────────────
+
+describe("semDataFrame", () => {
+ it("column-wise SEM", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ const result = semDataFrame(df);
+ // var=1, n=3 => sem=sqrt(1/3)
+ expect(round(result.values[0] as number, 8)).toBe(round(Math.sqrt(1 / 3), 8));
+ });
+
+ it("constant column has SEM = 0", () => {
+ const df = DataFrame.fromColumns({ a: [5, 5, 5] });
+ expect(semDataFrame(df).values[0]).toBe(0);
+ });
+
+ it("axis=1 row-wise SEM", () => {
+ const df = DataFrame.fromColumns({ a: [2, 4], b: [4, 4] });
+ const result = semDataFrame(df, { axis: 1 });
+ // row 0: [2,4] var=2, n=2 => sem=sqrt(2/2)=1
+ expect(round(result.values[0] as number)).toBe(1);
+ // row 1: [4,4] sem=0
+ expect(result.values[1]).toBe(0);
+ });
+});
+
+// ─── property tests ───────────────────────────────────────────────────────────
+
+describe("varSeries — property tests", () => {
+ it("sample variance is always >= 0 for non-null arrays", () => {
+ fc.assert(
+ fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 2 }), (arr) => {
+ const s = new Series({ data: arr });
+ const v = varSeries(s);
+ return Number.isNaN(v) || v >= 0;
+ }),
+ );
+ });
+
+ it("variance of identical values is 0", () => {
+ fc.assert(
+ fc.property(fc.float({ noNaN: true }), fc.integer({ min: 2, max: 20 }), (val, n) => {
+ const s = new Series({ data: new Array(n).fill(val) });
+ return Math.abs(varSeries(s)) < 1e-9 || Number.isNaN(varSeries(s));
+ }),
+ );
+ });
+
+ it("SEM is always >= 0 when defined", () => {
+ fc.assert(
+ fc.property(fc.array(fc.float({ noNaN: true }), { minLength: 2 }), (arr) => {
+ const s = new Series({ data: arr });
+ const v = semSeries(s);
+ return Number.isNaN(v) || v >= 0;
+ }),
+ );
+ });
+});
diff --git a/tests/stats/skew_kurt.test.ts b/tests/stats/skew_kurt.test.ts
new file mode 100644
index 00000000..952c8821
--- /dev/null
+++ b/tests/stats/skew_kurt.test.ts
@@ -0,0 +1,232 @@
+/**
+ * Tests for src/stats/skew_kurt.ts — skewSeries(), kurtSeries(),
+ * skewDataFrame(), kurtDataFrame().
+ */
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import {
+ DataFrame,
+ Series,
+ kurtDataFrame,
+ kurtSeries,
+ skewDataFrame,
+ skewSeries,
+} from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+/** Round to n decimal places for approx equality. */
+function round(v: number, decimals = 6): number {
+ const f = 10 ** decimals;
+ return Math.round(v * f) / f;
+}
+
+// ─── skewSeries ───────────────────────────────────────────────────────────────
+
+describe("skewSeries", () => {
+ it("symmetric distribution has skew ≈ 0", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(Math.abs(skewSeries(s))).toBeLessThan(1e-10);
+ });
+
+ it("right-skewed distribution has positive skew", () => {
+ const s = new Series({ data: [1, 2, 3, 4, 100] });
+ expect(skewSeries(s)).toBeGreaterThan(0);
+ });
+
+ it("left-skewed distribution has negative skew", () => {
+ const s = new Series({ data: [1, 50, 51, 52, 53] });
+ expect(skewSeries(s)).toBeLessThan(0);
+ });
+
+ it("returns NaN for n < 3", () => {
+ expect(Number.isNaN(skewSeries(new Series({ data: [1, 2] })))).toBe(true);
+ expect(Number.isNaN(skewSeries(new Series({ data: [1] })))).toBe(true);
+ expect(Number.isNaN(skewSeries(new Series({ data: [] as Scalar[] })))).toBe(true);
+ });
+
+ it("returns NaN for constant series (zero std)", () => {
+ const s = new Series({ data: [5, 5, 5, 5] });
+ expect(Number.isNaN(skewSeries(s))).toBe(true);
+ });
+
+ it("skipna=true (default) ignores nulls", () => {
+ const withNull = new Series({ data: [1, 2, 3, 4, 5, null] as Scalar[] });
+ const withoutNull = new Series({ data: [1, 2, 3, 4, 5] });
+ expect(skewSeries(withNull)).toBeCloseTo(skewSeries(withoutNull), 10);
+ });
+
+ it("skipna=false propagates NaN when null present", () => {
+ const s = new Series({ data: [1, 2, null, 4, 5] as Scalar[] });
+ expect(Number.isNaN(skewSeries(s, { skipna: false }))).toBe(true);
+ });
+
+ it("known skewness value", () => {
+ // Adjusted Fisher-Pearson: pd.Series([2,8,0,4,1,9,9,0]).skew() ≈ 0.3306
+ const s = new Series({ data: [2, 8, 0, 4, 1, 9, 9, 0] });
+ expect(round(skewSeries(s), 4)).toBeCloseTo(0.3306, 3);
+ });
+});
+
+// ─── kurtSeries ───────────────────────────────────────────────────────────────
+
+describe("kurtSeries", () => {
+ it("normal-like distribution has near-zero excess kurtosis", () => {
+ // Large normal-like sample — excess kurtosis should be small
+ const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+ // Not zero but finite
+ expect(Number.isFinite(kurtSeries(s))).toBe(true);
+ });
+
+ it("returns NaN for n < 4", () => {
+ expect(Number.isNaN(kurtSeries(new Series({ data: [1, 2, 3] })))).toBe(true);
+ expect(Number.isNaN(kurtSeries(new Series({ data: [1, 2] })))).toBe(true);
+ expect(Number.isNaN(kurtSeries(new Series({ data: [] as Scalar[] })))).toBe(true);
+ });
+
+ it("returns NaN for constant series (zero std)", () => {
+ const s = new Series({ data: [3, 3, 3, 3, 3] });
+ expect(Number.isNaN(kurtSeries(s))).toBe(true);
+ });
+
+ it("skipna=true (default) ignores nulls", () => {
+ const withNull = new Series({ data: [1, 2, 3, 4, 5, 6, null] as Scalar[] });
+ const withoutNull = new Series({ data: [1, 2, 3, 4, 5, 6] });
+ expect(kurtSeries(withNull)).toBeCloseTo(kurtSeries(withoutNull), 10);
+ });
+
+ it("skipna=false propagates NaN when null present", () => {
+ const s = new Series({ data: [1, 2, null, 4, 5, 6] as Scalar[] });
+ expect(Number.isNaN(kurtSeries(s, { skipna: false }))).toBe(true);
+ });
+
+ it("uniform distribution has negative excess kurtosis", () => {
+ // Uniform: excess kurtosis = -1.2
+ const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] });
+ expect(kurtSeries(s)).toBeLessThan(0);
+ });
+
+ it("known kurtosis value", () => {
+ // Adjusted Fisher: pd.Series([2,8,0,4,1,9,9,0]).kurt() ≈ -2.0986
+ const s = new Series({ data: [2, 8, 0, 4, 1, 9, 9, 0] });
+ expect(round(kurtSeries(s), 3)).toBeCloseTo(-2.099, 2);
+ });
+});
+
+// ─── skewDataFrame ────────────────────────────────────────────────────────────
+
+describe("skewDataFrame", () => {
+ it("axis=0 computes per-column skew", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3, 4, 5], b: [1, 2, 3, 4, 100] });
+ const result = skewDataFrame(df);
+ expect(result.index.values).toEqual(["a", "b"]);
+ expect(Math.abs(result.values[0] as number)).toBeLessThan(1e-9); // symmetric
+ expect(result.values[1] as number).toBeGreaterThan(0); // right-skewed
+ });
+
+ it("axis=1 computes per-row skew", () => {
+ const df = DataFrame.fromColumns({
+ a: [1, 10],
+ b: [2, 10],
+ c: [3, 100],
+ });
+ const result = skewDataFrame(df, { axis: 1 });
+ expect(result.values.length).toBe(2);
+ });
+
+ it("numericOnly skips string columns", () => {
+ const df = DataFrame.fromColumns({ n: [1, 2, 3, 4, 5], s: ["a", "b", "c", "d", "e"] });
+ const result = skewDataFrame(df, { numericOnly: true });
+ expect(result.index.values).toEqual(["n"]);
+ });
+
+ it("columns with n < 3 return NaN", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ expect(Number.isNaN(skewDataFrame(df).values[0] as number)).toBe(true);
+ });
+
+ it("preserves row index for axis=1", () => {
+ const df = DataFrame.fromColumns(
+ { a: [1, 2, 3, 4, 5], b: [5, 4, 3, 2, 1], c: [1, 1, 1, 1, 100] },
+ { index: ["r0", "r1", "r2", "r3", "r4"] },
+ );
+ const result = skewDataFrame(df, { axis: 1 });
+ expect(result.index.values).toEqual(["r0", "r1", "r2", "r3", "r4"]);
+ });
+});
+
+// ─── kurtDataFrame ────────────────────────────────────────────────────────────
+
+describe("kurtDataFrame", () => {
+ it("axis=0 computes per-column kurtosis", () => {
+ const df = DataFrame.fromColumns({
+ a: [1, 2, 3, 4, 5, 6],
+ b: [1, 1, 1, 9, 9, 9],
+ });
+ const result = kurtDataFrame(df);
+ expect(result.index.values).toEqual(["a", "b"]);
+ expect(Number.isFinite(result.values[0] as number)).toBe(true);
+ });
+
+ it("axis=1 computes per-row kurtosis", () => {
+ const df = DataFrame.fromColumns({
+ a: [1, 2],
+ b: [2, 3],
+ c: [3, 4],
+ d: [10, 5],
+ e: [4, 6],
+ f: [3, 7],
+ });
+ const result = kurtDataFrame(df, { axis: 1 });
+ expect(result.values.length).toBe(2);
+ });
+
+ it("columns with n < 4 return NaN", () => {
+ const df = DataFrame.fromColumns({ a: [1, 2, 3] });
+ expect(Number.isNaN(kurtDataFrame(df).values[0] as number)).toBe(true);
+ });
+});
+
+// ─── property tests ───────────────────────────────────────────────────────────
+
+describe("skewSeries property tests", () => {
+ it("skew of constant array is NaN", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: -100, max: 100 }), (c) => {
+ const s = new Series({ data: [c, c, c, c, c] });
+ expect(Number.isNaN(skewSeries(s))).toBe(true);
+ }),
+ );
+ });
+
+ it("skew is finite for non-constant arrays with n >= 3", () => {
+ fc.assert(
+ fc.property(
+ fc.array(fc.float({ min: -100, max: 100, noNaN: true }), { minLength: 3, maxLength: 20 }),
+ (arr) => {
+ // Check if constant
+ const allSame = arr.every((v) => v === arr[0]);
+ const s = new Series({ data: arr });
+ const result = skewSeries(s);
+ if (allSame) {
+ expect(Number.isNaN(result)).toBe(true);
+ } else {
+ expect(Number.isFinite(result)).toBe(true);
+ }
+ },
+ ),
+ );
+ });
+});
+
+describe("kurtSeries property tests", () => {
+ it("kurt of constant array is NaN", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: -100, max: 100 }), (c) => {
+ const s = new Series({ data: [c, c, c, c, c] });
+ expect(Number.isNaN(kurtSeries(s))).toBe(true);
+ }),
+ );
+ });
+});
diff --git a/tests/stats/to_datetime.test.ts b/tests/stats/to_datetime.test.ts
new file mode 100644
index 00000000..38339390
--- /dev/null
+++ b/tests/stats/to_datetime.test.ts
@@ -0,0 +1,304 @@
+/**
+ * Tests for stats/to_datetime — convert scalars, arrays, and Series to Date.
+ */
+
+import { describe, expect, it } from "bun:test";
+import fc from "fast-check";
+import { Series, toDatetime } from "../../src/index.ts";
+import type { Scalar } from "../../src/index.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function series(data: Scalar[]): Series {
+ return new Series({ data });
+}
+
+// ─── null / missing ───────────────────────────────────────────────────────────
+
+describe("toDatetime — missing values", () => {
+ it("returns null for null", () => {
+ expect(toDatetime(null)).toBeNull();
+ });
+
+ it("returns null for undefined", () => {
+ expect(toDatetime(undefined)).toBeNull();
+ });
+
+ it("returns null for NaN", () => {
+ expect(toDatetime(Number.NaN)).toBeNull();
+ });
+});
+
+// ─── Date passthrough ─────────────────────────────────────────────────────────
+
+describe("toDatetime — Date passthrough", () => {
+ it("returns the same Date object unchanged", () => {
+ const d = new Date(2024, 0, 15);
+ const result = toDatetime(d);
+ expect(result).toBeInstanceOf(Date);
+ expect((result as Date).getTime()).toBe(d.getTime());
+ });
+
+ it("utc:true still returns a Date with the same timestamp", () => {
+ const d = new Date(2024, 5, 1);
+ const result = toDatetime(d, { utc: true });
+ expect((result as Date).getTime()).toBe(d.getTime());
+ });
+});
+
+// ─── numeric inputs ───────────────────────────────────────────────────────────
+
+describe("toDatetime — numeric inputs", () => {
+ it("treats number as milliseconds by default", () => {
+ const ms = Date.UTC(2024, 0, 1);
+ const result = toDatetime(ms);
+ expect((result as Date).getTime()).toBe(ms);
+ });
+
+ it("unit=s — seconds", () => {
+ const s = Date.UTC(2024, 0, 1) / 1000;
+ const result = toDatetime(s, { unit: "s" });
+ expect((result as Date).getTime()).toBe(s * 1000);
+ });
+
+ it("unit=us — microseconds", () => {
+ const ms = Date.UTC(2024, 3, 15);
+ const us = ms * 1000;
+ const result = toDatetime(us, { unit: "us" });
+ expect((result as Date).getTime()).toBe(ms);
+ });
+
+ it("unit=ns — nanoseconds", () => {
+ const ms = Date.UTC(2020, 6, 4);
+ const ns = ms * 1_000_000;
+ const result = toDatetime(ns, { unit: "ns" });
+ expect((result as Date).getTime()).toBe(ms);
+ });
+});
+
+// ─── string — ISO 8601 ────────────────────────────────────────────────────────
+
+describe("toDatetime — ISO strings", () => {
+ it("parses YYYY-MM-DD", () => {
+ const result = toDatetime("2024-03-15") as Date;
+ expect(result).toBeInstanceOf(Date);
+ expect(result.getFullYear()).toBe(2024);
+ expect(result.getMonth()).toBe(2); // 0-indexed
+ expect(result.getDate()).toBe(15);
+ });
+
+ it("parses YYYY-MM-DDTHH:MM:SSZ", () => {
+ const result = toDatetime("2024-01-01T00:00:00Z") as Date;
+ expect(result.getTime()).toBe(Date.UTC(2024, 0, 1));
+ });
+
+ it("parses datetime with offset", () => {
+ const result = toDatetime("2024-06-15T12:30:00+05:30") as Date;
+ expect(result).toBeInstanceOf(Date);
+ });
+});
+
+// ─── string — US format (MDY) ─────────────────────────────────────────────────
+
+describe("toDatetime — US date strings (MM/DD/YYYY)", () => {
+ it("parses MM/DD/YYYY", () => {
+ const result = toDatetime("01/15/2024") as Date;
+ expect(result.getFullYear()).toBe(2024);
+ expect(result.getMonth()).toBe(0);
+ expect(result.getDate()).toBe(15);
+ });
+
+ it("parses M/D/YYYY (no zero-padding)", () => {
+ const result = toDatetime("3/5/2024") as Date;
+ expect(result.getFullYear()).toBe(2024);
+ expect(result.getMonth()).toBe(2);
+ expect(result.getDate()).toBe(5);
+ });
+
+ it("parses MM/DD/YY with year expansion", () => {
+ const result = toDatetime("01/15/24") as Date;
+ expect(result.getFullYear()).toBe(2024);
+ });
+
+ it("dayfirst=true treats first field as day", () => {
+ const result = toDatetime("02/01/2024", { dayfirst: true }) as Date;
+ // DD/MM/YYYY → 2 Jan 2024
+ expect(result.getMonth()).toBe(0);
+ expect(result.getDate()).toBe(2);
+ });
+
+ it("parses with time component", () => {
+ const result = toDatetime("06/15/2024 14:30:00") as Date;
+ expect(result.getHours()).toBe(14);
+ expect(result.getMinutes()).toBe(30);
+ });
+});
+
+// ─── string — European format (DMY dash) ─────────────────────────────────────
+
+describe("toDatetime — European date strings (DD-MM-YYYY)", () => {
+ it("parses DD-MM-YYYY", () => {
+ const result = toDatetime("15-03-2024") as Date;
+ expect(result.getFullYear()).toBe(2024);
+ expect(result.getMonth()).toBe(2);
+ expect(result.getDate()).toBe(15);
+ });
+
+ it("parses with time", () => {
+ const result = toDatetime("15-03-2024 10:00:30") as Date;
+ expect(result.getHours()).toBe(10);
+ expect(result.getSeconds()).toBe(30);
+ });
+});
+
+// ─── string — compact YYYYMMDD ────────────────────────────────────────────────
+
+describe("toDatetime — compact strings (YYYYMMDD)", () => {
+ it("parses YYYYMMDD", () => {
+ const result = toDatetime("20240115") as Date;
+ expect(result.getFullYear()).toBe(2024);
+ expect(result.getMonth()).toBe(0);
+ expect(result.getDate()).toBe(15);
+ });
+});
+
+// ─── string — integer string ──────────────────────────────────────────────────
+
+describe("toDatetime — integer strings", () => {
+ it("parses integer string as milliseconds", () => {
+ const ms = Date.UTC(2024, 0, 1);
+ const result = toDatetime(String(ms)) as Date;
+ expect(result.getTime()).toBe(ms);
+ });
+});
+
+// ─── errors handling ──────────────────────────────────────────────────────────
+
+describe("toDatetime — errors option", () => {
+ it("errors=raise throws on unparseable string (default)", () => {
+ expect(() => toDatetime("not-a-date")).toThrow(TypeError);
+ });
+
+ it("errors=raise throws on unparseable boolean", () => {
+ expect(() => toDatetime(true as unknown as Scalar)).toThrow(TypeError);
+ });
+
+ it("errors=coerce returns null on unparseable string", () => {
+ expect(toDatetime("not-a-date", { errors: "coerce" })).toBeNull();
+ });
+
+ it("errors=coerce returns null on boolean", () => {
+ expect(toDatetime(true as unknown as Scalar, { errors: "coerce" })).toBeNull();
+ });
+
+ it("errors=ignore returns original string unchanged", () => {
+ const result = toDatetime("nope", { errors: "ignore" }) as unknown as string;
+ expect(result).toBe("nope");
+ });
+
+ it("errors=ignore returns original boolean unchanged", () => {
+ const v = true as unknown as Scalar;
+ const result = toDatetime(v, { errors: "ignore" }) as unknown as boolean;
+ expect(result).toBe(true);
+ });
+});
+
+// ─── array overload ───────────────────────────────────────────────────────────
+
+describe("toDatetime — array overload", () => {
+ it("converts an array of mixed inputs", () => {
+ const result = toDatetime(["2024-01-01", null, "2024-06-15"]);
+ expect(result).toHaveLength(3);
+ expect(result[0]).toBeInstanceOf(Date);
+ expect(result[1]).toBeNull();
+ expect(result[2]).toBeInstanceOf(Date);
+ });
+
+ it("errors=coerce turns bad entries to null", () => {
+ const result = toDatetime(["2024-01-01", "bad", "2024-03-01"], { errors: "coerce" });
+ expect(result[1]).toBeNull();
+ });
+});
+
+// ─── Series overload ──────────────────────────────────────────────────────────
+
+describe("toDatetime — Series overload", () => {
+ it("converts a Series to Series", () => {
+ const s = series(["2024-01-01", null, "2024-06-15"]);
+ const result = toDatetime(s);
+ expect(result).toBeInstanceOf(Series);
+ expect(result.size).toBe(3);
+ expect(result.values[0]).toBeInstanceOf(Date);
+ expect(result.values[1]).toBeNull();
+ });
+
+ it("preserves index and name", () => {
+ const s = new Series({ data: ["2024-01-01"], name: "dates" });
+ const result = toDatetime(s);
+ expect(result.name).toBe("dates");
+ });
+
+ it("dtype is datetime", () => {
+ const s = series(["2024-01-01"]);
+ const result = toDatetime(s);
+ expect(result.dtype.kind).toBe("datetime");
+ });
+
+ it("errors=coerce in Series turns bad values to null", () => {
+ const s = series(["2024-01-01", "bad-date", "2024-03-01"]);
+ const result = toDatetime(s, { errors: "coerce" });
+ expect(result.values[1]).toBeNull();
+ });
+});
+
+// ─── property-based tests ─────────────────────────────────────────────────────
+
+describe("toDatetime — property tests", () => {
+ it("Date roundtrip: toDatetime(d) preserves milliseconds", () => {
+ fc.assert(
+ fc.property(fc.date(), (d) => {
+ const result = toDatetime(d) as Date;
+ return result instanceof Date && result.getTime() === d.getTime();
+ }),
+ );
+ });
+
+ it("numeric ms roundtrip: toDatetime(n) gives Date with same getTime()", () => {
+ fc.assert(
+ fc.property(fc.integer({ min: 0, max: 2_000_000_000_000 }), (ms) => {
+ const result = toDatetime(ms) as Date;
+ return result instanceof Date && result.getTime() === ms;
+ }),
+ );
+ });
+
+ it("errors=coerce never throws on arbitrary scalars", () => {
+ fc.assert(
+ fc.property(fc.oneof(fc.string(), fc.double(), fc.boolean(), fc.constant(null)), (v) => {
+ const s = v as unknown as Scalar;
+ try {
+ toDatetime(s, { errors: "coerce" });
+ return true;
+ } catch {
+ return false;
+ }
+ }),
+ );
+ });
+
+ it("errors=ignore never throws and always returns something", () => {
+ fc.assert(
+ fc.property(fc.oneof(fc.string(), fc.double(), fc.boolean(), fc.constant(null)), (v) => {
+ const s = v as unknown as Scalar;
+ let ok = true;
+ try {
+ const r = toDatetime(s, { errors: "ignore" });
+ ok = r !== undefined;
+ } catch {
+ ok = false;
+ }
+ return ok;
+ }),
+ );
+ });
+});