diff --git a/benchmarks/results.json b/benchmarks/results.json
index 13295d81..7d1fa6ec 100644
--- a/benchmarks/results.json
+++ b/benchmarks/results.json
@@ -1,357 +1 @@
-{
- "benchmarks": [
- {
- "function": "concat",
- "tsb": {
- "function": "concat",
- "mean_ms": 128.9745293,
- "iterations": 20,
- "total_ms": 2579.490586
- },
- "pandas": {
- "function": "concat",
- "mean_ms": 0.11375509999993483,
- "iterations": 20,
- "total_ms": 2.2751019999986966
- },
- "ratio": 1133.791
- },
- {
- "function": "dataframe_apply",
- "tsb": {
- "function": "dataframe_apply",
- "mean_ms": 16.7897294,
- "iterations": 10,
- "total_ms": 167.897294
- },
- "pandas": {
- "function": "dataframe_apply",
- "mean_ms": 47.161531699998704,
- "iterations": 10,
- "total_ms": 471.61531699998704
- },
- "ratio": 0.356
- },
- {
- "function": "dataframe_creation",
- "tsb": {
- "function": "dataframe_creation",
- "mean_ms": 223.22429929999998,
- "iterations": 10,
- "total_ms": 2232.242993
- },
- "pandas": {
- "function": "dataframe_creation",
- "mean_ms": 5.148059900000135,
- "iterations": 10,
- "total_ms": 51.48059900000135
- },
- "ratio": 43.361
- },
- {
- "function": "dataframe_dropna",
- "tsb": {
- "function": "dataframe_dropna",
- "mean_ms": 172.72901985000004,
- "iterations": 20,
- "total_ms": 3454.5803970000006
- },
- "pandas": {
- "function": "dataframe_dropna",
- "mean_ms": 2.42739894999886,
- "iterations": 20,
- "total_ms": 48.547978999977204
- },
- "ratio": 71.158
- },
- {
- "function": "dataframe_filter",
- "tsb": {
- "function": "dataframe_filter",
- "mean_ms": 126.19991375,
- "iterations": 20,
- "total_ms": 2523.998275
- },
- "pandas": {
- "function": "dataframe_filter",
- "mean_ms": 0.4964389500003108,
- "iterations": 20,
- "total_ms": 9.928779000006216
- },
- "ratio": 254.21
- },
- {
- "function": "dataframe_rename",
- "tsb": {
- "function": "dataframe_rename",
- "mean_ms": 0.008352200000000209,
- "iterations": 20,
- "total_ms": 0.1670440000000042
- },
- "pandas": {
- "function": "dataframe_rename",
- "mean_ms": 0.17103454999869427,
- "iterations": 20,
- "total_ms": 3.4206909999738855
- },
- "ratio": 0.049
- },
- {
- "function": "dataframe_sort",
- "tsb": {
- "function": "dataframe_sort",
- "mean_ms": 434.5389244,
- "iterations": 10,
- "total_ms": 4345.389244
- },
- "pandas": {
- "function": "dataframe_sort",
- "mean_ms": 33.301584399998774,
- "iterations": 10,
- "total_ms": 333.01584399998774
- },
- "ratio": 13.049
- },
- {
- "function": "describe",
- "tsb": {
- "function": "describe",
- "mean_ms": 19.719739000000004,
- "iterations": 10,
- "total_ms": 197.19739000000004
- },
- "pandas": {
- "function": "describe",
- "mean_ms": 5.521558600003118,
- "iterations": 10,
- "total_ms": 55.21558600003118
- },
- "ratio": 3.571
- },
- {
- "function": "ewm_mean",
- "tsb": {
- "function": "ewm_mean",
- "mean_ms": 118.5438748,
- "iterations": 10,
- "total_ms": 1185.438748
- },
- "pandas": {
- "function": "ewm_mean",
- "mean_ms": 1.7652839999982461,
- "iterations": 10,
- "total_ms": 17.65283999998246
- },
- "ratio": 67.153
- },
- {
- "function": "groupby_mean",
- "tsb": {
- "function": "groupby_mean",
- "mean_ms": 21.510315099999996,
- "iterations": 10,
- "total_ms": 215.10315099999997
- },
- "pandas": {
- "function": "groupby_mean",
- "mean_ms": 8.079756900002621,
- "iterations": 10,
- "total_ms": 80.79756900002621
- },
- "ratio": 2.662
- },
- {
- "function": "merge",
- "tsb": {
- "function": "merge",
- "mean_ms": 10348.345783,
- "iterations": 3,
- "total_ms": 31045.037349000002
- },
- "pandas": {
- "function": "merge",
- "mean_ms": 60.42320619999941,
- "iterations": 10,
- "total_ms": 604.2320619999941
- },
- "ratio": 171.264
- },
- {
- "function": "pivot_table",
- "tsb": {
- "function": "pivot_table",
- "mean_ms": 117.3417057,
- "iterations": 10,
- "total_ms": 1173.417057
- },
- "pandas": {
- "function": "pivot_table",
- "mean_ms": 22.500251999997545,
- "iterations": 10,
- "total_ms": 225.00251999997545
- },
- "ratio": 5.215
- },
- {
- "function": "read_csv",
- "tsb": {
- "function": "read_csv",
- "mean_ms": 589.2802257999999,
- "iterations": 5,
- "total_ms": 2946.401129
- },
- "pandas": {
- "function": "read_csv",
- "mean_ms": 29.951929399999244,
- "iterations": 5,
- "total_ms": 149.75964699999622
- },
- "ratio": 19.674
- },
- {
- "function": "rolling_mean",
- "tsb": {
- "function": "rolling_mean",
- "mean_ms": 419.62945440000004,
- "iterations": 10,
- "total_ms": 4196.294544
- },
- "pandas": {
- "function": "rolling_mean",
- "mean_ms": 1.71982609999759,
- "iterations": 10,
- "total_ms": 17.1982609999759
- },
- "ratio": 243.995
- },
- {
- "function": "series_arithmetic",
- "tsb": {
- "function": "series_arithmetic",
- "mean_ms": 122.68170964999999,
- "iterations": 20,
- "total_ms": 2453.634193
- },
- "pandas": {
- "function": "series_arithmetic",
- "mean_ms": 0.764571400000591,
- "iterations": 20,
- "total_ms": 15.29142800001182
- },
- "ratio": 160.458
- },
- {
- "function": "series_creation",
- "tsb": {
- "function": "series_creation",
- "mean_ms": 103.015,
- "iterations": 50,
- "total_ms": 5150.754
- },
- "pandas": {
- "function": "series_creation",
- "mean_ms": 7.607,
- "iterations": 50,
- "total_ms": 380.349
- },
- "ratio": 13.542
- },
- {
- "function": "series_cumsum",
- "tsb": {
- "function": "series_cumsum",
- "mean_ms": 58.26283665,
- "iterations": 20,
- "total_ms": 1165.256733
- },
- "pandas": {
- "function": "series_cumsum",
- "mean_ms": 1.1250383499998406,
- "iterations": 20,
- "total_ms": 22.500766999996813
- },
- "ratio": 51.787
- },
- {
- "function": "series_fillna",
- "tsb": {
- "function": "series_fillna",
- "mean_ms": 61.56140175,
- "iterations": 20,
- "total_ms": 1231.228035
- },
- "pandas": {
- "function": "series_fillna",
- "mean_ms": 0.18527670000025864,
- "iterations": 20,
- "total_ms": 3.705534000005173
- },
- "ratio": 332.267
- },
- {
- "function": "series_shift",
- "tsb": {
- "function": "series_shift",
- "mean_ms": 110.16682740000002,
- "iterations": 20,
- "total_ms": 2203.336548
- },
- "pandas": {
- "function": "series_shift",
- "mean_ms": 0.07249699999931636,
- "iterations": 20,
- "total_ms": 1.4499399999863272
- },
- "ratio": 1519.605
- },
- {
- "function": "series_sort",
- "tsb": {
- "function": "series_sort",
- "mean_ms": 161.28472190000002,
- "iterations": 10,
- "total_ms": 1612.8472190000002
- },
- "pandas": {
- "function": "series_sort",
- "mean_ms": 5.127767300001551,
- "iterations": 10,
- "total_ms": 51.27767300001551
- },
- "ratio": 31.453
- },
- {
- "function": "series_string_ops",
- "tsb": {
- "function": "series_string_ops",
- "mean_ms": 243.85622659999999,
- "iterations": 10,
- "total_ms": 2438.562266
- },
- "pandas": {
- "function": "series_string_ops",
- "mean_ms": 34.08206670000027,
- "iterations": 10,
- "total_ms": 340.8206670000027
- },
- "ratio": 7.155
- },
- {
- "function": "series_value_counts",
- "tsb": {
- "function": "series_value_counts",
- "mean_ms": 38.8205242,
- "iterations": 10,
- "total_ms": 388.205242
- },
- "pandas": {
- "function": "series_value_counts",
- "mean_ms": 9.212644899997713,
- "iterations": 10,
- "total_ms": 92.12644899997713
- },
- "ratio": 4.214
- }
- ],
- "timestamp": "2026-04-13T00:11:36Z"
-}
+{ "benchmarks": [], "timestamp": null }
diff --git a/benchmarks/run_benchmarks.sh b/benchmarks/run_benchmarks.sh
old mode 100755
new mode 100644
diff --git a/bun.lock b/bun.lock
new file mode 100644
index 00000000..163b75ec
--- /dev/null
+++ b/bun.lock
@@ -0,0 +1,50 @@
+{
+ "lockfileVersion": 1,
+ "configVersion": 0,
+ "workspaces": {
+ "": {
+ "name": "tsb",
+ "devDependencies": {
+ "@biomejs/biome": "^1.9.4",
+ "@types/bun": "^1.1.14",
+ "fast-check": "^3.22.0",
+ },
+ "peerDependencies": {
+ "typescript": "^5.7.0",
+ },
+ },
+ },
+ "packages": {
+ "@biomejs/biome": ["@biomejs/biome@1.9.4", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "1.9.4", "@biomejs/cli-darwin-x64": "1.9.4", "@biomejs/cli-linux-arm64": "1.9.4", "@biomejs/cli-linux-arm64-musl": "1.9.4", "@biomejs/cli-linux-x64": "1.9.4", "@biomejs/cli-linux-x64-musl": "1.9.4", "@biomejs/cli-win32-arm64": "1.9.4", "@biomejs/cli-win32-x64": "1.9.4" }, "bin": { "biome": "bin/biome" } }, "sha512-1rkd7G70+o9KkTn5KLmDYXihGoTaIGO9PIIN2ZB7UJxFrWw04CZHPYiMRjYsaDvVV7hP1dYNRLxSANLaBFGpog=="],
+
+ "@biomejs/cli-darwin-arm64": ["@biomejs/cli-darwin-arm64@1.9.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-bFBsPWrNvkdKrNCYeAp+xo2HecOGPAy9WyNyB/jKnnedgzl4W4Hb9ZMzYNbf8dMCGmUdSavlYHiR01QaYR58cw=="],
+
+ "@biomejs/cli-darwin-x64": ["@biomejs/cli-darwin-x64@1.9.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-ngYBh/+bEedqkSevPVhLP4QfVPCpb+4BBe2p7Xs32dBgs7rh9nY2AIYUL6BgLw1JVXV8GlpKmb/hNiuIxfPfZg=="],
+
+ "@biomejs/cli-linux-arm64": ["@biomejs/cli-linux-arm64@1.9.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-fJIW0+LYujdjUgJJuwesP4EjIBl/N/TcOX3IvIHJQNsAqvV2CHIogsmA94BPG6jZATS4Hi+xv4SkBBQSt1N4/g=="],
+
+ "@biomejs/cli-linux-arm64-musl": ["@biomejs/cli-linux-arm64-musl@1.9.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-v665Ct9WCRjGa8+kTr0CzApU0+XXtRgwmzIf1SeKSGAv+2scAlW6JR5PMFo6FzqqZ64Po79cKODKf3/AAmECqA=="],
+
+ "@biomejs/cli-linux-x64": ["@biomejs/cli-linux-x64@1.9.4", "", { "os": "linux", "cpu": "x64" }, "sha512-lRCJv/Vi3Vlwmbd6K+oQ0KhLHMAysN8lXoCI7XeHlxaajk06u7G+UsFSO01NAs5iYuWKmVZjmiOzJ0OJmGsMwg=="],
+
+ "@biomejs/cli-linux-x64-musl": ["@biomejs/cli-linux-x64-musl@1.9.4", "", { "os": "linux", "cpu": "x64" }, "sha512-gEhi/jSBhZ2m6wjV530Yy8+fNqG8PAinM3oV7CyO+6c3CEh16Eizm21uHVsyVBEB6RIM8JHIl6AGYCv6Q6Q9Tg=="],
+
+ "@biomejs/cli-win32-arm64": ["@biomejs/cli-win32-arm64@1.9.4", "", { "os": "win32", "cpu": "arm64" }, "sha512-tlbhLk+WXZmgwoIKwHIHEBZUwxml7bRJgk0X2sPyNR3S93cdRq6XulAZRQJ17FYGGzWne0fgrXBKpl7l4M87Hg=="],
+
+ "@biomejs/cli-win32-x64": ["@biomejs/cli-win32-x64@1.9.4", "", { "os": "win32", "cpu": "x64" }, "sha512-8Y5wMhVIPaWe6jw2H+KlEm4wP/f7EW3810ZLmDlrEEy5KvBsb9ECEfu/kMWD484ijfQ8+nIi0giMgu9g1UAuuA=="],
+
+ "@types/bun": ["@types/bun@1.3.11", "", { "dependencies": { "bun-types": "1.3.11" } }, "sha512-5vPne5QvtpjGpsGYXiFyycfpDF2ECyPcTSsFBMa0fraoxiQyMJ3SmuQIGhzPg2WJuWxVBoxWJ2kClYTcw/4fAg=="],
+
+ "@types/node": ["@types/node@25.5.2", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg=="],
+
+ "bun-types": ["bun-types@1.3.11", "", { "dependencies": { "@types/node": "*" } }, "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg=="],
+
+ "fast-check": ["fast-check@3.23.2", "", { "dependencies": { "pure-rand": "^6.1.0" } }, "sha512-h5+1OzzfCC3Ef7VbtKdcv7zsstUQwUDlYpUTvjeUsJAssPgLn7QzbboPtL5ro04Mq0rPOsMzl7q5hIbRs2wD1A=="],
+
+ "pure-rand": ["pure-rand@6.1.0", "", {}, "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA=="],
+
+ "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
+
+ "undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
+ }
+}
diff --git a/playground/add_sub_mul_div.html b/playground/add_sub_mul_div.html
new file mode 100644
index 00000000..956ae26f
--- /dev/null
+++ b/playground/add_sub_mul_div.html
@@ -0,0 +1,214 @@
+
+
+
+
+
+ tsb — add / sub / mul / div
+
+
+
+
+← tsb playground
+add / sub / mul / div
+
+ Element-wise arithmetic between a Series (or DataFrame) and a scalar or another
+ Series — mirrors pandas.Series.add(), .sub(),
+ .mul(), and .div().
+
+
+
+ 1 — add: Series + scalar
+
+ seriesAdd(series, scalar) adds a constant to every element.
+ Missing values (null / NaN) are propagated unchanged.
+ Mirrors pandas.Series.add(other).
+
+ import { Series, seriesAdd } from "tsb";
+
+const s = new Series({ data: [1, 2, null, 4] });
+const result = seriesAdd(s, 10);
+console.log([...result.values]); // [11, 12, null, 14]
+
+ Loading…
+
+
+
+ 2 — add: Series + Series (positional)
+
+ When other is another Series, elements are paired positionally
+ (same as pandas default when shapes match).
+
+ import { Series, seriesAdd } from "tsb";
+
+const a = new Series({ data: [1, 2, 3] });
+const b = new Series({ data: [4, 5, 6] });
+seriesAdd(a, b).values; // [5, 7, 9]
+
+ Loading…
+
+
+
+ 3 — sub / rsub
+
+ seriesSub(s, other) computes s − other.
+ seriesRsub(s, other) computes the reverse: other − s.
+
+ import { Series, seriesSub, seriesRsub } from "tsb";
+
+const s = new Series({ data: [10, 20, 30] });
+seriesSub(s, 5).values; // [5, 15, 25]
+seriesRsub(s, 100).values; // [90, 80, 70]
+
+ Loading…
+
+
+
+ 4 — mul: multiply
+
+ seriesMul(s, other) multiplies every element.
+ seriesRmul is the reversed form (commutative, provided for API symmetry).
+
+ import { Series, seriesMul } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, null] });
+seriesMul(s, 3).values; // [3, 6, 9, null]
+
+const weights = new Series({ data: [0.5, 1, 2, 1] });
+seriesMul(s, weights).values; // [0.5, 2, 6, null]
+
+ Loading…
+
+
+
+ 5 — div / rdiv (true division)
+
+ seriesDiv(s, other) performs IEEE-754 true division.
+ Division by zero yields ±Infinity or NaN (0÷0),
+ matching pandas.Series.div.
+ seriesRdiv(s, other) computes other / s.
+
+ import { Series, seriesDiv, seriesRdiv } from "tsb";
+
+const s = new Series({ data: [4, 9, 0, null] });
+seriesDiv(s, 2).values; // [2, 4.5, Infinity, null]
+seriesRdiv(s, 36).values; // [9, 4, Infinity, null]
+
+ Loading…
+
+
+
+ 6 — DataFrame arithmetic
+
+ All four operations work on DataFrames too. A scalar is broadcast across
+ every cell; a DataFrame operand is paired column-by-column, row-by-row.
+
+ import { DataFrame, dataFrameAdd, dataFrameMul, dataFrameDiv } from "tsb";
+
+const df = DataFrame.fromColumns({ price: [10, 20, 30], qty: [3, 5, 2] });
+
+// Add a discount
+dataFrameAdd(df, 5).col("price").values; // [15, 25, 35]
+
+// Scale everything by 2
+dataFrameMul(df, 2).col("qty").values; // [6, 10, 4]
+
+// Revenue per item / some constant
+dataFrameDiv(df, 10).col("price").values; // [1, 2, 3]
+
+ Loading…
+
+
+
+ 7 — Missing value propagation
+
+ Following pandas convention, any operation involving a missing value
+ (null or NaN) returns the missing value unchanged.
+
+ import { Series, seriesAdd, seriesMul, seriesDiv } from "tsb";
+
+const s = new Series({ data: [1, null, NaN, 4] });
+seriesAdd(s, 10).values; // [11, null, NaN, 14]
+seriesMul(s, 2).values; // [2, null, NaN, 8]
+seriesDiv(s, 2).values; // [0.5, null, NaN, 2]
+
+ Loading…
+
+
+
+
+
diff --git a/playground/align.html b/playground/align.html
new file mode 100644
index 00000000..bcf7a21d
--- /dev/null
+++ b/playground/align.html
@@ -0,0 +1,254 @@
+
+
+
+
+
+ tsb — align
+
+
+
+
+
+
+
+
+ align takes two objects and aligns them to the same axis, returning
+ a pair [alignedLeft, alignedRight] that share the same index.
+ Labels present in one but not the other are filled with a fillValue (default null).
+
+
+ alignSeries(a, b, { join? }) — align two Series on their row index.
+ alignDataFrame(a, b, { join?, axis? }) — align two DataFrames on rows, columns, or both.
+
+
+ Join policies
+
+ join Result index
+
+ "outer" (default)Union of both indices
+ "inner"Intersection of both indices
+ "left"Left object's index
+ "right"Right object's index
+
+
+
+
+ See also:
+ pandas.Series.align
+ ·
+ pandas.DataFrame.align
+
+
+
+
+ 1 · alignSeries — outer (default)
+ import { Series, Index, alignSeries } from "tsb";
+
+const a = new Series({ data: [1, 2, 3], index: new Index(["a", "b", "c"]) });
+const b = new Series({ data: [10, 20], index: new Index(["b", "c"]) });
+
+// Default join="outer" → union of indices
+const [la, ra] = alignSeries(a, b);
+la.toArray(); // → [1, 2, 3] (index: a, b, c)
+ra.toArray(); // → [null, 10, 20] (index: a, b, c)
+
+
+
+
+
+ 2 · alignSeries — inner join
+ const [li, ri] = alignSeries(a, b, { join: "inner" });
+li.toArray(); // → [2, 3] (only shared labels: b, c)
+ri.toArray(); // → [10, 20]
+
+
+
+
+
+ 3 · alignSeries — left / right join + fillValue
+ const x = new Series({ data: [1, 2, 3], index: new Index(["a", "b", "c"]) });
+const y = new Series({ data: [10, 30], index: new Index(["b", "d"]) });
+
+// join="left": result index = x's index
+const [ll, rl] = alignSeries(x, y, { join: "left", fillValue: 0 });
+ll.toArray(); // → [1, 2, 3]
+rl.toArray(); // → [0, 10, 0] ("d" is outside x's index → dropped)
+
+// join="right": result index = y's index
+const [lr, rr] = alignSeries(x, y, { join: "right", fillValue: 0 });
+lr.toArray(); // → [2, 0] ("b" matches, "d" is new)
+rr.toArray(); // → [10, 30]
+
+
+
+
+
+ 4 · alignDataFrame — outer, both axes
+ import { DataFrame, Index, alignDataFrame } from "tsb";
+
+const a = DataFrame.fromColumns(
+ { x: [1, 2], y: [3, 4] },
+ { index: new Index(["r0", "r1"]) },
+);
+const b = DataFrame.fromColumns(
+ { y: [10], z: [20] },
+ { index: new Index(["r1"]) },
+);
+
+// Default: align both rows and columns (outer union)
+const [la, ra] = alignDataFrame(a, b);
+
+// la → shape [2, 3] columns: x, y, z
+// row r0: x=1, y=3, z=null
+// row r1: x=2, y=4, z=null
+la.col("z").toArray(); // → [null, null]
+
+// ra → shape [2, 3] columns: x, y, z
+// row r0: x=null, y=null, z=null
+// row r1: x=null, y=10, z=20
+ra.col("x").toArray(); // → [null, null]
+ra.col("y").toArray(); // → [null, 10]
+
+
+
+
+
+ 5 · alignDataFrame — axis=0 (rows only)
+ // axis=0 aligns rows but leaves columns untouched
+const [la5, ra5] = alignDataFrame(a, b, { axis: 0 });
+la5.columns.toArray(); // → ["x", "y"] (unchanged)
+ra5.columns.toArray(); // → ["y", "z"] (unchanged)
+la5.index.toArray(); // → ["r0", "r1"] (outer union)
+ra5.index.toArray(); // → ["r0", "r1"] (outer union)
+
+
+
+
+
+ 6 · alignDataFrame — axis=1 (columns only)
+ // axis=1 aligns columns but leaves rows untouched
+const [la6, ra6] = alignDataFrame(a, b, { axis: 1 });
+la6.index.toArray(); // → ["r0", "r1"] (unchanged)
+ra6.index.toArray(); // → ["r1"] (unchanged)
+la6.columns.toArray().sort(); // → ["x", "y", "z"]
+ra6.columns.toArray().sort(); // → ["x", "y", "z"]
+
+
+
+
+
+ 7 · Arithmetic after alignment
+ // A common use-case: element-wise arithmetic on misaligned Series
+const p = new Series({ data: [100, 200, 300], index: new Index(["a", "b", "c"]) });
+const q = new Series({ data: [1, 2], index: new Index(["b", "c"]) });
+
+const [ap, aq] = alignSeries(p, q, { fillValue: 0 });
+// Now same shape — do element-wise addition
+const sum = ap.add(aq);
+sum.toArray(); // → [100, 201, 302]
+sum.index.toArray(); // → ["a", "b", "c"]
+
+
+
+
+
+
+
+
diff --git a/playground/apply.html b/playground/apply.html
new file mode 100644
index 00000000..050aef2a
--- /dev/null
+++ b/playground/apply.html
@@ -0,0 +1,128 @@
+
+
+
+
+
+ tsb — apply
+
+
+
+
+← tsb playground
+apply
+Element-wise and axis-wise function application — mirrors pandas.Series.apply(), pandas.DataFrame.applymap(), and pandas.DataFrame.apply().
+
+
+ 1 — Series.apply: transform each element
+ applySeries(series, fn) calls fn(value, label) for every element and returns a new Series with the results.
+ import { Series, applySeries } from "tsb";
+
+const s = new Series({ data: [1, 4, 9, 16], name: "squares" });
+
+// Square root of each element
+const r = applySeries(s, (v) => Math.sqrt(v));
+console.log([...r.values]); // [1, 2, 3, 4]
+
+// Use the label in the transform
+const s2 = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+const labeled = applySeries(s2, (v, lbl) => `${lbl}=${v}`);
+console.log([...labeled.values]); // ["a=10", "b=20", "c=30"]
+
+ Loading…
+
+
+
+ 2 — DataFrame.applymap: element-wise over entire DataFrame
+ applymap(df, fn) calls fn(value, colName) for every cell and returns a new DataFrame with the same shape.
+ import { DataFrame, applymap } from "tsb";
+
+const df = DataFrame.fromColumns({
+ price: [10.5, 22.0, 8.75],
+ qty: [3, 1, 5 ],
+});
+
+// Round every number to 1 decimal place
+const rounded = applymap(df, (v) => Math.round(v * 10) / 10);
+console.log([...rounded.col("price").values]); // [10.5, 22, 8.8]
+
+// Use the column name in the transform
+const tagged = applymap(df, (v, col) => `${col}:${v}`);
+console.log(tagged.col("price").values[0]); // "price:10.5"
+
+ Loading…
+
+
+
+ 3 — DataFrame.apply (axis=0): aggregate each column
+ dataFrameApply(df, fn) with default axis=0 passes each column as a Series to fn and returns a Series indexed by column names.
+ import { DataFrame, dataFrameApply } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, 2, 3],
+ b: [10, 20, 30],
+ c: [100, 200, 300],
+});
+
+// Sum of each column
+const colSums = dataFrameApply(df, (col) => col.sum());
+console.log([...colSums.index.values]); // ["a", "b", "c"]
+console.log([...colSums.values]); // [6, 60, 600]
+
+// Mean of each column
+const colMeans = dataFrameApply(df, (col) => col.mean());
+console.log([...colMeans.values]); // [2, 20, 200]
+
+ Loading…
+
+
+
+ 4 — DataFrame.apply (axis=1): aggregate each row
+ dataFrameApply(df, fn, { axis: 1 }) passes each row as a Series to fn and returns a Series indexed by row labels.
+ import { DataFrame, dataFrameApply } from "tsb";
+
+const df = DataFrame.fromColumns(
+ { a: [1, 2, 3], b: [4, 5, 6] },
+ { index: ["r0", "r1", "r2"] },
+);
+
+// Sum across columns for each row
+const rowSums = dataFrameApply(df, (row) => row.sum(), { axis: 1 });
+console.log([...rowSums.index.values]); // ["r0", "r1", "r2"]
+console.log([...rowSums.values]); // [5, 7, 9]
+
+// Max value in each row
+const rowMax = dataFrameApply(df, (row) => row.max(), { axis: 1 });
+console.log([...rowMax.values]); // [4, 5, 6]
+
+ Loading…
+
+
+
+ 5 — Handling missing values
+ The callback receives null / NaN as-is — you decide how to handle them.
+ import { Series, applySeries } from "tsb";
+
+const s = new Series({ data: [1, null, 3, null, 5] });
+
+// Replace nulls with 0, double numbers
+const r = applySeries(s, (v) => (v === null ? 0 : v * 2));
+console.log([...r.values]); // [2, 0, 6, 0, 10]
+
+ Loading…
+
+
+
+
diff --git a/playground/assign.html b/playground/assign.html
new file mode 100644
index 00000000..f915431c
--- /dev/null
+++ b/playground/assign.html
@@ -0,0 +1,107 @@
+
+
+
+
+
+ tsb — DataFrame.assign()
+
+
+
+ ← tsb playground
+
+ DataFrame.assign()
+
+ Mirrors
+ pandas.DataFrame.assign() . Returns a new DataFrame with the given
+ columns added or replaced. The source DataFrame is never mutated.
+
+
+ Specifier kinds
+
+ Specifier Type Description
+
+ Array readonly Scalar[]Values aligned by position with the row index
+ Series Series<Scalar>A Series aligned by position
+ Callable (df: DataFrame) => Scalar[] | SeriesReceives the in-progress DataFrame (earlier columns in this call are already visible)
+
+
+
+ Example 1 — Array and Series
+ import { DataFrame, Series, dataFrameAssign } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [10, 20, 30] });
+
+const df2 = dataFrameAssign(df, {
+ c: [7, 8, 9], // array
+ d: new Series({ data: [4, 5, 6] }), // Series
+});
+
+// df2.columns.values → ["a", "b", "c", "d"]
+// df2.col("c").values → [7, 8, 9]
+// df2.col("d").values → [4, 5, 6]
+
+
+ Example 2 — Callable (chained derivations)
+ const df3 = dataFrameAssign(df, {
+ // 1st: add "total" — callable receives df (no "total" column yet)
+ total: (d) => d.col("a").values.map((v, i) =>
+ (v as number) + (d.col("b").values[i] as number)
+ ),
+ // 2nd: add "tax" — callable now sees "total" because it was added above
+ tax: (d) => d.col("total").values.map((v) => (v as number) * 0.1),
+});
+
+// df3.col("total").values → [11, 22, 33]
+// df3.col("tax").values → [1.1, 2.2, 3.3]
+
+
+ Example 3 — Instance method
+ // DataFrame.assign() is also available as an instance method.
+const df4 = df.assign({
+ squared_a: (d: DataFrame) => d.col("a").values.map((v) => (v as number) ** 2),
+});
+// df4.col("squared_a").values → [1, 4, 9]
+
+
+ Example 4 — Replace an existing column
+ // If a key already exists as a column it is replaced in-place (order preserved).
+const df5 = dataFrameAssign(df, { b: [100, 200, 300] });
+
+// df5.columns.values → ["a", "b"] (order unchanged)
+// df5.col("b").values → [100, 200, 300]
+
+
+
+ Pandas parity note: callables are applied in insertion-order and each one
+ receives the DataFrame produced by all earlier assignments in the same call — matching
+ pandas' behaviour since Python 3.7+ where dict preserves insertion order.
+
+
+ API
+ // Standalone function
+function dataFrameAssign(df: DataFrame, spec: AssignSpec): DataFrame;
+
+// Instance method (same behaviour)
+df.assign(spec: AssignSpec): DataFrame;
+
+// Types
+type AssignColSpec =
+ | readonly Scalar[]
+ | Series<Scalar>
+ | ((df: DataFrame) => readonly Scalar[] | Series<Scalar>);
+
+type AssignSpec = Readonly<Record<string, AssignColSpec>>;
+
+
+
diff --git a/playground/benchmarks.html b/playground/benchmarks.html
index c4a74f9f..6b5dde65 100644
--- a/playground/benchmarks.html
+++ b/playground/benchmarks.html
@@ -300,58 +300,43 @@ 🤖 About
// Find max time for scaling bars
let maxTime = 0;
for (const b of benchmarks) {
- if (b.tsb != null) maxTime = Math.max(maxTime, b.tsb.mean_ms);
- if (b.pandas != null) maxTime = Math.max(maxTime, b.pandas.mean_ms);
+ maxTime = Math.max(maxTime, b.tsb.mean_ms, b.pandas.mean_ms);
}
// Render bar chart
for (const b of benchmarks) {
const label = b.function.replace(/_/g, " ");
- const pyPct = b.pandas != null ? (b.pandas.mean_ms / maxTime) * 100 : 0;
- const tsPct = b.tsb != null ? (b.tsb.mean_ms / maxTime) * 100 : 0;
-
- const tsBar = b.tsb != null
- ? '' + b.tsb.mean_ms.toFixed(3) + ' ms
'
- : 'pending
';
- const pyBar = b.pandas != null
- ? '' + b.pandas.mean_ms.toFixed(3) + ' ms
'
- : 'pending
';
+ const tsPct = (b.tsb.mean_ms / maxTime) * 100;
+ const pyPct = (b.pandas.mean_ms / maxTime) * 100;
const row = document.createElement("div");
row.className = "bar-row";
row.innerHTML =
'' + label + '
' +
- '' + tsBar + pyBar + '
';
+ '' +
+ '
' + b.tsb.mean_ms + ' ms
' +
+ '
' + b.pandas.mean_ms + ' ms
' +
+ '
';
barChart.appendChild(row);
}
// Render table
for (const b of benchmarks) {
- const ratio = (b.tsb != null && b.pandas != null && b.pandas.mean_ms > 0)
- ? b.tsb.mean_ms / b.pandas.mean_ms
- : null;
- const faster = ratio != null ? (ratio < 1 ? "tsb" : "pandas") : "—";
- const badgeClass = ratio != null ? (ratio < 1 ? "fast" : "slow") : "";
- const fasterClass = ratio != null ? (ratio < 1 ? "faster-tsb" : "faster-pandas") : "";
- const ratioDisplay = ratio != null
- ? '' + ratio.toFixed(3) + "x "
- : "—";
- const displayRatio = ratio != null
- ? (ratio < 1
- ? (1 / ratio).toFixed(2) + "x faster"
- : ratio.toFixed(2) + "x slower")
- : "";
- const fasterDisplay = ratio != null ? faster + " (" + displayRatio + ")" : "—";
- const tsMsDisplay = b.tsb != null ? b.tsb.mean_ms.toFixed(3) : "—";
- const pyMsDisplay = b.pandas != null ? b.pandas.mean_ms.toFixed(3) : "—";
+ const ratio = b.ratio;
+ const faster = ratio < 1 ? "tsb" : "pandas";
+ const badgeClass = ratio < 1 ? "fast" : "slow";
+ const fasterClass = ratio < 1 ? "faster-tsb" : "faster-pandas";
+ const displayRatio = ratio < 1
+ ? (1 / ratio).toFixed(2) + "x faster"
+ : ratio.toFixed(2) + "x slower";
const tr = document.createElement("tr");
tr.innerHTML =
"" + b.function.replace(/_/g, " ") + " " +
- "" + tsMsDisplay + " " +
- "" + pyMsDisplay + " " +
- "" + ratioDisplay + " " +
- '' + fasterDisplay + " ";
+ "" + b.tsb.mean_ms + " " +
+ "" + b.pandas.mean_ms + " " +
+ '' + ratio + "x " +
+ '' + faster + " (" + displayRatio + ") ";
benchTbody.appendChild(tr);
}
})();
diff --git a/playground/categorical_index.html b/playground/categorical_index.html
new file mode 100644
index 00000000..41ff5fab
--- /dev/null
+++ b/playground/categorical_index.html
@@ -0,0 +1,180 @@
+
+
+
+
+
+ tsb — CategoricalIndex
+
+
+
+
+← tsb playground
+CategoricalIndex
+
+ An index whose values are constrained to a fixed set of categories — mirrors
+ pandas.CategoricalIndex.
+
+
+
+ 1 — Basic construction
+
+ Create a CategoricalIndex from an array of labels. Categories are
+ inferred automatically (sorted, deduplicated). Internally values are stored as
+ integer codes.
+
+ import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromArray(["b", "a", "c", "a", "b"]);
+
+console.log("size:", ci.size); // 5
+console.log("categories:", ci.categories.toArray()); // ["a","b","c"]
+console.log("codes:", [...ci.codes]); // [1,0,2,0,1]
+console.log("ordered:", ci.ordered); // false
+console.log("at(0):", ci.at(0)); // "b"
+console.log("getLoc('a'):", ci.getLoc("a")); // 1
+
+ ▶ run
+
+
+
+ 2 — Explicit categories and ordered flag
+
+ Supply explicit categories to control their order. Set ordered: true
+ to unlock comparison operations between category labels.
+
+ import { CategoricalIndex } from "tsb";
+
+const sizes = CategoricalIndex.fromArray(
+ ["M", "S", "L", "XL", "S"],
+ {
+ categories: ["S", "M", "L", "XL"],
+ ordered: true,
+ name: "size",
+ },
+);
+
+console.log("categories:", sizes.categories.toArray()); // ["S","M","L","XL"]
+console.log("codes:", [...sizes.codes]); // [1,0,2,3,0]
+console.log("ordered:", sizes.ordered); // true
+console.log("name:", sizes.name); // "size"
+
+// Order-aware comparison: "S" < "L"?
+console.log("compareLabels('S','L'):", sizes.compareLabels("S", "L")); // negative
+
+ ▶ run
+
+
+
+ 3 — fromCodes constructor
+ Build a CategoricalIndex directly from a category list and pre-computed codes. Code -1 represents a missing (NA) value.
+ import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromCodes(
+ ["low", "mid", "high"],
+ [0, 2, -1, 1, 0],
+);
+
+console.log("toArray():", ci.toArray());
+// → ["low", "high", null, "mid", "low"]
+
+ ▶ run
+
+
+
+ 4 — Category mutations
+
+ All mutation methods return a new CategoricalIndex;
+ the original is unchanged.
+
+ import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromArray(["a", "b", "c", "b"]);
+
+// Rename: same codes, new labels
+const renamed = ci.renameCategories(["x", "y", "z"]);
+console.log("renamed:", renamed.toArray()); // ["x","y","z","y"]
+
+// Add a category that doesn't appear in the data yet
+const added = ci.addCategories(["d"]);
+console.log("added cats:", added.categories.toArray()); // ["a","b","c","d"]
+
+// Remove "b" → entries become null
+const removed = ci.removeCategories(["b"]);
+console.log("after remove:", removed.toArray()); // ["a",null,"c",null]
+
+// Remove unused categories
+const ci2 = CategoricalIndex.fromArray(["a", "b"], { categories: ["a", "b", "c", "d"] });
+console.log("nCats before:", ci2.nCategories); // 4
+console.log("nCats after:", ci2.removeUnusedCategories().nCategories); // 2
+
+ ▶ run
+
+
+
+ 5 — Reorder and setCategories
+ import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromArray(["a", "b", "c"]);
+
+// Reorder — must be a permutation of existing categories
+const reordered = ci.reorderCategories(["c", "a", "b"]);
+console.log("categories:", reordered.categories.toArray()); // ["c","a","b"]
+console.log("data:", reordered.toArray()); // ["a","b","c"] (unchanged)
+
+// Set completely new categories — entries outside new set → null
+const set = ci.setCategories(["a", "c"]);
+console.log("after setCategories:", set.toArray()); // ["a", null, "c"]
+
+ ▶ run
+
+
+
+ 6 — Set-like operations on categories
+ import { CategoricalIndex } from "tsb";
+
+const a = CategoricalIndex.fromArray(["a", "b", "b"]);
+const b = CategoricalIndex.fromArray(["b", "c", "c"]);
+
+// Union of category sets (left data retained)
+const u = a.unionCategories(b);
+console.log("union categories:", u.categories.toArray()); // ["a","b","c"]
+console.log("union data:", u.toArray()); // ["a","b","b"]
+
+// Intersection of category sets
+const ci = CategoricalIndex.fromArray(["a", "b", "c"]);
+const other = CategoricalIndex.fromArray(["b", "c", "d"]);
+const inter = ci.intersectCategories(other);
+console.log("intersect categories:", inter.categories.toArray()); // ["b","c"]
+console.log("intersect data:", inter.toArray()); // [null,"b","c"]
+
+ ▶ run
+
+
+
+ 7 — getLocsAll and membership
+ import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromArray(["a", "b", "a", "c", "a"]);
+
+console.log("all locs of 'a':", ci.getLocsAll("a")); // [0, 2, 4]
+console.log("contains 'b':", ci.contains("b")); // true
+console.log("contains 'z':", ci.contains("z")); // false
+console.log("hasCategory 'c':", ci.hasCategory("c")); // true (even if not in data with explicit cats)
+
+ ▶ run
+
+
+
diff --git a/playground/clip_with_bounds.html b/playground/clip_with_bounds.html
new file mode 100644
index 00000000..a787b66c
--- /dev/null
+++ b/playground/clip_with_bounds.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+ tsb — clip with bounds
+
+
+
+ ← tsb playground
+
+ ✂️ Clip with bounds
+
+ Extends scalar clip to support per-element bounds. Mirrors
+
+ pandas.Series.clip(lower, upper) and
+
+ pandas.DataFrame.clip(lower, upper, axis) with Series or DataFrame bounds.
+
+
+ Bound types
+
+ Bound argument Behaviour
+
+ numberSame scalar bound for every element
+ null / omittedNo bound on that side
+ (number | null)[]Positional per-element bounds
+ Series<Scalar>Aligned by index label — each element looks up its label in the bound Series
+ DataFrame (DataFrame variant only)Element-wise — each cell is clipped to the matching cell in the bound DataFrame
+
+
+
+ Example 1 — Series with scalar bounds
+ import { Series, clipSeriesWithBounds } from "tsb";
+
+const s = new Series({ data: [-5, 1, 7, 12] });
+
+clipSeriesWithBounds(s, { lower: 0, upper: 8 }).values;
+// [0, 1, 7, 8]
+
+
+ Example 2 — Series bounds (label-aligned)
+ import { Index, Series, clipSeriesWithBounds } from "tsb";
+
+const prices = new Series({
+ data: [90, 110, 85, 120],
+ index: new Index(["AAPL", "GOOG", "MSFT", "AMZN"]),
+ name: "price",
+});
+
+// Per-stock price floors
+const floors = new Series({
+ data: [95, 80, 100],
+ index: new Index(["AAPL", "MSFT", "GOOG"]),
+});
+
+clipSeriesWithBounds(prices, { lower: floors }).values;
+// AAPL: max(90, 95)=95 GOOG: max(110, 100)=110 MSFT: max(85, 80)=85 AMZN: 120 (no bound)
+// [95, 110, 85, 120]
+
+
+ Example 3 — DataFrame clip with per-column bounds (axis=1)
+ import { DataFrame, Index, Series, clipDataFrameWithBounds } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [0, 5, 12],
+ b: [3, 8, 15],
+});
+
+// Each column has its own lower/upper bound
+const lo = new Series({ data: [1, 4], index: new Index(["a", "b"]) });
+const hi = new Series({ data: [10, 9], index: new Index(["a", "b"]) });
+
+const result = clipDataFrameWithBounds(df, { lower: lo, upper: hi, axis: 1 });
+// col "a": [1, 5, 10] (lower=1, upper=10)
+// col "b": [4, 8, 9] (lower=4, upper=9)
+
+
+ Example 4 — DataFrame clip with per-row bounds (axis=0, default)
+ import { DataFrame, Series, clipDataFrameWithBounds } from "tsb";
+
+const df = DataFrame.fromColumns({
+ min_temp: [-5, -2, 1, 4],
+ max_temp: [10, 15, 18, 22],
+});
+
+// Daily operational thresholds (per row)
+const lowerBound = new Series({ data: [0, 0, 0, 0] }); // never below 0
+const upperBound = new Series({ data: [12, 12, 20, 20] }); // row-specific caps
+
+const result = clipDataFrameWithBounds(df, { lower: lowerBound, upper: upperBound, axis: 0 });
+// min_temp: [0, 0, 1, 4] max_temp: [10, 12, 18, 20]
+
+
+ Example 5 — Element-wise DataFrame bounds
+ import { DataFrame, clipDataFrameWithBounds } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 5, 10], b: [2, 8, 3] });
+const lo = DataFrame.fromColumns({ a: [3, 3, 3], b: [0, 9, 0] });
+const hi = DataFrame.fromColumns({ a: [8, 8, 8], b: [5, 5, 5] });
+
+const result = clipDataFrameWithBounds(df, { lower: lo, upper: hi });
+// col "a": [3, 5, 8] col "b": [2, 5, 3]
+
+
+ Null / NaN propagation
+
+ Missing values pass through unchanged. If the input contains null or
+ NaN, the output retains it regardless of the bounds.
+
+ import { Series, clipSeriesWithBounds } from "tsb";
+
+const s = new Series({ data: [null, -5, NaN, 10] });
+clipSeriesWithBounds(s, { lower: 0, upper: 8 }).values;
+// [null, 0, NaN, 8]
+
+
+ API reference
+
+ Function Signature
+
+
+ clipSeriesWithBounds
+ (series, { lower?, upper? }) → Series
+
+
+ clipDataFrameWithBounds
+ (df, { lower?, upper?, axis? }) → DataFrame
+
+
+
+
+
diff --git a/playground/combine_first.html b/playground/combine_first.html
new file mode 100644
index 00000000..a80bd540
--- /dev/null
+++ b/playground/combine_first.html
@@ -0,0 +1,135 @@
+
+
+
+
+
+ tsb — combine_first
+
+
+
+
+
+
+
+
+ combineFirstSeries(self, other) and combineFirstDataFrame(self, other)
+ update the calling object with non-null values from other.
+ The result index is the union of both index sets.
+ For each label, self's value takes priority; other's value is only
+ used when self's value is missing (null, undefined, or NaN).
+
+
+ Mirrors
+ pandas.Series.combine_first
+ and
+ pandas.DataFrame.combine_first .
+
+
+ Semantics
+
+ Self value Other value Result
+
+ non-null anything self value
+ null / NaN non-null other value
+ null / NaN null / missing null
+ — any (new label) other value
+
+
+
+
+
+ Example 1 — Series: fill gaps with values from another Series
+ import { Series, combineFirstSeries } from "tsb";
+
+const a = new Series({ data: [1, null, 3], index: ["x", "y", "z"] });
+const b = new Series({ data: [10, 20, 30, 40], index: ["x", "y", "z", "w"] });
+
+const result = combineFirstSeries(a, b);
+// index: ["x", "y", "z", "w"]
+// values: [1, 20, 3, 40]
+//
+// - "x": a has 1 (non-null) → keeps 1
+// - "y": a has null → filled from b → 20
+// - "z": a has 3 (non-null) → keeps 3
+// - "w": a has no entry → comes from b → 40
+
+
+
+
+ Example 2 — DataFrame: patch missing cells across row/column union
+ import { DataFrame, combineFirstDataFrame } from "tsb";
+
+const a = DataFrame.fromColumns(
+ { x: [1, null], y: [3, 4] },
+ { index: ["r0", "r1"] },
+);
+const b = DataFrame.fromColumns(
+ { x: [10, 20], z: [30, 40] },
+ { index: ["r0", "r2"] },
+);
+
+const result = combineFirstDataFrame(a, b);
+// rows: r0, r1, r2
+// columns: x, y, z
+//
+// result["r0"]["x"] = 1 (a wins)
+// result["r1"]["x"] = null (a had null, b has no r1 → null)
+// result["r2"]["x"] = 20 (a has no r2 → from b)
+// result["r0"]["y"] = 3 (a only)
+// result["r1"]["y"] = 4 (a only)
+// result["r2"]["y"] = null (no r2 in a, no y in b)
+// result["r0"]["z"] = 30 (b only)
+// result["r1"]["z"] = null (b has no r1)
+// result["r2"]["z"] = 40 (b only)
+
+
+
+
+ Example 3 — NaN is treated as missing
+ import { Series, combineFirstSeries } from "tsb";
+
+const sensor1 = new Series({
+ data: [NaN, 22.5, 23.1, NaN],
+ index: [0, 1, 2, 3],
+ name: "temperature",
+});
+const sensor2 = new Series({
+ data: [21.0, 22.0, NaN, 24.0],
+ index: [0, 1, 2, 3],
+ name: "temperature",
+});
+
+const merged = combineFirstSeries(sensor1, sensor2);
+// values: [21.0, 22.5, 23.1, 24.0]
+// Gaps in sensor1 filled from sensor2
+
+
+
+
+ Example 4 — Temporal data backfill
+ import { Series, combineFirstSeries } from "tsb";
+
+// Primary data source with some gaps
+const primary = new Series({
+ data: [100, null, 102, null, 104],
+ index: ["2024-01", "2024-02", "2024-03", "2024-04", "2024-05"],
+});
+
+// Secondary source to fill gaps + extends to June
+const backup = new Series({
+ data: [99, 101, 103, 103, 105, 106],
+ index: ["2024-01", "2024-02", "2024-03", "2024-04", "2024-05", "2024-06"],
+});
+
+const complete = combineFirstSeries(primary, backup);
+// index: 2024-01, 2024-02, 2024-03, 2024-04, 2024-05, 2024-06
+// values: 100, 101, 102, 103, 104, 106
+
+
+
+
+
diff --git a/playground/compare.html b/playground/compare.html
new file mode 100644
index 00000000..c4127f6d
--- /dev/null
+++ b/playground/compare.html
@@ -0,0 +1,273 @@
+
+
+
+
+
+ tsb — Comparison Ops | Interactive Playground
+
+
+
+
+
+
+
+
+
🔍 Comparison Operations
+
+ tsb implements all six pandas comparison methods:
+ eq, ne, lt, gt, le, ge.
+ They work on both Series and DataFrame , and accept either a scalar
+ or another Series/DataFrame as the other argument.
+
+
All functions return a boolean Series/DataFrame. Missing values (null / NaN) always yield false.
+
+
+
+ Function pandas equivalent Operator Description
+
+
+ seriesEq(s, other) s.eq(other) == Element-wise equality
+ seriesNe(s, other) s.ne(other) != Element-wise inequality
+ seriesLt(s, other) s.lt(other) < Less than
+ seriesGt(s, other) s.gt(other) > Greater than
+ seriesLe(s, other) s.le(other) <= Less than or equal
+ seriesGe(s, other) s.ge(other) >= Greater than or equal
+
+
+
DataFrame variants follow the same pattern: dataFrameEq, dataFrameNe, etc.
+
+
+
+
+
1 — seriesEq with a scalar
+
Compare every element of a Series against a single scalar value:
+
import { Series, seriesEq } from "tsb" ;
+
+const s = new Series ({ data: [1 , 2 , 3 , 2 , 1 ] });
+
+const result = seriesEq (s, 2 );
+
+
+
+
+
+
+
+
+
2 — seriesNe: inequality
+
seriesNe is the complement of seriesEq for non-null values:
+
import { Series, seriesNe } from "tsb" ;
+
+const s = new Series ({ data: ["apple" , "banana" , "apple" , "cherry" ] });
+
+seriesNe (s, "apple" ).values;
+
+
+
+
+
+
3 — Ordering comparisons: lt, gt, le, ge
+
Order comparisons work for numbers, strings, or any comparable type:
+
import { Series, seriesLt, seriesGt, seriesLe, seriesGe } from "tsb" ;
+
+const scores = new Series ({ data: [45 , 72 , 88 , 60 , 95 ] });
+
+seriesLt (scores, 60 ).values;
+seriesGe (scores, 60 ).values;
+
+
+
+
+
+
+
+
4 — Comparing two Series element-by-element
+
Pass a Series as other to compare position-by-position:
+
import { Series, seriesEq, seriesLt } from "tsb" ;
+
+const actual = new Series ({ data: [1 , 2 , 3 , 4 ] });
+const expected = new Series ({ data: [1 , 3 , 3 , 2 ] });
+
+seriesEq (actual, expected).values;
+seriesLt (actual, expected).values;
+
+
+
+
+
+
+
5 — Missing value behaviour
+
+ Following pandas' NaN-propagation convention: comparing a missing value against
+ anything (including another missing value) always returns false.
+
+
import { Series, seriesEq, seriesNe, seriesLt } from "tsb" ;
+
+const s = new Series ({ data: [1 , null , NaN , 3 ] });
+
+seriesEq (s, 1 ).values;
+seriesNe (s, 1 ).values;
+seriesLt (s, 2 ).values;
+
+
+seriesEq (s, null ).values;
+
+
+
+
+
6 — DataFrame comparison with a scalar
+
Broadcast a scalar to every cell in a DataFrame:
+
import { DataFrame, dataFrameGt, dataFrameLe } from "tsb" ;
+
+const df = DataFrame.fromColumns ({
+ math: [55 , 72 , 88 ],
+ science: [60 , 45 , 91 ],
+});
+
+dataFrameGt (df, 60 ).col("math" ).values;
+
+
+dataFrameLe (df, 60 ).col("science" ).values;
+
+
+
+
+
+
7 — DataFrame compared against another DataFrame
+
Column names are used to align the two DataFrames. Missing columns in other yield false:
+
import { DataFrame, dataFrameEq } from "tsb" ;
+
+const df1 = DataFrame.fromColumns ({ a: [1 , 2 ], b: [3 , 4 ] });
+const df2 = DataFrame.fromColumns ({ a: [1 , 0 ], b: [3 , 5 ] });
+
+dataFrameEq (df1, df2).col("a" ).values;
+dataFrameEq (df1, df2).col("b" ).values;
+
+
+
+
+
8 — Combining with whereSeries for conditional selection
+
Comparison ops pair naturally with whereSeries / maskSeries:
+
import { Series, seriesGe, whereSeries } from "tsb" ;
+
+const temps = new Series ({ data: [18 , 22 , 30 , 15 , 27 ] });
+
+
+const isWarm = seriesGe (temps, 20 );
+const warmOnly = whereSeries (temps, isWarm);
+
+warmOnly.values;
+
+
+
+
+
+
+
diff --git a/playground/crosstab.html b/playground/crosstab.html
new file mode 100644
index 00000000..484154f4
--- /dev/null
+++ b/playground/crosstab.html
@@ -0,0 +1,217 @@
+
+
+
+
+
+ tsb — crosstab: cross-tabulation
+
+
+
+ crosstab tsb
+
+ Cross-tabulation — the TypeScript port of
+ pandas.crosstab().
+ Count (or aggregate) the co-occurrence of two categorical variables,
+ producing a two-dimensional frequency table.
+
+
+ Supports margins (row/column totals), normalize
+ (proportions), custom aggfunc, and missing-value control
+ via dropna.
+
+
+ 1. Basic frequency table
+
+
Count how often each combination of row/column categories appears.
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 2. With margins (row/column totals)
+
+
+ Set margins: true to add an "All" row and
+ column showing totals. Use marginsName to change the
+ label.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 3. Normalize to proportions
+
+
+ Use normalize: true (or "all",
+ "index", "columns") to convert raw counts
+ into proportions.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 4. Custom aggregation (values + aggfunc)
+
+
+ Provide numeric values and an aggfunc to
+ aggregate values within each cell instead of just counting.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 5. seriesCrosstab — Series input
+
+
+ Use seriesCrosstab to cross-tabulate two
+ Series objects directly. The Series .name
+ is used as the default axis name.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 6. Missing values (dropna)
+
+
+ By default (dropna: true), any row where either factor is
+ missing is dropped. Set dropna: false to include missing
+ values as their own "NaN" category.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+
+
+
diff --git a/playground/cut.html b/playground/cut.html
new file mode 100644
index 00000000..524363c7
--- /dev/null
+++ b/playground/cut.html
@@ -0,0 +1,125 @@
+
+
+
+
+
+ tsb — cut / qcut
+
+
+
+
+← tsb playground
+cut / qcut
+Bin continuous values into discrete intervals — mirrors pandas.cut() and pandas.qcut().
+
+
+ 1 — cut: equal-width bins
+ cut(x, bins) divides the range of x into bins equal-width intervals. Each value is labelled with the interval it falls into.
+import { Series, cut } from "tsb";
+
+const scores = new Series({ data: [15, 32, 47, 63, 78, 91], name: "score" });
+const binned = cut(scores, 3);
+console.log(binned.toArray());
+// ["(14.924, 40.667]", "(14.924, 40.667]", "(40.667, 66.333]",
+// "(40.667, 66.333]", "(66.333, 92.091]", "(66.333, 92.091]"]
+ ["(14.924, 40.667]", "(14.924, 40.667]", "(40.667, 66.333]", "(40.667, 66.333]", "(66.333, 92.091]", "(66.333, 92.091]"]
+
+
+
+ 2 — cut: explicit bin edges
+ Pass an array of bin edges for full control over boundaries. Values outside the edges become null.
+const ages = new Series({ data: [5, 15, 25, 45, 65, 80] });
+const groups = cut(ages, [0, 18, 60, 100], {
+ labels: ["youth", "adult", "senior"],
+});
+console.log(groups.toArray());
+// ["youth", "youth", "adult", "adult", "senior", "senior"]
+ ["youth", "youth", "adult", "adult", "senior", "senior"]
+
+
+
+ 3 — cut: integer codes
+ Pass labels: false to get zero-indexed integer bin codes instead of interval strings.
+const data = [10, 20, 30, 40, 50];
+const codes = cut(data, 3, { labels: false });
+console.log(codes.toArray());
+// [0, 0, 1, 2, 2]
+ [0, 0, 1, 2, 2]
+
+
+
+ 4 — cut: right=false (left-closed intervals)
+ By default intervals are right-closed (a, b]. Set right: false for left-closed [a, b).
+const vals = new Series({ data: [0, 1, 2, 3] });
+const leftClosed = cut(vals, [0, 1, 2, 3], { right: false });
+console.log(leftClosed.toArray());
+// ["[0, 1)", "[1, 2)", "[2, 3)", "[2, 3)"]
+// Note: 3 falls in last bin because right edge of last bin is included
+ ["[0, 1)", "[1, 2)", "[2, 3)", "[2, 3)"]
+
+
+
+ 5 — qcut: quantile-based binning
+ qcut(x, q) creates bins so that each bin holds approximately the same number of observations (equal-frequency binning).
+import { qcut } from "tsb";
+
+const income = new Series({ data: [20000, 35000, 42000, 58000, 75000, 120000] });
+const quartiles = qcut(income, 2);
+console.log(quartiles.toArray());
+// Lower and upper halves by median
+ ["(19999.98, 50000.0]", "(19999.98, 50000.0]", "(19999.98, 50000.0]", "(50000.0, 120000.0]", "(50000.0, 120000.0]", "(50000.0, 120000.0]"]
+
+
+
+ 6 — qcut: custom quantile fractions
+ Pass an array of quantile fractions [0, ..., 1] for precise control over bin boundaries.
+const scores2 = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
+const deciles = qcut(scores2, [0, 0.25, 0.5, 0.75, 1.0], {
+ labels: ["Q1", "Q2", "Q3", "Q4"],
+});
+console.log(deciles.toArray());
+// ["Q1", "Q1", "Q2", "Q2", "Q3", "Q3", "Q4", "Q4", "Q4", "Q4"]
+ ["Q1", "Q1", "Q2", "Q2", "Q3", "Q3", "Q4", "Q4", "Q4", "Q4"]
+
+
+
+ 7 — cutIntervalIndex: inspect the bins
+ Use cutIntervalIndex() to retrieve the IntervalIndex that describes the bins, useful for further analysis or re-use.
+import { cutIntervalIndex } from "tsb";
+
+const idx = cutIntervalIndex([1, 2, 3, 4, 5], 3);
+console.log(idx.size); // 3
+console.log(idx.at(0).toString()); // "(0.996, 2.333]"
+console.log(idx.at(1).toString()); // "(2.333, 3.667]"
+console.log(idx.at(2).toString()); // "(3.667, 5.005]"
+ 3
+(0.996, 2.333]
+(2.333, 3.667]
+(3.667, 5.005]
+
+
+
+ 8 — Handling duplicates
+ When bin edges contain duplicates (common with repeated values in qcut), control behavior with duplicates: "drop".
+// Repeated values create duplicate quantile edges → use "drop" to handle gracefully
+const skewed = [1, 1, 1, 1, 1, 2, 3, 4, 5];
+const result = qcut(skewed, 4, { duplicates: "drop" });
+console.log(result.toArray());
+ null values for duplicates, non-null where distinct bins exist
+ 💡 Use duplicates: "drop" whenever your data has many repeated values. The default "raise" behaviour alerts you to potential binning issues.
+
+
+
diff --git a/playground/date-offset.html b/playground/date-offset.html
new file mode 100644
index 00000000..16a6e3bd
--- /dev/null
+++ b/playground/date-offset.html
@@ -0,0 +1,261 @@
+
+
+
+
+
+ tsb — DateOffset
+
+
+
+
+← tsb playground
+DateOffset
+
+ Calendar-aware date arithmetic — mirrors
+ pandas.tseries.offsets.
+
+
+
+ 1 — Available offsets
+
+ tsb provides eleven offset types for shifting dates by calendar-aware units.
+ All operations work in UTC to avoid DST surprises.
+
+
+ Class pandas equivalent Description
+ Day(n)Day(n)n calendar days
+ Hour(n)Hour(n)n hours
+ Minute(n)Minute(n)n minutes
+ Second(n)Second(n)n seconds
+ Milli(n)Milli(n)n milliseconds
+ Week(n, {weekday?})Week(n, weekday)n weeks, optional weekday alignment
+ MonthEnd(n)MonthEnd(n)n month-ends (last day of month)
+ MonthBegin(n)MonthBegin(n)n month-starts (first day of month)
+ YearEnd(n)YearEnd(n)n year-ends (Dec 31)
+ YearBegin(n)YearBegin(n)n year-starts (Jan 1)
+ BusinessDay(n)BDay(n)n business days (Mon–Fri)
+
+
+
+
+ 2 — Fixed-time offsets (Day, Hour, Minute, Second, Milli)
+
+ These offsets add a fixed number of milliseconds. Every date is "on offset"
+ so rollforward and rollback are no-ops.
+
+ import { Day, Hour, Minute, Second, Milli } from "tsb";
+
+const d = new Date(Date.UTC(2024, 0, 1)); // 2024-01-01T00:00:00Z
+
+new Day(3).apply(d).toISOString(); // "2024-01-04T00:00:00.000Z"
+new Day(-1).apply(d).toISOString(); // "2023-12-31T00:00:00.000Z"
+new Hour(2).apply(d).toISOString(); // "2024-01-01T02:00:00.000Z"
+new Minute(90).apply(d).toISOString(); // "2024-01-01T01:30:00.000Z"
+new Second(30).apply(d).toISOString(); // "2024-01-01T00:00:30.000Z"
+new Milli(500).apply(d).getTime() - d.getTime(); // 500
+ 2024-01-04T00:00:00.000Z
+2023-12-31T00:00:00.000Z
+2024-01-01T02:00:00.000Z
+2024-01-01T01:30:00.000Z
+2024-01-01T00:00:30.000Z
+500
+
+
+
+ 3 — Week offset
+
+ Week(n) adds n × 7 days. With an optional
+ weekday (pandas convention: 0 = Monday … 6 = Sunday),
+ the offset snaps to the nearest occurrence of that weekday.
+
+ import { Week } from "tsb";
+
+const wed = new Date(Date.UTC(2024, 0, 17)); // Wednesday 2024-01-17
+const mon = new Date(Date.UTC(2024, 0, 15)); // Monday 2024-01-15
+
+// Plain week — no alignment
+new Week(2).apply(wed).toISOString().slice(0, 10); // "2024-01-31"
+
+// Weekday-aligned (weekday=0 → Monday)
+const wk = new Week(1, { weekday: 0 });
+wk.apply(wed).toISOString().slice(0, 10); // "2024-01-22" (next Mon)
+wk.apply(mon).toISOString().slice(0, 10); // "2024-01-22" (Mon → next Mon)
+
+// Rollforward / rollback
+wk.rollforward(wed).toISOString().slice(0, 10); // "2024-01-22"
+wk.rollback(wed).toISOString().slice(0, 10); // "2024-01-15"
+
+// onOffset
+wk.onOffset(mon); // true
+wk.onOffset(wed); // false
+ "2024-01-31"
+"2024-01-22"
+"2024-01-22"
+"2024-01-22"
+"2024-01-15"
+true
+false
+
+
+
+ 4 — MonthEnd & MonthBegin
+
+ Anchored to the last and first day of each calendar month respectively.
+ Non-anchor dates are snapped before counting remaining steps.
+
+ import { MonthEnd, MonthBegin } from "tsb";
+
+const mid = new Date(Date.UTC(2024, 0, 15)); // 2024-01-15
+const end = new Date(Date.UTC(2024, 0, 31)); // 2024-01-31
+
+// MonthEnd
+new MonthEnd(1).apply(mid).toISOString().slice(0, 10); // "2024-01-31"
+new MonthEnd(2).apply(mid).toISOString().slice(0, 10); // "2024-02-29" (leap)
+new MonthEnd(1).apply(end).toISOString().slice(0, 10); // "2024-02-29"
+new MonthEnd(-1).apply(mid).toISOString().slice(0, 10); // "2023-12-31"
+
+new MonthEnd(0).rollforward(mid).toISOString().slice(0, 10); // "2024-01-31"
+new MonthEnd(0).rollback(mid).toISOString().slice(0, 10); // "2023-12-31"
+
+// MonthBegin
+new MonthBegin(1).apply(mid).toISOString().slice(0, 10); // "2024-02-01"
+new MonthBegin(-1).apply(mid).toISOString().slice(0, 10); // "2024-01-01"
+
+new MonthBegin(0).rollforward(mid).toISOString().slice(0, 10); // "2024-02-01"
+new MonthBegin(0).rollback(mid).toISOString().slice(0, 10); // "2024-01-01"
+ "2024-01-31"
+"2024-02-29"
+"2024-02-29"
+"2023-12-31"
+"2024-01-31"
+"2023-12-31"
+"2024-02-01"
+"2024-01-01"
+"2024-02-01"
+"2024-01-01"
+
+
+
+ 5 — YearEnd & YearBegin
+
+ YearEnd anchors to December 31; YearBegin
+ anchors to January 1.
+
+ import { YearEnd, YearBegin } from "tsb";
+
+const d = new Date(Date.UTC(2024, 6, 4)); // 2024-07-04
+
+new YearEnd(1).apply(d).toISOString().slice(0, 10); // "2024-12-31"
+new YearEnd(2).apply(d).toISOString().slice(0, 10); // "2025-12-31"
+new YearEnd(-1).apply(d).toISOString().slice(0, 10); // "2023-12-31"
+
+new YearBegin(1).apply(d).toISOString().slice(0, 10); // "2025-01-01"
+new YearBegin(-1).apply(d).toISOString().slice(0, 10); // "2024-01-01"
+
+const yr2024 = new Date(Date.UTC(2024, 11, 31));
+new YearEnd(0).rollforward(yr2024).toISOString().slice(0, 10); // "2024-12-31"
+new YearEnd(0).rollback(d).toISOString().slice(0, 10); // "2023-12-31"
+ "2024-12-31"
+"2025-12-31"
+"2023-12-31"
+"2025-01-01"
+"2024-01-01"
+"2024-12-31"
+"2023-12-31"
+
+
+
+ 6 — BusinessDay
+
+ Advances by weekdays only (Monday–Friday), skipping Saturday and Sunday.
+ Starting from a non-business-day, each step moves to the next
+ (or previous) business day.
+
+ import { BusinessDay } from "tsb";
+
+const fri = new Date(Date.UTC(2024, 0, 12)); // Friday 2024-01-12
+const sat = new Date(Date.UTC(2024, 0, 13)); // Saturday
+
+new BusinessDay(1).apply(fri).toISOString().slice(0, 10); // "2024-01-15" (Mon)
+new BusinessDay(3).apply(fri).toISOString().slice(0, 10); // "2024-01-17" (Wed)
+new BusinessDay(-1).apply(fri).toISOString().slice(0, 10); // "2024-01-11" (Thu)
+
+// From Saturday — first step lands on next Monday
+new BusinessDay(1).apply(sat).toISOString().slice(0, 10); // "2024-01-15"
+new BusinessDay(-1).apply(sat).toISOString().slice(0, 10); // "2024-01-12" (Fri)
+
+// Rolling
+new BusinessDay(0).rollforward(sat).toISOString().slice(0, 10); // "2024-01-15"
+new BusinessDay(0).rollback(sat).toISOString().slice(0, 10); // "2024-01-12"
+
+new BusinessDay(0).onOffset(fri); // true
+new BusinessDay(0).onOffset(sat); // false
+ "2024-01-15"
+"2024-01-17"
+"2024-01-11"
+"2024-01-15"
+"2024-01-12"
+"2024-01-15"
+"2024-01-12"
+true
+false
+
+
+
+ 7 — multiply & negate
+
+ Every offset class supports multiply(factor) and
+ negate() to produce a scaled or reversed copy.
+
+ import { Day, MonthEnd, BusinessDay } from "tsb";
+
+new Day(3).multiply(4).n; // 12
+new MonthEnd(2).negate().n; // -2
+new BusinessDay(5).multiply(2).n; // 10
+
+// negate is equivalent to multiply(-1)
+const bday = new BusinessDay(3);
+const fri = new Date(Date.UTC(2024, 0, 12));
+
+bday.negate().apply(bday.apply(fri)).toISOString().slice(0, 10); // "2024-01-12"
+ 12
+-2
+10
+"2024-01-12"
+
+
+
+ 8 — Static factory methods
+ Every class also provides a static of(n) factory:
+ import { Day, MonthEnd, Week, BusinessDay } from "tsb";
+
+const d = new Date(Date.UTC(2024, 0, 15));
+
+Day.of(5).apply(d).toISOString().slice(0, 10); // "2024-01-20"
+MonthEnd.of(1).apply(d).toISOString().slice(0, 10); // "2024-01-31"
+Week.of(1, { weekday: 0 }).apply(d).toISOString().slice(0, 10); // "2024-01-22"
+BusinessDay.of(2).apply(d).toISOString().slice(0, 10); // "2024-01-17"
+ "2024-01-20"
+"2024-01-31"
+"2024-01-22"
+"2024-01-17"
+
+
+
+
diff --git a/playground/date_range.html b/playground/date_range.html
new file mode 100644
index 00000000..143022eb
--- /dev/null
+++ b/playground/date_range.html
@@ -0,0 +1,517 @@
+
+
+
+
+
+ tsb — date_range / bdate_range
+
+
+
+
+
+ ← back to index
+
+ Frequency Reference
+
+
+ String Offset Example
+
+ D Calendar day 2024-01-01 → 2024-01-02
+ B Business day (Mon–Fri) 2024-01-05 → 2024-01-08
+ H Hour 2024-01-01T00 → 2024-01-01T01
+ T / min Minute
+ S Second
+ MS Month-start (1st) 2024-01-01 → 2024-02-01
+ ME Month-end (last day) 2024-01-31 → 2024-02-29
+ QS Quarter-start 2024-01-01 → 2024-04-01
+ QE Quarter-end 2024-03-31 → 2024-06-30
+ AS / YS Year-start (Jan 1) 2024-01-01 → 2025-01-01
+ AE / YE Year-end (Dec 31) 2024-12-31 → 2025-12-31
+
+
+
+
+ Interactive Builder
+
+
+
+
+ Function
+ date_range bdate_range
+
+
+ start
+
+
+
+ end
+
+
+
+ periods
+
+
+
+ freq
+
+ D — Day
+ B — BusinessDay
+ H — Hour
+ T — Minute
+ S — Second
+ MS — MonthStart
+ ME — MonthEnd
+ QS — QuarterStart
+ QE — QuarterEnd
+ AS — YearStart
+ AE — YearEnd
+ W — Week
+
+
+
+ normalize
+ false true
+
+
+ Generate
+
+
+
+
Result
+
Press Generate →
+
+
+
+
+ DatetimeIndex Operations
+
+
+ Generate an index, then apply .sort(), .unique(), .normalize(), .shift(n, freq), .filter(), .min() / .max().
+
+
+
+
+ Base range (start + periods, freq D)
+
+
+
+ periods
+
+
+
+ Operation
+
+ sort(ascending=true)
+ sort(ascending=false)
+ unique()
+ normalize()
+ shift(+3, "D")
+ shift(-3, "D")
+ snap("MS")
+ min() + max()
+ filter(weekday ≠ Mon)
+ slice(1, 4)
+ concat(7-day range 2024-02-01)
+
+
+
Apply
+
+
+
Result
+
Press Apply →
+
+
+
+
+ Code Snippets
+
+
Select a scenario to see the TypeScript code:
+
+ Basic daily range
+ Business days
+ Monthly
+ end + periods
+ Index operations
+
+
+
+
+
+
+
diff --git a/playground/datetime_tz.html b/playground/datetime_tz.html
new file mode 100644
index 00000000..93a309f4
--- /dev/null
+++ b/playground/datetime_tz.html
@@ -0,0 +1,187 @@
+
+
+
+
+
+ tsb — TZDatetimeIndex: tz_localize & tz_convert
+
+
+
+ TZDatetimeIndex tsb
+
+ Timezone-aware date sequences — the TypeScript port of
+ pandas.DatetimeIndex.tz_localize and
+ pandas.DatetimeIndex.tz_convert.
+
+
+ All timestamps are stored as UTC milliseconds internally.
+ tz_localize interprets wall-clock times in the given IANA timezone,
+ while tz_convert preserves UTC and only changes the display zone.
+
+
+ 1. tz_localize — naive → tz-aware
+
+
Treat each timestamp's UTC components as wall-clock times in the given timezone.
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 2. tz_convert — change display timezone
+
+
Keep the same UTC instants; re-display them in a different timezone.
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 3. Round-trip & tz_localize_none
+
+
Strip the timezone with tz_localize_none() to get a naive index back.
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 4. Transformations (sort / filter / unique)
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 5. DST Spring-forward & Fall-back (America/New_York 2024)
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+
+
+
diff --git a/playground/dropna.html b/playground/dropna.html
new file mode 100644
index 00000000..fa5758cb
--- /dev/null
+++ b/playground/dropna.html
@@ -0,0 +1,171 @@
+
+
+
+
+
+ tsb — dropna
+
+
+
+
+
+
+
+
+ dropna(input, options?) removes rows or columns that contain missing values
+ (null, undefined, or NaN) from a Series or DataFrame.
+ It mirrors
+ pandas.DataFrame.dropna
+ and
+ pandas.Series.dropna .
+
+
+ Options (DataFrame only)
+
+ Option Type Default Description
+
+ axis0 | 1 | "index" | "columns"0Drop rows (0) or columns (1).
+ how"any" | "all""any"Drop if any value is missing, or only if all are missing.
+ threshnumber— Minimum non-null count to keep (overrides how).
+ subsetstring[]— Only check these columns when scanning rows (axis=0 only).
+
+
+
+
+
+ Example 1 — Series: drop missing elements
+ import { Series, dropna } from "tsb";
+
+const s = new Series({ data: [1, null, NaN, 4, undefined, 6] });
+const clean = dropna(s);
+
+clean.values; // [1, 4, 6]
+clean.size; // 3
+
+
+
+
+ Example 2 — DataFrame: drop rows with any missing value (default)
+ import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Carol", "Dave"],
+ score: [95, null, 88, null ],
+ grade: ["A", "B", null, "C" ],
+});
+
+// Drop any row that has at least one null
+const clean = dropna(df);
+clean.shape; // [1, 3] — only "Alice" row survives (score=95, grade="A")
+
+
+
+
+ Example 3 — how = "all": only drop fully-null rows
+ import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, null, null],
+ b: [null, null, 3 ],
+});
+
+// Row 1: both null → dropped
+// Row 0 and Row 2: at least one non-null → kept
+const clean = dropna(df, { how: "all" });
+clean.shape; // [2, 2]
+
+
+
+
+ Example 4 — thresh: require at least N non-null values
+ import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, null, 3 ],
+ b: [4, null, null],
+ c: [7, null, 9 ],
+});
+
+// thresh=2: keep rows where at least 2 of 3 values are present
+// Row 0: 3 present → keep
+// Row 1: 0 present → drop
+// Row 2: 2 present → keep
+const clean = dropna(df, { thresh: 2 });
+clean.shape; // [2, 3]
+
+
+
+
+ Example 5 — subset: only check specific columns
+ import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+ id: [1, 2, 3 ],
+ score: [95, null, 88 ],
+ notes: ["ok", "ok", null],
+});
+
+// Only check the "score" column for nulls — ignore "notes"
+const clean = dropna(df, { subset: ["score"] });
+// Row 1 (score=null) is dropped; Row 2 (notes=null but score=88) is kept.
+clean.shape; // [2, 3]
+
+
+
+
+ Example 6 — axis = 1: drop columns with missing values
+ import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, null, 3], // has a null → dropped
+ b: [4, 5, 6], // no nulls → kept
+ c: [7, 8, null],// has a null → dropped
+});
+
+const clean = dropna(df, { axis: 1 });
+clean.columns.toArray(); // ["b"]
+clean.shape; // [3, 1]
+
+
+
+
+ Example 7 — axis = 1, how = "all": only drop all-null columns
+ import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [null, null, null], // all null → dropped
+ b: [1, null, 3 ], // some null → kept
+ c: [4, 5, 6 ], // no null → kept
+});
+
+const clean = dropna(df, { axis: 1, how: "all" });
+clean.columns.toArray(); // ["b", "c"]
+
+
+
+
+ Pandas equivalence table
+
+ tsb pandas
+
+ dropna(series)series.dropna()
+ dropna(df)df.dropna()
+ dropna(df, { how: "all" })df.dropna(how="all")
+ dropna(df, { thresh: 2 })df.dropna(thresh=2)
+ dropna(df, { subset: ["a", "b"] })df.dropna(subset=["a", "b"])
+ dropna(df, { axis: 1 })df.dropna(axis=1)
+ dropna(df, { axis: 1, how: "all" })df.dropna(axis=1, how="all")
+
+
+
+
+
+
+
+
diff --git a/playground/duplicated.html b/playground/duplicated.html
new file mode 100644
index 00000000..09a1aff7
--- /dev/null
+++ b/playground/duplicated.html
@@ -0,0 +1,133 @@
+
+
+
+
+
+ tsb — duplicated / drop_duplicates
+
+
+
+
+
+
+
+
+ duplicatedDataFrame(df, options?) returns a boolean Series indicating
+ which rows are duplicates of a previous (or later, depending on keep) row.
+ dropDuplicatesDataFrame(df, options?) returns a new DataFrame with duplicate rows
+ removed.
+ Both mirror
+ pandas.DataFrame.duplicated
+ and
+ pandas.DataFrame.drop_duplicates .
+
+
+
+ Series variants duplicatedSeries and dropDuplicatesSeries operate
+ on a single column.
+
+
+ Options
+
+ Option Type Default Description
+
+ subsetstring[]all columns Only consider these columns when checking for duplicates (DataFrame only).
+
+ keep
+ "first" | "last" | false
+ "first"
+
+ "first" — keep the first occurrence, mark later ones.
+ "last" — keep the last occurrence, mark earlier ones.
+ false — mark all occurrences of any duplicate.
+
+
+
+
+
+
+
+ Example 1 — Basic: find duplicate rows
+ import { DataFrame, duplicatedDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Alice", "Carol"],
+ score: [90, 85, 90, 88 ],
+});
+
+// Row 2 ("Alice", 90) is a duplicate of Row 0
+const mask = duplicatedDataFrame(df);
+mask.values; // [false, false, true, false]
+
+
+
+
+ Example 2 — Drop duplicate rows
+ import { DataFrame, dropDuplicatesDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Alice", "Carol"],
+ score: [90, 85, 90, 88 ],
+});
+
+const deduped = dropDuplicatesDataFrame(df);
+deduped.shape; // [3, 2] — "Alice" row 2 removed
+
+
+
+
+ Example 3 — subset: only check specific columns
+ import { DataFrame, dropDuplicatesDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ id: [1, 2, 1, 3],
+ value: ["a", "b", "c", "d"], // different values, but same id
+});
+
+// Drop based on "id" only — row 2 (id=1) is dup even though value differs
+const deduped = dropDuplicatesDataFrame(df, { subset: ["id"] });
+deduped.shape; // [3, 2]
+
+
+
+
+ Example 4 — keep="last": keep the last occurrence
+ import { DataFrame, duplicatedDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ city: ["NYC", "LA", "NYC", "Chicago"],
+});
+
+// keep="last" → mark the FIRST occurrence of each dup, keep the last
+duplicatedDataFrame(df, { keep: "last" }).values;
+// [true, false, false, false]
+
+// keep=false → mark ALL occurrences of any duplicate
+duplicatedDataFrame(df, { keep: false }).values;
+// [true, false, true, false]
+
+
+
+
+ Example 5 — Series: deduplicate values
+ import { Series, duplicatedSeries, dropDuplicatesSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 1, 3, 2, 4] });
+
+duplicatedSeries(s).values; // [false, false, true, false, true, false]
+dropDuplicatesSeries(s).values; // [1, 2, 3, 4]
+
+// keep=false → mark all duplicate values
+duplicatedSeries(s, { keep: false }).values;
+// [true, true, true, false, true, false]
+
+
+
+
+
+
+
diff --git a/playground/explode.html b/playground/explode.html
new file mode 100644
index 00000000..4e994cf6
--- /dev/null
+++ b/playground/explode.html
@@ -0,0 +1,139 @@
+
+
+
+
+
+ tsb — explode
+
+
+
+
+← tsb playground
+explode
+Transform list-like elements into individual rows — mirrors pandas.Series.explode() and pandas.DataFrame.explode().
+
+
+ 1 — Series.explode: lists to rows
+ explodeSeries(s) expands each array element into its own row. The original index label is repeated for each item. Null / empty arrays each produce a single null row.
+ import { Series, explodeSeries } from "tsb";
+
+const s = new Series({
+ data: [[1, 2, 3], "foo", [], [3, 4]],
+ name: "x",
+});
+
+const out = explodeSeries(s);
+console.log([...out.values]); // [1, 2, 3, "foo", null, 3, 4]
+console.log([...out.index.values]); // [0, 0, 0, 1, 2, 3, 3]
+
+ Loading…
+
+
+
+ 2 — Series.explode with ignoreIndex
+ Pass ignoreIndex: true to replace the resulting index with a fresh RangeIndex instead of repeating original labels.
+ import { Series, explodeSeries } from "tsb";
+
+const s = new Series({
+ data: [[10, 20], [30]],
+ index: ["row-A", "row-B"],
+});
+
+// Default: repeats original labels
+const repeated = explodeSeries(s);
+console.log([...repeated.index.values]); // ["row-A", "row-A", "row-B"]
+
+// With ignoreIndex: fresh RangeIndex
+const fresh = explodeSeries(s, { ignoreIndex: true });
+console.log([...fresh.index.values]); // [0, 1, 2]
+
+ Loading…
+
+
+
+ 3 — DataFrame.explode: expand a list column
+ explodeDataFrame(df, "col") explodes a single column; all other columns repeat their value for every generated row.
+ import { DataFrame, explodeDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob" ],
+ scores: [[95, 87], [72, 65, 88]],
+});
+
+const out = explodeDataFrame(df, "scores");
+console.log([...out.col("name").values]); // ["Alice","Alice","Bob","Bob","Bob"]
+console.log([...out.col("scores").values]); // [95, 87, 72, 65, 88]
+console.log([...out.index.values]); // [0, 0, 1, 1, 1]
+
+ Loading…
+
+
+
+ 4 — Handling null and empty lists
+ Null values remain as a single null row. An empty array also becomes a single null row (matching pandas' NaN behaviour).
+ import { DataFrame, explodeDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ id: [1, 2, 3 ],
+ tags: [["a", "b"], null, []],
+});
+
+const out = explodeDataFrame(df, "tags");
+console.log([...out.col("id").values]); // [1, 1, 2, 3]
+console.log([...out.col("tags").values]); // ["a", "b", null, null]
+
+ Loading…
+
+
+
+ 5 — Multi-column simultaneous explode
+ Pass an array of column names to explode multiple columns at the same time. Each row's lists must have the same length across the exploded columns.
+ import { DataFrame, explodeDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ key: ["x", "y" ],
+ left: [[1, 2], [3, 4] ],
+ right: [["a", "b"], ["c", "d"] ],
+});
+
+const out = explodeDataFrame(df, ["left", "right"]);
+console.log([...out.col("key").values]); // ["x","x","y","y"]
+console.log([...out.col("left").values]); // [1, 2, 3, 4]
+console.log([...out.col("right").values]); // ["a","b","c","d"]
+
+ Loading…
+
+
+
+ 6 — ignoreIndex on DataFrame.explode
+ import { DataFrame, explodeDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ v: [[10, 20], [30, 40]],
+});
+
+const out = explodeDataFrame(df, "v", { ignoreIndex: true });
+console.log([...out.index.values]); // [0, 1, 2, 3]
+
+ Loading…
+
+
+
+ Pandas parity: explodeSeries mirrors pandas.Series.explode(); explodeDataFrame mirrors pandas.DataFrame.explode(). Scalar values are passed through unchanged; empty arrays produce a single null (NaN in pandas). The ignoreIndex option corresponds to pandas' ignore_index parameter.
+
+
+
+
diff --git a/playground/factorize.html b/playground/factorize.html
new file mode 100644
index 00000000..dee95704
--- /dev/null
+++ b/playground/factorize.html
@@ -0,0 +1,154 @@
+
+
+
+
+
+ tsb — factorize: integer encoding
+
+
+
+ factorize tsb
+
+ Integer encoding of categorical values — the TypeScript port of
+ pandas.factorize() and Series.factorize().
+ Maps each unique value to a monotonically increasing integer code,
+ returning both the codes array and the
+ uniques array.
+
+
+ Missing values (null / undefined / NaN) receive code -1
+ by default. Useful as a lightweight alternative to full dummy encoding
+ when you need ordinal indices for categorical data.
+
+
+ 1. Basic factorize — first-seen order
+
+
+ By default, unique values appear in first-seen order ,
+ matching pandas' behaviour for object arrays.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 2. Sorted uniques
+
+
+ Pass sort: true to sort unique values before assigning
+ codes. Numbers are sorted numerically; strings lexicographically.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 3. Missing values → sentinel code -1
+
+
+ Null, undefined, and NaN receive code -1 by default and
+ are not included in uniques. Set
+ useNaSentinel: false to treat them as regular values.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 4. seriesFactorize — works on a Series
+
+
+ seriesFactorize accepts a Series and returns
+ { codes: Series<number>, uniques: Series<T> }.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+
+
+
diff --git a/playground/fillna.html b/playground/fillna.html
new file mode 100644
index 00000000..90efd67c
--- /dev/null
+++ b/playground/fillna.html
@@ -0,0 +1,277 @@
+
+
+
+
+
+ tsb — fillna
+
+
+
+
+← tsb playground
+fillna
+
+ Fill missing values with a constant, forward fill, or backward fill —
+ mirrors pandas.Series.fillna() and pandas.DataFrame.fillna().
+
+
+
+
+ 1 · Scalar fill
+
+ Pass { value: scalar } to replace every missing element
+ (null, undefined, NaN) with a constant.
+
+ import { Series, fillnaSeries } from "tsb";
+
+const s = new Series({ data: [1, null, null, 4] });
+fillnaSeries(s, { value: 0 }).values;
+// → [1, 0, 0, 4]
+
+const t = new Series({ data: [NaN, 2, NaN] });
+fillnaSeries(t, { value: 99 }).values;
+// → [99, 2, 99]
+ [1, 0, 0, 4]
+[99, 2, 99]
+
+
+
+
+ 2 · Forward fill (ffill / pad)
+
+ method: "ffill" (alias "pad") carries the last known value
+ forward into subsequent missing positions. Leading nulls (before the
+ first known value) are left unchanged.
+
+ import { Series, fillnaSeries } from "tsb";
+
+const s = new Series({ data: [null, 1, null, null, 4] });
+fillnaSeries(s, { method: "ffill" }).values;
+// → [null, 1, 1, 1, 4]
+
+// Trailing nulls are filled too
+const t = new Series({ data: [1, null, null] });
+fillnaSeries(t, { method: "ffill" }).values;
+// → [1, 1, 1]
+ [null, 1, 1, 1, 4]
+[1, 1, 1]
+
+
+
+
+ 3 · Backward fill (bfill / backfill)
+
+ method: "bfill" (alias "backfill") carries the next known
+ value backward into preceding missing positions. Trailing nulls (after
+ the last known value) are left unchanged.
+
+ import { Series, fillnaSeries } from "tsb";
+
+const s = new Series({ data: [null, null, 3, null, 5] });
+fillnaSeries(s, { method: "bfill" }).values;
+// → [3, 3, 3, 5, 5]
+
+// Leading nulls are filled from the first known value
+const t = new Series({ data: [null, null, 10] });
+fillnaSeries(t, { method: "bfill" }).values;
+// → [10, 10, 10]
+ [3, 3, 3, 5, 5]
+[10, 10, 10]
+
+
+
+
+ 4 · Limiting the fill — limit
+
+ limit caps the number of consecutive missing values filled per
+ run. Positions beyond the limit remain missing.
+
+ import { Series, fillnaSeries } from "tsb";
+
+// Only fill up to 1 consecutive missing value
+const s = new Series({ data: [1, null, null, null, 5] });
+fillnaSeries(s, { method: "ffill", limit: 1 }).values;
+// → [1, 1, null, null, 5]
+
+// bfill with limit=2
+fillnaSeries(s, { method: "bfill", limit: 2 }).values;
+// → [null, null, 5, 5, 5]
+ [1, 1, null, null, 5]
+[null, null, 5, 5, 5]
+
+
+
+
+ 5 · DataFrame — scalar fill
+
+ fillnaDataFrame(df, { value: 0 }) fills every missing cell in
+ every column.
+
+ import { DataFrame, fillnaDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, null, 3],
+ b: [null, 2, null],
+});
+
+const result = fillnaDataFrame(df, { value: 0 });
+result.col("a").values; // [1, 0, 3]
+result.col("b").values; // [0, 2, 0]
+ a: [1, 0, 3]
+b: [0, 2, 0]
+
+
+
+
+ 6 · DataFrame — per-column fill map
+
+ Pass a plain object { colName: fillValue } to use a different
+ fill value for each column. Columns absent from the map are left unchanged.
+
+ import { DataFrame, fillnaDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [null, 2, null],
+ b: [1, null, 3],
+ c: [null, null, null],
+});
+
+const result = fillnaDataFrame(df, { value: { a: -1, b: 99 } });
+result.col("a").values; // [-1, 2, -1]
+result.col("b").values; // [1, 99, 3]
+result.col("c").values; // [null, null, null] ← untouched
+ a: [-1, 2, -1]
+b: [1, 99, 3]
+c: [null, null, null]
+
+
+
+
+ 7 · DataFrame — method fill (axis=0 / axis=1)
+
+ method fills propagate along an axis. The default
+ axis=0 fills down each column ; axis=1
+ fills across each row .
+
+ import { DataFrame, fillnaDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, null, null],
+ b: [null, 2, null],
+ c: [null, null, 3],
+});
+
+// axis=0 (default): ffill down each column
+const byCol = fillnaDataFrame(df, { method: "ffill" });
+byCol.col("a").values; // [1, 1, 1]
+byCol.col("b").values; // [null, 2, 2]
+byCol.col("c").values; // [null, null, 3]
+
+// axis=1: bfill across each row
+const byRow = fillnaDataFrame(df, { method: "bfill", axis: 1 });
+// row 0: [1, null, null] → bfill → [1, null, null]
+// row 1: [null, 2, null] → bfill → [2, 2, null]
+// row 2: [null, null, 3] → bfill → [3, 3, 3]
+ axis=0 ffill:
+ a: [1, 1, 1]
+ b: [null, 2, 2]
+ c: [null, null, 3]
+
+axis=1 bfill:
+ row 0: [1, null, null]
+ row 1: [2, 2, null]
+ row 2: [3, 3, 3]
+
+
+
+
+ 8 · DataFrame — fill values from a Series
+
+ When value is a Series<Scalar>, its index labels
+ are matched to DataFrame column names. This is the TypeScript equivalent of
+ df.fillna(series) in pandas.
+
+ import { DataFrame, Series, fillnaDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ price: [10, null, 30],
+ volume: [null, 200, null],
+});
+
+// Use Series index labels as column selectors
+const fills = new Series({
+ data: [0, 0],
+ index: ["price", "volume"],
+});
+
+const result = fillnaDataFrame(df, { value: fills });
+result.col("price").values; // [10, 0, 30]
+result.col("volume").values; // [0, 200, 0]
+ price: [10, 0, 30]
+volume: [0, 200, 0]
+
+
+
+
+ API summary
+
+ Function Signature (simplified) Description
+
+ fillnaSeries
+ (series, { value?, method?, limit? })
+ Fill missing values in a Series
+
+
+ fillnaDataFrame
+ (df, { value?, method?, limit?, axis? })
+ Fill missing values in a DataFrame
+
+
+
+
+ Option Type Default Description
+
+ value
+ Scalar | ColumnFillMap | Series
+ —
+ Constant or per-column fill value
+
+
+ method
+ "ffill" | "pad" | "bfill" | "backfill"
+ —
+ Propagation direction
+
+
+ limit
+ number
+ Infinity
+ Max consecutive fills per run
+
+
+ axis
+ 0 | 1 | "index" | "columns"
+ 0
+ Direction for method-based fill on DataFrames
+
+
+
+
+
+
diff --git a/playground/get_dummies.html b/playground/get_dummies.html
new file mode 100644
index 00000000..2d878020
--- /dev/null
+++ b/playground/get_dummies.html
@@ -0,0 +1,153 @@
+
+
+
+
+
+ tsb — get_dummies: one-hot encoding
+
+
+
+ get_dummies tsb
+
+ One-hot / dummy encoding — the TypeScript port of
+ pandas.get_dummies().
+ Convert categorical variables into binary indicator columns,
+ one column per unique value.
+
+
+ Common in machine learning pipelines before fitting linear models.
+ Supports prefix, prefixSep,
+ dummyNa, and dropFirst options.
+
+
+ 1. Series → indicator DataFrame
+
+
Each unique value becomes a binary column (1 = present, 0 = absent).
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 2. DataFrame — encode categorical columns
+
+
+ dataFrameGetDummies auto-detects string columns and
+ replaces them with indicator columns. Numeric columns are kept as-is.
+
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 3. Options: prefix, dummyNa, dropFirst
+
+
Fine-tune the encoding with optional parameters.
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+ 4. Encode specific columns only
+
+
Pass columns to control which DataFrame columns are encoded.
+
+
▶ Run
+
Click ▶ Run to execute
+
+
+
+
+
diff --git a/playground/infer_dtype.html b/playground/infer_dtype.html
new file mode 100644
index 00000000..2dbf1ea4
--- /dev/null
+++ b/playground/infer_dtype.html
@@ -0,0 +1,128 @@
+
+
+
+
+
+ tsb — inferDtype
+
+
+
+
+
+
+
+
+ inferDtype(values, options?) inspects an array (or Series) of values and
+ returns a string label identifying the dominant data type. It mirrors the behaviour of
+ pandas.api.types.infer_dtype .
+
+ Return values
+
+ Label Meaning
+
+ "empty"Zero elements, or all null/undefined (when skipna=true)
+ "boolean"All boolean
+ "integer"All integers (whole number or bigint)
+ "floating"All floating-point numbers (including ±Infinity, NaN)
+ "mixed-integer-float"Mix of integers and floats
+ "decimal"Mix of plain integers and bigint
+ "string"All strings
+ "date"All Date objects
+ "datetime"All Timestamp objects
+ "timedelta"All Timedelta objects
+ "period"All Period objects
+ "interval"All Interval objects
+ "mixed-integer"Mix of integers and non-numeric types
+ "mixed"Multiple heterogeneous non-numeric types
+
+
+
+
+
+ Example 1 — basic scalar types
+ import { inferDtype } from "tsb";
+
+inferDtype([1, 2, 3]); // "integer"
+inferDtype([1.1, 2.2, 3.3]); // "floating"
+inferDtype([1, 2.5, 3]); // "mixed-integer-float"
+inferDtype([true, false, true]); // "boolean"
+inferDtype(["a", "b", "c"]); // "string"
+inferDtype([]); // "empty"
+inferDtype([null, null]); // "empty" (skipna=true by default)
+inferDtype([null, null], { skipna: false }); // "mixed"
+
+
+
+
+ Example 2 — working with Series
+ import { Series, inferDtype } from "tsb";
+
+const s1 = new Series({ data: [10, 20, 30] });
+inferDtype(s1); // "integer"
+
+const s2 = new Series({ data: ["hello", "world"] });
+inferDtype(s2); // "string"
+
+const s3 = new Series({ data: [1, null, 2, null, 3] });
+inferDtype(s3); // "integer" (nulls skipped by default)
+
+
+
+
+ Example 3 — specialised tsb types
+ import { inferDtype, Timestamp, Timedelta, Period, Interval } from "tsb";
+
+inferDtype([Timestamp.fromtimestamp(0), Timestamp.fromtimestamp(1)]);
+// "datetime"
+
+inferDtype([Timedelta.fromComponents({ days: 1 }), Timedelta.fromComponents({ hours: 2 })]);
+// "timedelta"
+
+inferDtype([Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M")]);
+// "period"
+
+inferDtype([new Interval(0, 1), new Interval(1, 2)]);
+// "interval"
+
+
+
+
+ Example 4 — mixed types
+ import { inferDtype } from "tsb";
+
+inferDtype([1, "a", 2]); // "mixed-integer" (int + non-numeric non-float)
+inferDtype(["a", true, null]); // "mixed" (string + bool)
+inferDtype([1n, 2n, 3n]); // "integer" (bigint only)
+inferDtype([1n, 2]); // "decimal" (bigint + integer)
+inferDtype([1n, 2.5]); // "mixed-integer-float"
+
+
+
+
+ API reference
+ function inferDtype(
+ values: readonly unknown[] | Series,
+ options?: InferDtypeOptions,
+): InferredDtype;
+
+interface InferDtypeOptions {
+ /**
+ * When true (default), null and undefined are ignored when
+ * determining the dtype. When false, they contribute to "mixed".
+ */
+ skipna?: boolean;
+}
+
+
+
+
+
+
+
diff --git a/playground/interpolate.html b/playground/interpolate.html
new file mode 100644
index 00000000..248dfd9d
--- /dev/null
+++ b/playground/interpolate.html
@@ -0,0 +1,280 @@
+
+
+
+
+
+ tsb — interpolate
+
+
+
+
+← tsb playground
+interpolate
+
+ Fill missing values by interpolation —
+ mirrors pandas.Series.interpolate() and pandas.DataFrame.interpolate().
+
+
+
+
+ 1 · Linear interpolation (default)
+
+ interpolateSeries(series) fills each run of missing values
+ (null, undefined, NaN) that lies
+ between two known values using straight-line interpolation.
+
+ import { Series, interpolateSeries } from "tsb";
+
+const s = new Series({ data: [1, null, null, 4] });
+// ^ ^ ^ ^
+// 0 1 2 3
+
+const filled = interpolateSeries(s);
+filled.values;
+// → [1, 2, 3, 4]
+
+
+filled.values → [1, 2, 3, 4]
+
+Position 1: 1 + (4 − 1) × (1/3) = 2
+Position 2: 1 + (4 − 1) × (2/3) = 3
+
+
+
+ ⚠ Leading & trailing gaps are not filled by
+ the linear method — there is no anchor on one side to interpolate from.
+ Use method: "ffill" or method: "bfill" to fill
+ those (see sections 2 and 3 below).
+
+
+ const t = new Series({ data: [null, 1, null, 3, null] });
+
+interpolateSeries(t).values;
+// → [null, 1, 2, 3, null]
+// ^^^^ ^^^^
+// leading trailing
+// (unchanged) (unchanged)
+
+ [null, 1, 2, 3, null]
+
+
+
+
+ 2 · Forward fill (ffill / pad / zero)
+
+ method: "ffill" carries the last known value forward into each
+ following gap. "pad" and "zero" are aliases.
+
+ import { Series, interpolateSeries } from "tsb";
+
+const s = new Series({ data: [1, null, null, 4, null] });
+
+interpolateSeries(s, { method: "ffill" }).values;
+// → [1, 1, 1, 4, 4]
+
+ [1, 1, 1, 4, 4]
+
+
+ 🔔 Leading NaN (no value to carry forward from) remain missing.
+
+
+
+
+
+ 3 · Backward fill (bfill / backfill)
+
+ method: "bfill" fills each gap from the next known
+ value looking backwards.
+
+ import { Series, interpolateSeries } from "tsb";
+
+const s = new Series({ data: [null, 2, null, null, 5] });
+
+interpolateSeries(s, { method: "bfill" }).values;
+// → [2, 2, 5, 5, 5]
+
+ [2, 2, 5, 5, 5]
+
+
+
+
+ 4 · Nearest-neighbor
+
+ method: "nearest" fills each missing position with the value
+ of its closest non-missing neighbor. When equidistant, the
+ right neighbor wins.
+
+ import { Series, interpolateSeries } from "tsb";
+
+// [1, ?, ?, 4]
+// pos 1: dist-left=1, dist-right=2 → left wins → 1
+// pos 2: dist-left=2, dist-right=1 → right wins → 4
+const s = new Series({ data: [1, null, null, 4] });
+
+interpolateSeries(s, { method: "nearest" }).values;
+// → [1, 1, 4, 4]
+
+ [1, 1, 4, 4]
+
+ // Tie at equidistance: right wins
+const t = new Series({ data: [10, null, 30] });
+interpolateSeries(t, { method: "nearest" }).values;
+// → [10, 30, 30] (pos 1 equidistant; right value 30 chosen)
+
+ [10, 30, 30]
+
+
+
+
+ 5 · Limiting how many values are filled
+
+ The limit option caps the number of consecutive missing values
+ that can be filled within a single gap. Pair it with
+ limitDirection to control which end of the gap is filled first.
+
+
+
+ limitDirection Fills from
+ "forward" (default)Left boundary of each gap
+ "backward"Right boundary of each gap
+ "both"Left and right boundaries
+
+
+ import { Series, interpolateSeries } from "tsb";
+
+// Gap of size 3 between 0 and 4
+const s = new Series({ data: [0, null, null, null, 4] });
+
+// limit=1, forward: fill only the first NaN from the left
+interpolateSeries(s, { limit: 1 }).values;
+// → [0, 1, null, null, 4]
+
+// limit=1, backward: fill only the last NaN from the right
+interpolateSeries(s, { limit: 1, limitDirection: "backward" }).values;
+// → [0, null, null, 3, 4]
+
+// limit=1, both: fill one from each end
+interpolateSeries(s, { limit: 1, limitDirection: "both" }).values;
+// → [0, 1, null, 3, 4]
+
+
+forward: [0, 1, null, null, 4]
+backward: [0, null, null, 3, 4]
+both: [0, 1, null, 3, 4]
+
+
+
+
+
+ 6 · DataFrame — column-wise (axis=0, default)
+
+ dataFrameInterpolate(df) applies the chosen method
+ independently down each column.
+
+ import { DataFrame, dataFrameInterpolate } from "tsb";
+
+const df = DataFrame.fromColumns({
+ temperature: [20, null, null, 23],
+ humidity: [60, null, 70, null],
+});
+
+const filled = dataFrameInterpolate(df);
+filled.col("temperature").values; // [20, 21, 22, 23]
+filled.col("humidity").values; // [60, 65, 70, null] ← trailing not filled
+
+
+temperature: [20, 21, 22, 23]
+humidity: [60, 65, 70, null]
+
+
+
+
+
+ 7 · DataFrame — row-wise (axis=1)
+
+ Set axis: 1 (or axis: "columns") to interpolate
+ across columns for each row.
+
+ import { DataFrame, dataFrameInterpolate } from "tsb";
+
+const df = DataFrame.fromColumns({
+ t0: [0, 10],
+ t1: [null, null], // missing
+ t2: [null, null], // missing
+ t3: [6, 22],
+});
+
+// Row 0 interpolates 0 → 6 (linear, 4 steps)
+// Row 1 interpolates 10 → 22
+const filled = dataFrameInterpolate(df, { axis: 1 });
+filled.col("t1").values; // [2, 14]
+filled.col("t2").values; // [4, 18]
+
+
+Row 0: [0, 2, 4, 6]
+Row 1: [10, 14, 18, 22]
+
+
+
+
+
+ 8 · API summary
+
+ Function Description
+
+ interpolateSeries(series, options?)
+ Fill missing values in a Series
+
+
+ dataFrameInterpolate(df, options?)
+ Fill missing values in a DataFrame (column-wise or row-wise)
+
+
+
+
+ Option Type Default Description
+
+ method
+ InterpolateMethod
+ "linear"
+ Interpolation strategy
+
+
+ limit
+ number
+ Infinity
+ Max consecutive NaN values to fill
+
+
+ limitDirection
+ LimitDirection
+ "forward"
+ Which end of each gap the limit counts from
+
+
+ axis (DataFrame only)
+ 0 | 1 | "index" | "columns"
+ 0
+ Column-wise (0) or row-wise (1)
+
+
+
+
+
+
diff --git a/playground/interval.html b/playground/interval.html
new file mode 100644
index 00000000..afbe9fa1
--- /dev/null
+++ b/playground/interval.html
@@ -0,0 +1,218 @@
+
+
+
+
+
+ tsb — Interval & IntervalIndex
+
+
+
+
+← tsb playground
+Interval & IntervalIndex
+
+ Numeric intervals with configurable endpoint closure —
+ mirrors pandas.Interval and pandas.IntervalIndex.
+
+
+
+ 1 — Interval basics
+
+ An Interval represents a range between two numbers. The closed
+ parameter controls which endpoints are included:
+
+
+ closed notation left included? right included?
+ "right" (default)(left, right] no yes
+ "left"[left, right) yes no
+ "both"[left, right] yes yes
+ "neither"(left, right) no no
+
+ import { Interval } from "tsb";
+
+const iv = new Interval(0, 1); // default: right-closed (0, 1]
+console.log(iv.toString()); // "(0, 1]"
+console.log(iv.length); // 1
+console.log(iv.mid); // 0.5
+console.log(iv.closedLeft); // false
+console.log(iv.closedRight); // true
+console.log(iv.contains(1)); // true — right endpoint included
+console.log(iv.contains(0)); // false — left endpoint excluded
+console.log(iv.contains(0.5)); // true
+
+ Loading…
+
+
+
+ 2 — All four closure modes
+ import { Interval } from "tsb";
+
+const modes = ["right", "left", "both", "neither"];
+for (const m of modes) {
+ const iv = new Interval(0, 1, m);
+ const at0 = iv.contains(0);
+ const at1 = iv.contains(1);
+ const mid = iv.contains(0.5);
+ console.log(`${iv.toString().padEnd(12)} contains(0)=${at0} contains(0.5)=${mid} contains(1)=${at1}`);
+}
+
+ Loading…
+
+
+
+ 3 — Interval.overlaps()
+
+ Two intervals overlap when they share at least one point.
+
+ import { Interval } from "tsb";
+
+const a = new Interval(0, 2);
+const b = new Interval(1, 3);
+const c = new Interval(5, 6);
+
+console.log(a.overlaps(b)); // true — share [1, 2]
+console.log(a.overlaps(c)); // false — gap between 2 and 5
+console.log(b.overlaps(a)); // true — symmetric
+
+// Adjacent intervals sharing exactly one endpoint
+const left = new Interval(0, 1, "right"); // (0, 1]
+const right = new Interval(1, 2, "left"); // [1, 2)
+console.log(left.overlaps(right)); // true — both include point 1
+
+ Loading…
+
+
+
+ 4 — IntervalIndex.fromBreaks()
+
+ The most common way to create an IntervalIndex is from a list of
+ break-points (like the output of pandas.cut()).
+ Given n+1 breaks, you get n intervals.
+
+ import { IntervalIndex } from "tsb";
+
+const idx = IntervalIndex.fromBreaks([0, 10, 20, 30, 40, 50]);
+console.log(idx.size); // 5
+console.log(idx.at(0).toString()); // "(0, 10]"
+console.log(idx.at(-1).toString()); // "(40, 50]"
+console.log([...idx.left]); // [0, 10, 20, 30, 40]
+console.log([...idx.right]); // [10, 20, 30, 40, 50]
+console.log([...idx.mid]); // [5, 15, 25, 35, 45]
+console.log(idx.isMonotonicIncreasing); // true
+
+ Loading…
+
+
+
+ 5 — IntervalIndex.get_loc() — bin lookup
+
+ get_loc(value) finds which bin a value falls into.
+ Returns -1 when the value isn't in any interval.
+
+ import { IntervalIndex } from "tsb";
+
+// Grade bands: F [0,50), D [50,60), C [60,70), B [70,80), A [80,100]
+const bands = IntervalIndex.fromArrays(
+ [0, 50, 60, 70, 80],
+ [50, 60, 70, 80, 100],
+ "right" // (left, right]
+);
+
+const scores = [45, 55, 65, 75, 95, 100, -1];
+const labels = ["F", "D", "C", "B", "A"];
+
+for (const score of scores) {
+ const bin = bands.get_loc(score);
+ const grade = bin >= 0 ? labels[bin] : "out of range";
+ console.log(`score ${score} → ${grade}`);
+}
+
+ Loading…
+
+
+
+ 6 — IntervalIndex.contains() and overlaps()
+ import { Interval, IntervalIndex } from "tsb";
+
+const idx = IntervalIndex.fromBreaks([0, 5, 10, 15]);
+// → (0,5], (5,10], (10,15]
+
+// contains: one boolean per interval
+console.log(idx.contains(7)); // [false, true, false]
+console.log(idx.contains(5)); // [true, false, false] (right-closed: 5 ∈ (0,5])
+
+// overlaps: does each interval share any point with the query?
+const query = new Interval(4, 6); // (4, 6]
+console.log(idx.overlaps(query)); // [true, true, false] — (0,5] and (5,10] both overlap
+
+ Loading…
+
+
+
+ 7 — IntervalIndex.filter() and rename()
+ import { Interval, IntervalIndex } from "tsb";
+
+const idx = IntervalIndex.fromBreaks([0, 1, 2, 3, 4, 5], "right", { name: "raw" });
+
+// Keep only intervals that overlap with (1.5, 3.5]
+const query = new Interval(1.5, 3.5);
+const mask = idx.overlaps(query);
+const filtered = idx.filter(mask);
+
+console.log(filtered.size); // 3
+console.log(filtered.toString()); // shows (1, 2], (2, 3], (3, 4]
+
+// Rename the index axis
+const named = idx.rename("score_bins");
+console.log(named.name); // "score_bins"
+
+ Loading…
+
+
+
+ 8 — Building from Interval objects
+ import { Interval, IntervalIndex } from "tsb";
+
+// Custom irregular intervals
+const intervals = [
+ new Interval(0, 5, "both"), // [0, 5]
+ new Interval(5, 10, "neither"), // (5, 10)
+ new Interval(10, 20, "left"), // [10, 20)
+];
+const idx = IntervalIndex.fromIntervals(intervals);
+console.log(idx.size); // 3
+// Closure mode comes from first interval
+console.log(idx.closed); // "both"
+console.log(idx.toString());
+
+ Loading…
+
+
+
+
+
diff --git a/playground/isin.html b/playground/isin.html
new file mode 100644
index 00000000..5a2ae910
--- /dev/null
+++ b/playground/isin.html
@@ -0,0 +1,139 @@
+
+
+
+
+
+ tsb — isin
+
+
+
+
+← tsb playground
+isin
+Element-wise membership testing — mirrors pandas.Series.isin() and pandas.DataFrame.isin().
+
+
+ 1 — Series.isin: check membership in an array
+ isin(series, values) returns a boolean Series with true where each element appears in values. Accepts any iterable: arrays, Sets, generators.
+ import { Series, isin } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5], name: "scores" });
+
+const result = isin(s, [1, 3, 5]);
+console.log([...result.values]); // [true, false, true, false, true]
+console.log(result.name); // "scores"
+
+ Loading…
+
+
+
+ 2 — Using a Set for O(1) lookups
+ Passing a Set avoids any extra construction overhead when you already have one:
+ import { Series, isin } from "tsb";
+
+const allowed = new Set(["apple", "cherry", "date"]);
+const fruits = new Series({ data: ["apple", "banana", "cherry", "elderberry"] });
+
+console.log([...isin(fruits, allowed).values]);
+// [true, false, true, false]
+
+ Loading…
+
+
+
+ 3 — NaN and null behaviour
+ NaN is never a member of any collection (matches pandas behaviour). null uses strict equality and will match if present.
+ import { Series, isin } from "tsb";
+
+const s = new Series({ data: [1, NaN, null, 3] });
+
+console.log([...isin(s, [1, NaN, null]).values]);
+// NaN → false even though NaN is in the list
+// [true, false, true, true]
+
+ Loading…
+
+
+
+ 4 — DataFrame.isin: shared collection
+ dataFrameIsin(df, values) checks every cell against the same collection and returns a boolean DataFrame of the same shape.
+ import { DataFrame, dataFrameIsin } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, 2, 3],
+ b: [3, 4, 5],
+ c: ["x", "y", "z"],
+});
+
+const result = dataFrameIsin(df, [1, 3, "z"]);
+// a: [true, false, true]
+// b: [true, false, false]
+// c: [false, false, true]
+console.log([...result.col("a").values]);
+console.log([...result.col("b").values]);
+console.log([...result.col("c").values]);
+
+ Loading…
+
+
+
+ 5 — DataFrame.isin: per-column lookup (IsinDict)
+ Pass a plain object { colName: values, … } to give each column its own set of allowed values. Columns absent from the dict produce all false.
+ import { DataFrame, dataFrameIsin } from "tsb";
+
+const df = DataFrame.fromColumns({
+ region: ["north", "south", "east", "west"],
+ revenue: [100, 200, 150, 300],
+ active: [true, false, true, false],
+});
+
+// Check region against a whitelist, revenue against specific values
+const result = dataFrameIsin(df, {
+ region: ["north", "east"],
+ revenue: [100, 300],
+});
+
+console.log([...result.col("region").values]); // [true, false, true, false]
+console.log([...result.col("revenue").values]); // [true, false, false, true]
+console.log([...result.col("active").values]); // [false, false, false, false] — absent from dict
+
+ Loading…
+
+
+
+ 6 — Filtering rows where any column matches
+ A common pattern: use dataFrameIsin as a boolean mask to filter rows.
+ import { DataFrame, dataFrameIsin } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Carol", "Dave"],
+ score: [85, 92, 78, 95],
+});
+
+// Keep only rows where name is in the target set
+const mask = dataFrameIsin(df, { name: ["Alice", "Carol"] });
+const filtered = df.filter((_row, i) => mask.col("name").values[i] === true);
+console.log([...filtered.col("name").values]); // ["Alice", "Carol"]
+console.log([...filtered.col("score").values]); // [85, 78]
+
+ Loading…
+
+
+
+ Note: isin always returns a new boolean Series/DataFrame — it never mutates the input. The index and column labels are preserved exactly.
+
+
+
diff --git a/playground/json_normalize.html b/playground/json_normalize.html
new file mode 100644
index 00000000..5469a255
--- /dev/null
+++ b/playground/json_normalize.html
@@ -0,0 +1,199 @@
+
+
+
+
+
+ tsb · json_normalize
+
+
+
+
+ ← tsb
+ json_normalize
+
+
+
+
+ Overview
+
+ jsonNormalize(data, options?) flattens semi-structured (nested) JSON into
+ a flat DataFrame — mirroring pandas.json_normalize().
+
+ Key options:
+
+ sep — separator for nested key paths (default ".")
+ recordPath — path to a nested array of child records
+ meta — parent fields to attach to every child row
+ metaPrefix / recordPrefix — column prefixes
+ maxLevel — limit nesting depth
+ errors — "raise" (default) or "ignore" for missing meta keys
+
+
+
+
+ Example 1 — flatten nested dicts
+ import { jsonNormalize } from "tsb";
+
+const data = [
+ { id: 1, info: { name: "Alice", city: "NY" } },
+ { id: 2, info: { name: "Bob", city: "LA" } },
+];
+
+const df = jsonNormalize(data);
+// id info.name info.city
+// 1 Alice NY
+// 2 Bob LA
+
+
+
+ Example 2 — recordPath + meta
+ const orders = [
+ { orderId: "A1", customer: "Alice", items: [{ sku: "X", qty: 2 }, { sku: "Y", qty: 1 }] },
+ { orderId: "B2", customer: "Bob", items: [{ sku: "Z", qty: 5 }] },
+];
+
+const df = jsonNormalize(orders, {
+ recordPath: "items",
+ meta: ["orderId", "customer"],
+});
+// sku qty orderId customer
+// X 2 A1 Alice
+// Y 1 A1 Alice
+// Z 5 B2 Bob
+
+
+
+ Example 3 — maxLevel
+ const data = [{ a: { b: { c: { d: 99 } } } }];
+
+jsonNormalize(data, { maxLevel: 1 });
+// a.b → {"c":{"d":99}} (depth 2 is not expanded)
+
+
+
+ Interactive playground
+
+
+ JSON input (array of objects or single object)
+
+
+
+ recordPath (optional, e.g. items or a.b for nested)
+
+
+
+ meta fields (comma-separated, e.g. id,name)
+
+
+
+ sep (separator for nested keys, default .)
+
+
+
+ maxLevel (blank = unlimited)
+
+
+
Normalize →
+
+
+
+
+
+
+
+
+
diff --git a/playground/memory_usage.html b/playground/memory_usage.html
new file mode 100644
index 00000000..78d0824b
--- /dev/null
+++ b/playground/memory_usage.html
@@ -0,0 +1,301 @@
+
+
+
+
+
+ tsb — memory_usage
+
+
+
+ 🧮 memory_usage
+
+ Estimate the memory consumed by a Series or DataFrame —
+ mirroring
+ pandas.Series.memory_usage() and
+ pandas.DataFrame.memory_usage() .
+
+
+ Two options control the calculation:
+
+ index (default true) — include index bytes.
+ deep (default false) — traverse values to measure string / object sizes exactly; otherwise uses itemsize × length for fixed-width dtypes or 8 bytes/pointer for variable-width.
+
+
+
+ 1 · Series memory_usage — fixed-width dtype
+ const s = new Series({ data: [1, 2, 3, 4], dtype: Dtype.int32 });
+// int32 → 4 bytes/element × 4 = 16 bytes of data
+// RangeIndex → 3 × 8 = 24 bytes (only start/stop/step stored)
+seriesMemoryUsage(s); // 40 (data + index)
+seriesMemoryUsage(s, { index: false }); // 16 (data only)
+ click Run to evaluate
+ Run
+
+ 2 · Series memory_usage — string dtype (shallow vs deep)
+ const s = new Series({ data: ["hello", "world", "tsb"], dtype: Dtype.string });
+// shallow: 3 × 8 bytes (one pointer per element)
+seriesMemoryUsage(s, { index: false }); // 24
+// deep: actual char data — each string = length*2 + 56 bytes overhead
+seriesMemoryUsage(s, { index: false, deep: true }); // "hello"=66, "world"=66, "tsb"=62 → 194
+ click Run to evaluate
+ Run
+
+ 3 · DataFrame memory_usage — per-column breakdown
+ const df = new DataFrame({
+ id: new Series({ data: [1, 2, 3], dtype: Dtype.int32 }),
+ score: new Series({ data: [9.5, 8.1, 7.2], dtype: Dtype.float64 }),
+ name: new Series({ data: ["Alice", "Bob", "Carol"], dtype: Dtype.string }),
+});
+const mu = dataFrameMemoryUsage(df);
+// Returns Series indexed by ["Index", "id", "score", "name"]
+// Index (RangeIndex) → 24 bytes
+// id (int32 × 3) → 12 bytes
+// score (float64 × 3)→ 24 bytes
+// name (string × 3, shallow) → 24 bytes
+ click Run to evaluate
+ Run
+
+ 4 · DataFrame memory_usage — deep=true for string columns
+ const df = new DataFrame({
+ label: new Series({ data: ["short", "a slightly longer string"], dtype: Dtype.string }),
+});
+dataFrameMemoryUsage(df, { deep: true, index: false })
+// "short" → 5*2+56 = 66
+// "a slightly longer string" → 24*2+56 = 104
+ click Run to evaluate
+ Run
+
+ 5 · Total memory across all columns
+ const df = new DataFrame({
+ a: new Series({ data: Array.from({length: 1000}, (_,i) => i), dtype: Dtype.int64 }),
+ b: new Series({ data: Array.from({length: 1000}, (_,i) => i * 0.1), dtype: Dtype.float64 }),
+});
+const mu = dataFrameMemoryUsage(df, { index: false });
+mu.sum(); // 1000*8 + 1000*8 = 16000 bytes
+ click Run to evaluate
+ Run
+
+
+
+
+
+
+ Part of tsb — a TypeScript port of pandas.
+ Built by Autoloop .
+
+
+
diff --git a/playground/named_agg.html b/playground/named_agg.html
new file mode 100644
index 00000000..debdf0fa
--- /dev/null
+++ b/playground/named_agg.html
@@ -0,0 +1,217 @@
+
+
+
+
+
+ tsb — NamedAgg Tutorial
+
+
+
+
+ ← tsb
+ NamedAgg groupby
+
+
+
+
+ What is NamedAgg?
+
+ NamedAgg lets you rename output columns from a groupby aggregation while
+ simultaneously choosing which source column to aggregate and how .
+ It mirrors pandas.NamedAgg.
+
+
+ Without NamedAgg, agg() keeps the original column names.
+ With aggNamed() you control the output name independently.
+
+
+
+
+ Basic Usage
+ import { DataFrame, namedAgg } from "tsb";
+
+const df = DataFrame.fromColumns({
+ dept: ["eng", "eng", "hr", "hr", "eng"],
+ salary: [100, 120, 80, 90, 110 ],
+ headcount: [1, 1, 1, 1, 1 ],
+ score: [4.0, 5.0, 3.0, 4.0, 3.5 ],
+});
+
+const result = df.groupby("dept").aggNamed({
+ total_salary: namedAgg("salary", "sum"),
+ avg_salary: namedAgg("salary", "mean"),
+ employees: namedAgg("headcount", "sum"),
+ avg_score: namedAgg("score", "mean"),
+});
+
+// result:
+// | total_salary | avg_salary | employees | avg_score
+// eng | 330 | 110 | 3 | 4.167
+// hr | 170 | 85 | 2 | 3.5
+
+
+
+
Input DataFrame
+
+ dept salary headcount score
+ eng 100 1 4.0
+ eng 120 1 5.0
+ hr 80 1 3.0
+ hr 90 1 4.0
+ eng 110 1 3.5
+
+
+
+
aggNamed result
+
+ (index) total_salary avg_salary employees
+ eng 330 110 3
+ hr 170 85 2
+
+
+
+
+
+
+ Aggregate Same Column Multiple Ways
+ A key advantage of NamedAgg is applying multiple functions to the same source column simultaneously:
+ df.groupby("dept").aggNamed({
+ min_salary: namedAgg("salary", "min"),
+ max_salary: namedAgg("salary", "max"),
+ salary_count: namedAgg("salary", "count"),
+});
+
+
+
+ Custom Aggregation Functions
+ Pass any function (vals: readonly Scalar[]) => Scalar as the aggfunc:
+ const salaryRange = (vals: readonly Scalar[]) => {
+ const nums = vals.filter((v): v is number => typeof v === "number");
+ return Math.max(...nums) - Math.min(...nums);
+};
+
+df.groupby("dept").aggNamed({
+ salary_range: namedAgg("salary", salaryRange),
+});
+
+
+
+ Using the NamedAgg Class Directly
+ namedAgg(col, fn) is shorthand for new NamedAgg(col, fn):
+ import { NamedAgg } from "tsb";
+
+const spec = new NamedAgg("salary", "sum");
+console.log(spec.column); // "salary"
+console.log(spec.aggfunc); // "sum"
+
+
+
+ asIndex=false
+ Pass false as the second argument to include the group key as a regular column:
+ df.groupby("dept").aggNamed(
+ { total_salary: namedAgg("salary", "sum") },
+ false, // asIndex
+);
+// result has columns: ["dept", "total_salary"]
+
+
+
+ API Reference
+ // Factory function (recommended)
+namedAgg(column: string, aggfunc: AggName | AggFn): NamedAgg
+
+// Class constructor
+new NamedAgg(column: string, aggfunc: AggName | AggFn)
+
+// GroupBy method
+DataFrameGroupBy.aggNamed(spec: NamedAggSpec, asIndex?: boolean): DataFrame
+
+// Type guard
+isNamedAggSpec(spec: unknown): spec is NamedAggSpec
+
+// Types
+type NamedAggSpec = Readonly<Record<string, NamedAgg>>
+type AggName = "sum" | "mean" | "min" | "max" | "count" | "std" | "first" | "last" | "size"
+type AggFn = (values: readonly Scalar[]) => Scalar
+
+
+
+ Pandas Equivalent
+ # Python / pandas
+import pandas as pd
+
+df.groupby("dept").agg(
+ total_salary=pd.NamedAgg(column="salary", aggfunc="sum"),
+ avg_salary=pd.NamedAgg(column="salary", aggfunc="mean"),
+)
+
+
+
+
+
diff --git a/playground/natsort.html b/playground/natsort.html
new file mode 100644
index 00000000..77c1fd7f
--- /dev/null
+++ b/playground/natsort.html
@@ -0,0 +1,133 @@
+
+
+
+
+
+ tsb — natsort
+
+
+
+
+
+
+
+
+ Standard lexicographic sort places "file10" before "file2" because
+ "1" < "2". Natural sort compares embedded numbers numerically , so
+ "file2" correctly sorts before "file10".
+
+
+ tsb exports four helpers:
+ natCompare(a, b) — comparator for Array.sort;
+ natSorted(arr) — returns a new naturally-sorted array;
+ natSortKey(s) — returns the token array used internally;
+ natArgSort(arr) — returns the permutation indices (like pandas.Index.argsort).
+
+
+ Mirrors
+ pandas.Index.sort_values(key=natsort_keygen())
+ and
+ natsort.natsorted() .
+
+
+
+
+ 1 · Basic usage
+ import { natSorted, natCompare, natSortKey, natArgSort } from "tsb";
+
+// File names sort by embedded number
+const files = ["file10.txt", "file2.txt", "file1.txt"];
+console.log(natSorted(files));
+// → ["file1.txt", "file2.txt", "file10.txt"]
+
+// Version strings
+const versions = ["1.10.0", "1.9.0", "1.2.0", "2.0.0"];
+console.log(natSorted(versions));
+// → ["1.2.0", "1.9.0", "1.10.0", "2.0.0"]
+
+// Use as Array.sort comparator
+const copy = [...files];
+copy.sort(natCompare);
+// → ["file1.txt", "file2.txt", "file10.txt"]
+
+
+
+ 2 · Options
+ // ignoreCase — text tokens are folded to lower-case
+const words = ["Banana", "apple", "Cherry"];
+natSorted(words, { ignoreCase: true });
+// → ["apple", "Banana", "Cherry"]
+
+// reverse — descending natural order
+natSorted(["file1", "file10", "file2"], { reverse: true });
+// → ["file10", "file2", "file1"]
+
+
+
+ 3 · Sorting objects with a key function
+ const rows = [
+ { path: "img/photo10.jpg" },
+ { path: "img/photo2.jpg" },
+ { path: "img/photo1.jpg" },
+];
+
+// key extracts the string to sort by
+import { natSorted } from "tsb";
+const sorted = natSorted(rows, { key: r => r.path });
+sorted.map(r => r.path);
+// → ["img/photo1.jpg", "img/photo2.jpg", "img/photo10.jpg"]
+
+
+
+ 4 · natSortKey — inspect the token representation
+ import { natSortKey } from "tsb";
+
+natSortKey("file10.txt"); // → ["file", 10, ".txt"]
+natSortKey("007bonds"); // → [7, "bonds"] (leading zeros stripped)
+natSortKey("abc"); // → ["abc"]
+natSortKey("42"); // → [42]
+
+// ignoreCase folds text tokens
+natSortKey("File10.TXT", { ignoreCase: true });
+// → ["file", 10, ".txt"]
+
+
+
+ 5 · natArgSort — permutation indices
+ import { natArgSort } from "tsb";
+
+const arr = ["file10", "file2", "file1"];
+const idx = natArgSort(arr);
+// → [2, 1, 0] (indices of "file1", "file2", "file10")
+
+idx.map(i => arr[i]);
+// → ["file1", "file2", "file10"]
+
+// Use with a tsb Index to sort labels naturally:
+// index.argsort() uses default lexicographic order;
+// natArgSort(index.values) gives natural order.
+
+
+
+ 6 · Comparison with lexicographic sort
+ const data = ["item1", "item12", "item2", "item20", "item3"];
+
+// Lexicographic (default Array.sort)
+[...data].sort();
+// → ["item1", "item12", "item2", "item20", "item3"] ← wrong
+
+// Natural sort
+natSorted(data);
+// → ["item1", "item2", "item3", "item12", "item20"] ← correct
+
+
+
+
+
+
diff --git a/playground/notna.html b/playground/notna.html
new file mode 100644
index 00000000..bffa398a
--- /dev/null
+++ b/playground/notna.html
@@ -0,0 +1,137 @@
+
+
+
+
+
+ tsb — notna / isna
+
+
+
+
+
+
+
+
+ isna(value) and notna(value) inspect scalars, arrays, Series, and DataFrames
+ and return a boolean (or boolean-valued object) indicating whether each element is missing.
+ They mirror
+ pandas.isna
+ and
+ pandas.notna .
+ The aliases isnull and notnull are also provided.
+
+ What counts as missing?
+
+ Value Missing?
+
+ null✅ yes
+ undefined✅ yes
+ NaN✅ yes
+ 0, "", false❌ no — falsy but present
+ Infinity, -Infinity❌ no — defined numeric values
+ any string ❌ no
+ any Date ❌ no
+
+
+
+
+
+ Example 1 — scalars
+ import { isna, notna } from "tsb";
+
+isna(null); // true
+isna(undefined); // true
+isna(NaN); // true
+
+isna(0); // false — zero is not missing
+isna(""); // false — empty string is not missing
+isna(false); // false — false is not missing
+isna(42); // false
+isna("hello"); // false
+
+notna(null); // false
+notna(42); // true
+
+
+
+
+ Example 2 — arrays
+ import { isna, notna } from "tsb";
+
+const arr = [1, null, NaN, "x", undefined, false];
+isna(arr);
+// [false, true, true, false, true, false]
+
+notna(arr);
+// [true, false, false, true, false, true]
+
+// Count missing values
+const missing = isna(arr).filter(Boolean).length; // 3
+
+
+
+
+ Example 3 — Series
+ import { Series, isna, notna } from "tsb";
+
+const s = new Series({ data: [10, null, NaN, 40], name: "sales" });
+
+isna(s).values;
+// [false, true, true, false]
+
+notna(s).values;
+// [true, false, false, true]
+
+// Filter to non-missing values
+const present = s.filter((_, i) => notna(s).values[i] === true);
+
+
+
+
+ Example 4 — DataFrame
+ import { DataFrame, isna, notna } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", null, "Carol"],
+ score: [95, NaN, 88 ],
+ active: [true, false, null ],
+});
+
+isna(df).toRecords();
+// [
+// { name: false, score: false, active: false },
+// { name: true, score: true, active: false },
+// { name: false, score: false, active: true },
+// ]
+
+// Count nulls per column
+const nullCounts: Record<string, number> = {};
+for (const col of df.columns) {
+ nullCounts[String(col)] = isna(df.col(String(col))).values
+ .filter(Boolean).length;
+}
+// { name: 1, score: 1, active: 1 }
+
+
+
+
+ Example 5 — aliases
+ import { isnull, notnull } from "tsb";
+
+// isnull is an alias for isna
+// notnull is an alias for notna
+isnull(null); // true
+notnull("hello"); // true
+
+
+
+
+
+
+
diff --git a/playground/numeric_ops.html b/playground/numeric_ops.html
new file mode 100644
index 00000000..32fa1096
--- /dev/null
+++ b/playground/numeric_ops.html
@@ -0,0 +1,208 @@
+
+
+
+
+
+ tsb — numeric math operations
+
+
+
+
+← tsb playground
+numeric math operations
+
+ Element-wise mathematical functions for Series and DataFrame —
+ mirrors NumPy ufuncs applied to a pandas Series/DataFrame:
+ floor, ceil, trunc,
+ sqrt, exp, log,
+ log2, log10, sign.
+
+
+
+ All functions are pure — they return a new Series/DataFrame
+ without mutating the input. Missing values (null / NaN)
+ propagate through every operation unchanged.
+
+
+
+ 1 — floor, ceil, trunc: rounding toward integers
+
+ seriesFloor(s) replaces each element with the largest integer ≤ the value.
+ seriesCeil(s) replaces each element with the smallest integer ≥ the value.
+ seriesTrunc(s) removes the fractional part, rounding toward zero.
+ For negative numbers: floor(-1.7) = -2,
+ ceil(-1.7) = -1, trunc(-1.7) = -1.
+
+ import { Series, seriesFloor, seriesCeil, seriesTrunc } from "tsb";
+
+const s = new Series({ data: [-1.7, -0.2, 0, 1.2, 1.9] });
+
+console.log([...seriesFloor(s).values]); // [-2, -1, 0, 1, 1]
+console.log([...seriesCeil(s).values]); // [-1, 0, 0, 2, 2]
+console.log([...seriesTrunc(s).values]); // [-1, 0, 0, 1, 1]
+
+ Loading…
+
+
+
+ 2 — floor / ceil / trunc on a DataFrame
+
+ DataFrame variants (dataFrameFloor, dataFrameCeil,
+ dataFrameTrunc) apply the operation to every numeric column.
+
+ import { DataFrame, dataFrameFloor, dataFrameCeil } from "tsb";
+
+const df = DataFrame.fromColumns({
+ price: [10.49, 20.01, 30.99],
+ change: [-0.55, 1.23, 2.78],
+});
+
+const floored = dataFrameFloor(df);
+console.log([...floored.col("price").values]); // [10, 20, 30]
+console.log([...floored.col("change").values]); // [-1, 1, 2]
+
+const ceiled = dataFrameCeil(df);
+console.log([...ceiled.col("price").values]); // [11, 21, 31]
+
+ Loading…
+
+
+
+ 3 — sqrt: square root
+
+ seriesSqrt(s) returns √x for each element.
+ Negative values produce NaN (real-valued, same as NumPy by default).
+ Mirrors np.sqrt(series).
+
+ import { Series, seriesSqrt } from "tsb";
+
+const s = new Series({ data: [0, 1, 4, 9, 16, 25] });
+console.log([...seriesSqrt(s).values]); // [0, 1, 2, 3, 4, 5]
+
+// Negative values → NaN
+const mixed = new Series({ data: [-4, 0, 9] });
+console.log([...seriesSqrt(mixed).values]); // [NaN, 0, 3]
+
+ Loading…
+
+
+
+ 4 — exp: exponential function
+
+ seriesExp(s) computes e x for each element.
+ Mirrors np.exp(series).
+
+ import { Series, seriesExp } from "tsb";
+
+const s = new Series({ data: [0, 1, 2, -1] });
+const result = seriesExp(s);
+console.log(result.values[0].toFixed(4)); // 1.0000 (e^0 = 1)
+console.log(result.values[1].toFixed(4)); // 2.7183 (e^1 = e)
+console.log(result.values[2].toFixed(4)); // 7.3891 (e^2)
+console.log(result.values[3].toFixed(4)); // 0.3679 (e^-1 = 1/e)
+
+ Loading…
+
+
+
+ 5 — log, log2, log10: logarithms
+
+ Three logarithm functions, matching the NumPy counterparts:
+
+
+ Function Base Mirrors
+ seriesLoge (natural) np.log
+ seriesLog22 np.log2
+ seriesLog1010 np.log10
+
+ Values ≤ 0 produce -Infinity (for 0) or NaN (for negative).
+ import { Series, seriesLog, seriesLog2, seriesLog10 } from "tsb";
+
+const naturals = new Series({ data: [1, Math.E, Math.E ** 2] });
+console.log(seriesLog(naturals).values.map(v => +v.toFixed(4)));
+// [0, 1, 2]
+
+const powersOf2 = new Series({ data: [1, 2, 4, 8, 1024] });
+console.log([...seriesLog2(powersOf2).values]);
+// [0, 1, 2, 3, 10]
+
+const powersOf10 = new Series({ data: [1, 10, 100, 1000] });
+console.log([...seriesLog10(powersOf10).values]);
+// [0, 1, 2, 3]
+
+ Loading…
+
+
+
+ 6 — sign: element sign
+
+ seriesSign(s) returns -1 for negative values,
+ 0 for zero, and 1 for positive values.
+ Mirrors np.sign(series).
+
+ import { Series, seriesSign } from "tsb";
+
+const s = new Series({ data: [-100, -0.001, 0, 0.001, 100] });
+console.log([...seriesSign(s).values]); // [-1, -1, 0, 1, 1]
+
+ Loading…
+
+
+
+ 7 — missing value propagation
+
+ All numeric ops propagate null and non-numeric values unchanged.
+ This matches the behaviour of pandas, where missing values are not coerced.
+
+ import { Series, seriesFloor, seriesSqrt, seriesLog } from "tsb";
+
+const s = new Series({ data: [null, 4, null, 9] });
+
+console.log([...seriesFloor(s).values]); // [null, 4, null, 9]
+console.log([...seriesSqrt(s).values]); // [null, 2, null, 3]
+console.log([...seriesLog(s).values]); // [null, ~1.386, null, ~2.197]
+
+ Loading…
+
+
+
+ 8 — composing operations
+
+ Combine multiple operations. For example, compute the log-transformed
+ square root of a price column — a common technique in data normalisation.
+
+ import { Series, seriesSqrt, seriesLog } from "tsb";
+
+// Log-sqrt transformation: log(√x) = log(x)/2
+const prices = new Series({ data: [1, 4, 9, 100, 10000], name: "price" });
+
+const logSqrt = seriesLog(seriesSqrt(prices));
+console.log(logSqrt.values.map(v => +v.toFixed(4)));
+// [0, 0.6931, 1.0986, 2.3026, 4.6052]
+
+// Verify: equals log(x) / 2
+const logHalf = seriesLog(prices).values.map(v => +(v / 2).toFixed(4));
+console.log(logHalf);
+// [0, 0.6931, 1.0986, 2.3026, 4.6052]
+
+ Loading…
+
+
+
+
diff --git a/playground/period.html b/playground/period.html
new file mode 100644
index 00000000..1f269b72
--- /dev/null
+++ b/playground/period.html
@@ -0,0 +1,253 @@
+
+
+
+
+
+ tsb — Period & PeriodIndex
+
+
+
+
+← tsb playground
+Period & PeriodIndex
+
+ Fixed-frequency time spans — mirrors
+ pandas.Period and pandas.PeriodIndex.
+
+
+
+ 1 — Supported frequencies
+
+ A Period represents a single time span at a fixed frequency.
+ tsb supports eight frequencies:
+
+
+ Code Alias Description Example string
+ "A""Y"Calendar year 2024
+ "Q"— Calendar quarter 2024Q2
+ "M"— Calendar month 2024-03
+ "W"— ISO week (Mon–Sun) 2024-01-01/2024-01-07
+ "D"— Day 2024-01-15
+ "H"— Hour 2024-01-15 14:00
+ "T""min"Minute 2024-01-15 14:35
+ "S"— Second 2024-01-15 14:35:42
+
+
+
+
+ 2 — Creating periods
+ Create periods from dates, strings, or directly from ordinals:
+ import { Period } from "tsb";
+
+// From a Date object
+const m = Period.fromDate(new Date("2024-03-15T00:00:00Z"), "M");
+console.log(m.toString()); // "2024-03"
+console.log(m.startTime.toISOString()); // "2024-03-01T00:00:00.000Z"
+console.log(m.endTime.toISOString()); // "2024-03-31T23:59:59.999Z"
+
+// From a string
+const q = Period.fromString("2024Q3", "Q");
+console.log(q.toString()); // "2024Q3"
+console.log(q.startTime.toISOString()); // "2024-07-01T00:00:00.000Z"
+
+// Direct construction from ordinal
+const p = new Period(54, "M"); // 54 months after Jan 1970 = July 2024
+console.log(p.toString()); // "2024-07"
+
+ Run to see output
+
+
+
+ 3 — Period arithmetic
+
+ Periods support shift (add), difference (diff),
+ and comparison. Arithmetic is always within the same frequency.
+
+ import { Period } from "tsb";
+
+const jan = Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M");
+const apr = jan.add(3);
+console.log(apr.toString()); // "2024-04"
+
+const dec = Period.fromDate(new Date("2024-12-01T00:00:00Z"), "M");
+console.log(dec.diff(jan)); // 11 (months between)
+
+// Comparison
+console.log(jan.compareTo(dec) < 0); // true (jan is earlier)
+console.log(jan.equals(jan.add(0))); // true
+
+// contains: does a Date fall within a Period?
+const march15 = new Date("2024-03-15T00:00:00Z");
+const march = Period.fromDate(march15, "M");
+console.log(march.contains(march15)); // true
+console.log(march.contains(new Date("2024-04-01T00:00:00Z"))); // false
+
+ Run to see output
+
+
+
+ 4 — Frequency conversion with asfreq
+
+ asfreq() converts a period to a different frequency.
+ The how parameter picks the start or end of the current period
+ as the anchor point.
+
+ import { Period } from "tsb";
+
+const q2 = Period.fromString("2024Q2", "Q");
+
+// "start": April (first month of Q2)
+console.log(q2.asfreq("M", "start").toString()); // "2024-04"
+
+// "end": June (last month of Q2)
+console.log(q2.asfreq("M", "end").toString()); // "2024-06"
+
+// Going coarser: month → year
+const aug = Period.fromString("2024-08", "M");
+console.log(aug.asfreq("A").toString()); // "2024"
+console.log(aug.asfreq("Q").toString()); // "2024Q3"
+
+ Run to see output
+
+
+
+ 5 — PeriodIndex: building ranges
+
+ A PeriodIndex is an ordered sequence of periods at a uniform
+ frequency, suitable for use as a row index.
+
+ import { Period, PeriodIndex } from "tsb";
+
+// All four quarters of 2024
+const start = Period.fromDate(new Date("2024-01-01T00:00:00Z"), "Q");
+const end = Period.fromDate(new Date("2024-12-31T00:00:00Z"), "Q");
+const quarters = PeriodIndex.fromRange(start, end);
+console.log(quarters.size); // 4
+console.log(quarters.at(0).toString()); // "2024Q1"
+console.log(quarters.at(-1).toString()); // "2024Q4"
+
+// periodRange: start + count
+const months = PeriodIndex.periodRange(
+ Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M"),
+ 12,
+);
+console.log(months.size); // 12
+console.log(months.at(11).toString()); // "2024-12"
+
+// Iteration
+for (const q of quarters) {
+ console.log(q.toString());
+}
+
+ Run to see output
+
+
+
+ 6 — PeriodIndex: lookup and transformation
+ import { Period, PeriodIndex } from "tsb";
+
+const idx = PeriodIndex.periodRange(
+ Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M"),
+ 6, // Jan–Jun 2024
+);
+
+// Position lookup
+const mar = Period.fromString("2024-03", "M");
+console.log(idx.getLoc(mar)); // 2
+console.log(idx.contains(mar)); // true
+
+// Shift the whole index
+const shifted = idx.shift(6);
+console.log(shifted.at(0).toString()); // "2024-07"
+console.log(shifted.at(5).toString()); // "2024-12"
+
+// Convert to different frequency
+const asQtr = idx.asfreq("Q", "start");
+console.log(asQtr.freq); // "Q"
+// Note: some months map to the same quarter → duplicates
+console.log(asQtr.unique().size); // 2 (Q1 and Q2)
+
+// Get Date arrays
+const starts = idx.toDatetimeStart();
+console.log(starts[0]?.toISOString()); // "2024-01-01T00:00:00.000Z"
+
+ Run to see output
+
+
+
+ 7 — Weekly periods
+
+ Weekly periods span Monday–Sunday. The string representation shows the
+ full range: YYYY-MM-DD/YYYY-MM-DD.
+
+ import { Period, PeriodIndex } from "tsb";
+
+// 1970-01-01 is a Thursday — it belongs to week 0 (Mon 1969-12-29 → Sun 1970-01-04)
+const w0 = Period.fromDate(new Date("1970-01-01T00:00:00Z"), "W");
+console.log(w0.ordinal); // 0
+console.log(w0.startTime.toISOString()); // "1969-12-29T00:00:00.000Z"
+console.log(w0.endTime.toISOString()); // "1970-01-04T23:59:59.999Z"
+console.log(w0.toString()); // "1969-12-29/1970-01-04"
+
+// Build a 4-week index
+const fourWeeks = PeriodIndex.periodRange(
+ Period.fromDate(new Date("2024-01-01T00:00:00Z"), "W"),
+ 4,
+);
+for (const w of fourWeeks) {
+ console.log(w.toString());
+}
+
+ Run to see output
+
+
+
+ 8 — Sub-daily periods
+ import { Period } from "tsb";
+
+// Hourly
+const h = Period.fromDate(new Date("2024-03-15T14:35:00Z"), "H");
+console.log(h.toString()); // "2024-03-15 14:00"
+console.log(h.durationMs); // 3_600_000
+
+// Minutely (alias "min")
+const t = Period.fromDate(new Date("2024-03-15T14:35:42Z"), "min");
+console.log(t.toString()); // "2024-03-15 14:35"
+console.log(t.freq); // "T"
+
+// Secondly
+const s = Period.fromDate(new Date("2024-03-15T14:35:42.500Z"), "S");
+console.log(s.toString()); // "2024-03-15 14:35:42"
+console.log(s.durationMs); // 1_000
+
+ Run to see output
+
+
+
+
+
diff --git a/playground/pipe.html b/playground/pipe.html
new file mode 100644
index 00000000..64ddfb1b
--- /dev/null
+++ b/playground/pipe.html
@@ -0,0 +1,191 @@
+
+
+
+
+
+ tsb — pipe
+
+
+
+
+← tsb playground
+pipe
+
+ Function-application helpers for left-to-right method chaining — mirrors
+ pandas.Series.pipe() and pandas.DataFrame.pipe().
+
+
+
+ 1 — pipeSeries: apply a function to a Series
+
+ pipeSeries(series, fn, ...args) calls fn(series, ...args)
+ and returns the result. Use it to build readable transformation chains without
+ deep nesting.
+
+ import { Series, pipeSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5], name: "nums" });
+
+// Simple transform — return the size
+const size = pipeSeries(s, (x) => x.size);
+console.log("size:", size); // 5
+
+// Pass extra arguments to the function
+const offset = pipeSeries(s, (x, n) => x.size + n, 10);
+console.log("size+10:", offset); // 15
+
+ ▶ run
+
+
+
+ 2 — dataFramePipe: apply a function to a DataFrame
+
+ dataFramePipe(df, fn, ...args) works the same way for DataFrames.
+ This mirrors pandas.DataFrame.pipe(fn, *args).
+
+ import { DataFrame, dataFramePipe } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, 2, 3, 4, 5],
+ b: [10, 20, 30, 40, 50],
+});
+
+// Get the shape without nesting
+const shape = dataFramePipe(df, (d) => d.shape);
+console.log("shape:", shape); // [5, 2]
+
+// Chain: take head, then get row count
+const rowCount = dataFramePipe(dataFramePipe(df, (d) => d.head(3)), (d) => d.shape[0]);
+console.log("rows after head(3):", rowCount); // 3
+
+ ▶ run
+
+
+
+ 3 — pipeChain: chain multiple Series transforms
+
+ pipeChain(series, f1, f2, f3, ...) applies a sequence of
+ Series → Series transforms in left-to-right order.
+ This is the cleanest API when building multi-step pipelines.
+
+ import { Series, pipeChain } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+
+// Helper transforms
+const addOne = (x) => new Series({ data: [...x.values].map((v) => v + 1) });
+const double = (x) => new Series({ data: [...x.values].map((v) => v * 2) });
+const square = (x) => new Series({ data: [...x.values].map((v) => v * v) });
+
+// Without pipe: deeply nested and hard to read
+const nested = square(double(addOne(s)));
+console.log("nested:", [...nested.values]); // [16, 36, 64, 100]
+
+// With pipeChain: clean left-to-right
+const piped = pipeChain(s, addOne, double, square);
+console.log("piped:", [...piped.values]); // [16, 36, 64, 100]
+
+ ▶ run
+
+
+
+ 4 — dataFramePipeChain: chain multiple DataFrame transforms
+
+ dataFramePipeChain(df, f1, f2, ...) applies a sequence of
+ DataFrame → DataFrame transforms, ideal for data-prep pipelines.
+
+ import { DataFrame, dataFramePipeChain } from "tsb";
+
+const df = DataFrame.fromColumns({
+ score: [10, 5, 80, 95, 42, 3, 77],
+ label: ["a", "b", "c", "d", "e", "f", "g"],
+});
+
+// Build a pipeline of DataFrame → DataFrame steps
+const head4 = (d) => d.head(4);
+const tail3 = (d) => d.tail(3);
+
+const result = dataFramePipeChain(df, head4, tail3);
+
+console.log("shape:", result.shape); // [3, 2]
+console.log("records:", result.toRecords());
+// [{ score: 5, label: "b" }, { score: 80, label: "c" }, { score: 95, label: "d" }]
+
+ ▶ run
+
+
+
+ 5 — pipeTo / dataFramePipeTo: control the insertion point
+
+ pandas supports a tuple form df.pipe((fn, "kwarg_name")) where the
+ DataFrame goes to a specific keyword argument. In tsb we provide
+ pipeTo(series, pos, fn, ...otherArgs) and
+ dataFramePipeTo(df, pos, fn, ...otherArgs) which splice the value
+ at a chosen zero-based argument position.
+
+ import { Series, pipeTo } from "tsb";
+
+const seriesB = new Series({ data: [10, 20, 30] });
+
+// A function that expects (left, right) arguments
+const concatValues = (left, right) => {
+ return [...left.values, ...right.values];
+};
+
+// Insert seriesB as the SECOND argument (pos=1) alongside seriesA
+const seriesA = new Series({ data: [1, 2, 3] });
+const result = pipeTo(seriesB, 1, concatValues, seriesA);
+
+console.log("result:", result); // [1, 2, 3, 10, 20, 30]
+
+ ▶ run
+
+
+
+ 6 — Practical data pipeline example
+
+ Combining multiple pipe utilities to build a complete data transformation
+ pipeline, similar to the .pipe() method chains common in pandas.
+
+ import { DataFrame, dataFramePipeChain } from "tsb";
+
+// Raw sales data with some issues
+const df = DataFrame.fromColumns({
+ revenue: [100, null, 250, 75, null, 400],
+ region: ["north", "south", "east", "west", "north", "east"],
+});
+
+// Step functions: each DataFrame → DataFrame
+const dropNulls = (d) => {
+ // Keep only rows where revenue is not null
+ const mask = [...d.col("revenue").values].map((v) => v !== null);
+ const revVals = [...d.col("revenue").values].filter((_, i) => mask[i]);
+ const regVals = [...d.col("region").values].filter((_, i) => mask[i]);
+ return DataFrame.fromColumns({ revenue: revVals, region: regVals });
+};
+
+const top3 = (d) => d.head(3);
+
+// Build the pipeline
+const result = dataFramePipeChain(df, dropNulls, top3);
+console.log("shape:", result.shape); // [3, 2]
+console.log("records:", result.toRecords());
+
+ ▶ run
+
+
+
+
diff --git a/playground/pivot_table.html b/playground/pivot_table.html
new file mode 100644
index 00000000..c2ffadd1
--- /dev/null
+++ b/playground/pivot_table.html
@@ -0,0 +1,169 @@
+
+
+
+
+
+ tsb — pivotTableFull
+
+
+
+
+
+
+
+
+ pivotTableFull(df, options) reshapes a DataFrame by aggregating values
+ across row and column groups, and optionally appends a grand-total "All" row and column.
+
+
+ index — column(s) to use as row groups
+ columns — column(s) to use as column groups
+ values — column(s) to aggregate
+ aggfunc — aggregation: mean | sum | min | max | count | first | last
+ margins — add grand-total row & column (default false)
+ margins_name — label for totals (default "All")
+ sort — sort row/column labels (default true)
+
+
+
+
+ Example 1 — sales by region and product (sum + margins)
+ import { DataFrame, pivotTableFull } from "tsb";
+
+const df = DataFrame.fromColumns({
+ region: ["North","North","South","South","North","South"],
+ product: ["A", "B", "A", "B", "A", "B" ],
+ sales: [100, 200, 150, 250, 120, 180 ],
+});
+
+const result = pivotTableFull(df, {
+ index: "region",
+ columns: "product",
+ values: "sales",
+ aggfunc: "sum",
+ margins: true,
+});
+
+// Output:
+// A B All
+// North 220 200 420
+// South 150 430 580
+// All 370 630 1000
+
+
+
+ Example 2 — mean aggregation with custom margins_name
+ const df = DataFrame.fromColumns({
+ team: ["Eng","Eng","Mkt","Mkt"],
+ level: ["Sr","Jr","Sr","Jr"],
+ score: [90, 70, 80, 60],
+});
+
+const result = pivotTableFull(df, {
+ index: "team",
+ columns: "level",
+ values: "score",
+ aggfunc: "mean",
+ margins: true,
+ margins_name: "Total",
+});
+
+// Output:
+// Jr Sr Total
+// Eng 70 90 80
+// Mkt 60 80 70
+// Total 65 85 75
+
+
+
+ Example 3 — sort=false preserves insertion order
+ const df = DataFrame.fromColumns({
+ r: ["Z", "A", "M", "Z"],
+ c: ["b", "a", "c", "a"],
+ v: [1, 2, 3, 4 ],
+});
+
+pivotTableFull(df, {
+ index: "r",
+ columns: "c",
+ values: "v",
+ aggfunc: "sum",
+ sort: false,
+});
+
+// Rows in order: Z, A, M (insertion order)
+// Cols in order: b, a, c (insertion order)
+
+
+
+ Example 4 — count with margins
+ const df = DataFrame.fromColumns({
+ dept: ["Eng","Eng","Mkt","Mkt","Eng"],
+ level: ["Sr","Jr","Sr","Jr","Sr"],
+ salary: [120, 80, 110, 75, 130],
+});
+
+pivotTableFull(df, {
+ index: "dept",
+ columns: "level",
+ values: "salary",
+ aggfunc: "count",
+ margins: true,
+});
+
+// Jr Sr All
+// Eng 1 2 3
+// Mkt 1 1 2
+// All 2 3 5
+
+
+
+ Key differences from pivotTable
+
+
+
+ Feature
+ pivotTable
+ pivotTableFull
+
+
+
+
+ Grand-total margins
+ ❌
+ ✅ margins
+
+
+ Custom margin label
+ ❌
+ ✅ margins_name
+
+
+ Sort labels
+ insertion order
+ ✅ sort (default true)
+
+
+ All aggfuncs
+ ✅
+ ✅
+
+
+ fill_value, dropna
+ ✅
+ ✅
+
+
+
+
+
+
+
+
+
diff --git a/playground/pow_mod.html b/playground/pow_mod.html
new file mode 100644
index 00000000..6113bc72
--- /dev/null
+++ b/playground/pow_mod.html
@@ -0,0 +1,194 @@
+
+
+
+
+
+ tsb — pow / mod / floordiv
+
+
+
+
+← tsb playground
+pow / mod / floordiv
+
+ Element-wise exponentiation, modulo, and floor-division for Series and DataFrame —
+ mirroring pandas.Series.pow, .mod, and .floordiv.
+
+
+
+ Overview
+ All three operations work element-wise on Series and DataFrame:
+
+ Function pandas equivalent Operator Description
+
+ seriesPow(s, other)s.pow(other)**Raise to power
+ seriesMod(s, other)s.mod(other)%Modulo (Python/pandas sign rule)
+ seriesFloorDiv(s, other)s.floordiv(other)//Floor division (rounds toward −∞)
+ dataFramePow(df, other)df.pow(other)**Column-wise exponentiation
+ dataFrameMod(df, other)df.mod(other)%Column-wise modulo
+ dataFrameFloorDiv(df, other)df.floordiv(other)//Column-wise floor division
+
+
+ The other operand may be a scalar number or another Series / DataFrame
+ of the same shape (positional alignment). Missing values (null / NaN) propagate unchanged.
+
+
+
+ seriesPow — exponentiation
+ import { Series, seriesPow } from "tsb";
+
+// Scalar exponent: each element raised to the power 2
+const s = new Series({ data: [1, 2, 3, 4, 5], name: "x" });
+console.log(seriesPow(s, 2).values);
+// [1, 4, 9, 16, 25]
+
+// Series exponent: element-wise pairing
+const exponents = new Series({ data: [1, 2, 3, 4, 5] });
+console.log(seriesPow(s, exponents).values);
+// [1, 4, 27, 256, 3125]
+
+// Square root via pow(0.5)
+const sq = new Series({ data: [4, 9, 16, 25] });
+console.log(seriesPow(sq, 0.5).values);
+// [2, 3, 4, 5]
+ [1, 4, 9, 16, 25]
+[1, 4, 27, 256, 3125]
+[2, 3, 4, 5]
+
+
+
+ seriesMod — Python-style modulo
+
+ Sign rule: Unlike JavaScript's % operator (which follows C semantics),
+ seriesMod uses Python / pandas semantics : the result always has the
+ same sign as the divisor . For example, -7 mod 3 = 2 (not -1).
+
+ import { Series, seriesMod } from "tsb";
+
+// Positive divisor: result is always in [0, divisor)
+const s = new Series({ data: [-7, -4, 0, 3, 10] });
+console.log(seriesMod(s, 3).values);
+// [2, 2, 0, 0, 1]
+
+// JavaScript % would give different results for negatives:
+// [-7 % 3, -4 % 3, 0 % 3, 3 % 3, 10 % 3] = [-1, -1, 0, 0, 1]
+
+// Series divisor: element-wise
+const divisors = new Series({ data: [3, 4, 5, 6, 7] });
+console.log(seriesMod(s, divisors).values);
+// [2, 0, 0, 3, 3]
+
+// Missing values propagate unchanged
+const withNull = new Series({ data: [10, null, 15] });
+console.log(seriesMod(withNull, 4).values);
+// [2, null, 3]
+ [2, 2, 0, 0, 1]
+[2, 0, 0, 3, 3]
+[2, null, 3]
+
+
+
+ seriesFloorDiv — floor division
+
+ Rounding rule: seriesFloorDiv rounds toward −∞
+ (Python / pandas // semantics). This differs from JavaScript's
+ Math.trunc for negative values: -7 // 2 = -4 (not -3).
+
+ import { Series, seriesFloorDiv } from "tsb";
+
+const s = new Series({ data: [7, -7, 10, -10, 0] });
+
+// Floor division by scalar
+console.log(seriesFloorDiv(s, 2).values);
+// [3, -4, 5, -5, 0]
+// Note: -7 // 2 = -4 (floor toward -∞, not -3 from trunc)
+
+// Compare with Math.trunc:
+// Math.trunc(-7/2) = Math.trunc(-3.5) = -3 ← different!
+
+// Series divisor
+const divisors = new Series({ data: [2, 3, 4, 5, 1] });
+console.log(seriesFloorDiv(s, divisors).values);
+// [3, -3, 2, -2, 0]
+
+// The div-mod identity: floordiv(a,b)*b + mod(a,b) === a (for integers)
+// -7 = (-4)*2 + 1 ✓ (not (-3)*2 + (-1) which is JS % behavior)
+ [3, -4, 5, -5, 0]
+[3, -3, 2, -2, 0]
+
+
+
+ DataFrame operations
+ import { DataFrame, dataFramePow, dataFrameMod, dataFrameFloorDiv } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [2, 3, 4],
+ b: [7, -7, 10],
+});
+
+// dataFramePow: raise every cell to the power 2
+console.log("pow(2):");
+console.log(dataFramePow(df, 2).col("a").values); // [4, 9, 16]
+console.log(dataFramePow(df, 2).col("b").values); // [49, 49, 100]
+
+// dataFrameMod: element-wise modulo by scalar
+console.log("mod(3):");
+console.log(dataFrameMod(df, 3).col("a").values); // [2, 0, 1]
+console.log(dataFrameMod(df, 3).col("b").values); // [1, 2, 1]
+
+// dataFrameFloorDiv: floor division by scalar
+console.log("floordiv(3):");
+console.log(dataFrameFloorDiv(df, 3).col("a").values); // [0, 1, 1]
+console.log(dataFrameFloorDiv(df, 3).col("b").values); // [2, -3, 3]
+
+// DataFrame × DataFrame: column-aligned
+const df2 = DataFrame.fromColumns({ a: [1, 2, 3], b: [3, 4, 5] });
+console.log("pow(df2):");
+console.log(dataFramePow(df, df2).col("a").values); // [2, 9, 64]
+ pow(2):
+[4, 9, 16]
+[49, 49, 100]
+mod(3):
+[2, 0, 1]
+[1, 2, 1]
+floordiv(3):
+[0, 1, 1]
+[2, -3, 3]
+pow(df2):
+[2, 9, 64]
+
+
+
+ Pandas comparison
+ # pandas equivalent
+import pandas as pd
+
+s = pd.Series([-7, 0, 7, 10])
+print(s.pow(2)) # [49, 0, 49, 100]
+print(s.mod(3)) # [2, 0, 1, 1] (Python sign rule)
+print(s.floordiv(2)) # [-4, 0, 3, 5] (floor toward -∞)
+
+df = pd.DataFrame({"a": [2, 3, 4], "b": [7, -7, 10]})
+print(df.pow(2)) # a: [4,9,16] b: [49,49,100]
+print(df.mod(3)) # a: [2,0,1] b: [1,2,1]
+print(df.floordiv(3)) # a: [0,1,1] b: [2,-3,3]
+
+
+
+
diff --git a/playground/reindex.html b/playground/reindex.html
new file mode 100644
index 00000000..55ddb8f5
--- /dev/null
+++ b/playground/reindex.html
@@ -0,0 +1,166 @@
+
+
+
+
+
+ tsb — reindex
+
+
+
+
+
+
+
+
+ reindex lets you align a Series or DataFrame to a new index,
+ filling gaps with a fill value or propagating adjacent values.
+
+
+ reindexSeries(s, newLabels) — realign a Series to new labels.
+ reindexDataFrame(df, { index?, columns? }) — realign rows and/or columns.
+
+
+ Missing labels get null by default, or any fillValue you choose.
+ You can also propagate values using method: "ffill" (forward fill),
+ "bfill" (backward fill), or "nearest".
+
+
+ See also:
+ pandas.Series.reindex
+ ·
+ pandas.DataFrame.reindex
+
+
+
+
+ 1 · reindexSeries — basics
+ import { Series, Index, reindexSeries } from "tsb";
+
+const s = new Series({ data: [10, 20, 30], index: new Index(["a", "b", "c"]) });
+
+// Reorder labels
+reindexSeries(s, ["c", "a", "b"]).toArray();
+// → [30, 10, 20]
+
+// Extend with new labels → null by default
+reindexSeries(s, ["a", "b", "c", "d"]).toArray();
+// → [10, 20, 30, null]
+
+// Extend with custom fill value
+reindexSeries(s, ["a", "b", "c", "d"], { fillValue: 0 }).toArray();
+// → [10, 20, 30, 0]
+
+// Drop labels
+reindexSeries(s, ["a", "c"]).toArray();
+// → [10, 30]
+
+
+
+ 2 · Fill methods
+ import { Series, Index, reindexSeries } from "tsb";
+
+const temps = new Series({
+ data: [15, 18, 22],
+ index: new Index([0, 2, 5]), // sparse integer index
+});
+
+// Forward fill — carry last known value forward
+reindexSeries(temps, [0, 1, 2, 3, 4, 5], { method: "ffill" }).toArray();
+// → [15, 15, 18, 18, 18, 22]
+// ^^ ^^ ^^ ← filled from left
+
+// Backward fill — carry next known value backward
+reindexSeries(temps, [0, 1, 2, 3, 4, 5], { method: "bfill" }).toArray();
+// → [15, 18, 18, 22, 22, 22]
+// ^^ ^^ ^^ ← filled from right
+
+// Nearest — use closest value (prefer right on tie)
+reindexSeries(temps, [0, 1, 2, 3, 4, 5], { method: "nearest" }).toArray();
+// → [15, 15, 18, 18, 22, 22]
+
+// Limit — cap consecutive fills
+reindexSeries(temps, [0, 1, 2, 3, 4, 5], { method: "ffill", limit: 1 }).toArray();
+// → [15, 15, 18, null, null, 22]
+// ^^ ^^ only 1 consecutive fill
+
+
+
+ 3 · reindexDataFrame — rows
+ import { DataFrame, reindexDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ open: [100, 102, 105],
+ close: [ 98, 104, 107],
+});
+// shape [3, 2], RangeIndex [0, 1, 2]
+
+// Extend to 5 rows — new rows filled with null
+reindexDataFrame(df, { index: [0, 1, 2, 3, 4] }).col("open").toArray();
+// → [100, 102, 105, null, null]
+
+// Forward-fill new rows
+reindexDataFrame(df, { index: [0, 1, 2, 3, 4], method: "ffill" }).col("open").toArray();
+// → [100, 102, 105, 105, 105]
+
+
+
+ 4 · reindexDataFrame — columns
+ import { DataFrame, reindexDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+
+// Reorder columns
+reindexDataFrame(df, { columns: ["b", "a"] }).columns.toArray();
+// → ["b", "a"]
+
+// Add a new column filled with 0
+const r = reindexDataFrame(df, { columns: ["a", "b", "c"], fillValue: 0 });
+r.col("c").toArray();
+// → [0, 0, 0]
+
+
+
+ 5 · Reindex rows and columns simultaneously
+ import { DataFrame, reindexDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ x: [1, 2, 3],
+ y: [4, 5, 6],
+});
+
+const r = reindexDataFrame(df, {
+ index: [0, 1, 2, 3], // extend to 4 rows
+ columns: ["x", "y", "z"], // add column "z"
+ fillValue: -1,
+});
+// shape [4, 3]
+r.col("z").toArray(); // → [-1, -1, -1, -1]
+r.col("x").toArray(); // → [1, 2, 3, -1]
+
+
+
+ 6 · Pandas equivalents
+ # Python / pandas equivalent
+import pandas as pd
+
+s = pd.Series([10, 20, 30], index=["a", "b", "c"])
+
+# reindexSeries(s, newLabels) → s.reindex(newLabels)
+s.reindex(["a", "b", "c", "d"]) # NaN for "d"
+s.reindex(["a", "b", "c", "d"], fill_value=0)
+s.reindex(range(5), method="ffill") # forward fill gaps
+
+df = pd.DataFrame({"a": [1,2,3], "b": [4,5,6]})
+
+# reindexDataFrame(df, { index, columns })
+df.reindex([0, 1, 2, 3])
+df.reindex(columns=["a", "b", "c"])
+df.reindex(index=[0,1,2,3], columns=["a","b","c"], fill_value=0)
+
+
+
+
diff --git a/playground/sample.html b/playground/sample.html
new file mode 100644
index 00000000..2ba4e93e
--- /dev/null
+++ b/playground/sample.html
@@ -0,0 +1,187 @@
+
+
+
+
+
+ tsb — sample
+
+
+
+
+← tsb playground
+sample
+Random sampling from Series and DataFrame — mirrors pandas.Series.sample() and pandas.DataFrame.sample().
+
+
+ 1 — Basic Series sampling
+ sampleSeries(s, { n }) returns a new Series with n randomly chosen elements. Pass randomState for reproducible results.
+ import { Series, sampleSeries } from "tsb";
+
+const s = new Series({
+ data: [10, 20, 30, 40, 50],
+ index: ["a", "b", "c", "d", "e"],
+});
+
+// Sample 3 elements — same result every time with randomState
+const r = sampleSeries(s, { n: 3, randomState: 42 });
+console.log([...r.values]); // 3 values from [10,20,30,40,50]
+console.log([...r.index.values]); // corresponding labels
+
+ Loading…
+
+
+
+ 2 — Sampling a fraction
+ Instead of a fixed count, use frac to specify a proportion of the data (0–1).
+ import { Series, sampleSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+
+// Sample 40% of the data
+const r = sampleSeries(s, { frac: 0.4, randomState: 0 });
+console.log(r.size); // 4 (= round(0.4 × 10))
+console.log([...r.values]); // 4 random values
+
+ Loading…
+
+
+
+ 3 — Sampling with replacement
+ Set replace: true to allow the same element to be selected more than once. This also lets you request more items than the Series contains.
+ import { Series, sampleSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 3] });
+
+// 6 samples from a 3-element Series — duplicates allowed
+const r = sampleSeries(s, { n: 6, replace: true, randomState: 7 });
+console.log(r.size); // 6
+console.log([...r.values]); // may contain repeated values, e.g. [3, 1, 3, 2, 1, 1]
+
+ Loading…
+
+
+
+ 4 — Weighted sampling
+ Provide a weights array to bias the random draw. Higher weight → higher probability of selection. Weights are normalised automatically.
+ import { Series, sampleSeries } from "tsb";
+
+const s = new Series({ data: ["apple", "banana", "cherry"] });
+// cherry has 8× the weight of apple and 4× the weight of banana
+const weights = [1, 2, 8];
+
+const counts = { apple: 0, banana: 0, cherry: 0 };
+for (let seed = 0; seed < 200; seed++) {
+ const v = sampleSeries(s, { n: 1, weights, randomState: seed }).values[0];
+ counts[v]++;
+}
+console.log(counts); // cherry ~145/200, banana ~36/200, apple ~18/200
+
+ Loading…
+
+
+
+ 5 — ignoreIndex
+ Set ignoreIndex: true to reset the result index to 0, 1, 2, … instead of preserving the original labels.
+ import { Series, sampleSeries } from "tsb";
+
+const s = new Series({
+ data: [100, 200, 300],
+ index: ["x", "y", "z"],
+});
+
+const r = sampleSeries(s, { n: 2, randomState: 1 });
+console.log([...r.index.values]); // e.g. ["z", "x"] — original labels
+
+const r2 = sampleSeries(s, { n: 2, randomState: 1, ignoreIndex: true });
+console.log([...r2.index.values]); // [0, 1] — reset
+
+ Loading…
+
+
+
+ 6 — DataFrame row sampling
+ sampleDataFrame(df, { n }) returns a DataFrame with n randomly selected rows. Row integrity is preserved — all columns stay aligned.
+ import { DataFrame, sampleDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ name: ["Alice", "Bob", "Carol", "Dave", "Eve"],
+ score: [88, 72, 95, 61, 84],
+ grade: ["B", "C", "A", "D", "B"],
+});
+
+const sample = sampleDataFrame(df, { n: 3, randomState: 5 });
+console.log([...sample.col("name").values]); // 3 names
+console.log([...sample.col("score").values]); // corresponding scores
+
+ Loading…
+
+
+
+ 7 — DataFrame column sampling (axis=1)
+ Set axis: 1 to sample columns instead of rows. Useful for random feature selection.
+ import { DataFrame, sampleDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, 2, 3],
+ b: [4, 5, 6],
+ c: [7, 8, 9],
+ d: [10, 11, 12],
+});
+
+// Pick 2 random columns
+const r = sampleDataFrame(df, { n: 2, axis: 1, randomState: 3 });
+console.log([...r.columns.values]); // e.g. ["b", "d"]
+
+ Loading…
+
+
+
+ 8 — Bootstrapping example
+ Sampling with replacement is the foundation of bootstrapping — re-sampling your data to estimate statistics.
+ import { Series, sampleSeries } from "tsb";
+
+const data = new Series({ data: [2.1, 3.4, 5.5, 2.9, 4.2, 3.8, 5.1, 4.6, 3.3, 2.7] });
+const n = data.size;
+
+// 50 bootstrap means
+const means = [];
+for (let seed = 0; seed < 50; seed++) {
+ const boot = sampleSeries(data, { n, replace: true, randomState: seed });
+ const sum = (boot.values as number[]).reduce((a, b) => a + b, 0);
+ means.push(sum / n);
+}
+means.sort((a, b) => a - b);
+const lo = means[Math.floor(means.length * 0.025)].toFixed(2);
+const hi = means[Math.floor(means.length * 0.975)].toFixed(2);
+console.log(`Bootstrap 95% CI for mean: [${lo}, ${hi}]`);
+
+ Loading…
+
+
+
+
+
diff --git a/playground/searchsorted.html b/playground/searchsorted.html
new file mode 100644
index 00000000..685f84e9
--- /dev/null
+++ b/playground/searchsorted.html
@@ -0,0 +1,136 @@
+
+
+
+
+
+ tsb — searchsorted
+
+
+
+
+
+
+
+
+ searchsorted(a, v) returns the index at which value v should be inserted
+ into the sorted array a to keep it sorted. This is the standard
+ binary-search operation used throughout pandas for alignment, binning, and lookup.
+
+
+ Two side modes:
+ side = "left" (default) places insertion before any equal elements;
+ side = "right" places it after .
+
+
+ Three exports:
+ searchsorted(a, v) — single value search;
+ searchsortedMany(a, vs) — vectorised search over multiple values;
+ argsortScalars(a) — compute a sort permutation (for the sorter option).
+
+
+
+
+ 1 · Basic usage
+ import { searchsorted, searchsortedMany, argsortScalars } from "tsb";
+
+const a = [1, 3, 5, 7, 9];
+
+// Where would 4 go?
+console.log(searchsorted(a, 4)); // → 2 (between 3 and 5)
+
+// Where would 5 go — before existing 5?
+console.log(searchsorted(a, 5)); // → 2 (side="left" default)
+
+// Where would 5 go — after existing 5?
+console.log(searchsorted(a, 5, { side: "right" })); // → 3
+
+// Out-of-range values
+console.log(searchsorted(a, 0)); // → 0 (before everything)
+console.log(searchsorted(a, 99)); // → 5 (after everything)
+
+
+
+ 2 · Vectorised search with searchsortedMany
+ import { searchsortedMany } from "tsb";
+
+const prices = [10, 20, 30, 40, 50];
+
+// Find where several bid prices would fall
+const bids = [15, 25, 50, 55];
+console.log(searchsortedMany(prices, bids));
+// → [1, 2, 4, 5]
+
+// side="right" for after-equal semantics
+console.log(searchsortedMany(prices, [20, 40], { side: "right" }));
+// → [2, 4]
+
+
+
+ 3 · Searching unsorted data with sorter
+ import { searchsorted, argsortScalars } from "tsb";
+
+// argsortScalars returns the permutation that would sort the array
+const data = [50, 10, 30, 20, 40];
+const sorter = argsortScalars(data);
+// sorter → [1, 3, 2, 4, 0] (indices of 10, 20, 30, 40, 50)
+
+// Now search without sorting the original array
+console.log(searchsorted(data, 25, { sorter })); // → 2 (between 20 and 30)
+console.log(searchsorted(data, 30, { sorter })); // → 2 (left of 30)
+console.log(searchsorted(data, 30, { side: "right", sorter })); // → 3
+
+
+
+ 4 · String arrays
+ import { searchsorted } from "tsb";
+
+const words = ["apple", "banana", "cherry", "date", "elderberry"];
+
+console.log(searchsorted(words, "blueberry")); // → 2 (between banana and cherry)
+console.log(searchsorted(words, "cherry")); // → 2 (left of cherry)
+console.log(searchsorted(words, "cherry", { side: "right" })); // → 3
+
+
+
+ 5 · Custom comparator
+ import { searchsorted } from "tsb";
+
+// Case-insensitive string search
+const arr = ["apple", "Banana", "cherry"]; // sorted case-insensitively
+const cmp = (a: unknown, b: unknown) => {
+ const sa = String(a).toLowerCase();
+ const sb = String(b).toLowerCase();
+ return sa < sb ? -1 : sa > sb ? 1 : 0;
+};
+
+console.log(searchsorted(arr, "banana", { compareFn: cmp })); // → 1
+console.log(searchsorted(arr, "CHERRY", { compareFn: cmp })); // → 2
+
+
+
+ 6 · pandas equivalents
+ # Python / pandas
+import pandas as pd
+import numpy as np
+
+idx = pd.Index([1, 3, 5, 7, 9])
+idx.searchsorted(4) # → 2
+idx.searchsorted(5) # → 2 (side='left')
+idx.searchsorted(5, side='right') # → 3
+
+np.searchsorted([1, 3, 5, 7, 9], [2, 5, 8]) # → [1, 2, 4]
+
+# TypeScript / tsb equivalent
+import { searchsorted, searchsortedMany } from "tsb";
+searchsorted([1, 3, 5, 7, 9], 4) // → 2
+searchsorted([1, 3, 5, 7, 9], 5) // → 2
+searchsorted([1, 3, 5, 7, 9], 5, { side: "right" }) // → 3
+searchsortedMany([1, 3, 5, 7, 9], [2, 5, 8]) // → [1, 2, 4]
+
+
+
+
diff --git a/playground/select_dtypes.html b/playground/select_dtypes.html
new file mode 100644
index 00000000..19498050
--- /dev/null
+++ b/playground/select_dtypes.html
@@ -0,0 +1,236 @@
+
+
+
+
+
+ tsb — select_dtypes
+
+
+
+ 🔍 select_dtypes
+
+ Return a subset of DataFrame columns matching given dtype selectors —
+ mirroring
+ pandas.DataFrame.select_dtypes() .
+
+
+ Overview
+
+ Selector Matches
+ "number"int, uint, float dtypes
+ "integer"int and uint dtypes
+ "signed integer"int dtypes only (int8–int64)
+ "unsigned integer"uint dtypes only (uint8–uint64)
+ "floating"float dtypes (float32, float64)
+ "bool"boolean dtype
+ "string"string dtype
+ "object"object dtype
+ "datetime"datetime dtype
+ "timedelta"timedelta dtype
+ "category"category dtype
+ "int64" etc.exact concrete dtype name
+
+
+ 1 · include: keep only numeric columns
+ import { DataFrame } from "tsb";
+import { selectDtypes } from "tsb";
+
+const df = DataFrame.fromColumns({
+ age: [25, 30, 22],
+ score: [88.5, 92.0, 77.3],
+ name: ["Alice", "Bob", "Carol"],
+ active: [true, false, true],
+});
+
+const nums = selectDtypes(df, { include: "number" });
+// Keeps: age (int64), score (float64)
+// Drops: name (string), active (bool)
+console.log(nums.columns.toArray()); // ["age", "score"]
+ Click Run to evaluate
+ ▶ Run
+
+ 2 · exclude: drop boolean and string columns
+ const withoutBoolStr = selectDtypes(df, { exclude: ["bool", "string"] });
+// Keeps: age (int64), score (float64)
+console.log(withoutBoolStr.columns.toArray()); // ["age", "score"]
+ Click Run to evaluate
+ ▶ Run
+
+ 3 · include + exclude combined
+ // Include all numeric, but exclude float64
+const intOnly = selectDtypes(df, { include: "number", exclude: "floating" });
+// Keeps: age (int64)
+console.log(intOnly.columns.toArray()); // ["age"]
+ Click Run to evaluate
+ ▶ Run
+
+ 4 · Concrete dtype name selector
+ const floatOnly = selectDtypes(df, { include: "float64" });
+console.log(floatOnly.columns.toArray()); // ["score"]
+ Click Run to evaluate
+ ▶ Run
+
+ 5 · Inspect column dtypes
+ Click Run to evaluate
+ ▶ Run dtype inspection
+
+ 6 · Interactive: try your own
+
+ ▶ Run
+ Output will appear here
+
+
+
+
diff --git a/playground/shift_diff.html b/playground/shift_diff.html
new file mode 100644
index 00000000..1c15e7de
--- /dev/null
+++ b/playground/shift_diff.html
@@ -0,0 +1,214 @@
+
+
+
+
+
+ tsb — shift & diff
+
+
+
+
+← tsb playground
+shift & diff
+
+ Lag values and compute discrete differences —
+ mirrors pandas.Series.shift() and pandas.Series.diff().
+
+
+
+ 1 — shiftSeries: lag values by N positions
+
+ shiftSeries(series, periods) shifts each value by periods positions.
+ Exposed positions are filled with null. The index is unchanged.
+ Mirrors pandas.Series.shift().
+
+ import { Series, shiftSeries } from "tsb";
+
+const s = new Series({ data: [10, 20, 30, 40, 50] });
+
+// shift down by 1 (default)
+console.log([...shiftSeries(s).values]);
+// [null, 10, 20, 30, 40]
+
+// shift up by 1 (negative periods)
+console.log([...shiftSeries(s, -1).values]);
+// [20, 30, 40, 50, null]
+
+// periods = 0 → no change
+console.log([...shiftSeries(s, 0).values]);
+// [10, 20, 30, 40, 50]
+
+ Loading…
+
+
+
+ 2 — diffSeries: first discrete difference
+
+ diffSeries(series, periods) computes values[i] - values[i - periods]
+ for each element. Returns NaN where there is no prior value or when either
+ operand is non-numeric. Mirrors pandas.Series.diff().
+
+ import { Series, diffSeries } from "tsb";
+
+// cumulative price data
+const prices = new Series({ data: [100, 105, 103, 110, 108] });
+
+// day-over-day change (lag 1)
+const d1 = diffSeries(prices);
+console.log([...d1.values]);
+// [NaN, 5, -2, 7, -2]
+
+// 2-day change (lag 2)
+const d2 = diffSeries(prices, 2);
+console.log([...d2.values]);
+// [NaN, NaN, 3, 5, 5]
+
+ Loading…
+
+
+
+ 3 — missing values in shift
+
+ Null and NaN values in the source are preserved when shifted — they behave just like
+ any other value, not like holes.
+
+ import { Series, shiftSeries } from "tsb";
+
+const s = new Series({ data: [1, null, 3, NaN, 5] });
+const shifted = shiftSeries(s, 1);
+console.log([...shifted.values]);
+// [null, 1, null, 3, NaN]
+// Note: the leading null is from the shift; the rest are the original values shifted down.
+
+ Loading…
+
+
+
+ 4 — missing values in diff
+
+ When either operand in a diff is null/NaN or non-numeric, the result at that position
+ is NaN. This mirrors pandas' behaviour.
+
+ import { Series, diffSeries } from "tsb";
+
+const s = new Series({ data: [1, null, 5, 8] });
+const d = diffSeries(s);
+console.log([...d.values]);
+// [NaN, NaN, NaN, 3]
+// positions 0, 1, 2 are NaN:
+// 0 → no previous value
+// 1 → current is null (non-numeric)
+// 2 → previous (position 1) is null (non-numeric)
+
+ Loading…
+
+
+
+ 5 — dataFrameShift: shift by column (axis=0)
+
+ dataFrameShift(df, periods) applies shiftSeries to each column
+ independently. Use axis: 1 to shift values across columns within each row.
+
+ import { DataFrame, dataFrameShift } from "tsb";
+
+const df = DataFrame.fromColumns({
+ open: [100, 105, 103, 110],
+ close: [104, 102, 108, 112],
+});
+
+const shifted = dataFrameShift(df, 1);
+console.log([...shifted.col("open").values]); // [null, 100, 105, 103]
+console.log([...shifted.col("close").values]); // [null, 104, 102, 108]
+
+ Loading…
+
+
+
+ 6 — dataFrameDiff: column-wise differences
+
+ dataFrameDiff(df, periods) computes element-wise discrete differences for
+ each column. Useful for converting time-series levels into changes.
+
+ import { DataFrame, dataFrameDiff } from "tsb";
+
+const df = DataFrame.fromColumns({
+ a: [1, 3, 6, 10],
+ b: [10, 30, 60, 100],
+});
+
+const changes = dataFrameDiff(df);
+console.log([...changes.col("a").values]); // [NaN, 2, 3, 4]
+console.log([...changes.col("b").values]); // [NaN, 20, 30, 40]
+
+ Loading…
+
+
+
+ 7 — combining shift and diff
+
+ A common pattern: use shiftSeries to compute percentage change by dividing
+ the current value by the lagged value.
+
+ import { Series, shiftSeries, diffSeries } from "tsb";
+
+const prices = new Series({ data: [100, 110, 105, 115, 120] });
+
+// percentage change manually using shift
+const prev = shiftSeries(prices, 1);
+const pctChange = prices.values.map((v, i) => {
+ const p = prev.values[i];
+ if (p === null || p === 0 || typeof p !== "number") return NaN;
+ return ((v as number) - p) / p * 100;
+});
+console.log(pctChange.map(x => isNaN(x) ? "NaN" : x.toFixed(2) + "%"));
+// ["NaN", "10.00%", "-4.55%", "9.52%", "4.35%"]
+
+// or just use diffSeries and divide
+const diff = diffSeries(prices);
+const prevVals = shiftSeries(prices, 1).values;
+const pct2 = diff.values.map((d, i) => {
+ const p = prevVals[i];
+ if (typeof p !== "number" || p === 0) return NaN;
+ return (d as number) / p * 100;
+});
+console.log(pct2.map(x => isNaN(x) ? "NaN" : x.toFixed(2) + "%"));
+
+ Loading…
+
+
+
+ 8 — negative periods: lead instead of lag
+
+ Negative periods "leads" the series — each position gets the value from
+ ahead of it, not behind. Useful for computing forward-looking changes.
+
+ import { Series, shiftSeries, diffSeries } from "tsb";
+
+const s = new Series({ data: [1, 4, 9, 16, 25] });
+
+// lead by 1: each position gets the next value
+const lead1 = shiftSeries(s, -1);
+console.log([...lead1.values]); // [4, 9, 16, 25, null]
+
+// forward diff: how much will the value increase?
+const fwdDiff = diffSeries(s, -1);
+console.log([...fwdDiff.values]); // [-3, -5, -7, -9, NaN]
+
+ Loading…
+
+
+
+
diff --git a/playground/timedelta.html b/playground/timedelta.html
new file mode 100644
index 00000000..4085524d
--- /dev/null
+++ b/playground/timedelta.html
@@ -0,0 +1,240 @@
+
+
+
+
+
+ tsb — Timedelta & TimedeltaIndex
+
+
+
+
+← tsb playground
+Timedelta & TimedeltaIndex
+
+ Fixed-duration time spans and ordered index of durations —
+ mirrors pandas.Timedelta and pandas.TimedeltaIndex.
+
+
+
+ 1 — Creating a Timedelta
+
+ A Timedelta stores a duration as a whole number of milliseconds.
+ Construct from component fields, a raw millisecond count, or a string.
+
+ import { Timedelta } from "tsb";
+
+// From components
+const td1 = Timedelta.fromComponents({ days: 1, hours: 2, minutes: 30 });
+console.log(td1.toString()); // "1 days 02:30:00"
+console.log(td1.totalHours); // 26.5
+
+// From milliseconds
+const td2 = Timedelta.fromMilliseconds(3_600_000);
+console.log(td2.totalHours); // 1
+
+// Parse pandas-style string
+const td3 = Timedelta.parse("2 days 06:00:00");
+console.log(td3.totalDays); // 2.25
+
+// Parse ISO 8601
+const td4 = Timedelta.parse("P1DT12H");
+console.log(td4.totalHours); // 36
+
+ Loading…
+
+
+
+ 2 — Component accessors
+
+ Access the individual components of a duration. For negative durations the
+ days component carries the sign; hours,
+ minutes, seconds, and milliseconds
+ are always non-negative remainders.
+
+ import { Timedelta } from "tsb";
+
+const td = Timedelta.fromComponents({
+ days: 1, hours: 2, minutes: 3, seconds: 4, milliseconds: 567
+});
+
+console.log("days :", td.days); // 1
+console.log("hours :", td.hours); // 2
+console.log("minutes :", td.minutes); // 3
+console.log("seconds :", td.seconds); // 4
+console.log("milliseconds:", td.milliseconds); // 567
+
+// Negative duration
+const neg = Timedelta.fromComponents({ hours: -25 });
+console.log("days (neg) :", neg.days); // -1
+console.log("hours (neg) :", neg.hours); // 1
+
+ Loading…
+
+
+
+ 3 — Arithmetic
+
+ Timedeltas support addition, subtraction, scalar multiplication, negation,
+ absolute value, and ratio (dividing one duration by another).
+
+ import { Timedelta } from "tsb";
+
+const h1 = Timedelta.fromComponents({ hours: 1 });
+const h2 = Timedelta.fromComponents({ hours: 2 });
+
+console.log(h1.add(h2).totalHours); // 3
+console.log(h2.sub(h1).totalHours); // 1
+console.log(h1.mul(3).totalHours); // 3
+console.log(h1.negate().totalHours); // -1
+
+const neg = Timedelta.fromComponents({ hours: -3 });
+console.log(neg.abs().totalHours); // 3
+
+// Ratio between two durations
+const day = Timedelta.fromComponents({ days: 1 });
+console.log(day.divBy(h1)); // 24
+
+ Loading…
+
+
+
+ 4 — String formats
+
+ toString() produces a pandas-compatible representation.
+ toISOString() produces an ISO 8601 duration.
+ Timedelta.parse() accepts both formats plus plain
+ HH:MM:SS.
+
+
+ Format Example
+ pandas-style 1 days 02:30:00
+ pandas-style (ms) 0 days 00:00:01.500
+ ISO 8601 P1DT2H30M
+ HH:MM:SS 02:30:00
+ Negative -1 days 01:00:00
+
+ import { Timedelta } from "tsb";
+
+const td = Timedelta.fromComponents({ days: 1, hours: 2, minutes: 30 });
+console.log(td.toString()); // "1 days 02:30:00"
+console.log(td.toISOString()); // "P1DT2H30M"
+
+// Round-trip parse
+const parsed = Timedelta.parse(td.toString());
+console.log(parsed.equals(td)); // true
+
+// Negative
+const neg = Timedelta.fromComponents({ hours: -25 });
+console.log(neg.toString()); // "-1 days 01:00:00"
+console.log(neg.toISOString()); // "-P1DT1H"
+
+ Loading…
+
+
+
+ 5 — TimedeltaIndex
+
+ TimedeltaIndex is an ordered array of Timedelta
+ values — useful as a row index for time-series data with irregular or
+ regular durations.
+
+ import { Timedelta, TimedeltaIndex } from "tsb";
+
+// Build from a range (like pandas.timedelta_range)
+const idx = TimedeltaIndex.fromRange(
+ Timedelta.fromComponents({ hours: 0 }),
+ Timedelta.fromComponents({ hours: 4 }),
+ Timedelta.fromComponents({ hours: 1 }),
+ { name: "duration" },
+);
+
+console.log("size :", idx.size); // 5
+console.log("name :", idx.name); // "duration"
+console.log("at(0) :", idx.at(0).toString()); // "0 days 00:00:00"
+console.log("at(4) :", idx.at(4).toString()); // "0 days 04:00:00"
+console.log("min :", idx.min().totalHours); // 0
+console.log("max :", idx.max().totalHours); // 4
+
+ Loading…
+
+
+
+ 6 — Index operations
+
+ TimedeltaIndex supports sorting, deduplication, shifting,
+ filtering, and renaming.
+
+ import { Timedelta, TimedeltaIndex } from "tsb";
+
+const vals = [3, 1, 2, 1].map(h => Timedelta.fromComponents({ hours: h }));
+const idx = TimedeltaIndex.fromTimedeltas(vals);
+
+// Sort
+const sorted = idx.sort();
+console.log("sorted:", sorted.toStrings());
+// ["0 days 01:00:00", "0 days 01:00:00", "0 days 02:00:00", "0 days 03:00:00"]
+
+// Remove duplicates
+const uniq = idx.unique();
+console.log("unique size:", uniq.size); // 3
+
+// Shift by 10 hours
+const shifted = idx.shift(Timedelta.fromComponents({ hours: 10 }));
+console.log("shifted[0]:", shifted.at(0).totalHours); // 13
+
+// Filter
+const large = idx.filter(td => td.totalHours >= 2);
+console.log("large size:", large.size); // 2
+
+// Parse from strings
+const fromStr = TimedeltaIndex.fromStrings(["01:00:00", "02:00:00", "03:00:00"]);
+console.log("fromStr[1]:", fromStr.at(1).totalHours); // 2
+
+ Loading…
+
+
+
+ 7 — Comparison
+ import { Timedelta } from "tsb";
+
+const h1 = Timedelta.fromComponents({ hours: 1 });
+const h2 = Timedelta.fromComponents({ hours: 2 });
+
+console.log(h1.equals(h2)); // false
+console.log(h1.equals(Timedelta.fromComponents({ hours: 1 }))); // true
+console.log(h1.compareTo(h2)); // negative → h1 < h2
+console.log(h2.compareTo(h1)); // positive → h2 > h1
+console.log(h1.compareTo(Timedelta.fromComponents({ hours: 1 }))); // 0
+
+ Loading…
+
+
+
+
+
diff --git a/playground/timestamp.html b/playground/timestamp.html
new file mode 100644
index 00000000..001709c6
--- /dev/null
+++ b/playground/timestamp.html
@@ -0,0 +1,647 @@
+
+
+
+
+
+ tsb — Timestamp
+
+
+
+
+
+ ← back to index
+
+ API Reference
+
+
+ Method / Property Description pandas equivalent
+
+ new Timestamp(str) Parse ISO string (with optional tz option) pd.Timestamp(str)
+ Timestamp.now(tz?) Current time pd.Timestamp.now()
+ Timestamp.today() Today at midnight pd.Timestamp.today()
+ .year .month .day Date components same
+ .hour .minute .second Time components same
+ .dayofweek 0=Mon … 6=Sun .dayofweek
+ .dayofyear .quarter .week Calendar properties same
+ .is_month_start .is_month_end Calendar boundary checks same
+ .isoformat(sep, timespec) ISO string output .isoformat()
+ .strftime(fmt) Format string (%Y-%m-%d etc.) .strftime()
+ .floor(freq) .ceil(freq) .round(freq) Round to frequency same
+ .normalize() Truncate to midnight .normalize()
+ .tz_localize(tz) Attach timezone to naive .tz_localize()
+ .tz_convert(tz) Convert to another timezone .tz_convert()
+ .add(Timedelta) Shift forward by a duration ts + td
+ .sub(ts|td) Subtract timestamp or timedelta ts - ts2
+ .day_name() .month_name() English name strings same
+ .timestamp() Unix seconds (float) .timestamp()
+
+
+
+
+ Interactive Inspector
+
+
+
Inspect
+
Click "Inspect" to explore a Timestamp.
+
+
+ strftime Formatter
+
+
+ Rounding
+
+
+
+ Datetime
+
+
+
+ Frequency
+
+ H — Hour
+ T — Minute
+ S — Second
+ D — Day
+
+
+
+ Run floor / ceil / round
+
+
+
+
+
+ Arithmetic
+
+
+ Timezone Conversion
+
+
+
+ UTC datetime
+
+
+
+ Target timezone
+
+ America/New_York
+ America/Los_Angeles
+ Europe/London
+ Europe/Paris
+ Asia/Tokyo
+ Asia/Kolkata (IST)
+ Australia/Sydney
+ UTC
+
+
+
+ Convert
+
+
+
+
+
+ Now / Today
+
+
+ Timestamp.now("UTC")
+ Timestamp.now() — naive
+ Timestamp.today()
+
+
Click a button above.
+
+
+
+
+
+
diff --git a/playground/to_numeric.html b/playground/to_numeric.html
new file mode 100644
index 00000000..d8bb491d
--- /dev/null
+++ b/playground/to_numeric.html
@@ -0,0 +1,138 @@
+
+
+
+
+
+ tsb — to_numeric
+
+
+
+ 📐 to_numeric
+
+ Convert scalars, arrays, or Series to numeric types — mirroring
+ pandas.to_numeric() .
+
+
+ 1 · Scalar conversion
+ toNumericScalar("42") // 42
+toNumericScalar("3.14") // 3.14
+toNumericScalar(true) // 1
+toNumericScalar(null) // NaN
+toNumericScalar("bad", { errors: "coerce" }) // NaN
+toNumericScalar("bad", { errors: "ignore" }) // "bad"
+ click Run to evaluate
+ Run
+
+ 2 · Array conversion with error handling
+ toNumericArray(["1", "2.5", "abc", null], { errors: "coerce" })
+// [1, 2.5, NaN, NaN]
+ click Run to evaluate
+ Run
+
+ 3 · Series conversion
+ const prices = new Series(["10.5", "bad", "22"], {
+ name: "price",
+ index: ["a", "b", "c"]
+});
+toNumericSeries(prices, { errors: "coerce" })
+// Series [10.5, NaN, 22] name="price"
+ click Run to evaluate
+ Run
+
+ 4 · Downcast
+ // downcast to float32 precision
+toNumericScalar(3.14159265358979, { downcast: "float" })
+// ~3.1415927
+
+// downcast to integer (snap to smallest int type)
+toNumericScalar(42, { downcast: "integer" }) // 42
+ click Run to evaluate
+ Run
+
+ 5 · Live sandbox
+ Edit and run arbitrary code using the tsb API.
+
+ click Run to evaluate
+ Run
+
+ ← back to index
+
+
+
+
diff --git a/playground/value_counts_full.html b/playground/value_counts_full.html
new file mode 100644
index 00000000..2c2e91ad
--- /dev/null
+++ b/playground/value_counts_full.html
@@ -0,0 +1,238 @@
+
+
+
+
+
+ valueCountsBinned — tsb playground
+
+
+
+ valueCountsBinned
+
+ pandas.Series.value_counts(bins=N) — bin numeric values into equal-width
+ intervals, then count frequencies.
+
+
+
+
Interactive Demo
+
+
+
+ sort
+
+ true (by count)
+ false (by interval)
+
+
+
+ ascending
+
+ false
+ true
+
+
+
+ normalize
+
+ false (counts)
+ true (proportions)
+
+
+
+
Run
+
+
Result
+
Click Run to see results.
+
+
+
+
How it works
+
+ valueCountsBinned(series, N) internally:
+
+
+ Calls cut(series, N) to assign each value to one of N equal-width bins.
+ Counts occurrences per bin label (NaN/null values are excluded).
+ Optionally sorts by count (sort=true, default) or by interval position (sort=false).
+ Optionally returns proportions instead of counts (normalize=true).
+
+
+
API
+
+valueCountsBinned(
+ series: Series<Scalar>,
+ bins: number,
+ options?: {
+ sort?: boolean; // default: true
+ ascending?: boolean; // default: false
+ normalize?: boolean; // default: false
+ }
+): Series<number>
+
+
+
+
Examples
+
+
Basic binning
+
+import { Series, valueCountsBinned } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+const vc = valueCountsBinned(s, 2);
+// Index: ["(0.995, 3.0]", "(3.0, 5.005]"]
+// Values: [3, 2] ← sorted by count (default)
+
+
Interval order (sort=false)
+
+const vc2 = valueCountsBinned(s, 2, { sort: false });
+// Index: ["(0.995, 3.0]", "(3.0, 5.005]"]
+// Values: [3, 2] ← in interval order
+
+
Proportions (normalize=true)
+
+const vc3 = valueCountsBinned(s, 2, { normalize: true });
+// Index: ["(0.995, 3.0]", "(3.0, 5.005]"]
+// Values: [0.6, 0.4]
+
+
Handling NaN / null
+
+const s2 = new Series({ data: [1, null, 2, NaN, 3, 4, 5] });
+const vc4 = valueCountsBinned(s2, 2);
+// NaN and null values are excluded. Total = 5.
+
+
+
+ ← Back to playground index
+ |
+ stats
+ binning
+ value_counts
+
+
+
+
+
diff --git a/playground/where_mask.html b/playground/where_mask.html
index 89a50a05..cf942330 100644
--- a/playground/where_mask.html
+++ b/playground/where_mask.html
@@ -1,220 +1,194 @@
-
+
-
-
-
- tsb — where / mask: Conditional Selection
-
-
-
- tsb — where / mask: Conditional Selection
-
- seriesWhere / seriesMask and their DataFrame equivalents
- allow element-wise conditional replacement — the TypeScript equivalents of
- pandas.Series.where
- and
- pandas.Series.mask .
-
-
-
- Quick rule:
- where(cond) — keep where cond is true , replace elsewhere.
- mask(cond) — keep where cond is false , replace elsewhere.
- They are exact inverses of each other.
-
-
- 1. seriesWhere — Boolean Array Condition
-
- Pass a boolean[] to keep values at true positions, replace
- the rest with null (or a custom other value).
-
- import { Series, seriesWhere } from "tsb";
-
-const scores = new Series({ data: [42, 91, 67, 55, 88] });
-const highScores = seriesWhere(scores, [false, true, false, false, true]);
-// Series [null, 91, null, null, 88]
-
-// Custom replacement value
-const clamped = seriesWhere(scores, [false, true, false, false, true], { other: 0 });
-// Series [0, 91, 0, 0, 88]
-
- 2. seriesWhere — Callable Condition
-
- Pass a function that receives the Series and returns a boolean[] or
- Series<boolean>. This avoids computing the condition array manually.
-
- import { Series, seriesWhere } from "tsb";
-
-const temps = new Series({ data: [-5, 12, 23, -3, 8] });
-
-// Keep only values above freezing
-const aboveFreezing = seriesWhere(
- temps,
- (s) => s.values.map((v) => (v as number) > 0),
-);
-// Series [null, 12, 23, null, 8]
-
-// Replace with 0 instead of null
-const noFreeze = seriesWhere(
- temps,
- (s) => s.values.map((v) => (v as number) > 0),
- { other: 0 },
-);
-// Series [0, 12, 23, 0, 8]
-
- 3. seriesMask — The Inverse
-
- mask replaces positions where the condition is true
- (the opposite of where). Use it to "blank out" outliers or invalid values.
-
- import { Series, seriesMask } from "tsb";
-
-const data = new Series({ data: [1, 2, 3, 4, 5] });
-
-// Mask out values greater than 3
-const masked = seriesMask(
- data,
- (s) => s.values.map((v) => (v as number) > 3),
- { other: null },
-);
-// Series [1, 2, 3, null, null]
-
- 4. dataFrameWhere — Element-Wise on DataFrames
-
- Pass a boolean DataFrame or a callable that returns one.
- Columns and row labels are aligned by name.
-
- import { DataFrame, dataFrameWhere } from "tsb";
+
+
+
+ tsb — where / mask
+
+
+
+
+← tsb playground
+where / mask
+Conditional value selection and replacement — mirrors pandas.Series.where and pandas.DataFrame.mask.
+
+
+ 1 — whereSeries: keep values where condition is true
+ whereSeries(series, cond) keeps each element where cond is true and replaces it with null (or a custom other) where cond is false.
+ import { Series, whereSeries } from "tsb";
+
+const scores = new Series({ data: [45, 72, 58, 88, 91, 30], name: "score" });
+
+// Keep only passing scores (>= 60); replace failing scores with null
+const passing = whereSeries(scores, (v) => v >= 60);
+console.log("passing:", [...passing.values]);
+// → [null, 72, null, 88, 91, null]
+
+// Replace failing scores with 0 instead of null
+const zeroFail = whereSeries(scores, (v) => v >= 60, { other: 0 });
+console.log("zero-fail:", [...zeroFail.values]);
+// → [0, 72, 0, 88, 91, 0]
+
+ ▶ run
+
+
+
+ 2 — maskSeries: replace values where condition is true
+ maskSeries is the inverse of whereSeries: it replaces where cond is true and keeps where cond is false.
+ import { Series, maskSeries } from "tsb";
+
+const temps = new Series({ data: [-5, 12, -3, 20, 7], name: "temp_C" });
+
+// Mask (hide) sub-zero temperatures
+const noFrost = maskSeries(temps, (v) => v < 0);
+console.log("no frost:", [...noFrost.values]);
+// → [null, 12, null, 20, 7]
+
+// Replace sub-zero with a sentinel value
+const clamped = maskSeries(temps, (v) => v < 0, { other: 0 });
+console.log("clamped: ", [...clamped.values]);
+// → [0, 12, 0, 20, 7]
+
+ ▶ run
+
+
+
+ 3 — Boolean Series as condition
+ Pass a Series<boolean> (or a plain boolean array) as the condition for position-aligned filtering.
+ import { Series, whereSeries, maskSeries } from "tsb";
+
+const prices = new Series({ data: [100, 200, 150, 80, 300], name: "price" });
+const inStock = new Series({ data: [true, false, true, false, true] });
+
+// Keep prices only for in-stock items
+const available = whereSeries(prices, inStock);
+console.log("in-stock prices:", [...available.values]);
+// → [100, null, 150, null, 300]
+
+// Mask out-of-stock prices (same result — cond is inverted)
+const masked = maskSeries(prices, inStock.values.map((v) => !v));
+console.log("masked: ", [...masked.values]);
+// → [100, null, 150, null, 300]
+
+ ▶ run
+
+
+
+ 4 — whereDataFrame: cell-wise filtering on a DataFrame
+ whereDataFrame(df, cond) applies the condition independently to each cell across all columns.
+ import { DataFrame, whereDataFrame } from "tsb";
const df = DataFrame.fromColumns({
- temp_c: [22, -3, 18, -7, 30],
- humidity: [55, 80, 62, 75, 45],
+ a: [1, -2, 3],
+ b: [-4, 5, -6],
+ c: [ 7, 8, 9],
});
-// Keep only valid summer readings (temp > 0)
-const condDf = DataFrame.fromColumns({
- temp_c: [true, false, true, false, true],
- humidity: [true, false, true, false, true],
-});
-
-const summer = dataFrameWhere(df, condDf);
-// DataFrame:
-// temp_c [22, null, 18, null, 30 ]
-// humidity [55, null, 62, null, 45 ]
+// Keep non-negative values; replace negatives with null
+const positive = whereDataFrame(df, (v) => v >= 0);
+console.log("a:", [...positive.col("a").values]); // [1, null, 3]
+console.log("b:", [...positive.col("b").values]); // [null, 5, null]
+console.log("c:", [...positive.col("c").values]); // [7, 8, 9]
+
+ ▶ run
+
- 5. dataFrameWhere — Callable Condition
- import { DataFrame, dataFrameWhere } from "tsb";
+
+ 5 — maskDataFrame: replace cells matching condition
+ import { DataFrame, maskDataFrame } from "tsb";
const df = DataFrame.fromColumns({
- a: [1, 2, 3, 4, 5],
- b: [10, 20, 30, 40, 50],
+ revenue: [100, 0, 250, -50, 0],
+ cost: [ 80, 0, 200, 30, 0],
});
-// Keep only values > 2 (column-wise threshold)
-const result = dataFrameWhere(df, (d) => {
- const condCols: Record<string, boolean[]> = {};
- for (const col of d.columns) {
- condCols[col as string] = d.col(col as string).values.map(
- (v) => (v as number) > 2
- );
- }
- return DataFrame.fromColumns(condCols);
+// Mask zeros (replace with null to mark as missing)
+const noZeros = maskDataFrame(df, (v) => v === 0);
+console.log("revenue:", [...noZeros.col("revenue").values]);
+// → [100, null, 250, -50, null]
+console.log("cost: ", [...noZeros.col("cost").values]);
+// → [80, null, 200, 30, null]
+
+ ▶ run
+
+
+
+ 6 — DataFrame condition (boolean DataFrame)
+ Pass a boolean DataFrame as the condition for per-cell control.
+ import { DataFrame, whereDataFrame } from "tsb";
+
+const data = DataFrame.fromColumns({
+ x: [10, 20, 30],
+ y: [40, 50, 60],
});
-// DataFrame:
-// a: [null, null, 3, 4, 5]
-// b: [10, 20, 30, 40, 50]
- 6. dataFrameMask — DataFrame Mask
- import { DataFrame, dataFrameMask } from "tsb";
-
-const df = DataFrame.fromColumns({
- sales: [100, 200, 50, 300, 80],
- profit: [10, 40, -5, 60, -2],
+// Custom boolean mask per cell
+const cond = DataFrame.fromColumns({
+ x: [true, false, true],
+ y: [false, true, true],
});
-// Mask out (replace) rows with negative profit
-const cleaned = dataFrameMask(
- df,
- (d) => {
- const condCols: Record<string, boolean[]> = {};
- for (const col of d.columns) {
- condCols[col as string] = d.col(col as string).values.map(
- (v) => (v as number) < 0
- );
- }
- return DataFrame.fromColumns(condCols);
- },
- { other: 0 },
-);
-// DataFrame:
-// sales: [100, 200, 50, 300, 80]
-// profit: [10, 40, 0, 60, 0 ]
-
- Label-Aligned Series Condition
-
- When you pass a Series<boolean> as the condition, values are aligned
- by label , not position. Labels absent from the condition series are treated
- as false.
-
- import { Series, seriesWhere } from "tsb";
-
-const prices = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
-const valid = new Series<boolean>({ data: [false, true], index: ["a", "b"] });
-
-// Only "b" is in the condition with value=true; "a"=false, "c" missing→false
-const result = seriesWhere(prices, valid, { other: -1 });
-// Series { a: -1, b: 20, c: -1 }
-
- API Reference
-
- Function Keeps when cond is… Replaces with
- seriesWhere(s, cond, {other})trueother (default null)
- seriesMask(s, cond, {other})falseother (default null)
- dataFrameWhere(df, cond, {other})trueother (default null)
- dataFrameMask(df, cond, {other})falseother (default null)
-
-
- Condition types
-
- Type Series ops DataFrame ops
- Boolean array ✅ positional —
- Series<boolean>✅ label-aligned —
- DataFrame (boolean)— ✅ label-aligned
- Callable ✅ receives Series ✅ receives DataFrame
-
-
- ← Back to tsb playground index
-
+const result = whereDataFrame(data, cond);
+console.log("x:", [...result.col("x").values]); // [10, null, 30]
+console.log("y:", [...result.col("y").values]); // [null, 50, 60]
+
+ ▶ run
+
+
+
+ 7 — Combining where and mask for range clamping
+ Chaining whereSeries and maskSeries is a clean way to apply lower and upper bounds.
+ import { Series, whereSeries, maskSeries } from "tsb";
+
+const raw = new Series({ data: [-10, 0, 5, 15, 100, 3], name: "value" });
+const LO = 0, HI = 10;
+
+// 1) Replace values below lower bound with LO
+const step1 = whereSeries(raw, (v) => (v as number) >= LO, { other: LO });
+// 2) Replace values above upper bound with HI
+const clamped = whereSeries(step1, (v) => (v as number) <= HI, { other: HI });
+
+console.log("clamped:", [...clamped.values]);
+// → [0, 0, 5, 10, 10, 3]
+
+ ▶ run
+
+
+
+ 8 — where / mask vs. clip
+
+ When to use which?
+ Use clip() for simple numeric lower/upper bounds.
+ Use where() / mask() for arbitrary conditions — including non-numeric types,
+ string patterns, or per-cell boolean DataFrames.
+
+ import { Series, whereSeries, clip } from "tsb";
+
+const s = new Series({ data: [-3, 1, 5, 10], name: "val" });
+
+// clip is concise for numeric bounds
+const clipped = clip(s, { lower: 0, upper: 6 });
+console.log("clipped: ", [...clipped.values]); // [0, 1, 5, 6]
+
+// where gives full control — replace out-of-range with null instead of clamping
+const filtered = whereSeries(s, (v) => (v as number) >= 0 && (v as number) <= 6);
+console.log("filtered:", [...filtered.values]); // [null, 1, 5, null]
+
+ ▶ run
+
+
+
diff --git a/playground/wide_to_long.html b/playground/wide_to_long.html
index b30980cd..887b4bbb 100644
--- a/playground/wide_to_long.html
+++ b/playground/wide_to_long.html
@@ -1,113 +1,263 @@
-
+
-
-
-
- tsb — wideToLong
-
-
-
- ← tsb playground
-
- wideToLong
-
- Reshape a wide-format DataFrame to long format by collapsing stub-prefixed column
- groups into rows — mirrors
-
- pandas.wide_to_long() .
-
-
- Concept
-
- Given a wide DataFrame where repeated measurements are spread across columns with a
- common stub prefix and a numeric (or other) suffix — e.g. score_2021,
- score_2022 — wideToLong pivots those column groups into rows.
- One row per original row per unique suffix is produced.
-
-
- Example — numeric suffixes
- import { DataFrame } from "tsb";
-import { wideToLong } from "tsb";
-
-const df = DataFrame.fromColumns({
- id: ["x", "y"],
- A1: [1, 2],
- A2: [3, 4],
- B1: [5, 6],
- B2: [7, 8],
+
+
+
+ tsb · wide_to_long
+
+
+
+
+ ← tsb
+ 🔀 wide_to_long
+
+
+
+
+ Overview
+
+ wideToLong(df, stubnames, i, j, options?) mirrors
+ pandas.wide_to_long(). It reshapes a wide
+ DataFrame — where multiple columns share a common prefix (stub) and a
+ varying suffix — into a long DataFrame with one row per
+ (original row, suffix) pair.
+
+
+ Unlike the simpler melt (which treats every column value
+ independently), wideToLong keeps related stub columns
+ side by side and extracts the suffix as a new identifier column.
+
+ wideToLong(df, stubnames, i, j, { sep?, suffix? })
+
+ stubnames — prefix(es) of the grouped columns (e.g.
+ ["A","B"]).
+ i — identifier column(s) that are carried along
+ unchanged.
+ j — name for the new column that holds the extracted
+ suffixes.
+ sep — separator between stub and suffix
+ ("" by default).
+ suffix — RegExp or string pattern that matches the suffix
+ (/\d+/ by default).
+
+
+
+
+ Interactive playground
+ Edit the CSV data, stubs, and options then click Run .
+
+
+
+
+
+ Wide CSV data
+
+
+
+
Long output
+
Click Run…
+
+
+
+ Run
+
+
+
+ Examples
+
+ 1 · Numeric suffix (default)
+ Column names like A1, A2 share stub A; the suffix 1/2 becomes the year column.
+ const df = DataFrame.fromColumns({
+ id: ["x", "y"],
+ A1: [1, 2], A2: [3, 4],
+ B1: [5, 6], B2: [7, 8],
+});
+wideToLong(df, ["A", "B"], "id", "year");
+// id year A B
+// x 1 1 5
+// y 1 2 6
+// x 2 3 7
+// y 2 4 8
+
+
+ 2 · Underscore separator
+ Use sep: "_" for column names like score_pre / score_post.
+ const df = DataFrame.fromColumns({
+ subject: [1, 2],
+ score_pre: [80, 90],
+ score_post: [85, 95],
+});
+wideToLong(df, "score", "subject", "phase", {
+ sep: "_",
+ suffix: /[a-z]+/,
});
+// subject phase score
+// 1 pre 80
+// 2 pre 90
+// 1 post 85
+// 2 post 95
-const long = wideToLong(df, ["A", "B"], "id", "num");
-
-// long.columns.values → ["id", "num", "A", "B"]
-// long.shape → [4, 4]
-//
-// id num A B
-// x 1 1 5
-// y 1 2 6
-// x 2 3 7
-// y 2 4 8
-
-
- Example — separator and custom suffix
- const df = DataFrame.fromColumns({
- country: ["US", "UK"],
- gdp_2020: [21e12, 2.7e12],
- gdp_2021: [23e12, 3.1e12],
- pop_2020: [331e6, 67e6],
- pop_2021: [332e6, 68e6],
+
+ 3 · Multiple id columns
+ Pass an array to i to preserve several identifier columns.
+ const df = DataFrame.fromColumns({
+ country: ["US","UK","DE"],
+ region: ["East","South","West"],
+ gdp2020: [21, 2.7, 3.8],
+ gdp2021: [23, 3.1, 4.2],
});
+wideToLong(df, "gdp", ["country","region"], "year", { sep: "" });
+// country region year gdp
+// US East 2020 21
+// UK South 2020 2.7
+// DE West 2020 3.8
+// US East 2021 23
+// UK South 2021 3.1
+// DE West 2021 4.2
+
+
+
+ vs melt
+
+ Both melt and wideToLong convert wide data to
+ long. The key difference:
+
+
+ melt wideToLong
+ Column grouping None — each column → one variable/value row Groups by stub; related columns land in the same output row
+ New columns variable + valueOne column per stub + j
+ Suffix extraction No Yes — suffix becomes j value
+ Use when… Each wide column is independent Columns share a common prefix and varying suffix
+
+
+
+
+
+
+
diff --git a/src/core/align.ts b/src/core/align.ts
new file mode 100644
index 00000000..144f53b5
--- /dev/null
+++ b/src/core/align.ts
@@ -0,0 +1,197 @@
+/**
+ * align — realign two Series or DataFrames to a common axis.
+ *
+ * Mirrors `pandas.Series.align()` / `pandas.DataFrame.align()`:
+ *
+ * - {@link alignSeries} — align two `Series` on their row indices.
+ * - {@link alignDataFrame} — align two `DataFrame` objects on rows, columns,
+ * or both axes simultaneously.
+ *
+ * ### Join policies
+ *
+ * | `join` | Result index |
+ * |-----------|---------------------------------------------------|
+ * | `"outer"` | Union of the two index sets (default) |
+ * | `"inner"` | Intersection of the two index sets |
+ * | `"left"` | Left object's index |
+ * | `"right"` | Right object's index |
+ *
+ * ### Axis (DataFrame only)
+ *
+ * | `axis` | Aligned axes |
+ * |---------------|-------------------------------------------------|
+ * | `0` / `"index"` | Row index only |
+ * | `1` / `"columns"` | Columns only |
+ * | `null` / `undefined` | Both rows **and** columns (default) |
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: [1, 2, 3], index: new Index(["a", "b", "c"]) });
+ * const b = new Series({ data: [10, 20], index: new Index(["b", "c"]) });
+ *
+ * const [left, right] = alignSeries(a, b, { join: "inner" });
+ * // left → Series [2, 3] with index ["b", "c"]
+ * // right → Series [10, 20] with index ["b", "c"]
+ *
+ * const [lo, ro] = alignSeries(a, b, { join: "outer", fillValue: 0 });
+ * // left → Series [1, 2, 3] with index ["a", "b", "c"]
+ * // right → Series [0, 10, 20] with index ["a", "b", "c"]
+ * ```
+ *
+ * @module
+ */
+
+import type { Axis, JoinHow, Label, Scalar } from "../types.ts";
+import type { Index } from "./base-index.ts";
+import type { DataFrame } from "./frame.ts";
+import { reindexDataFrame, reindexSeries } from "./reindex.ts";
+import type { Series } from "./series.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Options for {@link alignSeries}. */
+export interface AlignSeriesOptions {
+ /**
+ * How to determine the result index.
+ * - `"outer"` (default) — union of both indices.
+ * - `"inner"` — intersection of both indices.
+ * - `"left"` — left Series' index.
+ * - `"right"` — right Series' index.
+ */
+ join?: JoinHow;
+ /**
+ * Scalar to use for labels that exist in the result index but are absent
+ * from one of the inputs (default: `null`).
+ */
+ fillValue?: Scalar;
+}
+
+/** Options for {@link alignDataFrame}. */
+export interface AlignDataFrameOptions extends AlignSeriesOptions {
+ /**
+ * Which axes to align.
+ * - `null` / `undefined` (default) — align both rows and columns.
+ * - `0` / `"index"` — rows only.
+ * - `1` / `"columns"` — columns only.
+ */
+ axis?: Axis | null;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * Compute the target index from `left` and `right` according to `join`.
+ */
+function resolveIndex(left: Index, right: Index, join: JoinHow): Index {
+ switch (join) {
+ case "outer":
+ return left.union(right);
+ case "inner":
+ return left.intersection(right);
+ case "left":
+ return left;
+ case "right":
+ return right;
+ }
+}
+
+// ─── Series ───────────────────────────────────────────────────────────────────
+
+/**
+ * Align two Series on their row indices.
+ *
+ * Returns `[alignedLeft, alignedRight]` — a tuple of two Series that share the
+ * same index (determined by `join`). Labels absent in either original are
+ * filled with `fillValue` (default `null`).
+ *
+ * @param left - First Series.
+ * @param right - Second Series.
+ * @param options - Alignment options (join policy, fill value).
+ * @returns Tuple `[alignedLeft, alignedRight]`.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: [1, 2, 3], index: new Index(["x", "y", "z"]) });
+ * const b = new Series({ data: [10, 30], index: new Index(["x", "z"]) });
+ * const [la, ra] = alignSeries(a, b);
+ * // la: [1, 2, 3] ra: [10, null, 30] index: ["x", "y", "z"]
+ * ```
+ */
+export function alignSeries(
+ left: Series,
+ right: Series,
+ options: AlignSeriesOptions = {},
+): [Series, Series] {
+ const { join = "outer", fillValue = null } = options;
+ const targetIdx = resolveIndex(left.index, right.index, join);
+
+ const alignedLeft = reindexSeries(left, targetIdx, { fillValue });
+ const alignedRight = reindexSeries(right, targetIdx, { fillValue });
+
+ return [alignedLeft, alignedRight];
+}
+
+// ─── DataFrame ────────────────────────────────────────────────────────────────
+
+/**
+ * Align two DataFrames on their row index, column index, or both.
+ *
+ * Returns `[alignedLeft, alignedRight]` — a tuple of two DataFrames sharing the
+ * same shape (row labels and/or column labels), filled with `fillValue` where
+ * labels are absent.
+ *
+ * @param left - First DataFrame.
+ * @param right - Second DataFrame.
+ * @param options - Alignment options (join, axis, fill value).
+ * @returns Tuple `[alignedLeft, alignedRight]`.
+ *
+ * @example
+ * ```ts
+ * const a = DataFrame.fromColumns({ x: [1, 2], y: [3, 4] }, { index: ["r0", "r1"] });
+ * const b = DataFrame.fromColumns({ y: [10], z: [20] }, { index: ["r1"] });
+ * const [la, ra] = alignDataFrame(a, b);
+ * // shape [2, 3]; columns ["x", "y", "z"]; rows ["r0", "r1"]
+ * ```
+ */
+export function alignDataFrame(
+ left: DataFrame,
+ right: DataFrame,
+ options: AlignDataFrameOptions = {},
+): [DataFrame, DataFrame] {
+ const { join = "outer", fillValue = null, axis } = options;
+
+ // Normalise axis: null/undefined → align both
+ const normalised: 0 | 1 | null =
+ axis === null || axis === undefined ? null : axis === 0 || axis === "index" ? 0 : 1;
+
+ const alignRows = normalised === null || normalised === 0;
+ const alignCols = normalised === null || normalised === 1;
+
+ // Compute target row index
+ const targetRowIdx: Index | undefined = alignRows
+ ? resolveIndex(left.index, right.index, join)
+ : undefined;
+
+ // Compute target column index (string labels)
+ const targetColIdx: Index | undefined = alignCols
+ ? (resolveIndex(
+ left.columns as Index,
+ right.columns as Index,
+ join,
+ ) as Index)
+ : undefined;
+
+ const alignedLeft = reindexDataFrame(left, {
+ ...(targetRowIdx !== undefined ? { index: targetRowIdx } : {}),
+ ...(targetColIdx !== undefined ? { columns: targetColIdx } : {}),
+ fillValue,
+ });
+
+ const alignedRight = reindexDataFrame(right, {
+ ...(targetRowIdx !== undefined ? { index: targetRowIdx } : {}),
+ ...(targetColIdx !== undefined ? { columns: targetColIdx } : {}),
+ fillValue,
+ });
+
+ return [alignedLeft, alignedRight];
+}
diff --git a/src/core/assign.ts b/src/core/assign.ts
new file mode 100644
index 00000000..d11561cb
--- /dev/null
+++ b/src/core/assign.ts
@@ -0,0 +1,129 @@
+/**
+ * DataFrame.assign() — add new columns to a DataFrame, mirroring `pandas.DataFrame.assign()`.
+ *
+ * Supports three kinds of column specifiers:
+ * - **Array**: `readonly Scalar[]` — values aligned by position with the row index
+ * - **Series**: `Series` — values aligned by position with the row index
+ * - **Callable**: `(df: DataFrame) => readonly Scalar[] | Series` — receives the
+ * *in-progress* DataFrame (i.e. any columns added earlier in the same `assign` call are
+ * already present), enabling chained column derivations that mirror the pandas behaviour.
+ *
+ * Columns are applied in insertion order; each callable sees the result of all earlier
+ * assignments in the same call.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameAssign } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [10, 20, 30] });
+ *
+ * // Mix of array, Series, and callable
+ * const df2 = dataFrameAssign(df, {
+ * c: [7, 8, 9], // plain array
+ * d: df.col("a").add(df.col("b")), // Series
+ * e: (d) => d.col("c").add(d.col("a")), // callable — sees column "c" already added
+ * });
+ * // df2.columns → ["a", "b", "c", "d", "e"]
+ * ```
+ *
+ * @packageDocumentation
+ */
+
+import type { Scalar } from "../types.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// ─── types ─────────────────────────────────────────────────────────────────
+
+/**
+ * A single column specifier accepted by {@link dataFrameAssign}.
+ *
+ * - `readonly Scalar[]` — raw values (must equal `df.shape[0]` in length)
+ * - `Series` — a Series (values aligned by position)
+ * - `(df: DataFrame) => readonly Scalar[] | Series` — callable, receives the
+ * in-progress DataFrame and must return values or a Series
+ */
+export type AssignColSpec =
+ | readonly Scalar[]
+ | Series
+ | ((df: DataFrame) => readonly Scalar[] | Series);
+
+/**
+ * A mapping from new (or overwritten) column name to an {@link AssignColSpec}.
+ *
+ * Column names present in the original DataFrame are overwritten; new names are appended.
+ * Application order matches the insertion order of the object, which in modern JavaScript
+ * is the declaration order for string keys (consistent with pandas).
+ */
+export type AssignSpec = Readonly>;
+
+// ─── implementation ──────────────────────────────────────────────────────────
+
+/**
+ * Add or replace columns on `df` and return a new DataFrame.
+ *
+ * Columns are processed in the declaration order of `spec`. Callable entries receive the
+ * DataFrame **as updated so far** within this `assign` call, which allows chained
+ * derivations:
+ *
+ * ```ts
+ * dataFrameAssign(df, {
+ * total: (d) => d.col("price").mul(d.col("qty")),
+ * tax: (d) => d.col("total").mul(0.1), // sees "total" already added
+ * });
+ * ```
+ *
+ * @param df The source DataFrame. Not mutated.
+ * @param spec Column specifiers keyed by column name.
+ * @returns A new DataFrame with the columns added / replaced.
+ */
+export function dataFrameAssign(df: DataFrame, spec: AssignSpec): DataFrame {
+ // Start from a mutable copy of the existing column map.
+ // We build a working DataFrame after each step so callables see up-to-date state.
+ let working: DataFrame = df;
+
+ for (const [name, colSpec] of Object.entries(spec)) {
+ const resolved: readonly Scalar[] | Series =
+ typeof colSpec === "function" ? colSpec(working) : colSpec;
+
+ working = _addOrReplaceColumn(working, name, resolved);
+ }
+
+ return working;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * Return a new DataFrame with column `name` set to `resolved`.
+ * Preserves column order: if `name` already exists, its position is kept;
+ * if new, it is appended.
+ */
+function _addOrReplaceColumn(
+ df: DataFrame,
+ name: string,
+ resolved: readonly Scalar[] | Series,
+): DataFrame {
+ const series: Series =
+ resolved instanceof Series ? resolved : new Series({ data: resolved, index: df.index });
+
+ // Rebuild the column map preserving insertion order.
+ const colMap = new Map>();
+ let inserted = false;
+
+ for (const colName of df.columns.values) {
+ if (colName === name) {
+ colMap.set(name, series);
+ inserted = true;
+ } else {
+ colMap.set(colName, df.col(colName));
+ }
+ }
+
+ if (!inserted) {
+ colMap.set(name, series);
+ }
+
+ // Use the DataFrame's internal constructor to preserve the row index.
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/core/categorical_index.ts b/src/core/categorical_index.ts
new file mode 100644
index 00000000..cf388bea
--- /dev/null
+++ b/src/core/categorical_index.ts
@@ -0,0 +1,480 @@
+/**
+ * CategoricalIndex — an index backed by categorical data.
+ *
+ * Mirrors `pandas.CategoricalIndex`: an ordered sequence of labels drawn from
+ * a fixed, finite set of *categories*. Internally the labels are stored as
+ * integer codes (indices into the categories array), so equality tests and
+ * membership checks are O(1).
+ *
+ * - `categories` — the ordered set of valid labels
+ * - `ordered` — whether the categories form an ordered type (i.e. supports
+ * `<`/`>` comparisons between categories)
+ * - `codes` — integer positions into `categories`; `-1` for missing (NA)
+ *
+ * @example
+ * ```ts
+ * const ci = CategoricalIndex.fromArray(["b", "a", "c", "a"]);
+ * ci.size; // 4
+ * ci.categories.toArray(); // ["a", "b", "c"]
+ * ci.codes; // [1, 0, 2, 0]
+ * ci.at(0); // "b"
+ * ci.getLoc("a"); // 1 (first occurrence)
+ * ci.addCategories(["d"]).categories.toArray(); // ["a","b","c","d"]
+ * ```
+ *
+ * @module
+ */
+
+import type { Label } from "../types.ts";
+import { Index } from "./base-index.ts";
+
+// ─── option types ────────────────────────────────────────────────────────────
+
+/** Options accepted by {@link CategoricalIndex.fromArray}. */
+export interface CategoricalIndexOptions {
+ /** Explicit set of categories. If omitted the unique values in `data` are used. */
+ readonly categories?: readonly Label[];
+ /** Whether the categories have a meaningful order. Defaults to `false`. */
+ readonly ordered?: boolean;
+ /** Optional name for the index. */
+ readonly name?: string | null;
+}
+
+// ─── helpers ─────────────────────────────────────────────────────────────────
+
+/** Build a deduplicated, sorted list of category labels from raw values. */
+function inferCategories(values: readonly Label[]): Label[] {
+ const seen = new Set();
+ const cats: Label[] = [];
+ for (const v of values) {
+ const key = String(v);
+ if (!seen.has(key)) {
+ seen.add(key);
+ cats.push(v);
+ }
+ }
+ return cats.sort((a, b) => {
+ const sa = String(a);
+ const sb = String(b);
+ if (sa < sb) {
+ return -1;
+ }
+ if (sa > sb) {
+ return 1;
+ }
+ return 0;
+ });
+}
+
+/** Build a category-to-code map for O(1) look-up. */
+function buildCategoryMap(categories: readonly Label[]): Map {
+ const map = new Map();
+ for (let i = 0; i < categories.length; i++) {
+ map.set(String(categories[i]), i);
+ }
+ return map;
+}
+
+/** Encode an array of raw labels into integer codes. */
+function encodeValues(values: readonly Label[], catMap: Map): number[] {
+ return values.map((v) => {
+ if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
+ return -1;
+ }
+ return catMap.get(String(v)) ?? -1;
+ });
+}
+
+// ─── CategoricalIndex ─────────────────────────────────────────────────────────
+
+/**
+ * An immutable index whose values are constrained to a fixed set of categories.
+ *
+ * Mirrors `pandas.CategoricalIndex`.
+ */
+export class CategoricalIndex {
+ /** The ordered set of valid labels. */
+ private readonly _categories: readonly Label[];
+
+ /** One integer code per index position; `-1` means NA/missing. */
+ private readonly _codes: readonly number[];
+
+ /** Category → code look-up (derived from `_categories`). */
+ private readonly _catMap: Map;
+
+ /** Whether the category set has a meaningful ordering. */
+ readonly ordered: boolean;
+
+ /** Optional human-readable name for this index. */
+ readonly name: string | null;
+
+ // ─── construction ──────────────────────────────────────────────────────────
+
+ private constructor(
+ categories: readonly Label[],
+ codes: readonly number[],
+ ordered: boolean,
+ name: string | null,
+ ) {
+ this._categories = Object.freeze([...categories]);
+ this._codes = Object.freeze([...codes]);
+ this._catMap = buildCategoryMap(categories);
+ this.ordered = ordered;
+ this.name = name;
+ }
+
+ /**
+ * Build a `CategoricalIndex` from an array of raw label values.
+ *
+ * @param data The sequence of labels.
+ * @param options Optional configuration (categories, ordered, name).
+ */
+ static fromArray(
+ data: readonly Label[],
+ options: CategoricalIndexOptions = {},
+ ): CategoricalIndex {
+ const cats = options.categories != null ? [...options.categories] : inferCategories(data);
+ const catMap = buildCategoryMap(cats);
+ const codes = encodeValues(data, catMap);
+ return new CategoricalIndex(cats, codes, options.ordered ?? false, options.name ?? null);
+ }
+
+ /**
+ * Build a `CategoricalIndex` directly from an existing category list and
+ * a pre-computed codes array.
+ *
+ * @param categories Ordered list of valid labels.
+ * @param codes Integer indices into `categories`; `-1` for NA.
+ * @param options Optional configuration (ordered, name).
+ */
+ static fromCodes(
+ categories: readonly Label[],
+ codes: readonly number[],
+ options: Omit = {},
+ ): CategoricalIndex {
+ const nCats = categories.length;
+ for (const c of codes) {
+ if (c !== -1 && (c < 0 || c >= nCats)) {
+ throw new RangeError(`Code ${c} is out of range for ${nCats} categories`);
+ }
+ }
+ return new CategoricalIndex(categories, codes, options.ordered ?? false, options.name ?? null);
+ }
+
+ // ─── basic properties ──────────────────────────────────────────────────────
+
+ /** Number of elements in the index. */
+ get size(): number {
+ return this._codes.length;
+ }
+
+ /** Shape tuple (always 1-D). */
+ get shape(): [number] {
+ return [this._codes.length];
+ }
+
+ /** Number of dimensions (always 1). */
+ get ndim(): 1 {
+ return 1;
+ }
+
+ /**
+ * The ordered set of category labels wrapped in an `Index`.
+ * Matches `pandas.CategoricalIndex.categories`.
+ */
+ get categories(): Index {
+ return new Index(this._categories);
+ }
+
+ /**
+ * Integer code for each position.
+ * `-1` indicates a missing (NA) value.
+ */
+ get codes(): readonly number[] {
+ return this._codes;
+ }
+
+ /** Number of unique categories (not index size). */
+ get nCategories(): number {
+ return this._categories.length;
+ }
+
+ // ─── element access ────────────────────────────────────────────────────────
+
+ /**
+ * Return the label at position `i`.
+ * Returns `null` for NA entries (code === -1).
+ *
+ * @throws {RangeError} when `i` is out of bounds.
+ */
+ at(i: number): Label | null {
+ if (i < 0 || i >= this._codes.length) {
+ throw new RangeError(`Index ${i} is out of bounds for size ${this._codes.length}`);
+ }
+ const code = this._codes[i] as number;
+ if (code === -1) {
+ return null;
+ }
+ return this._categories[code] as Label;
+ }
+
+ /**
+ * Return the (first) position of `label` in the index.
+ * Returns `-1` if not found.
+ */
+ getLoc(label: Label): number {
+ const code = this._catMap.get(String(label));
+ if (code === undefined) {
+ return -1;
+ }
+ return this._codes.indexOf(code);
+ }
+
+ /**
+ * Return *all* positions where `label` appears.
+ */
+ getLocsAll(label: Label): number[] {
+ const code = this._catMap.get(String(label));
+ if (code === undefined) {
+ return [];
+ }
+ const locs: number[] = [];
+ for (let i = 0; i < this._codes.length; i++) {
+ if (this._codes[i] === code) {
+ locs.push(i);
+ }
+ }
+ return locs;
+ }
+
+ /**
+ * Decode all codes into their label values.
+ * NA positions (code === -1) become `null`.
+ */
+ toArray(): (Label | null)[] {
+ return this._codes.map((c) => (c === -1 ? null : (this._categories[c] as Label)));
+ }
+
+ // ─── membership ────────────────────────────────────────────────────────────
+
+ /** Return `true` if `label` is one of the current categories. */
+ hasCategory(label: Label): boolean {
+ return this._catMap.has(String(label));
+ }
+
+ /** Return `true` if `label` appears in the index (at any position). */
+ contains(label: Label): boolean {
+ return this.getLoc(label) !== -1;
+ }
+
+ // ─── category mutations (all return new instances) ─────────────────────────
+
+ /**
+ * Return a new `CategoricalIndex` with renamed categories.
+ *
+ * `newCategories` must have the same length as the current categories.
+ * Each category is replaced in-place (the codes remain valid).
+ */
+ renameCategories(newCategories: readonly Label[]): CategoricalIndex {
+ if (newCategories.length !== this._categories.length) {
+ throw new RangeError(
+ `renameCategories: expected ${this._categories.length} names, ` +
+ `got ${newCategories.length}`,
+ );
+ }
+ return new CategoricalIndex(newCategories, this._codes, this.ordered, this.name);
+ }
+
+ /**
+ * Return a new `CategoricalIndex` with the categories reordered.
+ *
+ * `newOrder` must be a permutation of the current categories.
+ */
+ reorderCategories(newOrder: readonly Label[]): CategoricalIndex {
+ if (newOrder.length !== this._categories.length) {
+ throw new RangeError(
+ "reorderCategories: new order must have the same length as the current categories",
+ );
+ }
+ const newMap = buildCategoryMap(newOrder);
+ for (const cat of this._categories) {
+ if (!newMap.has(String(cat))) {
+ throw new RangeError(
+ `reorderCategories: category "${String(cat)}" is missing from the new order`,
+ );
+ }
+ }
+ const newCodes = this._codes.map((c) => {
+ if (c === -1) {
+ return -1;
+ }
+ const label = this._categories[c] as Label;
+ return newMap.get(String(label)) as number;
+ });
+ return new CategoricalIndex(newOrder, newCodes, this.ordered, this.name);
+ }
+
+ /**
+ * Return a new `CategoricalIndex` with `extra` appended to the categories.
+ *
+ * Values in the index that already match an existing category are unaffected.
+ * The new categories are appended (not inserted in sorted order).
+ */
+ addCategories(extra: readonly Label[]): CategoricalIndex {
+ for (const cat of extra) {
+ if (this._catMap.has(String(cat))) {
+ throw new RangeError(`addCategories: "${String(cat)}" is already a category`);
+ }
+ }
+ const newCats = [...this._categories, ...extra];
+ return new CategoricalIndex(newCats, this._codes, this.ordered, this.name);
+ }
+
+ /**
+ * Return a new `CategoricalIndex` with the specified categories removed.
+ *
+ * Index entries whose label belongs to `removals` become NA (code → -1).
+ */
+ removeCategories(removals: readonly Label[]): CategoricalIndex {
+ const removeSet = new Set(removals.map((v) => String(v)));
+ const newCats = this._categories.filter((c) => !removeSet.has(String(c)));
+ const newMap = buildCategoryMap(newCats);
+ const newCodes = this._codes.map((c) => {
+ if (c === -1) {
+ return -1;
+ }
+ const label = this._categories[c] as Label;
+ return newMap.get(String(label)) ?? -1;
+ });
+ return new CategoricalIndex(newCats, newCodes, this.ordered, this.name);
+ }
+
+ /**
+ * Return a new `CategoricalIndex` with `categories` replaced wholesale.
+ *
+ * Entries that fall outside `newCategories` become NA.
+ * Equivalent to `pandas.CategoricalIndex.set_categories()`.
+ */
+ setCategories(
+ newCategories: readonly Label[],
+ options: { ordered?: boolean } = {},
+ ): CategoricalIndex {
+ const newMap = buildCategoryMap(newCategories);
+ const newCodes = this._codes.map((c) => {
+ if (c === -1) {
+ return -1;
+ }
+ const label = this._categories[c] as Label;
+ return newMap.get(String(label)) ?? -1;
+ });
+ return new CategoricalIndex(
+ newCategories,
+ newCodes,
+ options.ordered ?? this.ordered,
+ this.name,
+ );
+ }
+
+ /**
+ * Return a new `CategoricalIndex` with only the categories that appear in
+ * the data (unused categories are dropped).
+ */
+ removeUnusedCategories(): CategoricalIndex {
+ const usedCodes = new Set(this._codes.filter((c) => c !== -1));
+ const usedCats = this._categories.filter((_, i) => usedCodes.has(i));
+ const newMap = buildCategoryMap(usedCats);
+ const newCodes = this._codes.map((c) => {
+ if (c === -1) {
+ return -1;
+ }
+ const label = this._categories[c] as Label;
+ return newMap.get(String(label)) ?? -1;
+ });
+ return new CategoricalIndex(usedCats, newCodes, this.ordered, this.name);
+ }
+
+ // ─── ordering helpers ──────────────────────────────────────────────────────
+
+ /** Return a copy with `ordered = true`. */
+ asOrdered(): CategoricalIndex {
+ return new CategoricalIndex(this._categories, this._codes, true, this.name);
+ }
+
+ /** Return a copy with `ordered = false`. */
+ asUnordered(): CategoricalIndex {
+ return new CategoricalIndex(this._categories, this._codes, false, this.name);
+ }
+
+ // ─── set-like operations ───────────────────────────────────────────────────
+
+ /**
+ * Return a new `CategoricalIndex` that is the union of the two category sets.
+ *
+ * The resulting categories are the union of both sets (left ∪ right),
+ * preserving left-side order and appending new categories from `other`.
+ * Only the data from *this* index is retained.
+ */
+ unionCategories(other: CategoricalIndex): CategoricalIndex {
+ const merged = [...this._categories];
+ const seen = new Set(this._categories.map((c) => String(c)));
+ for (const cat of other._categories) {
+ if (!seen.has(String(cat))) {
+ seen.add(String(cat));
+ merged.push(cat);
+ }
+ }
+ return this.setCategories(merged);
+ }
+
+ /**
+ * Return a new `CategoricalIndex` whose categories are the intersection of
+ * both category sets. Entries outside the intersection become NA.
+ */
+ intersectCategories(other: CategoricalIndex): CategoricalIndex {
+ const otherSet = new Set(other._categories.map((c) => String(c)));
+ const shared = this._categories.filter((c) => otherSet.has(String(c)));
+ return this.setCategories(shared);
+ }
+
+ // ─── comparison (ordered only) ─────────────────────────────────────────────
+
+ /**
+ * Compare two label values according to the category order.
+ *
+ * Returns a negative number when `a < b`, 0 when equal, positive when `a > b`.
+ *
+ * @throws {Error} when `ordered` is `false`.
+ * @throws {RangeError} when either label is not a category.
+ */
+ compareLabels(a: Label, b: Label): number {
+ if (!this.ordered) {
+ throw new Error("compareLabels requires an ordered CategoricalIndex");
+ }
+ const ca = this._catMap.get(String(a));
+ const cb = this._catMap.get(String(b));
+ if (ca === undefined) {
+ throw new RangeError(`"${String(a)}" is not a category`);
+ }
+ if (cb === undefined) {
+ throw new RangeError(`"${String(b)}" is not a category`);
+ }
+ return ca - cb;
+ }
+
+ // ─── misc ──────────────────────────────────────────────────────────────────
+
+ /** Return a new index with a different name. */
+ rename(name: string | null): CategoricalIndex {
+ return new CategoricalIndex(this._categories, this._codes, this.ordered, name);
+ }
+
+ /** Human-readable representation. */
+ toString(): string {
+ const preview = this.toArray()
+ .slice(0, 5)
+ .map((v) => (v === null ? "NA" : String(v)))
+ .join(", ");
+ const more = this.size > 5 ? `, ... (${this.size} total)` : "";
+ return `CategoricalIndex([${preview}${more}], categories=[${this._categories.map(String).join(", ")}], ordered=${String(this.ordered)})`;
+ }
+}
diff --git a/src/core/date_offset.ts b/src/core/date_offset.ts
new file mode 100644
index 00000000..4eda968e
--- /dev/null
+++ b/src/core/date_offset.ts
@@ -0,0 +1,848 @@
+/**
+ * date_offset — calendar-aware date arithmetic.
+ *
+ * Mirrors the following pandas offset classes from `pandas.tseries.offsets`:
+ *
+ * | Class | pandas equivalent | Description |
+ * |---|---|---|
+ * | {@link Day} | `Day(n)` | n calendar days |
+ * | {@link Hour} | `Hour(n)` | n hours |
+ * | {@link Minute} | `Minute(n)` | n minutes |
+ * | {@link Second} | `Second(n)` | n seconds |
+ * | {@link Milli} | `Milli(n)` | n milliseconds |
+ * | {@link Week} | `Week(n, weekday?)` | n weeks, with optional weekday alignment |
+ * | {@link MonthEnd} | `MonthEnd(n)` | n month-ends |
+ * | {@link MonthBegin} | `MonthBegin(n)` | n month-starts (first of month) |
+ * | {@link YearEnd} | `YearEnd(n)` | n year-ends (Dec 31) |
+ * | {@link YearBegin} | `YearBegin(n)` | n year-starts (Jan 1) |
+ * | {@link BusinessDay} | `BDay(n)` | n business days (Mon–Fri) |
+ *
+ * All operations work in **UTC** to avoid DST ambiguity.
+ *
+ * @example
+ * ```ts
+ * const d = new Date(Date.UTC(2024, 0, 15)); // 2024-01-15
+ * new MonthEnd(1).apply(d); // 2024-01-31
+ * new MonthEnd(2).apply(d); // 2024-02-29
+ * new YearBegin(1).apply(d); // 2025-01-01
+ * new BusinessDay(3).apply(d); // 2024-01-18
+ * ```
+ *
+ * @module
+ */
+
+// ─── constants ────────────────────────────────────────────────────────────────
+
+const MS_PER_SECOND = 1_000;
+const MS_PER_MINUTE = 60_000;
+const MS_PER_HOUR = 3_600_000;
+const MS_PER_DAY = 86_400_000;
+const MS_PER_WEEK = 7 * MS_PER_DAY;
+
+// ─── public interface ─────────────────────────────────────────────────────────
+
+/**
+ * Common interface shared by all calendar-aware date offsets.
+ *
+ * Every offset:
+ * - Has a multiplier `n` (positive or negative).
+ * - Can `apply` itself to a `Date` to produce a shifted date.
+ * - Supports `rollforward` and `rollback` for snapping to the nearest anchor.
+ * - Reports whether a date falls `onOffset`.
+ */
+export interface DateOffset {
+ /** Multiplier: number of units per application. */
+ readonly n: number;
+ /** Human-readable class name, e.g. `"Day"` or `"MonthEnd"`. */
+ readonly name: string;
+ /**
+ * Return a new `Date` that is `n` offset-units ahead of `date`.
+ * For anchored offsets (MonthEnd, YearBegin, …) non-anchor dates are
+ * first snapped before advancing the remaining steps.
+ */
+ apply(date: Date): Date;
+ /**
+ * If `date` falls on the offset anchor, return it unchanged.
+ * Otherwise advance to the **next** anchor.
+ */
+ rollforward(date: Date): Date;
+ /**
+ * If `date` falls on the offset anchor, return it unchanged.
+ * Otherwise retreat to the **previous** anchor.
+ */
+ rollback(date: Date): Date;
+ /** Return `true` if `date` falls exactly on an offset anchor. */
+ onOffset(date: Date): boolean;
+}
+
+// ─── WeekOptions ──────────────────────────────────────────────────────────────
+
+/** Options accepted by the {@link Week} offset constructor. */
+export interface WeekOptions {
+ /**
+ * Optional weekday alignment following **pandas convention**:
+ * `0` = Monday, `1` = Tuesday, …, `6` = Sunday.
+ *
+ * When set, every anchor date falls on this day of the week.
+ * When `null` / `undefined`, every date is "on offset" and `apply` simply
+ * adds `n × 7` days.
+ */
+ readonly weekday?: number | null;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/**
+ * Convert a pandas weekday index (0 = Monday) to a JavaScript UTC day index
+ * (0 = Sunday, as returned by `Date.prototype.getUTCDay`).
+ */
+function pdToJsDow(weekday: number): number {
+ return weekday === 6 ? 0 : weekday + 1;
+}
+
+/** True if `date` falls on the last day of its UTC month. */
+function isMonthEnd(date: Date): boolean {
+ const last = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth() + 1, 0));
+ return date.getUTCDate() === last.getUTCDate();
+}
+
+/** True if `date` falls on the first day of its UTC month. */
+function isMonthBegin(date: Date): boolean {
+ return date.getUTCDate() === 1;
+}
+
+/** True if `date` is December 31 (UTC). */
+function isYearEnd(date: Date): boolean {
+ return date.getUTCMonth() === 11 && date.getUTCDate() === 31;
+}
+
+/** True if `date` is January 1 (UTC). */
+function isYearBegin(date: Date): boolean {
+ return date.getUTCMonth() === 0 && date.getUTCDate() === 1;
+}
+
+/** True if `date` is a weekday (Monday–Friday, UTC). */
+function isBusinessDay(date: Date): boolean {
+ const dow = date.getUTCDay();
+ return dow >= 1 && dow <= 5;
+}
+
+// ─── apply helpers ────────────────────────────────────────────────────────────
+
+/**
+ * Apply month-end semantics for `n` steps.
+ *
+ * Logic mirrors `pandas.tseries.offsets.MonthEnd(n).apply(date)`:
+ * - If not on a month-end and `n > 0`: snap to this month's end (costs 1),
+ * then advance `n-1` more.
+ * - If not on a month-end and `n < 0`: snap to prev month's end (costs 1),
+ * then advance `n+1` more.
+ * - If on a month-end: advance `n` months directly.
+ */
+function applyMonthEnd(date: Date, n: number): Date {
+ if (n === 0) {
+ return new Date(date.getTime());
+ }
+ const y = date.getUTCFullYear();
+ const m = date.getUTCMonth();
+ if (isMonthEnd(date)) {
+ return new Date(Date.UTC(y, m + n + 1, 0));
+ }
+ if (n > 0) {
+ return new Date(Date.UTC(y, m + n, 0));
+ }
+ const prev = new Date(Date.UTC(y, m, 0));
+ return new Date(Date.UTC(prev.getUTCFullYear(), prev.getUTCMonth() + n + 2, 0));
+}
+
+/**
+ * Apply month-begin semantics for `n` steps.
+ * Mirrors `pandas.tseries.offsets.MonthBegin(n).apply(date)`.
+ */
+function applyMonthBegin(date: Date, n: number): Date {
+ if (n === 0) {
+ return new Date(date.getTime());
+ }
+ const y = date.getUTCFullYear();
+ const m = date.getUTCMonth();
+ if (isMonthBegin(date) || n > 0) {
+ return new Date(Date.UTC(y, m + n, 1));
+ }
+ return new Date(Date.UTC(y, m + n + 1, 1));
+}
+
+/**
+ * Apply year-end semantics for `n` steps.
+ * Mirrors `pandas.tseries.offsets.YearEnd(n).apply(date)`.
+ */
+function applyYearEnd(date: Date, n: number): Date {
+ if (n === 0) {
+ return new Date(date.getTime());
+ }
+ const y = date.getUTCFullYear();
+ if (isYearEnd(date)) {
+ return new Date(Date.UTC(y + n, 11, 31));
+ }
+ if (n > 0) {
+ return new Date(Date.UTC(y + n - 1, 11, 31));
+ }
+ return new Date(Date.UTC(y + n, 11, 31));
+}
+
+/**
+ * Apply year-begin semantics for `n` steps.
+ * Mirrors `pandas.tseries.offsets.YearBegin(n).apply(date)`.
+ */
+function applyYearBegin(date: Date, n: number): Date {
+ if (n === 0) {
+ return new Date(date.getTime());
+ }
+ const y = date.getUTCFullYear();
+ if (isYearBegin(date) || n > 0) {
+ return new Date(Date.UTC(y + n, 0, 1));
+ }
+ return new Date(Date.UTC(y + n + 1, 0, 1));
+}
+
+/** Roll forward to the current or next business day. */
+function rollFwdBiz(date: Date): Date {
+ let d = new Date(date.getTime());
+ while (!isBusinessDay(d)) {
+ d = new Date(d.getTime() + MS_PER_DAY);
+ }
+ return d;
+}
+
+/** Roll backward to the current or previous business day. */
+function rollBkBiz(date: Date): Date {
+ let d = new Date(date.getTime());
+ while (!isBusinessDay(d)) {
+ d = new Date(d.getTime() - MS_PER_DAY);
+ }
+ return d;
+}
+
+/**
+ * Apply business-day semantics for `n` steps.
+ * Mirrors `pandas.tseries.offsets.BDay(n).apply(date)`.
+ * Saturdays and Sundays are skipped in both directions.
+ */
+function applyBday(date: Date, n: number): Date {
+ let d = new Date(date.getTime());
+ const forward = n >= 0;
+ const steps = Math.abs(n);
+ for (let i = 0; i < steps; i++) {
+ const next = new Date(d.getTime() + (forward ? MS_PER_DAY : -MS_PER_DAY));
+ d = forward ? rollFwdBiz(next) : rollBkBiz(next);
+ }
+ return d;
+}
+
+/**
+ * Roll forward to the nearest occurrence of `jsDow` (JS UTC day convention).
+ * Returns `date` unchanged if it already falls on `jsDow`.
+ */
+function rollFwdWeekday(date: Date, jsDow: number): Date {
+ const daysAhead = (jsDow - date.getUTCDay() + 7) % 7;
+ if (daysAhead === 0) {
+ return new Date(date.getTime());
+ }
+ return new Date(date.getTime() + daysAhead * MS_PER_DAY);
+}
+
+/**
+ * Roll backward to the nearest occurrence of `jsDow` (JS UTC day convention).
+ * Returns `date` unchanged if it already falls on `jsDow`.
+ */
+function rollBkWeekday(date: Date, jsDow: number): Date {
+ const daysBack = (date.getUTCDay() - jsDow + 7) % 7;
+ if (daysBack === 0) {
+ return new Date(date.getTime());
+ }
+ return new Date(date.getTime() - daysBack * MS_PER_DAY);
+}
+
+/**
+ * Apply week semantics for `n` steps, with optional weekday alignment.
+ * `jsDow` is null for plain (unaligned) weeks.
+ */
+function applyWeek(date: Date, n: number, jsDow: number | null): Date {
+ if (n === 0) {
+ return new Date(date.getTime());
+ }
+ if (jsDow === null) {
+ return new Date(date.getTime() + n * MS_PER_WEEK);
+ }
+ const onTarget = date.getUTCDay() === jsDow;
+ if (n > 0) {
+ if (onTarget) {
+ return new Date(date.getTime() + n * MS_PER_WEEK);
+ }
+ const rolled = rollFwdWeekday(date, jsDow);
+ return new Date(rolled.getTime() + (n - 1) * MS_PER_WEEK);
+ }
+ if (onTarget) {
+ return new Date(date.getTime() + n * MS_PER_WEEK);
+ }
+ const rolled = rollBkWeekday(date, jsDow);
+ return new Date(rolled.getTime() + (n + 1) * MS_PER_WEEK);
+}
+
+// ─── classes ──────────────────────────────────────────────────────────────────
+
+/**
+ * n calendar days.
+ *
+ * Mirrors `pandas.tseries.offsets.Day`.
+ * Every date is "on offset" — `rollforward` and `rollback` are no-ops.
+ *
+ * @example
+ * ```ts
+ * new Day(3).apply(new Date(Date.UTC(2024, 0, 1))); // 2024-01-04
+ * ```
+ */
+export class Day implements DateOffset {
+ readonly name = "Day";
+
+ constructor(readonly n = 1) {}
+
+ /** Convenience factory: `Day.of(3)` equivalent to `new Day(3)`. */
+ static of(n = 1): Day {
+ return new Day(n);
+ }
+
+ apply(date: Date): Date {
+ return new Date(date.getTime() + this.n * MS_PER_DAY);
+ }
+
+ rollforward(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ rollback(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ onOffset(_date: Date): boolean {
+ return true;
+ }
+
+ /** Return a new `Day` with multiplier scaled by `factor`. */
+ multiply(factor: number): Day {
+ return new Day(this.n * factor);
+ }
+
+ /** Return a new `Day` with negated multiplier. */
+ negate(): Day {
+ return new Day(-this.n);
+ }
+}
+
+/**
+ * n hours.
+ *
+ * Mirrors `pandas.tseries.offsets.Hour`.
+ * Every date is "on offset".
+ */
+export class Hour implements DateOffset {
+ readonly name = "Hour";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): Hour {
+ return new Hour(n);
+ }
+
+ apply(date: Date): Date {
+ return new Date(date.getTime() + this.n * MS_PER_HOUR);
+ }
+
+ rollforward(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ rollback(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ onOffset(_date: Date): boolean {
+ return true;
+ }
+
+ multiply(factor: number): Hour {
+ return new Hour(this.n * factor);
+ }
+
+ negate(): Hour {
+ return new Hour(-this.n);
+ }
+}
+
+/**
+ * n minutes.
+ *
+ * Mirrors `pandas.tseries.offsets.Minute`.
+ */
+export class Minute implements DateOffset {
+ readonly name = "Minute";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): Minute {
+ return new Minute(n);
+ }
+
+ apply(date: Date): Date {
+ return new Date(date.getTime() + this.n * MS_PER_MINUTE);
+ }
+
+ rollforward(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ rollback(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ onOffset(_date: Date): boolean {
+ return true;
+ }
+
+ multiply(factor: number): Minute {
+ return new Minute(this.n * factor);
+ }
+
+ negate(): Minute {
+ return new Minute(-this.n);
+ }
+}
+
+/**
+ * n seconds.
+ *
+ * Mirrors `pandas.tseries.offsets.Second`.
+ */
+export class Second implements DateOffset {
+ readonly name = "Second";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): Second {
+ return new Second(n);
+ }
+
+ apply(date: Date): Date {
+ return new Date(date.getTime() + this.n * MS_PER_SECOND);
+ }
+
+ rollforward(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ rollback(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ onOffset(_date: Date): boolean {
+ return true;
+ }
+
+ multiply(factor: number): Second {
+ return new Second(this.n * factor);
+ }
+
+ negate(): Second {
+ return new Second(-this.n);
+ }
+}
+
+/**
+ * n milliseconds.
+ *
+ * Mirrors `pandas.tseries.offsets.Milli`.
+ */
+export class Milli implements DateOffset {
+ readonly name = "Milli";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): Milli {
+ return new Milli(n);
+ }
+
+ apply(date: Date): Date {
+ return new Date(date.getTime() + this.n);
+ }
+
+ rollforward(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ rollback(date: Date): Date {
+ return new Date(date.getTime());
+ }
+
+ onOffset(_date: Date): boolean {
+ return true;
+ }
+
+ multiply(factor: number): Milli {
+ return new Milli(this.n * factor);
+ }
+
+ negate(): Milli {
+ return new Milli(-this.n);
+ }
+}
+
+/**
+ * n weeks, with optional weekday alignment.
+ *
+ * Mirrors `pandas.tseries.offsets.Week`.
+ *
+ * When `weekday` is specified (pandas convention: 0 = Monday, …, 6 = Sunday),
+ * every anchor date falls on that day of the week.
+ * Without `weekday`, every date is "on offset" and `apply` adds `n × 7` days.
+ *
+ * @example
+ * ```ts
+ * // Plain week
+ * new Week(2).apply(new Date(Date.UTC(2024, 0, 1))); // 2024-01-15
+ *
+ * // Weekday-aligned (anchor = Monday)
+ * const wk = new Week(1, { weekday: 0 }); // 0 = Monday
+ * wk.apply(new Date(Date.UTC(2024, 0, 15))); // 2024-01-22 (next Mon)
+ * wk.apply(new Date(Date.UTC(2024, 0, 17))); // 2024-01-22 (next Mon from Wed)
+ * ```
+ */
+export class Week implements DateOffset {
+ readonly name = "Week";
+
+ /**
+ * Weekday anchor (pandas convention: 0 = Monday, …, 6 = Sunday).
+ * `null` means no alignment.
+ */
+ readonly weekday: number | null;
+
+ constructor(
+ readonly n = 1,
+ options: WeekOptions = {},
+ ) {
+ this.weekday = options.weekday ?? null;
+ }
+
+ static of(n = 1, options?: WeekOptions): Week {
+ return new Week(n, options);
+ }
+
+ apply(date: Date): Date {
+ const jsDow = this.weekday === null ? null : pdToJsDow(this.weekday);
+ return applyWeek(date, this.n, jsDow);
+ }
+
+ rollforward(date: Date): Date {
+ if (this.weekday === null) {
+ return new Date(date.getTime());
+ }
+ return rollFwdWeekday(date, pdToJsDow(this.weekday));
+ }
+
+ rollback(date: Date): Date {
+ if (this.weekday === null) {
+ return new Date(date.getTime());
+ }
+ return rollBkWeekday(date, pdToJsDow(this.weekday));
+ }
+
+ onOffset(date: Date): boolean {
+ if (this.weekday === null) {
+ return true;
+ }
+ return date.getUTCDay() === pdToJsDow(this.weekday);
+ }
+
+ multiply(factor: number): Week {
+ return new Week(this.n * factor, { weekday: this.weekday });
+ }
+
+ negate(): Week {
+ return new Week(-this.n, { weekday: this.weekday });
+ }
+}
+
+/**
+ * n month-ends.
+ *
+ * Mirrors `pandas.tseries.offsets.MonthEnd`.
+ * Anchor dates are the last calendar day of each month.
+ *
+ * @example
+ * ```ts
+ * const d = new Date(Date.UTC(2024, 0, 15)); // 2024-01-15
+ * new MonthEnd(1).apply(d); // 2024-01-31
+ * new MonthEnd(2).apply(d); // 2024-02-29
+ * new MonthEnd(-1).apply(d); // 2023-12-31
+ *
+ * // Rolling
+ * new MonthEnd(0).rollforward(d); // 2024-01-31
+ * new MonthEnd(0).rollback(d); // 2023-12-31
+ * ```
+ */
+export class MonthEnd implements DateOffset {
+ readonly name = "MonthEnd";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): MonthEnd {
+ return new MonthEnd(n);
+ }
+
+ apply(date: Date): Date {
+ return applyMonthEnd(date, this.n);
+ }
+
+ rollforward(date: Date): Date {
+ if (isMonthEnd(date)) {
+ return new Date(date.getTime());
+ }
+ const y = date.getUTCFullYear();
+ const m = date.getUTCMonth();
+ return new Date(Date.UTC(y, m + 1, 0));
+ }
+
+ rollback(date: Date): Date {
+ if (isMonthEnd(date)) {
+ return new Date(date.getTime());
+ }
+ const y = date.getUTCFullYear();
+ const m = date.getUTCMonth();
+ return new Date(Date.UTC(y, m, 0));
+ }
+
+ onOffset(date: Date): boolean {
+ return isMonthEnd(date);
+ }
+
+ multiply(factor: number): MonthEnd {
+ return new MonthEnd(this.n * factor);
+ }
+
+ negate(): MonthEnd {
+ return new MonthEnd(-this.n);
+ }
+}
+
+/**
+ * n month-starts.
+ *
+ * Mirrors `pandas.tseries.offsets.MonthBegin`.
+ * Anchor dates are the first calendar day of each month.
+ *
+ * @example
+ * ```ts
+ * const d = new Date(Date.UTC(2024, 0, 15)); // 2024-01-15
+ * new MonthBegin(1).apply(d); // 2024-02-01
+ * new MonthBegin(-1).apply(d); // 2024-01-01
+ *
+ * // Rolling
+ * new MonthBegin(0).rollforward(d); // 2024-02-01
+ * new MonthBegin(0).rollback(d); // 2024-01-01
+ * ```
+ */
+export class MonthBegin implements DateOffset {
+ readonly name = "MonthBegin";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): MonthBegin {
+ return new MonthBegin(n);
+ }
+
+ apply(date: Date): Date {
+ return applyMonthBegin(date, this.n);
+ }
+
+ rollforward(date: Date): Date {
+ if (isMonthBegin(date)) {
+ return new Date(date.getTime());
+ }
+ const y = date.getUTCFullYear();
+ const m = date.getUTCMonth();
+ return new Date(Date.UTC(y, m + 1, 1));
+ }
+
+ rollback(date: Date): Date {
+ if (isMonthBegin(date)) {
+ return new Date(date.getTime());
+ }
+ const y = date.getUTCFullYear();
+ const m = date.getUTCMonth();
+ return new Date(Date.UTC(y, m, 1));
+ }
+
+ onOffset(date: Date): boolean {
+ return isMonthBegin(date);
+ }
+
+ multiply(factor: number): MonthBegin {
+ return new MonthBegin(this.n * factor);
+ }
+
+ negate(): MonthBegin {
+ return new MonthBegin(-this.n);
+ }
+}
+
+/**
+ * n year-ends (December 31).
+ *
+ * Mirrors `pandas.tseries.offsets.YearEnd`.
+ *
+ * @example
+ * ```ts
+ * const d = new Date(Date.UTC(2024, 0, 15)); // 2024-01-15
+ * new YearEnd(1).apply(d); // 2024-12-31
+ * new YearEnd(2).apply(d); // 2025-12-31
+ * new YearEnd(-1).apply(d); // 2023-12-31
+ * ```
+ */
+export class YearEnd implements DateOffset {
+ readonly name = "YearEnd";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): YearEnd {
+ return new YearEnd(n);
+ }
+
+ apply(date: Date): Date {
+ return applyYearEnd(date, this.n);
+ }
+
+ rollforward(date: Date): Date {
+ if (isYearEnd(date)) {
+ return new Date(date.getTime());
+ }
+ return new Date(Date.UTC(date.getUTCFullYear(), 11, 31));
+ }
+
+ rollback(date: Date): Date {
+ if (isYearEnd(date)) {
+ return new Date(date.getTime());
+ }
+ return new Date(Date.UTC(date.getUTCFullYear() - 1, 11, 31));
+ }
+
+ onOffset(date: Date): boolean {
+ return isYearEnd(date);
+ }
+
+ multiply(factor: number): YearEnd {
+ return new YearEnd(this.n * factor);
+ }
+
+ negate(): YearEnd {
+ return new YearEnd(-this.n);
+ }
+}
+
+/**
+ * n year-starts (January 1).
+ *
+ * Mirrors `pandas.tseries.offsets.YearBegin`.
+ *
+ * @example
+ * ```ts
+ * const d = new Date(Date.UTC(2024, 6, 4)); // 2024-07-04
+ * new YearBegin(1).apply(d); // 2025-01-01
+ * new YearBegin(-1).apply(d); // 2024-01-01
+ * ```
+ */
+export class YearBegin implements DateOffset {
+ readonly name = "YearBegin";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): YearBegin {
+ return new YearBegin(n);
+ }
+
+ apply(date: Date): Date {
+ return applyYearBegin(date, this.n);
+ }
+
+ rollforward(date: Date): Date {
+ if (isYearBegin(date)) {
+ return new Date(date.getTime());
+ }
+ return new Date(Date.UTC(date.getUTCFullYear() + 1, 0, 1));
+ }
+
+ rollback(date: Date): Date {
+ if (isYearBegin(date)) {
+ return new Date(date.getTime());
+ }
+ return new Date(Date.UTC(date.getUTCFullYear(), 0, 1));
+ }
+
+ onOffset(date: Date): boolean {
+ return isYearBegin(date);
+ }
+
+ multiply(factor: number): YearBegin {
+ return new YearBegin(this.n * factor);
+ }
+
+ negate(): YearBegin {
+ return new YearBegin(-this.n);
+ }
+}
+
+/**
+ * n business days (Monday–Friday).
+ *
+ * Mirrors `pandas.tseries.offsets.BDay` / `BusinessDay`.
+ * Weekends are skipped — each step advances or retreats by exactly one
+ * weekday.
+ *
+ * @example
+ * ```ts
+ * const fri = new Date(Date.UTC(2024, 0, 12)); // 2024-01-12 (Friday)
+ * new BusinessDay(1).apply(fri); // 2024-01-15 (Monday)
+ * new BusinessDay(3).apply(fri); // 2024-01-17 (Wednesday)
+ * new BusinessDay(-1).apply(fri); // 2024-01-11 (Thursday)
+ * ```
+ */
+export class BusinessDay implements DateOffset {
+ readonly name = "BusinessDay";
+
+ constructor(readonly n = 1) {}
+
+ static of(n = 1): BusinessDay {
+ return new BusinessDay(n);
+ }
+
+ apply(date: Date): Date {
+ return applyBday(date, this.n);
+ }
+
+ rollforward(date: Date): Date {
+ return rollFwdBiz(date);
+ }
+
+ rollback(date: Date): Date {
+ return rollBkBiz(date);
+ }
+
+ onOffset(date: Date): boolean {
+ return isBusinessDay(date);
+ }
+
+ multiply(factor: number): BusinessDay {
+ return new BusinessDay(this.n * factor);
+ }
+
+ negate(): BusinessDay {
+ return new BusinessDay(-this.n);
+ }
+}
diff --git a/src/core/date_range.ts b/src/core/date_range.ts
new file mode 100644
index 00000000..4560db94
--- /dev/null
+++ b/src/core/date_range.ts
@@ -0,0 +1,662 @@
+/**
+ * DatetimeIndex, date_range, and bdate_range.
+ *
+ * Mirrors `pandas.DatetimeIndex`, `pandas.date_range`, and `pandas.bdate_range`.
+ *
+ * A {@link DatetimeIndex} is an ordered sequence of `Date` objects suitable for
+ * use as a time-series axis. The two factory functions generate regularly-spaced
+ * sequences:
+ *
+ * | Function | Default freq | pandas equivalent |
+ * |---|---|---|
+ * | {@link date_range} | `"D"` (calendar day) | `pandas.date_range` |
+ * | {@link bdate_range} | `"B"` (business day) | `pandas.bdate_range` |
+ *
+ * **Frequency string aliases** (a subset of pandas abbreviations):
+ *
+ * | String | Offset |
+ * |--------|--------|
+ * | `"D"` | Calendar day |
+ * | `"B"` | Business day (Mon–Fri) |
+ * | `"H"` | Hour |
+ * | `"T"` / `"min"` | Minute |
+ * | `"S"` | Second |
+ * | `"L"` / `"ms"` | Millisecond |
+ * | `"W"` | Week |
+ * | `"MS"` | Month-start (1st of month) |
+ * | `"ME"` | Month-end (last day of month) |
+ * | `"QS"` | Quarter-start (MonthBegin ×3) |
+ * | `"QE"` | Quarter-end (MonthEnd ×3) |
+ * | `"AS"` / `"YS"` | Year-start (Jan 1) |
+ * | `"AE"` / `"YE"` | Year-end (Dec 31) |
+ *
+ * @example
+ * ```ts
+ * const idx = date_range({ start: "2024-01-01", end: "2024-01-05" });
+ * idx.size; // 5
+ * idx.at(0).toISOString(); // "2024-01-01T00:00:00.000Z"
+ *
+ * const biz = bdate_range({ start: "2024-01-01", periods: 5 });
+ * biz.size; // 5 (Mon–Fri only)
+ * ```
+ *
+ * @module
+ */
+
+import {
+ BusinessDay,
+ Day,
+ Hour,
+ Milli,
+ Minute,
+ MonthBegin,
+ MonthEnd,
+ Second,
+ Week,
+ YearBegin,
+ YearEnd,
+} from "./date_offset.ts";
+import type { DateOffset } from "./date_offset.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Recognised frequency string abbreviations accepted by {@link date_range} and
+ * {@link bdate_range}. A {@link DateOffset} object may always be used in place
+ * of a string.
+ */
+export type DateRangeFreq =
+ | "D"
+ | "B"
+ | "H"
+ | "T"
+ | "min"
+ | "S"
+ | "L"
+ | "ms"
+ | "W"
+ | "MS"
+ | "ME"
+ | "QS"
+ | "QE"
+ | "AS"
+ | "YS"
+ | "AE"
+ | "YE";
+
+/** Options accepted by {@link DatetimeIndex} factory methods. */
+export interface DatetimeIndexOptions {
+ /** Optional name label for this axis. */
+ readonly name?: string | null;
+}
+
+/** Options accepted by {@link date_range} and {@link bdate_range}. */
+export interface DateRangeOptions {
+ /**
+ * Left bound for generating dates. Accepts a `Date` object or an ISO-8601
+ * string.
+ */
+ readonly start?: Date | string;
+ /**
+ * Right bound for generating dates (inclusive when the offset lands on it
+ * exactly). Accepts a `Date` object or an ISO-8601 string.
+ */
+ readonly end?: Date | string;
+ /**
+ * Number of periods (dates) to generate. Must be a non-negative integer.
+ */
+ readonly periods?: number;
+ /**
+ * Step frequency. Accepts a string alias (e.g. `"D"`, `"MS"`, `"B"`) or a
+ * {@link DateOffset} instance. Defaults to `"D"` for {@link date_range} and
+ * `"B"` for {@link bdate_range}.
+ */
+ readonly freq?: DateRangeFreq | DateOffset;
+ /**
+ * When `true`, normalise start / end to midnight UTC before generating.
+ * @default false
+ */
+ readonly normalize?: boolean;
+ /** Optional name for the resulting {@link DatetimeIndex}. */
+ readonly name?: string | null;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Parse a `Date | string` into a `Date`. */
+function toDate(val: Date | string): Date {
+ if (val instanceof Date) {
+ return new Date(val.getTime());
+ }
+ const d = new Date(val);
+ if (Number.isNaN(d.getTime())) {
+ throw new RangeError(`Cannot parse date: "${val}"`);
+ }
+ return d;
+}
+
+/** Floor a date to midnight UTC. */
+function normDate(d: Date): Date {
+ return new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()));
+}
+
+/**
+ * Convert a frequency string (or pass-through a `DateOffset`) to a
+ * `DateOffset` with multiplier `n`.
+ */
+function freqToOffset(freq: DateRangeFreq | DateOffset, n = 1): DateOffset {
+ if (typeof freq === "object") {
+ return freq;
+ }
+ switch (freq) {
+ case "D":
+ return new Day(n);
+ case "B":
+ return new BusinessDay(n);
+ case "H":
+ return new Hour(n);
+ case "T":
+ case "min":
+ return new Minute(n);
+ case "S":
+ return new Second(n);
+ case "L":
+ case "ms":
+ return new Milli(n);
+ case "W":
+ return new Week(n);
+ case "MS":
+ return new MonthBegin(n);
+ case "ME":
+ return new MonthEnd(n);
+ case "QS":
+ return new MonthBegin(n * 3);
+ case "QE":
+ return new MonthEnd(n * 3);
+ case "AS":
+ case "YS":
+ return new YearBegin(n);
+ case "AE":
+ case "YE":
+ return new YearEnd(n);
+ default: {
+ const _never: never = freq;
+ throw new RangeError(`Unknown frequency string: "${String(_never)}"`);
+ }
+ }
+}
+
+// ─── DatetimeIndex ────────────────────────────────────────────────────────────
+
+/**
+ * An ordered sequence of `Date` values — the TypeScript equivalent of
+ * `pandas.DatetimeIndex`.
+ *
+ * Typically created via {@link date_range} or {@link bdate_range}, but can also
+ * be built directly from an array of `Date` objects.
+ */
+export class DatetimeIndex {
+ private readonly _dates: readonly Date[];
+
+ /** Optional human-readable label for this axis. */
+ readonly name: string | null;
+
+ private constructor(dates: readonly Date[], name: string | null) {
+ this._dates = Object.freeze([...dates]);
+ this.name = name;
+ }
+
+ // ─── factories ───────────────────────────────────────────────────
+
+ /**
+ * Create a `DatetimeIndex` from an array of `Date` objects.
+ *
+ * @example
+ * ```ts
+ * DatetimeIndex.fromDates([new Date("2024-01-01"), new Date("2024-01-02")]);
+ * ```
+ */
+ static fromDates(dates: readonly Date[], name: string | null = null): DatetimeIndex {
+ return new DatetimeIndex(dates, name);
+ }
+
+ /**
+ * Create a `DatetimeIndex` from an array of UTC millisecond timestamps.
+ *
+ * @example
+ * ```ts
+ * DatetimeIndex.fromTimestamps([0, 86_400_000]); // 1970-01-01, 1970-01-02
+ * ```
+ */
+ static fromTimestamps(timestamps: readonly number[], name: string | null = null): DatetimeIndex {
+ return new DatetimeIndex(
+ timestamps.map((ms) => new Date(ms)),
+ name,
+ );
+ }
+
+ // ─── properties ──────────────────────────────────────────────────
+
+ /** Number of elements. */
+ get size(): number {
+ return this._dates.length;
+ }
+
+ /** Shape tuple `[size]`. */
+ get shape(): [number] {
+ return [this._dates.length];
+ }
+
+ /** Number of dimensions (always `1`). */
+ get ndim(): 1 {
+ return 1;
+ }
+
+ /** `true` when the index has zero elements. */
+ get empty(): boolean {
+ return this._dates.length === 0;
+ }
+
+ /** Read-only view of the underlying `Date` array. */
+ get values(): readonly Date[] {
+ return this._dates;
+ }
+
+ // ─── element access ───────────────────────────────────────────────
+
+ /**
+ * Return the element at position `i` (0-based).
+ *
+ * @throws `RangeError` if `i` is out of bounds.
+ */
+ at(i: number): Date {
+ const d = this._dates[i];
+ if (d === undefined) {
+ throw new RangeError(`Index ${i} out of bounds (size=${this.size})`);
+ }
+ return d;
+ }
+
+ /** Shallow copy as a plain mutable array. */
+ toArray(): Date[] {
+ return [...this._dates];
+ }
+
+ // ─── statistics ───────────────────────────────────────────────────
+
+ /**
+ * Earliest date in the index, or `null` if empty.
+ *
+ * @example
+ * ```ts
+ * DatetimeIndex.fromDates([new Date("2024-03-01"), new Date("2024-01-01")]).min()
+ * //→ Date("2024-01-01")
+ * ```
+ */
+ min(): Date | null {
+ if (this._dates.length === 0) {
+ return null;
+ }
+ let best = this._dates[0] as Date;
+ for (const d of this._dates) {
+ if (d.getTime() < best.getTime()) {
+ best = d;
+ }
+ }
+ return best;
+ }
+
+ /**
+ * Latest date in the index, or `null` if empty.
+ *
+ * @example
+ * ```ts
+ * DatetimeIndex.fromDates([new Date("2024-01-01"), new Date("2024-03-01")]).max()
+ * //→ Date("2024-03-01")
+ * ```
+ */
+ max(): Date | null {
+ if (this._dates.length === 0) {
+ return null;
+ }
+ let best = this._dates[0] as Date;
+ for (const d of this._dates) {
+ if (d.getTime() > best.getTime()) {
+ best = d;
+ }
+ }
+ return best;
+ }
+
+ // ─── transformation ───────────────────────────────────────────────
+
+ /**
+ * Return a sorted copy.
+ *
+ * @param ascending - Sort direction; defaults to `true`.
+ */
+ sort(ascending = true): DatetimeIndex {
+ const sorted = [...this._dates].sort((a, b) =>
+ ascending ? a.getTime() - b.getTime() : b.getTime() - a.getTime(),
+ );
+ return new DatetimeIndex(sorted, this.name);
+ }
+
+ /**
+ * Return a new index with duplicate timestamps removed (first occurrence kept).
+ */
+ unique(): DatetimeIndex {
+ const seen = new Set();
+ const out: Date[] = [];
+ for (const d of this._dates) {
+ const ms = d.getTime();
+ if (!seen.has(ms)) {
+ seen.add(ms);
+ out.push(d);
+ }
+ }
+ return new DatetimeIndex(out, this.name);
+ }
+
+ /**
+ * Return a new index with elements that satisfy `predicate`.
+ */
+ filter(predicate: (d: Date, i: number) => boolean): DatetimeIndex {
+ return new DatetimeIndex(
+ this._dates.filter((d, i) => predicate(d, i)),
+ this.name,
+ );
+ }
+
+ /**
+ * Normalise all timestamps to midnight UTC (floor to day boundary).
+ *
+ * @example
+ * ```ts
+ * DatetimeIndex.fromDates([new Date("2024-03-15T14:30:00Z")]).normalize().at(0)
+ * //→ Date("2024-03-15T00:00:00.000Z")
+ * ```
+ */
+ normalize(): DatetimeIndex {
+ return new DatetimeIndex(this._dates.map(normDate), this.name);
+ }
+
+ /**
+ * Return a new index where each date has been shifted by `n` applications of
+ * `freq`. Negative `n` shifts backward.
+ *
+ * @example
+ * ```ts
+ * const idx = date_range({ start: "2024-01-01", periods: 3 });
+ * idx.shift(7, "D").at(0).toISOString(); // "2024-01-08T00:00:00.000Z"
+ * idx.shift(-1, "D").at(0).toISOString(); // "2023-12-31T00:00:00.000Z"
+ * ```
+ */
+ shift(n: number, freq: DateRangeFreq | DateOffset): DatetimeIndex {
+ if (n === 0) {
+ return new DatetimeIndex(this._dates, this.name);
+ }
+ const offset = freqToOffset(freq, n);
+ return new DatetimeIndex(
+ this._dates.map((d) => offset.apply(d)),
+ this.name,
+ );
+ }
+
+ /**
+ * Snap each date to the nearest anchor of `freq` via `rollforward`.
+ *
+ * @example
+ * ```ts
+ * DatetimeIndex.fromDates([new Date("2024-01-15")]).snap("MS").at(0).toISOString();
+ * // "2024-02-01T00:00:00.000Z"
+ * ```
+ */
+ snap(freq: DateRangeFreq | DateOffset): DatetimeIndex {
+ const offset = freqToOffset(freq);
+ return new DatetimeIndex(
+ this._dates.map((d) => offset.rollforward(d)),
+ this.name,
+ );
+ }
+
+ /**
+ * Return a slice `[start, stop)`.
+ *
+ * @param start - Inclusive start index (0-based).
+ * @param stop - Exclusive stop index; defaults to `this.size`.
+ */
+ slice(start: number, stop?: number): DatetimeIndex {
+ return new DatetimeIndex(this._dates.slice(start, stop), this.name);
+ }
+
+ /**
+ * Return a new index formed by appending `other` after this index.
+ */
+ concat(other: DatetimeIndex): DatetimeIndex {
+ return new DatetimeIndex([...this._dates, ...other._dates], this.name);
+ }
+
+ /**
+ * Return `true` if any element has the same UTC millisecond value as `date`.
+ */
+ contains(date: Date): boolean {
+ const ms = date.getTime();
+ return this._dates.some((d) => d.getTime() === ms);
+ }
+
+ /**
+ * Convert each date to its ISO-8601 string representation.
+ *
+ * @example
+ * ```ts
+ * date_range({ start: "2024-01-01", periods: 2 }).toStrings();
+ * // ["2024-01-01T00:00:00.000Z", "2024-01-02T00:00:00.000Z"]
+ * ```
+ */
+ toStrings(): string[] {
+ return this._dates.map((d) => d.toISOString());
+ }
+
+ // ─── iteration ───────────────────────────────────────────────────
+
+ [Symbol.iterator](): Iterator {
+ return this._dates[Symbol.iterator]();
+ }
+}
+
+// ─── resolveFreq ─────────────────────────────────────────────────────────────
+
+/**
+ * Convert a frequency string or existing `DateOffset` to a `DateOffset`
+ * instance with multiplier `n` (defaults to `1`).
+ *
+ * @example
+ * ```ts
+ * resolveFreq("MS"); // MonthBegin(1)
+ * resolveFreq("QS", 2); // MonthBegin(6)
+ * ```
+ */
+export function resolveFreq(freq: DateRangeFreq | DateOffset, n = 1): DateOffset {
+ return freqToOffset(freq, n);
+}
+
+// ─── date_range / bdate_range ────────────────────────────────────────────────
+
+/**
+ * Return a fixed-frequency {@link DatetimeIndex}.
+ *
+ * You must supply at least two of `start`, `end`, and `periods`:
+ *
+ * | start | end | periods | behaviour |
+ * |-------|-----|---------|-----------|
+ * | ✓ | ✓ | — | generate from `start` up to and including `end` (if reachable) |
+ * | ✓ | — | ✓ | generate `periods` dates forward from `start` |
+ * | — | ✓ | ✓ | generate `periods` dates backward ending at `end` |
+ *
+ * @example
+ * ```ts
+ * // 5 daily dates
+ * date_range({ start: "2024-01-01", end: "2024-01-05" }).size; // 5
+ *
+ * // 4 hourly dates from a starting point
+ * date_range({ start: "2024-01-01", periods: 4, freq: "H" }).size; // 4
+ *
+ * // 3 dates ending on Jan 10
+ * date_range({ end: "2024-01-10", periods: 3 }).at(2).toISOString();
+ * // "2024-01-10T00:00:00.000Z"
+ * ```
+ */
+export function date_range(options: DateRangeOptions): DatetimeIndex {
+ return buildRange(options, "D");
+}
+
+/**
+ * Return a fixed-frequency {@link DatetimeIndex} of **business days**.
+ *
+ * Identical to {@link date_range} but defaults to `freq: "B"` (Mon–Fri).
+ *
+ * @example
+ * ```ts
+ * // 5 business days starting 2024-01-01 (Mon)
+ * bdate_range({ start: "2024-01-01", periods: 5 }).size; // 5
+ * ```
+ */
+export function bdate_range(options: DateRangeOptions): DatetimeIndex {
+ return buildRange(options, "B");
+}
+
+// ─── internal builder ─────────────────────────────────────────────────────────
+
+const MAX_ITER = 1_000_000;
+
+function buildRange(options: DateRangeOptions, defaultFreq: DateRangeFreq): DatetimeIndex {
+ const { start, end, periods, normalize = false, name = null } = options;
+ const freq = options.freq ?? defaultFreq;
+ const offset = freqToOffset(freq);
+
+ if (start === undefined && end === undefined) {
+ throw new Error("date_range: at least one of 'start' or 'end' must be provided");
+ }
+
+ let startDate = start !== undefined ? toDate(start) : null;
+ let endDate = end !== undefined ? toDate(end) : null;
+
+ if (normalize) {
+ if (startDate !== null) {
+ startDate = normDate(startDate);
+ }
+ if (endDate !== null) {
+ endDate = normDate(endDate);
+ }
+ }
+
+ let dates: Date[];
+
+ if (startDate !== null && endDate !== null && periods === undefined) {
+ dates = rangeStartEnd(startDate, endDate, offset);
+ } else if (startDate !== null && periods !== undefined) {
+ dates = rangeStartPeriods(startDate, periods, offset);
+ } else if (endDate !== null && periods !== undefined) {
+ dates = rangeEndPeriods(endDate, periods, offset);
+ } else {
+ throw new Error("date_range: provide at least two of 'start', 'end', 'periods'");
+ }
+
+ return DatetimeIndex.fromDates(dates, name);
+}
+
+/** Forward from start; stop when next date would exceed end. */
+function rangeStartEnd(start: Date, end: Date, offset: DateOffset): Date[] {
+ if (start.getTime() > end.getTime()) {
+ return [];
+ }
+ const out: Date[] = [start];
+ let cur = start;
+ for (let i = 0; i < MAX_ITER; i++) {
+ const next = offset.apply(cur);
+ if (next.getTime() > end.getTime()) {
+ break;
+ }
+ if (next.getTime() === cur.getTime()) {
+ break; // non-progressing guard
+ }
+ out.push(next);
+ cur = next;
+ }
+ return out;
+}
+
+/** Forward from start for exactly `periods` dates. */
+function rangeStartPeriods(start: Date, periods: number, offset: DateOffset): Date[] {
+ if (periods <= 0) {
+ return [];
+ }
+ const out: Date[] = [start];
+ let cur = start;
+ while (out.length < periods) {
+ const next = offset.apply(cur);
+ if (next.getTime() === cur.getTime()) {
+ break; // non-progressing guard
+ }
+ out.push(next);
+ cur = next;
+ }
+ return out;
+}
+
+/** Backward from end for exactly `periods` dates, then reverse. */
+function rangeEndPeriods(end: Date, periods: number, offset: DateOffset): Date[] {
+ if (periods <= 0) {
+ return [];
+ }
+ // Create a negated offset: same class, n negated
+ const negOffset = negateOffset(offset);
+ const out: Date[] = [end];
+ let cur = end;
+ while (out.length < periods) {
+ const prev = negOffset.apply(cur);
+ if (prev.getTime() === cur.getTime()) {
+ break; // non-progressing guard
+ }
+ out.push(prev);
+ cur = prev;
+ }
+ out.reverse();
+ return out;
+}
+
+/** Return a new offset that steps in the opposite direction. */
+function negateOffset(offset: DateOffset): DateOffset {
+ const n = offset.n;
+ const name = offset.name;
+ switch (name) {
+ case "Day":
+ return new Day(-n);
+ case "BusinessDay":
+ return new BusinessDay(-n);
+ case "Hour":
+ return new Hour(-n);
+ case "Minute":
+ return new Minute(-n);
+ case "Second":
+ return new Second(-n);
+ case "Milli":
+ return new Milli(-n);
+ case "Week":
+ return new Week(-n);
+ case "MonthBegin":
+ return new MonthBegin(-n);
+ case "MonthEnd":
+ return new MonthEnd(-n);
+ case "YearBegin":
+ return new YearBegin(-n);
+ case "YearEnd":
+ return new YearEnd(-n);
+ default:
+ // For unknown offset types (custom user-provided), negate n heuristically
+ throw new RangeError(
+ `negateOffset: unsupported offset type "${name}". Provide 'start' + 'periods' instead of 'end' + 'periods'.`,
+ );
+ }
+}
diff --git a/src/core/datetime_tz.ts b/src/core/datetime_tz.ts
new file mode 100644
index 00000000..2c2e09c4
--- /dev/null
+++ b/src/core/datetime_tz.ts
@@ -0,0 +1,523 @@
+/**
+ * Timezone-aware DatetimeIndex: tz_localize and tz_convert.
+ *
+ * Mirrors `pandas.DatetimeIndex.tz_localize` and
+ * `pandas.DatetimeIndex.tz_convert`.
+ *
+ * Uses the IANA timezone database via `Intl.DateTimeFormat` — built into
+ * every modern JS engine including Bun. No external dependencies.
+ *
+ * | Function | Description |
+ * |---|---|
+ * | {@link tz_localize} | Naive → tz-aware (interprets wall-clock times) |
+ * | {@link tz_convert} | Tz-aware → tz-aware (same UTC, new display tz) |
+ *
+ * @example
+ * ```ts
+ * import { date_range } from "./date_range.ts";
+ * import { tz_localize, tz_convert } from "./datetime_tz.ts";
+ *
+ * const naive = date_range({ start: "2024-01-01", periods: 3 });
+ * const ny = tz_localize(naive, "America/New_York");
+ * ny.at(0).toISOString(); // "2024-01-01T05:00:00.000Z" (UTC-5 in Jan)
+ *
+ * const utcIdx = tz_convert(ny, "UTC");
+ * utcIdx.toLocalStrings(); // ["2024-01-01T05:00:00.000+00:00", ...]
+ * ```
+ *
+ * @module
+ */
+
+import { DatetimeIndex } from "./date_range.ts";
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/**
+ * Return the UTC offset (ms) for `tz` at the given UTC instant.
+ *
+ * The offset is defined as `localMs - utcMs`, where `localMs` is the UTC
+ * representation of the wall-clock value in `tz` at that instant.
+ *
+ * For example, at 2024-01-01T05:00:00Z, America/New_York is on EST (UTC-5),
+ * so the offset is −18 000 000 ms.
+ */
+function utcOffsetMs(utcMs: number, tz: string): number {
+ const d = new Date(utcMs);
+ const parts = new Intl.DateTimeFormat("en-CA", {
+ timeZone: tz,
+ year: "numeric",
+ month: "2-digit",
+ day: "2-digit",
+ hour: "2-digit",
+ minute: "2-digit",
+ second: "2-digit",
+ hour12: false,
+ }).formatToParts(d);
+
+ let year = 0;
+ let month = 0;
+ let day = 0;
+ let hour = 0;
+ let minute = 0;
+ let second = 0;
+
+ for (const p of parts) {
+ switch (p.type) {
+ case "year":
+ year = Number(p.value);
+ break;
+ case "month":
+ month = Number(p.value);
+ break;
+ case "day":
+ day = Number(p.value);
+ break;
+ case "hour":
+ hour = Number(p.value) % 24; // "24" can appear for midnight in some impls
+ break;
+ case "minute":
+ minute = Number(p.value);
+ break;
+ case "second":
+ second = Number(p.value);
+ break;
+ default:
+ break;
+ }
+ }
+
+ const localMs = Date.UTC(year, month - 1, day, hour, minute, second);
+ return localMs - utcMs;
+}
+
+/**
+ * Convert a wall-clock time (expressed as a UTC millisecond value whose UTC
+ * date/time components equal the desired local time) to the actual UTC
+ * equivalent in `tz`.
+ *
+ * Uses two-step offset refinement to handle DST transitions:
+ *
+ * - Spring-forward non-existent times: shifted forward to after the gap.
+ * - Fall-back ambiguous times: the pre-transition (EDT) occurrence is used.
+ */
+function wallClockToUtc(wallMs: number, tz: string): number {
+ const off1 = utcOffsetMs(wallMs, tz);
+ const est = wallMs - off1;
+ const off2 = utcOffsetMs(est, tz);
+ // If both offsets agree, est is correct. If they differ (DST boundary),
+ // the second offset is the one actually in effect at the target UTC time.
+ return wallMs - off2;
+}
+
+/**
+ * Format a UTC timestamp as a local ISO-8601 string in `tz`.
+ *
+ * Output format: `"YYYY-MM-DDTHH:mm:ss.000±HH:MM"`
+ */
+function formatInTz(utcMs: number, tz: string): string {
+ const d = new Date(utcMs);
+ const parts = new Intl.DateTimeFormat("en-CA", {
+ timeZone: tz,
+ year: "numeric",
+ month: "2-digit",
+ day: "2-digit",
+ hour: "2-digit",
+ minute: "2-digit",
+ second: "2-digit",
+ hour12: false,
+ }).formatToParts(d);
+
+ let year = "";
+ let month = "";
+ let day = "";
+ let hour = "";
+ let minute = "";
+ let second = "";
+
+ for (const p of parts) {
+ switch (p.type) {
+ case "year":
+ year = p.value;
+ break;
+ case "month":
+ month = p.value;
+ break;
+ case "day":
+ day = p.value;
+ break;
+ case "hour":
+ hour = String(Number(p.value) % 24).padStart(2, "0");
+ break;
+ case "minute":
+ minute = p.value;
+ break;
+ case "second":
+ second = p.value;
+ break;
+ default:
+ break;
+ }
+ }
+
+ const offsetMs = utcOffsetMs(utcMs, tz);
+ const sign = offsetMs >= 0 ? "+" : "-";
+ const absMs = Math.abs(offsetMs);
+ const offsetH = String(Math.floor(absMs / 3_600_000)).padStart(2, "0");
+ const offsetM = String(Math.floor((absMs % 3_600_000) / 60_000)).padStart(2, "0");
+
+ return `${year}-${month}-${day}T${hour}:${minute}:${second}.000${sign}${offsetH}:${offsetM}`;
+}
+
+// ─── TZDatetimeIndex ──────────────────────────────────────────────────────────
+
+/**
+ * A timezone-aware DatetimeIndex — the TypeScript equivalent of a tz-aware
+ * `pandas.DatetimeIndex`.
+ *
+ * Internally stores **UTC millisecond** timestamps. The {@link tz} property
+ * records the IANA timezone (e.g. `"America/New_York"`, `"Europe/London"`,
+ * `"UTC"`). Display methods such as {@link toLocalStrings} convert to local
+ * time on-the-fly.
+ *
+ * Typical usage:
+ * ```ts
+ * const naive = date_range({ start: "2024-01-01", periods: 3 });
+ * const ny = tz_localize(naive, "America/New_York");
+ * const london = ny.tz_convert("Europe/London");
+ * london.toLocalStrings();
+ * // ["2024-01-01T05:00:00.000+00:00", "2024-01-02T05:00:00.000+00:00", ...]
+ * ```
+ */
+export class TZDatetimeIndex {
+ private readonly _utcMs: readonly number[];
+
+ /** IANA timezone name (e.g. `"UTC"`, `"America/New_York"`, `"Asia/Kolkata"`). */
+ readonly tz: string;
+
+ /** Optional human-readable label for this axis. */
+ readonly name: string | null;
+
+ /** @internal */
+ constructor(utcMs: readonly number[], tz: string, name: string | null) {
+ this._utcMs = Object.freeze([...utcMs]);
+ this.tz = tz;
+ this.name = name;
+ }
+
+ // ─── properties ──────────────────────────────────────────────────
+
+ /** Number of elements. */
+ get size(): number {
+ return this._utcMs.length;
+ }
+
+ /** Shape tuple `[size]`. */
+ get shape(): [number] {
+ return [this._utcMs.length];
+ }
+
+ /** Number of dimensions (always `1`). */
+ get ndim(): 1 {
+ return 1;
+ }
+
+ /** `true` when the index has zero elements. */
+ get empty(): boolean {
+ return this._utcMs.length === 0;
+ }
+
+ /**
+ * Raw UTC millisecond timestamps (the underlying storage).
+ *
+ * Use {@link at} to retrieve a `Date` at a specific position.
+ */
+ get values(): readonly number[] {
+ return this._utcMs;
+ }
+
+ // ─── element access ───────────────────────────────────────────────
+
+ /**
+ * Return the UTC `Date` at position `i` (0-based).
+ *
+ * @throws `RangeError` if `i` is out of bounds.
+ */
+ at(i: number): Date {
+ const ms = this._utcMs[i];
+ if (ms === undefined) {
+ throw new RangeError(`Index ${i} out of bounds (size=${this.size})`);
+ }
+ return new Date(ms);
+ }
+
+ /** Shallow copy as a plain mutable array of UTC `Date` objects. */
+ toArray(): Date[] {
+ return this._utcMs.map((ms) => new Date(ms));
+ }
+
+ /** Raw UTC millisecond timestamps as a mutable array. */
+ toTimestamps(): number[] {
+ return [...this._utcMs];
+ }
+
+ // ─── formatting ───────────────────────────────────────────────────
+
+ /**
+ * Return each timestamp formatted as a local ISO-8601 string in this
+ * index's timezone.
+ *
+ * Format: `"YYYY-MM-DDTHH:mm:ss.000±HH:MM"`
+ *
+ * @example
+ * ```ts
+ * const idx = tz_localize(date_range({ start: "2024-01-01", periods: 1 }),
+ * "America/New_York");
+ * idx.toLocalStrings(); // ["2024-01-01T00:00:00.000-05:00"]
+ * ```
+ */
+ toLocalStrings(): string[] {
+ return this._utcMs.map((ms) => formatInTz(ms, this.tz));
+ }
+
+ // ─── conversion ───────────────────────────────────────────────────
+
+ /**
+ * Convert this tz-aware index to a different timezone.
+ *
+ * The UTC timestamps are **preserved**; only the display timezone changes.
+ *
+ * Mirrors `pandas.DatetimeIndex.tz_convert(tz)`.
+ *
+ * @param tz - IANA timezone identifier.
+ * @example
+ * ```ts
+ * const ny = tz_localize(date_range({ start: "2024-01-01", periods: 1 }),
+ * "America/New_York");
+ * const london = ny.tz_convert("Europe/London");
+ * london.toLocalStrings(); // ["2024-01-01T05:00:00.000+00:00"]
+ * ```
+ */
+ tz_convert(tz: string): TZDatetimeIndex {
+ return new TZDatetimeIndex(this._utcMs, tz, this.name);
+ }
+
+ /**
+ * Strip the timezone, returning a tz-naive {@link DatetimeIndex} whose
+ * values are the raw UTC timestamps.
+ *
+ * Equivalent to `pandas.DatetimeIndex.tz_localize(None)`.
+ *
+ * @example
+ * ```ts
+ * const ny = tz_localize(date_range({ start: "2024-01-01", periods: 1 }),
+ * "America/New_York");
+ * ny.tz_localize_none().at(0).toISOString(); // "2024-01-01T05:00:00.000Z"
+ * ```
+ */
+ tz_localize_none(): DatetimeIndex {
+ return DatetimeIndex.fromTimestamps(this._utcMs, this.name);
+ }
+
+ // ─── statistics ───────────────────────────────────────────────────
+
+ /**
+ * Earliest timestamp (UTC), or `null` if empty.
+ *
+ * @example
+ * ```ts
+ * const idx = tz_localize(date_range({ start: "2024-01-01", periods: 3 }),
+ * "UTC");
+ * idx.min()?.toISOString(); // "2024-01-01T00:00:00.000Z"
+ * ```
+ */
+ min(): Date | null {
+ if (this._utcMs.length === 0) {
+ return null;
+ }
+ let best = this._utcMs[0] as number;
+ for (const ms of this._utcMs) {
+ if (ms < best) {
+ best = ms;
+ }
+ }
+ return new Date(best);
+ }
+
+ /**
+ * Latest timestamp (UTC), or `null` if empty.
+ *
+ * @example
+ * ```ts
+ * const idx = tz_localize(date_range({ start: "2024-01-01", periods: 3 }),
+ * "UTC");
+ * idx.max()?.toISOString(); // "2024-01-03T00:00:00.000Z"
+ * ```
+ */
+ max(): Date | null {
+ if (this._utcMs.length === 0) {
+ return null;
+ }
+ let best = this._utcMs[0] as number;
+ for (const ms of this._utcMs) {
+ if (ms > best) {
+ best = ms;
+ }
+ }
+ return new Date(best);
+ }
+
+ // ─── transformation ───────────────────────────────────────────────
+
+ /**
+ * Return a sorted copy (by UTC timestamp).
+ *
+ * @param ascending - Sort direction; defaults to `true`.
+ */
+ sort(ascending = true): TZDatetimeIndex {
+ const sorted = [...this._utcMs].sort((a, b) => (ascending ? a - b : b - a));
+ return new TZDatetimeIndex(sorted, this.tz, this.name);
+ }
+
+ /**
+ * Return a new index with duplicate UTC timestamps removed (first
+ * occurrence kept).
+ */
+ unique(): TZDatetimeIndex {
+ const seen = new Set();
+ const out: number[] = [];
+ for (const ms of this._utcMs) {
+ if (!seen.has(ms)) {
+ seen.add(ms);
+ out.push(ms);
+ }
+ }
+ return new TZDatetimeIndex(out, this.tz, this.name);
+ }
+
+ /**
+ * Return a new index containing only elements that satisfy `predicate`.
+ */
+ filter(predicate: (d: Date, i: number) => boolean): TZDatetimeIndex {
+ const out: number[] = [];
+ this._utcMs.forEach((ms, i) => {
+ if (predicate(new Date(ms), i)) {
+ out.push(ms);
+ }
+ });
+ return new TZDatetimeIndex(out, this.tz, this.name);
+ }
+
+ /**
+ * Return a slice `[start, stop)`.
+ *
+ * @param start - Inclusive start index (0-based).
+ * @param stop - Exclusive stop index; defaults to `this.size`.
+ */
+ slice(start: number, stop?: number): TZDatetimeIndex {
+ return new TZDatetimeIndex(this._utcMs.slice(start, stop), this.tz, this.name);
+ }
+
+ /**
+ * Return a new index formed by appending `other` after this index.
+ *
+ * The two indexes **must share the same timezone** — a `RangeError` is
+ * thrown otherwise to prevent silent data corruption.
+ */
+ concat(other: TZDatetimeIndex): TZDatetimeIndex {
+ if (other.tz !== this.tz) {
+ throw new RangeError(
+ `concat: timezone mismatch ("${this.tz}" vs "${other.tz}"). Convert to the same timezone first with tz_convert.`,
+ );
+ }
+ return new TZDatetimeIndex([...this._utcMs, ...other._utcMs], this.tz, this.name);
+ }
+
+ /**
+ * Return `true` if any element has the same UTC millisecond value as
+ * `date`.
+ */
+ contains(date: Date): boolean {
+ const ms = date.getTime();
+ return this._utcMs.some((t) => t === ms);
+ }
+
+ // ─── iteration ───────────────────────────────────────────────────
+
+ [Symbol.iterator](): Iterator {
+ let i = 0;
+ const arr = this._utcMs;
+ return {
+ next(): IteratorResult {
+ if (i >= arr.length) {
+ return { done: true, value: undefined };
+ }
+ const ms = arr[i];
+ i++;
+ if (ms === undefined) {
+ return { done: true, value: undefined };
+ }
+ return { done: false, value: new Date(ms) };
+ },
+ };
+ }
+}
+
+// ─── tz_localize ──────────────────────────────────────────────────────────────
+
+/**
+ * Localize a tz-naive {@link DatetimeIndex} to the given timezone.
+ *
+ * Each timestamp's **UTC date/time components** are treated as wall-clock
+ * times in `tz`; the function converts them to the actual UTC equivalents.
+ *
+ * Mirrors `pandas.DatetimeIndex.tz_localize(tz)`.
+ *
+ * **DST handling:**
+ * - For non-existent times (spring-forward gap): shifted forward to after
+ * the gap.
+ * - For ambiguous times (fall-back overlap): the pre-transition occurrence
+ * is used.
+ *
+ * @param idx - Tz-naive index whose UTC components are the desired wall-clock
+ * times.
+ * @param tz - IANA timezone identifier (e.g. `"America/New_York"`).
+ *
+ * @example
+ * ```ts
+ * const naive = date_range({ start: "2024-01-01", periods: 3 });
+ * const ny = tz_localize(naive, "America/New_York");
+ * ny.tz; // "America/New_York"
+ * ny.size; // 3
+ * ny.at(0).toISOString(); // "2024-01-01T05:00:00.000Z"
+ * ny.toLocalStrings()[0]; // "2024-01-01T00:00:00.000-05:00"
+ * ```
+ */
+export function tz_localize(idx: DatetimeIndex, tz: string): TZDatetimeIndex {
+ const utcMs = idx.values.map((d) => wallClockToUtc(d.getTime(), tz));
+ return new TZDatetimeIndex(utcMs, tz, idx.name);
+}
+
+// ─── tz_convert ───────────────────────────────────────────────────────────────
+
+/**
+ * Convert a {@link TZDatetimeIndex} to a new timezone.
+ *
+ * A free-function alias for {@link TZDatetimeIndex.tz_convert}.
+ *
+ * The UTC timestamps are **preserved**; only the display timezone changes.
+ *
+ * @param idx - Tz-aware index to convert.
+ * @param tz - Target IANA timezone identifier.
+ *
+ * @example
+ * ```ts
+ * const ny = tz_localize(date_range({ start: "2024-01-01", periods: 1 }),
+ * "America/New_York");
+ * const london = tz_convert(ny, "Europe/London");
+ * london.tz; // "Europe/London"
+ * london.toLocalStrings()[0]; // "2024-01-01T05:00:00.000+00:00"
+ * ```
+ */
+export function tz_convert(idx: TZDatetimeIndex, tz: string): TZDatetimeIndex {
+ return idx.tz_convert(tz);
+}
diff --git a/src/core/frame.ts b/src/core/frame.ts
index e457b034..91b28377 100644
--- a/src/core/frame.ts
+++ b/src/core/frame.ts
@@ -302,16 +302,27 @@ export class DataFrame {
* const df2 = df.assign({ c: [7, 8, 9] });
* ```
*/
- assign(newCols: Readonly>>): DataFrame {
- const colMap = new Map>(this._columns);
- for (const [name, val] of Object.entries(newCols)) {
- if (val instanceof Series) {
- colMap.set(name, val);
- } else {
- colMap.set(name, new Series({ data: val, index: this.index }));
- }
+ assign(
+ newCols: Readonly<
+ Record<
+ string,
+ readonly Scalar[] | Series | ((df: DataFrame) => readonly Scalar[] | Series)
+ >
+ >,
+ ): DataFrame {
+ let currentFrame: DataFrame = this;
+ for (const [name, spec] of Object.entries(newCols)) {
+ const resolved: readonly Scalar[] | Series =
+ typeof spec === "function" ? spec(currentFrame) : spec;
+ const series: Series =
+ resolved instanceof Series
+ ? resolved
+ : new Series({ data: resolved, index: currentFrame.index });
+ const colMap = new Map>(currentFrame._columns);
+ colMap.set(name, series);
+ currentFrame = new DataFrame(colMap, currentFrame.index);
}
- return new DataFrame(colMap, this.index);
+ return currentFrame;
}
/** Drop one or more columns by name. Returns a new DataFrame. */
diff --git a/src/core/index.ts b/src/core/index.ts
index 255aade6..e737ec8f 100644
--- a/src/core/index.ts
+++ b/src/core/index.ts
@@ -15,6 +15,43 @@ export { CategoricalAccessor } from "./cat_accessor.ts";
export type { CatSeriesLike } from "./cat_accessor.ts";
export { MultiIndex } from "./multi_index.ts";
export type { MultiIndexOptions } from "./multi_index.ts";
+export { Interval, IntervalIndex } from "./interval.ts";
+export type { IntervalClosed, IntervalIndexOptions } from "./interval.ts";
+export { CategoricalIndex } from "./categorical_index.ts";
+export type { CategoricalIndexOptions } from "./categorical_index.ts";
+export { Period, PeriodIndex } from "./period.ts";
+export type { PeriodFreq, PeriodIndexOptions } from "./period.ts";
+export { Timedelta, TimedeltaIndex } from "./timedelta.ts";
+export type { TimedeltaComponents, TimedeltaIndexOptions } from "./timedelta.ts";
+export {
+ Day,
+ Hour,
+ Minute,
+ Second,
+ Milli,
+ Week,
+ MonthEnd,
+ MonthBegin,
+ YearEnd,
+ YearBegin,
+ BusinessDay,
+} from "./date_offset.ts";
+export type { DateOffset, WeekOptions } from "./date_offset.ts";
+export { DatetimeIndex, date_range, bdate_range, resolveFreq } from "./date_range.ts";
+export type { DateRangeFreq, DateRangeOptions, DatetimeIndexOptions } from "./date_range.ts";
+export { TZDatetimeIndex, tz_localize, tz_convert } from "./datetime_tz.ts";
+export { Timestamp } from "./timestamp.ts";
+export type { TimestampOptions, TimestampComponents, TimestampUnit } from "./timestamp.ts";
+export { dataFrameAssign } from "./assign.ts";
+export type { AssignColSpec, AssignSpec } from "./assign.ts";
+export { natCompare, natSorted, natSortKey, natArgSort } from "./natsort.ts";
+export type { NatSortOptions, NatSortedOptions } from "./natsort.ts";
+export { searchsorted, searchsortedMany, argsortScalars } from "./searchsorted.ts";
+export type { SearchSortedSide, SearchSortedOptions } from "./searchsorted.ts";
+export { reindexSeries, reindexDataFrame } from "./reindex.ts";
+export type { ReindexMethod, ReindexSeriesOptions, ReindexDataFrameOptions } from "./reindex.ts";
+export { alignSeries, alignDataFrame } from "./align.ts";
+export type { AlignSeriesOptions, AlignDataFrameOptions } from "./align.ts";
export {
insertColumn,
popColumn,
diff --git a/src/core/interval.ts b/src/core/interval.ts
new file mode 100644
index 00000000..8f706771
--- /dev/null
+++ b/src/core/interval.ts
@@ -0,0 +1,436 @@
+/**
+ * Interval and IntervalIndex — closed/open interval types with index support.
+ *
+ * Mirrors `pandas.Interval` and `pandas.IntervalIndex`:
+ *
+ * - `Interval` represents a single numeric interval `(left, right)` with
+ * configurable endpoint inclusion (`closed`).
+ * - `IntervalIndex` is an ordered collection of intervals suitable for use as
+ * a row index (e.g. after `pd.cut()` / `pd.qcut()`).
+ *
+ * **Closed modes:**
+ * | `closed` | Left endpoint | Right endpoint |
+ * |-------------|---------------|----------------|
+ * | `"right"` | open | closed |
+ * | `"left"` | closed | open |
+ * | `"both"` | closed | closed |
+ * | `"neither"` | open | open |
+ *
+ * @example
+ * ```ts
+ * const iv = new Interval(0, 1); // (0, 1]
+ * iv.contains(0.5); // true
+ * iv.length; // 1
+ * iv.mid; // 0.5
+ *
+ * const idx = IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ * idx.size; // 3
+ * idx.at(0).toString(); // "(0, 1]"
+ * idx.get_loc(1.5); // 1
+ * ```
+ *
+ * @module
+ */
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/** Which endpoint(s) of the interval are closed (inclusive). */
+export type IntervalClosed = "left" | "right" | "both" | "neither";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `closed` includes the left endpoint. */
+function includesLeft(closed: IntervalClosed): boolean {
+ return closed === "left" || closed === "both";
+}
+
+/** True when `closed` includes the right endpoint. */
+function includesRight(closed: IntervalClosed): boolean {
+ return closed === "right" || closed === "both";
+}
+
+/** Test whether `value` falls inside `[left, right]` with the given closure. */
+function pointInInterval(
+ value: number,
+ left: number,
+ right: number,
+ closed: IntervalClosed,
+): boolean {
+ const okLeft = includesLeft(closed) ? value >= left : value > left;
+ const okRight = includesRight(closed) ? value <= right : value < right;
+ return okLeft && okRight;
+}
+
+/** Return the bracket characters for the given endpoint inclusion. */
+function bracketLeft(closed: IntervalClosed): string {
+ return includesLeft(closed) ? "[" : "(";
+}
+
+/** Return the bracket characters for the given endpoint inclusion. */
+function bracketRight(closed: IntervalClosed): string {
+ return includesRight(closed) ? "]" : ")";
+}
+
+// ─── Interval ─────────────────────────────────────────────────────────────────
+
+/**
+ * A single numeric interval with configurable endpoint closure.
+ *
+ * Mirrors `pandas.Interval`.
+ */
+export class Interval {
+ /** Left (lower) endpoint. */
+ readonly left: number;
+ /** Right (upper) endpoint. */
+ readonly right: number;
+ /**
+ * Which endpoints are closed (inclusive).
+ * Defaults to `"right"` to match pandas convention.
+ */
+ readonly closed: IntervalClosed;
+
+ constructor(left: number, right: number, closed: IntervalClosed = "right") {
+ if (left > right) {
+ throw new RangeError(`Interval left (${left}) must be ≤ right (${right})`);
+ }
+ this.left = left;
+ this.right = right;
+ this.closed = closed;
+ }
+
+ // ─── derived properties ─────────────────────────────────────────
+
+ /** `true` when the left endpoint is included. */
+ get closedLeft(): boolean {
+ return includesLeft(this.closed);
+ }
+
+ /** `true` when the right endpoint is included. */
+ get closedRight(): boolean {
+ return includesRight(this.closed);
+ }
+
+ /** Length of the interval (`right - left`). */
+ get length(): number {
+ return this.right - this.left;
+ }
+
+ /** Midpoint of the interval. */
+ get mid(): number {
+ return (this.left + this.right) / 2;
+ }
+
+ /**
+ * `true` when the interval contains no points.
+ * This occurs only for zero-length intervals with `closed = "neither"`.
+ */
+ get isEmpty(): boolean {
+ return this.length === 0 && this.closed === "neither";
+ }
+
+ // ─── methods ────────────────────────────────────────────────────
+
+ /**
+ * Test whether `value` falls inside this interval.
+ *
+ * @example
+ * ```ts
+ * new Interval(0, 1).contains(1); // true — right-closed
+ * new Interval(0, 1).contains(0); // false — left-open
+ * ```
+ */
+ contains(value: number): boolean {
+ return pointInInterval(value, this.left, this.right, this.closed);
+ }
+
+ /**
+ * `true` when this interval shares any points with `other`.
+ *
+ * Uses the standard interval-overlap criterion: two intervals overlap when
+ * neither is completely to the left of the other.
+ */
+ overlaps(other: Interval): boolean {
+ if (this.right < other.left || other.right < this.left) {
+ return false;
+ }
+ if (this.right === other.left) {
+ return includesRight(this.closed) && includesLeft(other.closed);
+ }
+ if (other.right === this.left) {
+ return includesRight(other.closed) && includesLeft(this.closed);
+ }
+ return true;
+ }
+
+ /**
+ * Standard string representation, e.g. `"(0, 1]"`.
+ */
+ toString(): string {
+ return `${bracketLeft(this.closed)}${this.left}, ${this.right}${bracketRight(this.closed)}`;
+ }
+}
+
+// ─── IntervalIndex ───────────────────────────────────────────────────────────
+
+/** Options for the `IntervalIndex` constructor. */
+export interface IntervalIndexOptions {
+ readonly name?: string | null;
+}
+
+/**
+ * An ordered collection of numeric intervals, usable as a row index.
+ *
+ * Mirrors `pandas.IntervalIndex`. All intervals in an `IntervalIndex` share
+ * the same `closed` mode.
+ *
+ * @example
+ * ```ts
+ * const idx = IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ * idx.size; // 3
+ * idx.get_loc(2.5); // 2 (falls in (2, 3])
+ * idx.left; // [0, 1, 2]
+ * idx.right; // [1, 2, 3]
+ * idx.mid; // [0.5, 1.5, 2.5]
+ * ```
+ */
+export class IntervalIndex {
+ /** Left endpoints (one per interval). */
+ readonly left: readonly number[];
+ /** Right endpoints (one per interval). */
+ readonly right: readonly number[];
+ /** Closure mode shared by all intervals. */
+ readonly closed: IntervalClosed;
+ /** Optional label for this index axis. */
+ readonly name: string | null;
+
+ // ─── construction ───────────────────────────────────────────────
+
+ private constructor(
+ left: readonly number[],
+ right: readonly number[],
+ closed: IntervalClosed,
+ name: string | null,
+ ) {
+ if (left.length !== right.length) {
+ throw new RangeError(
+ `left and right arrays must have the same length (${left.length} vs ${right.length})`,
+ );
+ }
+ this.left = Object.freeze([...left]);
+ this.right = Object.freeze([...right]);
+ this.closed = closed;
+ this.name = name;
+ }
+
+ // ─── factory methods ────────────────────────────────────────────
+
+ /**
+ * Build an `IntervalIndex` from an array of break-points.
+ *
+ * Given `n+1` break-points, produces `n` intervals:
+ * `breaks[i] → breaks[i+1]` for `i = 0 … n-1`.
+ *
+ * @example
+ * ```ts
+ * IntervalIndex.fromBreaks([0, 1, 2, 3]);
+ * // (0,1], (1,2], (2,3]
+ * ```
+ */
+ static fromBreaks(
+ breaks: readonly number[],
+ closed: IntervalClosed = "right",
+ opts: IntervalIndexOptions = {},
+ ): IntervalIndex {
+ if (breaks.length < 2) {
+ return new IntervalIndex([], [], closed, opts.name ?? null);
+ }
+ const left: number[] = [];
+ const right: number[] = [];
+ for (let i = 0; i < breaks.length - 1; i++) {
+ left.push(breaks[i] as number);
+ right.push(breaks[i + 1] as number);
+ }
+ return new IntervalIndex(left, right, closed, opts.name ?? null);
+ }
+
+ /**
+ * Build an `IntervalIndex` from separate left and right arrays.
+ *
+ * @example
+ * ```ts
+ * IntervalIndex.fromArrays([0, 1, 2], [1, 2, 3]);
+ * ```
+ */
+ static fromArrays(
+ left: readonly number[],
+ right: readonly number[],
+ closed: IntervalClosed = "right",
+ opts: IntervalIndexOptions = {},
+ ): IntervalIndex {
+ return new IntervalIndex(left, right, closed, opts.name ?? null);
+ }
+
+ /**
+ * Build an `IntervalIndex` from an array of `Interval` objects.
+ *
+ * All intervals must share the same `closed` mode; if they differ the
+ * first interval's mode is used (matching pandas behaviour).
+ */
+ static fromIntervals(
+ intervals: readonly Interval[],
+ opts: IntervalIndexOptions = {},
+ ): IntervalIndex {
+ if (intervals.length === 0) {
+ return new IntervalIndex([], [], "right", opts.name ?? null);
+ }
+ const closed = intervals[0]?.closed ?? "right";
+ const left = intervals.map((iv) => iv.left);
+ const right = intervals.map((iv) => iv.right);
+ return new IntervalIndex(left, right, closed, opts.name ?? null);
+ }
+
+ // ─── properties ─────────────────────────────────────────────────
+
+ /** Number of intervals. */
+ get size(): number {
+ return this.left.length;
+ }
+
+ /** Alias for `size` (matches pandas `.length` attribute). */
+ get length(): number {
+ return this.size;
+ }
+
+ /** `true` when the index has zero intervals. */
+ get empty(): boolean {
+ return this.size === 0;
+ }
+
+ /** Midpoints of each interval. */
+ get mid(): readonly number[] {
+ return Object.freeze(
+ this.left.map((l, i) => {
+ const r = this.right[i] as number;
+ return (l + r) / 2;
+ }),
+ );
+ }
+
+ /**
+ * `true` when left endpoints are non-decreasing and each right ≥ its left.
+ * Matches pandas `is_monotonic_increasing`.
+ */
+ get isMonotonicIncreasing(): boolean {
+ for (let i = 1; i < this.size; i++) {
+ if ((this.left[i] as number) < (this.left[i - 1] as number)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * `true` when left endpoints are non-increasing.
+ * Matches pandas `is_monotonic_decreasing`.
+ */
+ get isMonotonicDecreasing(): boolean {
+ for (let i = 1; i < this.size; i++) {
+ if ((this.left[i] as number) > (this.left[i - 1] as number)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /** `true` when either `isMonotonicIncreasing` or `isMonotonicDecreasing`. */
+ get isMonotonic(): boolean {
+ return this.isMonotonicIncreasing || this.isMonotonicDecreasing;
+ }
+
+ // ─── element access ─────────────────────────────────────────────
+
+ /**
+ * Return the `Interval` at position `i` (0-indexed).
+ * Negative indices count from the end.
+ */
+ at(i: number): Interval {
+ const idx = i < 0 ? this.size + i : i;
+ if (idx < 0 || idx >= this.size) {
+ throw new RangeError(`Index ${i} is out of bounds for IntervalIndex of size ${this.size}`);
+ }
+ return new Interval(this.left[idx] as number, this.right[idx] as number, this.closed);
+ }
+
+ /** Materialise all intervals as a plain array. */
+ toArray(): Interval[] {
+ return Array.from({ length: this.size }, (_, i) => this.at(i));
+ }
+
+ // ─── containment / location ─────────────────────────────────────
+
+ /**
+ * For each interval, test whether `value` falls inside it.
+ *
+ * Returns a boolean array of the same length as this index.
+ */
+ contains(value: number): boolean[] {
+ return this.left.map((l, i) => pointInInterval(value, l, this.right[i] as number, this.closed));
+ }
+
+ /**
+ * Return the position of the **first** interval that contains `value`.
+ *
+ * Returns `-1` when no interval contains `value` (matches a common
+ * conventions; pandas raises `KeyError` for this).
+ */
+ get_loc(value: number): number {
+ for (let i = 0; i < this.size; i++) {
+ if (pointInInterval(value, this.left[i] as number, this.right[i] as number, this.closed)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * For each interval, test whether it overlaps `other`.
+ *
+ * Returns a boolean array of the same length as this index.
+ */
+ overlaps(other: Interval): boolean[] {
+ return this.toArray().map((iv) => iv.overlaps(other));
+ }
+
+ // ─── set operations ─────────────────────────────────────────────
+
+ /**
+ * Return a new `IntervalIndex` containing only the intervals where
+ * `mask[i]` is `true`.
+ */
+ filter(mask: readonly boolean[]): IntervalIndex {
+ const left: number[] = [];
+ const right: number[] = [];
+ for (let i = 0; i < this.size; i++) {
+ if (mask[i]) {
+ left.push(this.left[i] as number);
+ right.push(this.right[i] as number);
+ }
+ }
+ return new IntervalIndex(left, right, this.closed, this.name);
+ }
+
+ /** Return a copy of this index with a new `name`. */
+ rename(name: string | null): IntervalIndex {
+ return new IntervalIndex(this.left, this.right, this.closed, name);
+ }
+
+ // ─── formatting ─────────────────────────────────────────────────
+
+ /** Human-readable summary, e.g. `"IntervalIndex([(0, 1], (1, 2]], closed='right')"`. */
+ toString(): string {
+ const inner = this.toArray()
+ .map((iv) => iv.toString())
+ .join(", ");
+ return `IntervalIndex([${inner}], closed='${this.closed}')`;
+ }
+}
diff --git a/src/core/natsort.ts b/src/core/natsort.ts
new file mode 100644
index 00000000..d0112d3e
--- /dev/null
+++ b/src/core/natsort.ts
@@ -0,0 +1,251 @@
+/**
+ * natsort — natural-order sorting for strings and string-keyed collections.
+ *
+ * Mirrors the behaviour of the Python `natsort` package used by pandas when
+ * calling `Index.sort_values(key=natsort_keygen())` or `natsorted(...)`.
+ *
+ * The algorithm tokenises each string into alternating *text* and *digit*
+ * chunks and compares them chunk-by-chunk:
+ * - Digit chunks are compared **numerically** (so "file10" > "file9").
+ * - Text chunks are compared **lexicographically** (optionally case-folded).
+ *
+ * @module
+ */
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/** A single token produced by the tokeniser: a string run or a non-negative integer. */
+type Token = string | number;
+
+/** Options shared by all public `nat*` helpers. */
+export interface NatSortOptions {
+ /**
+ * If `true`, fold text tokens to lower-case before comparing.
+ * Digit tokens are always compared numerically regardless of this flag.
+ * @defaultValue `false`
+ */
+ readonly ignoreCase?: boolean;
+
+ /**
+ * If `true`, reverse the comparison direction so that `natSorted` returns
+ * values in descending natural order.
+ * @defaultValue `false`
+ */
+ readonly reverse?: boolean;
+}
+
+/** Options for {@link natSorted} that additionally allow a key function. */
+export interface NatSortedOptions extends NatSortOptions {
+ /**
+ * Optional function that extracts the string to sort by from each element.
+ * When omitted, elements must themselves be strings.
+ */
+ readonly key?: (item: T) => string;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/**
+ * Split `s` into a sequence of alternating text and digit tokens.
+ *
+ * Examples:
+ * - `"file10.txt"` → `["file", 10, ".txt"]`
+ * - `"abc"` → `["abc"]`
+ * - `"007"` → `[7]`
+ */
+function tokenize(s: string): readonly Token[] {
+ const tokens: Token[] = [];
+ // Split on runs of ASCII digits
+ const re = /(\d+)/g;
+ let last = 0;
+ while (true) {
+ const m = re.exec(s);
+ if (m === null) {
+ break;
+ }
+ if (m.index > last) {
+ tokens.push(s.slice(last, m.index));
+ }
+ tokens.push(Number(m[0]));
+ last = m.index + m[0].length;
+ }
+ if (last < s.length) {
+ tokens.push(s.slice(last));
+ }
+ return tokens;
+}
+
+/**
+ * Compare two individual tokens.
+ *
+ * Mixed-type comparison (one text, one digit) always places the digit chunk
+ * *before* the text chunk — matching `natsort`'s default `ns.DEFAULT` order.
+ */
+function cmpTokens(a: Token, b: Token, ignoreCase: boolean): number {
+ if (typeof a === "number" && typeof b === "number") {
+ return a - b;
+ }
+ if (typeof a === "string" && typeof b === "string") {
+ const la = ignoreCase ? a.toLowerCase() : a;
+ const lb = ignoreCase ? b.toLowerCase() : b;
+ return la < lb ? -1 : la > lb ? 1 : 0;
+ }
+ // Mixed: digit tokens sort before string tokens
+ return typeof a === "number" ? -1 : 1;
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Natural-order comparator suitable for use as an argument to `Array.sort`.
+ *
+ * ```ts
+ * const files = ["file10.txt", "file2.txt", "file1.txt"];
+ * files.sort(natCompare);
+ * // → ["file1.txt", "file2.txt", "file10.txt"]
+ * ```
+ *
+ * Mirrors `natsort.natsort_keygen()` used inside pandas.
+ *
+ * @param a - First string to compare.
+ * @param b - Second string to compare.
+ * @param options - Sort options (ignoreCase, reverse).
+ * @returns Negative, zero, or positive number as expected by `Array.sort`.
+ */
+export function natCompare(a: string, b: string, options: NatSortOptions = {}): number {
+ const { ignoreCase = false, reverse = false } = options;
+ const ta = tokenize(a);
+ const tb = tokenize(b);
+ const len = Math.min(ta.length, tb.length);
+ for (let i = 0; i < len; i++) {
+ const taToken = ta[i];
+ const tbToken = tb[i];
+ if (taToken === undefined || tbToken === undefined) {
+ break;
+ }
+ const c = cmpTokens(taToken, tbToken, ignoreCase);
+ if (c !== 0) {
+ return reverse ? -c : c;
+ }
+ }
+ const lenCmp = ta.length - tb.length;
+ if (lenCmp === 0) {
+ return 0;
+ }
+ return reverse ? -lenCmp : lenCmp;
+}
+
+/**
+ * Return a new array sorted in natural order.
+ *
+ * ```ts
+ * natSorted(["b1", "a20", "a3"])
+ * // → ["a3", "a20", "b1"]
+ * ```
+ *
+ * When elements are not strings, pass a `key` function that extracts the
+ * string to sort by:
+ *
+ * ```ts
+ * const rows = [{ name: "file10" }, { name: "file2" }];
+ * natSorted(rows, { key: r => r.name });
+ * // → [{ name: "file2" }, { name: "file10" }]
+ * ```
+ *
+ * Mirrors `natsort.natsorted()`.
+ *
+ * @param arr - Array to sort (not mutated).
+ * @param options - Sort options (ignoreCase, reverse, key).
+ * @returns New array in natural order.
+ */
+export function natSorted(arr: readonly T[], options: NatSortedOptions = {}): T[] {
+ const { key, ...cmpOpts } = options;
+ const copy = [...arr];
+ if (key !== undefined) {
+ copy.sort((a, b) => natCompare(key(a), key(b), cmpOpts));
+ } else {
+ copy.sort((a, b) => {
+ if (typeof a !== "string" || typeof b !== "string") {
+ throw new TypeError(
+ "natSorted: elements must be strings when no `key` function is provided",
+ );
+ }
+ return natCompare(a, b, cmpOpts);
+ });
+ }
+ return copy;
+}
+
+/**
+ * Compute the natural-sort **key** for a string.
+ *
+ * Returns the token array that `natCompare` derives internally. Useful for
+ * caching keys when sorting large arrays (compare once, sort many).
+ *
+ * ```ts
+ * const key = natSortKey("file10.txt");
+ * // → ["file", 10, ".txt"]
+ * ```
+ *
+ * Mirrors `natsort.natsort_key()`.
+ *
+ * @param s - String to produce a key for.
+ * @param options - Sort options (ignoreCase only; `reverse` is not applied to keys).
+ * @returns Immutable token array.
+ */
+export function natSortKey(
+ s: string,
+ options: Pick = {},
+): readonly Token[] {
+ const { ignoreCase = false } = options;
+ const tokens = tokenize(s);
+ if (!ignoreCase) {
+ return tokens;
+ }
+ return tokens.map((t) => (typeof t === "string" ? t.toLowerCase() : t));
+}
+
+/**
+ * Return the integer permutation that would sort `arr` in natural order.
+ *
+ * ```ts
+ * natArgSort(["file10", "file2", "file1"])
+ * // → [2, 1, 0] (index of "file1", "file2", "file10" in original array)
+ * ```
+ *
+ * Mirrors the `argsort` / `natsort` integration in `pandas.Index`.
+ *
+ * @param arr - Array of strings to rank.
+ * @param options - Sort options (ignoreCase, reverse).
+ * @returns Array of original indices in sorted order.
+ */
+export function natArgSort(arr: readonly string[], options: NatSortOptions = {}): number[] {
+ const indices = arr.map((_, i) => i);
+ const { ignoreCase = false, reverse = false } = options;
+ const keys = arr.map((s) => tokenize(ignoreCase ? s.toLowerCase() : s));
+ indices.sort((i, j) => {
+ const ta = keys[i];
+ const tb = keys[j];
+ if (ta === undefined || tb === undefined) {
+ throw new RangeError("natArgSort: index out of bounds");
+ }
+ const len = Math.min(ta.length, tb.length);
+ for (let k = 0; k < len; k++) {
+ const taToken = ta[k];
+ const tbToken = tb[k];
+ if (taToken === undefined || tbToken === undefined) {
+ break;
+ }
+ const c = cmpTokens(taToken, tbToken, false); // already case-folded
+ if (c !== 0) {
+ return reverse ? -c : c;
+ }
+ }
+ const lc = ta.length - tb.length;
+ if (lc === 0) {
+ return 0;
+ }
+ return reverse ? -lc : lc;
+ });
+ return indices;
+}
diff --git a/src/core/period.ts b/src/core/period.ts
new file mode 100644
index 00000000..db3fe97f
--- /dev/null
+++ b/src/core/period.ts
@@ -0,0 +1,728 @@
+/**
+ * Period and PeriodIndex — fixed-frequency time spans.
+ *
+ * Mirrors `pandas.Period` and `pandas.PeriodIndex`.
+ *
+ * A {@link Period} represents a single time span at a fixed frequency.
+ * A {@link PeriodIndex} is an ordered sequence of such spans, suitable for
+ * use as a row / column index.
+ *
+ * **Supported frequencies:**
+ * | Code | Description |
+ * |------|------------------------------------|
+ * | `"A"` | Calendar year (alias `"Y"`) |
+ * | `"Q"` | Calendar quarter |
+ * | `"M"` | Calendar month |
+ * | `"W"` | ISO week (Monday start, Sunday end) |
+ * | `"D"` | Day |
+ * | `"H"` | Hour |
+ * | `"T"` | Minute (alias `"min"`) |
+ * | `"S"` | Second |
+ *
+ * @example
+ * ```ts
+ * const p = Period.fromDate(new Date("2024-03-15T00:00:00Z"), "M");
+ * p.toString(); // "2024-03"
+ * p.add(2).toString(); // "2024-05"
+ *
+ * const idx = PeriodIndex.fromRange(
+ * Period.fromDate(new Date("2024-01-01T00:00:00Z"), "Q"),
+ * Period.fromDate(new Date("2024-12-31T00:00:00Z"), "Q"),
+ * );
+ * idx.size; // 4
+ * idx.at(0).toString(); // "2024Q1"
+ * ```
+ *
+ * @module
+ */
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Supported period frequencies.
+ *
+ * `"Y"` is accepted as an alias for `"A"` (annual).
+ * `"min"` is accepted as an alias for `"T"` (minute).
+ */
+export type PeriodFreq = "A" | "Q" | "M" | "W" | "D" | "H" | "T" | "S";
+
+/** Options accepted by {@link PeriodIndex} factory methods. */
+export interface PeriodIndexOptions {
+ /** Optional name label for the index. */
+ readonly name?: string | null;
+}
+
+// ─── internal constants ───────────────────────────────────────────────────────
+
+const MS_S = 1_000;
+const MS_MIN = 60_000;
+const MS_HOUR = 3_600_000;
+const MS_DAY = 86_400_000;
+
+/**
+ * 1970-01-01 is a Thursday (Monday = 0, Thursday = 3).
+ * Week ordinal 0 starts on Monday 1969-12-29.
+ * Offset = 3 days bridges 1969-12-29 → 1970-01-01.
+ */
+const WEEK_OFFSET = 3;
+
+/** Canonical ordered list of supported frequencies for validation. */
+const VALID_FREQS = ["A", "Q", "M", "W", "D", "H", "T", "S"] as const;
+
+// ─── top-level regex constants ────────────────────────────────────────────────
+
+const RE_QUARTER = /^(\d{4})Q([1-4])$/i;
+const RE_YEAR_MONTH = /^(\d{4})-(\d{2})$/;
+const RE_YEAR = /^\d{4}$/;
+
+// ─── frequency helpers ────────────────────────────────────────────────────────
+
+/** Normalise a user-supplied frequency string to a canonical {@link PeriodFreq}. */
+function normFreq(freq: string): PeriodFreq {
+ const upper = freq.toUpperCase();
+ if (upper === "MIN") {
+ return "T";
+ }
+ if (upper === "Y") {
+ return "A";
+ }
+ for (const v of VALID_FREQS) {
+ if (v === upper) {
+ return v;
+ }
+ }
+ throw new Error(`Unsupported PeriodFreq: "${freq}". Valid: A (Y), Q, M, W, D, H, T (min), S`);
+}
+
+// ─── ordinal ↔ Date conversion ────────────────────────────────────────────────
+
+/**
+ * Compute the integer ordinal for a given Date and frequency.
+ * Ordinal 0 corresponds to the period containing 1970-01-01T00:00:00Z.
+ */
+function dateToOrdinal(date: Date, freq: PeriodFreq): number {
+ const ms = date.getTime();
+ const y = date.getUTCFullYear();
+ const mo = date.getUTCMonth(); // 0-based
+ const dayOrd = Math.floor(ms / MS_DAY);
+ switch (freq) {
+ case "S":
+ return Math.floor(ms / MS_S);
+ case "T":
+ return Math.floor(ms / MS_MIN);
+ case "H":
+ return Math.floor(ms / MS_HOUR);
+ case "D":
+ return dayOrd;
+ case "W":
+ return Math.floor((dayOrd + WEEK_OFFSET) / 7);
+ case "M":
+ return (y - 1970) * 12 + mo;
+ case "Q":
+ return (y - 1970) * 4 + Math.floor(mo / 3);
+ case "A":
+ return y - 1970;
+ default:
+ throw new Error(`Unreachable: unknown freq "${freq}"`);
+ }
+}
+
+/** Start-of-period timestamp in ms since Unix epoch for the given ordinal. */
+function ordinalToStartMs(ordinal: number, freq: PeriodFreq): number {
+ switch (freq) {
+ case "S":
+ return ordinal * MS_S;
+ case "T":
+ return ordinal * MS_MIN;
+ case "H":
+ return ordinal * MS_HOUR;
+ case "D":
+ return ordinal * MS_DAY;
+ case "W":
+ return (ordinal * 7 - WEEK_OFFSET) * MS_DAY;
+ case "M": {
+ const y = 1970 + Math.floor(ordinal / 12);
+ const mo = ((ordinal % 12) + 12) % 12;
+ return Date.UTC(y, mo, 1);
+ }
+ case "Q": {
+ const y = 1970 + Math.floor(ordinal / 4);
+ const q = ((ordinal % 4) + 4) % 4;
+ return Date.UTC(y, q * 3, 1);
+ }
+ case "A":
+ return Date.UTC(1970 + ordinal, 0, 1);
+ default:
+ throw new Error(`Unreachable: unknown freq "${freq}"`);
+ }
+}
+
+/** End-of-period timestamp (last ms) for the given ordinal. */
+function ordinalToEndMs(ordinal: number, freq: PeriodFreq): number {
+ return ordinalToStartMs(ordinal + 1, freq) - 1;
+}
+
+// ─── formatting ───────────────────────────────────────────────────────────────
+
+/** Zero-pad a number to exactly 2 decimal digits. */
+function pad2(n: number): string {
+ return String(n).padStart(2, "0");
+}
+
+/** Format a Period ordinal as a human-readable string for the given frequency. */
+function formatPeriod(ordinal: number, freq: PeriodFreq): string {
+ const start = new Date(ordinalToStartMs(ordinal, freq));
+ const y = start.getUTCFullYear();
+ const mo = pad2(start.getUTCMonth() + 1);
+ const d = pad2(start.getUTCDate());
+ const h = pad2(start.getUTCHours());
+ const mi = pad2(start.getUTCMinutes());
+ const s = pad2(start.getUTCSeconds());
+ switch (freq) {
+ case "A":
+ return `${y}`;
+ case "Q": {
+ const q = Math.floor(start.getUTCMonth() / 3) + 1;
+ return `${y}Q${q}`;
+ }
+ case "M":
+ return `${y}-${mo}`;
+ case "W": {
+ const end = new Date(ordinalToEndMs(ordinal, freq));
+ const ey = end.getUTCFullYear();
+ const emo = pad2(end.getUTCMonth() + 1);
+ const ed = pad2(end.getUTCDate());
+ return `${y}-${mo}-${d}/${ey}-${emo}-${ed}`;
+ }
+ case "D":
+ return `${y}-${mo}-${d}`;
+ case "H":
+ return `${y}-${mo}-${d} ${h}:00`;
+ case "T":
+ return `${y}-${mo}-${d} ${h}:${mi}`;
+ case "S":
+ return `${y}-${mo}-${d} ${h}:${mi}:${s}`;
+ default:
+ throw new Error(`Unreachable: unknown freq "${freq}"`);
+ }
+}
+
+// ─── parsing ──────────────────────────────────────────────────────────────────
+
+/** Attempt to parse "2024Q1" → Date(2024-01-01). Returns null on mismatch. */
+function tryParseQuarter(s: string): Date | null {
+ const m = RE_QUARTER.exec(s);
+ if (m === null) {
+ return null;
+ }
+ const year = Number(m[1] ?? "1970");
+ const q = Number(m[2] ?? "1");
+ return new Date(Date.UTC(year, (q - 1) * 3, 1));
+}
+
+/** Attempt to parse "2024-03" → Date(2024-03-01). Returns null on mismatch. */
+function tryParseYearMonth(s: string): Date | null {
+ const m = RE_YEAR_MONTH.exec(s);
+ if (m === null) {
+ return null;
+ }
+ const year = Number(m[1] ?? "1970");
+ const month = Number(m[2] ?? "1") - 1;
+ return new Date(Date.UTC(year, month, 1));
+}
+
+/** Attempt to parse "2024" → Date(2024-01-01). Returns null on mismatch. */
+function tryParseYear(s: string): Date | null {
+ const m = RE_YEAR.exec(s);
+ if (m === null) {
+ return null;
+ }
+ return new Date(Date.UTC(Number(s), 0, 1));
+}
+
+/** Attempt to parse "YYYY-MM-DD/YYYY-MM-DD" → start Date. Returns null on mismatch. */
+function tryParseWeekRange(s: string): Date | null {
+ const slash = s.indexOf("/");
+ if (slash < 0) {
+ return null;
+ }
+ return new Date(`${s.slice(0, slash)}T00:00:00Z`);
+}
+
+/** Normalise a plain ISO date string to ensure UTC parsing. */
+function normIso(s: string): string {
+ if (s.includes("T") || s.endsWith("Z")) {
+ return s;
+ }
+ const withT = s.replace(" ", "T");
+ return withT.length <= 10 ? `${withT}T00:00:00Z` : `${withT}Z`;
+}
+
+/** Parse a period string into a Date for the given frequency. */
+function parsePeriodString(s: string, freq: PeriodFreq): Date {
+ if (freq === "A") {
+ const d = tryParseYear(s);
+ if (d !== null) {
+ return d;
+ }
+ }
+ if (freq === "Q") {
+ const d = tryParseQuarter(s);
+ if (d !== null) {
+ return d;
+ }
+ }
+ if (freq === "M") {
+ const d = tryParseYearMonth(s);
+ if (d !== null) {
+ return d;
+ }
+ }
+ if (freq === "W") {
+ const d = tryParseWeekRange(s);
+ if (d !== null) {
+ return d;
+ }
+ }
+ const d = new Date(normIso(s));
+ if (!Number.isFinite(d.getTime())) {
+ throw new Error(`Cannot parse "${s}" as Period with freq "${freq}"`);
+ }
+ return d;
+}
+
+// ─── Period ───────────────────────────────────────────────────────────────────
+
+/**
+ * A single time span at a fixed frequency.
+ *
+ * Mirrors `pandas.Period`. Internally stores an integer `ordinal` (number
+ * of periods elapsed since the Unix epoch for that frequency) together with
+ * the frequency code.
+ *
+ * Periods are **immutable**: all mutation methods return a new `Period`.
+ */
+export class Period {
+ /** Integer ordinal — number of complete periods since the Unix epoch. */
+ readonly ordinal: number;
+
+ /** Canonical frequency code. */
+ readonly freq: PeriodFreq;
+
+ /**
+ * Construct a Period directly from its ordinal and frequency.
+ *
+ * @param ordinal - Integer period count since the Unix epoch.
+ * @param freq - Frequency code (case-insensitive; `"Y"` and `"min"` accepted).
+ */
+ constructor(ordinal: number, freq: PeriodFreq | string) {
+ if (!Number.isInteger(ordinal)) {
+ throw new Error(`Period ordinal must be an integer, got ${ordinal}`);
+ }
+ this.ordinal = ordinal;
+ this.freq = normFreq(freq as string);
+ }
+
+ /**
+ * Create a Period from a `Date` object.
+ *
+ * The period that *contains* `date` at the given frequency is returned.
+ */
+ static fromDate(date: Date, freq: PeriodFreq | string): Period {
+ const f = normFreq(freq as string);
+ return new Period(dateToOrdinal(date, f), f);
+ }
+
+ /**
+ * Create a Period from a string representation.
+ *
+ * Accepted formats depend on the frequency:
+ * - `"A"`: `"2024"`
+ * - `"Q"`: `"2024Q1"`
+ * - `"M"`: `"2024-03"`
+ * - `"W"`: `"2024-01-01/2024-01-07"` or any date string in the week
+ * - `"D"`: `"2024-01-15"`
+ * - `"H"`: `"2024-01-15T12:00:00Z"`
+ * - `"T"`: `"2024-01-15T12:30:00Z"`
+ * - `"S"`: `"2024-01-15T12:30:45Z"`
+ */
+ static fromString(s: string, freq: PeriodFreq | string): Period {
+ const f = normFreq(freq as string);
+ return new Period(dateToOrdinal(parsePeriodString(s, f), f), f);
+ }
+
+ /** Date marking the start of this period (UTC, inclusive). */
+ get startTime(): Date {
+ return new Date(ordinalToStartMs(this.ordinal, this.freq));
+ }
+
+ /** Date marking the end of this period (UTC, inclusive, last millisecond). */
+ get endTime(): Date {
+ return new Date(ordinalToEndMs(this.ordinal, this.freq));
+ }
+
+ /** Duration of this period in milliseconds. */
+ get durationMs(): number {
+ return ordinalToEndMs(this.ordinal, this.freq) - ordinalToStartMs(this.ordinal, this.freq) + 1;
+ }
+
+ /**
+ * Return true if the given `Date` falls within this period (inclusive of
+ * both endpoints).
+ */
+ contains(date: Date): boolean {
+ const ms = date.getTime();
+ return (
+ ms >= ordinalToStartMs(this.ordinal, this.freq) &&
+ ms <= ordinalToEndMs(this.ordinal, this.freq)
+ );
+ }
+
+ /**
+ * Return a new Period shifted `n` periods forward (`n > 0`) or backward
+ * (`n < 0`) at the same frequency.
+ */
+ add(n: number): Period {
+ return new Period(this.ordinal + n, this.freq);
+ }
+
+ /**
+ * Return the number of periods between `this` and `other` (`this - other`).
+ *
+ * Both periods must share the same frequency.
+ */
+ diff(other: Period): number {
+ if (other.freq !== this.freq) {
+ throw new Error(
+ `Cannot diff periods with different frequencies: "${this.freq}" vs "${other.freq}"`,
+ );
+ }
+ return this.ordinal - other.ordinal;
+ }
+
+ /**
+ * Compare two periods. Returns negative / zero / positive just like
+ * `Array.prototype.sort` comparators.
+ *
+ * Both periods must share the same frequency.
+ */
+ compareTo(other: Period): number {
+ if (other.freq !== this.freq) {
+ throw new Error(
+ `Cannot compare periods with different frequencies: "${this.freq}" vs "${other.freq}"`,
+ );
+ }
+ return this.ordinal - other.ordinal;
+ }
+
+ /** Structural equality — same ordinal and same frequency. */
+ equals(other: Period): boolean {
+ return this.ordinal === other.ordinal && this.freq === other.freq;
+ }
+
+ /**
+ * Convert this period to a different frequency.
+ *
+ * The `how` parameter controls which point within this period is used:
+ * - `"start"` (default) — use the start of the current period
+ * - `"end"` — use the end of the current period
+ */
+ asfreq(freq: PeriodFreq | string, how: "start" | "end" = "start"): Period {
+ const f = normFreq(freq as string);
+ const ms =
+ how === "end"
+ ? ordinalToEndMs(this.ordinal, this.freq)
+ : ordinalToStartMs(this.ordinal, this.freq);
+ return new Period(dateToOrdinal(new Date(ms), f), f);
+ }
+
+ /** Human-readable string representation (matches pandas output). */
+ toString(): string {
+ return formatPeriod(this.ordinal, this.freq);
+ }
+
+ /** JSON serialisation delegates to {@link toString}. */
+ toJSON(): string {
+ return this.toString();
+ }
+}
+
+// ─── PeriodIndex ──────────────────────────────────────────────────────────────
+
+/**
+ * An ordered sequence of {@link Period} values at a uniform frequency.
+ *
+ * Mirrors `pandas.PeriodIndex`. Internally stores integer ordinals for
+ * efficiency; {@link Period} objects are created on demand by {@link at}.
+ *
+ * @example
+ * ```ts
+ * const idx = PeriodIndex.periodRange(
+ * Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M"),
+ * 6,
+ * );
+ * idx.size; // 6
+ * idx.at(0).toString(); // "2024-01"
+ * idx.at(5).toString(); // "2024-06"
+ * idx.getLoc(Period.fromString("2024-03", "M")); // 2
+ * ```
+ */
+export class PeriodIndex {
+ private readonly _ordinals: readonly number[];
+
+ /** Frequency shared by all periods in this index. */
+ readonly freq: PeriodFreq;
+
+ /** Optional name label. */
+ readonly name: string | null;
+
+ private constructor(ordinals: readonly number[], freq: PeriodFreq, name?: string | null) {
+ this._ordinals = ordinals;
+ this.freq = freq;
+ this.name = name ?? null;
+ }
+
+ // ─── factory methods ──────────────────────────────────────────────────────
+
+ /**
+ * Build a PeriodIndex from an array of {@link Period} objects.
+ *
+ * All periods must share the same frequency; the frequency of the first
+ * element is used (an error is thrown on mismatch).
+ */
+ static fromPeriods(periods: readonly Period[], options?: PeriodIndexOptions): PeriodIndex {
+ if (periods.length === 0) {
+ throw new Error("Cannot construct PeriodIndex from an empty array");
+ }
+ const first = periods[0];
+ if (first === undefined) {
+ throw new Error("Cannot construct PeriodIndex from an empty array");
+ }
+ const freq = first.freq;
+ const ordinals: number[] = [];
+ for (const p of periods) {
+ if (p.freq !== freq) {
+ throw new Error(
+ `PeriodIndex.fromPeriods: all periods must share the same frequency. Expected "${freq}", got "${p.freq}"`,
+ );
+ }
+ ordinals.push(p.ordinal);
+ }
+ return new PeriodIndex(ordinals, freq, options?.name);
+ }
+
+ /**
+ * Build a PeriodIndex covering every period from `start` to `end` inclusive.
+ *
+ * `start` and `end` must share the same frequency.
+ */
+ static fromRange(start: Period, end: Period, options?: PeriodIndexOptions): PeriodIndex {
+ if (start.freq !== end.freq) {
+ throw new Error(
+ `PeriodIndex.fromRange: start and end must share the same frequency ("${start.freq}" vs "${end.freq}")`,
+ );
+ }
+ if (start.ordinal > end.ordinal) {
+ throw new Error(`PeriodIndex.fromRange: start (${start}) must be ≤ end (${end})`);
+ }
+ const ordinals: number[] = [];
+ for (let i = start.ordinal; i <= end.ordinal; i++) {
+ ordinals.push(i);
+ }
+ return new PeriodIndex(ordinals, start.freq, options?.name);
+ }
+
+ /**
+ * Generate a PeriodIndex by stepping `periods` periods forward from `start`.
+ *
+ * `periods` must be a positive integer.
+ */
+ static periodRange(start: Period, periods: number, options?: PeriodIndexOptions): PeriodIndex {
+ if (!Number.isInteger(periods) || periods <= 0) {
+ throw new Error(
+ `PeriodIndex.periodRange: "periods" must be a positive integer, got ${periods}`,
+ );
+ }
+ const ordinals: number[] = [];
+ for (let i = 0; i < periods; i++) {
+ ordinals.push(start.ordinal + i);
+ }
+ return new PeriodIndex(ordinals, start.freq, options?.name);
+ }
+
+ // ─── properties ───────────────────────────────────────────────────────────
+
+ /** Number of periods in the index. */
+ get size(): number {
+ return this._ordinals.length;
+ }
+
+ /** Shape tuple `[size]`. */
+ get shape(): [number] {
+ return [this._ordinals.length];
+ }
+
+ /** Always 1 — a PeriodIndex is one-dimensional. */
+ get ndim(): 1 {
+ return 1;
+ }
+
+ /** True when the index contains no periods. */
+ get empty(): boolean {
+ return this._ordinals.length === 0;
+ }
+
+ // ─── element access ───────────────────────────────────────────────────────
+
+ /**
+ * Return the {@link Period} at position `i` (0-based).
+ *
+ * Negative indices are supported (Python-style).
+ */
+ at(i: number): Period {
+ const len = this._ordinals.length;
+ const idx = i < 0 ? len + i : i;
+ if (idx < 0 || idx >= len) {
+ throw new RangeError(`Index ${i} out of bounds for PeriodIndex of size ${len}`);
+ }
+ const ordinal = this._ordinals[idx];
+ if (ordinal === undefined) {
+ throw new RangeError(`Index ${i} out of bounds for PeriodIndex of size ${len}`);
+ }
+ return new Period(ordinal, this.freq);
+ }
+
+ /**
+ * Return the 0-based position of the first occurrence of `period`.
+ *
+ * Throws if the period is not found or has a different frequency.
+ */
+ getLoc(period: Period): number {
+ if (period.freq !== this.freq) {
+ throw new Error(
+ `getLoc: period frequency "${period.freq}" does not match index frequency "${this.freq}"`,
+ );
+ }
+ const idx = this._ordinals.indexOf(period.ordinal);
+ if (idx < 0) {
+ throw new Error(`Period ${period} not found in index`);
+ }
+ return idx;
+ }
+
+ /**
+ * Return true if `period` appears in this index.
+ */
+ contains(period: Period): boolean {
+ if (period.freq !== this.freq) {
+ return false;
+ }
+ return this._ordinals.includes(period.ordinal);
+ }
+
+ /**
+ * Return an array of all {@link Period} objects in this index.
+ */
+ toArray(): Period[] {
+ return this._ordinals.map((ord) => new Period(ord, this.freq));
+ }
+
+ // ─── transformation ───────────────────────────────────────────────────────
+
+ /**
+ * Return a new PeriodIndex shifted `n` periods forward (`n > 0`) or
+ * backward (`n < 0`).
+ */
+ shift(n: number): PeriodIndex {
+ return new PeriodIndex(
+ this._ordinals.map((ord) => ord + n),
+ this.freq,
+ this.name,
+ );
+ }
+
+ /**
+ * Convert all periods to a different frequency.
+ *
+ * The `how` parameter controls which point within each period is used:
+ * - `"start"` (default) — start of each current period
+ * - `"end"` — end of each current period
+ */
+ asfreq(freq: PeriodFreq | string, how: "start" | "end" = "start"): PeriodIndex {
+ const newFreq = normFreq(freq as string);
+ const newOrdinals = this._ordinals.map((ord) => {
+ const ms = how === "end" ? ordinalToEndMs(ord, this.freq) : ordinalToStartMs(ord, this.freq);
+ return dateToOrdinal(new Date(ms), newFreq);
+ });
+ return new PeriodIndex(newOrdinals, newFreq, this.name);
+ }
+
+ /**
+ * Return a new PeriodIndex sorted in ascending order of ordinal.
+ */
+ sort(): PeriodIndex {
+ const sorted = [...this._ordinals].sort((a, b) => a - b);
+ return new PeriodIndex(sorted, this.freq, this.name);
+ }
+
+ /**
+ * Return a copy of this index with duplicates removed (first occurrence wins).
+ */
+ unique(): PeriodIndex {
+ const seen = new Set();
+ const unique: number[] = [];
+ for (const ord of this._ordinals) {
+ if (!seen.has(ord)) {
+ seen.add(ord);
+ unique.push(ord);
+ }
+ }
+ return new PeriodIndex(unique, this.freq, this.name);
+ }
+
+ /**
+ * Return the start {@link Date} for every period in the index.
+ */
+ toDatetimeStart(): Date[] {
+ return this._ordinals.map((ord) => new Date(ordinalToStartMs(ord, this.freq)));
+ }
+
+ /**
+ * Return the end {@link Date} (last ms) for every period in the index.
+ */
+ toDatetimeEnd(): Date[] {
+ return this._ordinals.map((ord) => new Date(ordinalToEndMs(ord, this.freq)));
+ }
+
+ // ─── iteration / serialisation ────────────────────────────────────────────
+
+ /** Iterate over all periods in order. */
+ [Symbol.iterator](): Iterator {
+ let i = 0;
+ const ordinals = this._ordinals;
+ const freq = this.freq;
+ return {
+ next(): IteratorResult {
+ if (i >= ordinals.length) {
+ return { done: true, value: undefined };
+ }
+ const ordinal = ordinals[i];
+ i++;
+ if (ordinal === undefined) {
+ return { done: true, value: undefined };
+ }
+ return { done: false, value: new Period(ordinal, freq) };
+ },
+ };
+ }
+
+ /** Human-readable summary string. */
+ toString(): string {
+ const preview = this._ordinals
+ .slice(0, 4)
+ .map((ord) => formatPeriod(ord, this.freq))
+ .join(", ");
+ const suffix = this._ordinals.length > 4 ? ", ..." : "";
+ return `PeriodIndex([${preview}${suffix}], freq="${this.freq}", length=${this._ordinals.length})`;
+ }
+}
diff --git a/src/core/reindex.ts b/src/core/reindex.ts
new file mode 100644
index 00000000..c89e1c8d
--- /dev/null
+++ b/src/core/reindex.ts
@@ -0,0 +1,352 @@
+/**
+ * reindex — align a Series or DataFrame to a new axis (index or columns).
+ *
+ * Mirrors `pandas.Series.reindex` / `pandas.DataFrame.reindex`:
+ *
+ * - {@link reindexSeries} — realign a Series to `newIndex`, inserting `fillValue`
+ * (or filling via a fill method) for labels absent in the original.
+ * - {@link reindexDataFrame} — realign a DataFrame's rows (`index`), columns
+ * (`columns`), or both, with optional fill semantics.
+ *
+ * ### Supported fill methods
+ *
+ * | Method | Alias | Description |
+ * |--------|-------|-------------|
+ * | `"ffill"` | `"pad"` | Propagate last valid value forward |
+ * | `"bfill"` | `"backfill"` | Propagate next valid value backward |
+ * | `"nearest"` | — | Use the closest valid value; prefer forward on tie |
+ *
+ * The `limit` option caps the number of consecutive NaN slots filled when
+ * using `"ffill"` or `"bfill"`.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [10, 20, 30], index: new Index({ data: ["a", "b", "c"] }) });
+ * reindexSeries(s, ["b", "c", "d"]);
+ * // Series [20, 30, null] with index ["b", "c", "d"]
+ *
+ * reindexSeries(s, ["a", "x", "c"], { method: "ffill" });
+ * // Series [10, 10, 30] — "x" filled forward from "a"
+ * ```
+ *
+ * @module
+ */
+
+import type { FillMethod, Label, Scalar } from "../types.ts";
+import { Index } from "./base-index.ts";
+import { DataFrame } from "./frame.ts";
+import { Series } from "./series.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Fill method including "nearest" (not in the base FillMethod union). */
+export type ReindexMethod = FillMethod | "nearest";
+
+/** Options for {@link reindexSeries}. */
+export interface ReindexSeriesOptions {
+ /** Scalar to insert for labels not found in the original index (default: `null`). */
+ fillValue?: Scalar;
+ /**
+ * Fill method for consecutive missing entries created by reindexing.
+ * - `"ffill"` / `"pad"` — propagate last valid value forward.
+ * - `"bfill"` / `"backfill"` — propagate next valid value backward.
+ * - `"nearest"` — use the closest valid value.
+ */
+ method?: ReindexMethod;
+ /**
+ * Maximum number of consecutive NaN values to fill when using `"ffill"`
+ * or `"bfill"`. Has no effect for `"nearest"`.
+ */
+ limit?: number;
+}
+
+/** Options for {@link reindexDataFrame}. */
+export interface ReindexDataFrameOptions extends ReindexSeriesOptions {
+ /**
+ * New row-index labels. When provided, every row is realigned to this
+ * index (same semantics as `Series.reindex`).
+ */
+ index?: readonly Label[] | Index;
+ /**
+ * New column labels. When provided, the DataFrame's columns are
+ * reordered / extended. Fill methods apply per-column when rows are
+ * also reindexed; for columns-only reindexing the fill value is used.
+ */
+ columns?: readonly Label[] | Index;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Normalise a label array or Index into an `Index`. */
+function toIndex(src: readonly Label[] | Index): Index {
+ if (src instanceof Index) {
+ return src as Index;
+ }
+ return new Index(src as Label[]);
+}
+
+/** Build a label → [positions] lookup for an index. */
+function buildLabelMap(idx: Index): Map {
+ const map = new Map();
+ const labels = idx.toArray();
+ for (let i = 0; i < labels.length; i++) {
+ const key = String(labels[i]);
+ const existing = map.get(key);
+ if (existing !== undefined) {
+ existing.push(i);
+ } else {
+ map.set(key, [i]);
+ }
+ }
+ return map;
+}
+
+/** Forward-fill: propagate last valid value to the right. */
+function applyFfill(
+ values: Scalar[],
+ present: readonly boolean[],
+ limit: number | undefined,
+): Scalar[] {
+ const out = values.slice();
+ let lastVal: Scalar = null;
+ let streak = 0;
+ for (let i = 0; i < out.length; i++) {
+ if (present[i]) {
+ lastVal = out[i];
+ streak = 0;
+ } else if (!isMissing(lastVal) && (limit === undefined || streak < limit)) {
+ out[i] = lastVal;
+ streak++;
+ } else if (!present[i]) {
+ streak++;
+ }
+ }
+ return out;
+}
+
+/** Backward-fill: propagate next valid value to the left. */
+function applyBfill(
+ values: Scalar[],
+ present: readonly boolean[],
+ limit: number | undefined,
+): Scalar[] {
+ const out = values.slice();
+ let nextVal: Scalar = null;
+ let streak = 0;
+ for (let i = out.length - 1; i >= 0; i--) {
+ if (present[i]) {
+ nextVal = out[i];
+ streak = 0;
+ } else if (!isMissing(nextVal) && (limit === undefined || streak < limit)) {
+ out[i] = nextVal;
+ streak++;
+ } else if (!present[i]) {
+ streak++;
+ }
+ }
+ return out;
+}
+
+/**
+ * Nearest-fill: for each missing slot, use the closest valid value.
+ * On a tie (equidistant left and right), prefer the right (forward) value —
+ * matching pandas' `method="nearest"` behaviour.
+ */
+function applyNearest(values: Scalar[], present: readonly boolean[]): Scalar[] {
+ const n = values.length;
+ const out = values.slice();
+
+ // left[i] = { dist, val } of the nearest valid position to the left (or null)
+ const leftDist: number[] = new Array(n).fill(-1);
+ const leftVal: Scalar[] = new Array(n).fill(null);
+ let lastIdx = -1;
+ for (let i = 0; i < n; i++) {
+ if (present[i]) {
+ lastIdx = i;
+ }
+ if (lastIdx >= 0) {
+ leftDist[i] = i - lastIdx;
+ leftVal[i] = values[lastIdx];
+ }
+ }
+
+ // right[i] = { dist, val } of the nearest valid position to the right (or null)
+ const rightDist: number[] = new Array(n).fill(-1);
+ const rightVal: Scalar[] = new Array(n).fill(null);
+ let nextIdx = -1;
+ for (let i = n - 1; i >= 0; i--) {
+ if (present[i]) {
+ nextIdx = i;
+ }
+ if (nextIdx >= 0) {
+ rightDist[i] = nextIdx - i;
+ rightVal[i] = values[nextIdx];
+ }
+ }
+
+ for (let i = 0; i < n; i++) {
+ if (present[i]) {
+ continue;
+ }
+ const ld = leftDist[i];
+ const rd = rightDist[i];
+ if (ld === -1 && rd === -1) {
+ out[i] = null;
+ } else if (ld === -1) {
+ out[i] = rightVal[i];
+ } else if (rd === -1) {
+ out[i] = leftVal[i];
+ } else if (rd !== undefined && ld !== undefined && rd <= ld) {
+ // prefer right on tie
+ out[i] = rightVal[i];
+ } else {
+ out[i] = leftVal[i];
+ }
+ }
+
+ return out;
+}
+
+/** Apply the chosen fill method to a (values, present) pair. */
+function applyFillMethod(
+ values: Scalar[],
+ present: readonly boolean[],
+ method: ReindexMethod,
+ limit: number | undefined,
+): Scalar[] {
+ if (method === "ffill" || method === "pad") {
+ return applyFfill(values, present, limit);
+ }
+ if (method === "bfill" || method === "backfill") {
+ return applyBfill(values, present, limit);
+ }
+ // nearest
+ return applyNearest(values, present);
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Realign a Series to a new index.
+ *
+ * Labels present in `newIndex` but absent in `series.index` become `fillValue`
+ * (default `null`). Labels absent in `newIndex` are dropped.
+ *
+ * When `method` is supplied the fill is applied after the initial alignment,
+ * so only entries that were *newly missing* (not in the original index) are
+ * candidates for filling — exactly matching pandas semantics.
+ *
+ * @example
+ * ```ts
+ * const s = new Series({ data: [1, 2, 3], index: new Index({ data: [0, 1, 2] }) });
+ * reindexSeries(s, [1, 3, 5], { fillValue: 0 });
+ * // Series [2, 0, 0]
+ *
+ * reindexSeries(s, [0, 1, 2, 3, 4], { method: "ffill" });
+ * // Series [1, 2, 3, 3, 3]
+ * ```
+ */
+export function reindexSeries(
+ series: Series,
+ newIndex: readonly Label[] | Index,
+ options: ReindexSeriesOptions = {},
+): Series {
+ const { fillValue = null, method, limit } = options;
+
+ const newIdx = toIndex(newIndex);
+ const newLabels = newIdx.toArray();
+ const n = newLabels.length;
+
+ const labelMap = buildLabelMap(series.index);
+
+ const resultValues: Scalar[] = new Array(n).fill(fillValue);
+ const present: boolean[] = new Array(n).fill(false);
+
+ for (let i = 0; i < n; i++) {
+ const key = String(newLabels[i]);
+ const positions = labelMap.get(key);
+ if (positions !== undefined && positions.length > 0) {
+ const pos = positions[0];
+ if (pos !== undefined) {
+ resultValues[i] = series.values[pos] ?? null;
+ present[i] = true;
+ }
+ }
+ }
+
+ const finalValues =
+ method !== undefined ? applyFillMethod(resultValues, present, method, limit) : resultValues;
+
+ return new Series({
+ data: finalValues as T[],
+ index: newIdx,
+ name: series.name,
+ });
+}
+
+/**
+ * Realign a DataFrame's rows (`index`), columns, or both.
+ *
+ * Supply at least one of `index` or `columns` in the options.
+ * Row reindexing reindexes each column's Series independently; column
+ * reindexing reorders / adds columns (new columns filled with `fillValue`).
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * reindexDataFrame(df, {
+ * index: [0, 1, 2],
+ * columns: ["a", "b", "c"],
+ * fillValue: 0,
+ * });
+ * // shape [3, 3]; row 2 → [0, 0, 0]; column "c" → [0, 0, 0]
+ * ```
+ */
+export function reindexDataFrame(df: DataFrame, options: ReindexDataFrameOptions = {}): DataFrame {
+ const { index: newRowIndex, columns: newColumns, ...seriesOpts } = options;
+
+ // Step 1 — optionally reindex rows
+ let working = df;
+ if (newRowIndex !== undefined) {
+ const newIdx = toIndex(newRowIndex);
+ const colNames = df.columns.toArray().map(String);
+ const colMap = new Map>();
+ for (const name of colNames) {
+ colMap.set(name, reindexSeries(df.col(name), newIdx, seriesOpts));
+ }
+ working = new DataFrame(colMap, newIdx);
+ }
+
+ // Step 2 — optionally reindex columns
+ if (newColumns !== undefined) {
+ const newColIdx = toIndex(newColumns);
+ const newColLabels = newColIdx.toArray().map(String);
+ const existingCols = new Set(working.columns.toArray().map(String));
+ const fillVal = (seriesOpts.fillValue ?? null) as Scalar;
+ const colMap = new Map>();
+ const rowCount = working.shape[0];
+ const rowIdx = working.index;
+
+ for (const name of newColLabels) {
+ if (existingCols.has(name)) {
+ colMap.set(name, working.col(name));
+ } else {
+ colMap.set(
+ name,
+ new Series({
+ data: new Array(rowCount).fill(fillVal),
+ index: rowIdx,
+ name,
+ }),
+ );
+ }
+ }
+ return new DataFrame(colMap, rowIdx);
+ }
+
+ return working;
+}
diff --git a/src/core/searchsorted.ts b/src/core/searchsorted.ts
new file mode 100644
index 00000000..e00333db
--- /dev/null
+++ b/src/core/searchsorted.ts
@@ -0,0 +1,267 @@
+/**
+ * searchsorted — binary search on sorted arrays.
+ *
+ * Mirrors `numpy.searchsorted` and `pandas.Index.searchsorted`:
+ * given a **sorted** array `a`, return the index at which a value `v`
+ * should be inserted to keep `a` sorted.
+ *
+ * - `side = "left"` (default) — insertion point before any equal elements
+ * (`a[i-1] < v <= a[i]`)
+ * - `side = "right"` — insertion point after any equal elements
+ * (`a[i-1] <= v < a[i]`)
+ *
+ * @module
+ */
+
+import type { Scalar } from "../types.ts";
+
+// ─── types ────────────────────────────────────────────────────────────────────
+
+/** Which side of equal elements to return. */
+export type SearchSortedSide = "left" | "right";
+
+/** Options for {@link searchsorted} and {@link searchsortedMany}. */
+export interface SearchSortedOptions {
+ /**
+ * Whether to return the insertion point before (`"left"`) or after
+ * (`"right"`) existing equal elements.
+ * @defaultValue `"left"`
+ */
+ readonly side?: SearchSortedSide;
+
+ /**
+ * An integer permutation that sorts `a` into ascending order.
+ * When provided, `a[sorter[i]]` is assumed to be sorted ascending.
+ * Mirrors `numpy.searchsorted`'s `sorter` parameter.
+ */
+ readonly sorter?: readonly number[];
+
+ /**
+ * Custom comparator returning a negative number when `a < b`, zero when
+ * `a === b`, and a positive number when `a > b`.
+ *
+ * If omitted, a default comparator is used that handles `number`, `string`,
+ * `boolean`, `bigint`, `Date`, `null`, and `undefined`:
+ * - `null` and `undefined` are treated as **less than** all other values.
+ * - Mixed-type comparisons fall back to `String()`.
+ */
+ readonly compareFn?: (a: Scalar, b: Scalar) => number;
+}
+
+// ─── internal helpers ─────────────────────────────────────────────────────────
+
+/** Returns true when `v` is null or undefined (i.e. a missing value). */
+function isMissing(v: Scalar): v is null | undefined {
+ return v === null || v === undefined;
+}
+
+/**
+ * Default scalar comparator.
+ *
+ * Ordering contract:
+ * 1. null/undefined < every non-missing value
+ * 2. Numbers, strings, booleans, bigints, and Dates compare naturally.
+ * 3. Everything else falls back to String() comparison.
+ */
+function defaultCompare(a: Scalar, b: Scalar): number {
+ const aMiss = isMissing(a);
+ const bMiss = isMissing(b);
+ if (aMiss && bMiss) {
+ return 0;
+ }
+ if (aMiss) {
+ return -1;
+ }
+ if (bMiss) {
+ return 1;
+ }
+
+ // Same type fast-paths
+ if (typeof a === "number" && typeof b === "number") {
+ // NaN sorts last (treat NaN as greater than everything)
+ const aNaN = Number.isNaN(a);
+ const bNaN = Number.isNaN(b);
+ if (aNaN && bNaN) {
+ return 0;
+ }
+ if (aNaN) {
+ return 1;
+ }
+ if (bNaN) {
+ return -1;
+ }
+ return a - b;
+ }
+
+ if (typeof a === "bigint" && typeof b === "bigint") {
+ return a < b ? -1 : a > b ? 1 : 0;
+ }
+
+ if (a instanceof Date && b instanceof Date) {
+ return a.getTime() - b.getTime();
+ }
+
+ if (typeof a === "string" && typeof b === "string") {
+ return a < b ? -1 : a > b ? 1 : 0;
+ }
+
+ if (typeof a === "boolean" && typeof b === "boolean") {
+ return Number(a) - Number(b);
+ }
+
+ // Cross-type fallback
+ const sa = String(a);
+ const sb = String(b);
+ return sa < sb ? -1 : sa > sb ? 1 : 0;
+}
+
+/**
+ * Core binary search returning the insertion index for `v` into the already-
+ * sorted sequence `get(0)…get(n-1)`.
+ *
+ * @param n - Length of the sorted sequence.
+ * @param get - Element accessor by position.
+ * @param v - Value to locate.
+ * @param side - "left" or "right".
+ * @param cmp - Comparator.
+ */
+function bisect(
+ n: number,
+ get: (i: number) => Scalar,
+ v: Scalar,
+ side: SearchSortedSide,
+ cmp: (a: Scalar, b: Scalar) => number,
+): number {
+ let lo = 0;
+ let hi = n;
+ if (side === "left") {
+ while (lo < hi) {
+ const mid = (lo + hi) >>> 1;
+ if (cmp(get(mid), v) < 0) {
+ lo = mid + 1;
+ } else {
+ hi = mid;
+ }
+ }
+ } else {
+ while (lo < hi) {
+ const mid = (lo + hi) >>> 1;
+ if (cmp(get(mid), v) <= 0) {
+ lo = mid + 1;
+ } else {
+ hi = mid;
+ }
+ }
+ }
+ return lo;
+}
+
+function valueAt(values: readonly Scalar[], index: number): Scalar {
+ const value = values[index];
+ if (value === undefined) {
+ throw new RangeError("searchsorted: index out of bounds");
+ }
+ return value;
+}
+
+function valueAtSorted(
+ values: readonly Scalar[],
+ sorter: readonly number[],
+ index: number,
+): Scalar {
+ const sortedIndex = sorter[index];
+ if (sortedIndex === undefined) {
+ throw new RangeError("searchsorted: sorter index out of bounds");
+ }
+ return valueAt(values, sortedIndex);
+}
+
+// ─── public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Find the insertion index for a **single** value in a sorted array.
+ *
+ * ```ts
+ * searchsorted([1, 3, 5, 7], 5) // → 2 (before the 5)
+ * searchsorted([1, 3, 5, 7], 5, { side: "right" }) // → 3 (after the 5)
+ * searchsorted([1, 3, 5, 7], 4) // → 2 (where 4 would go)
+ * searchsorted([1, 3, 5, 7], 0) // → 0
+ * searchsorted([1, 3, 5, 7], 99) // → 4
+ * ```
+ *
+ * Mirrors `numpy.searchsorted(a, v)` and `pandas.Index.searchsorted(v)`.
+ *
+ * @param a - Sorted array to search (ascending order assumed).
+ * @param v - Value to locate.
+ * @param options - `side`, `sorter`, and optional `compareFn`.
+ * @returns Insertion index in `[0, a.length]`.
+ */
+export function searchsorted(
+ a: readonly Scalar[],
+ v: Scalar,
+ options: SearchSortedOptions = {},
+): number {
+ const { side = "left", sorter, compareFn = defaultCompare } = options;
+ const n = a.length;
+ if (sorter !== undefined) {
+ return bisect(n, (i) => valueAtSorted(a, sorter, i), v, side, compareFn);
+ }
+ return bisect(n, (i) => valueAt(a, i), v, side, compareFn);
+}
+
+/**
+ * Find insertion indices for **multiple** values in a sorted array.
+ *
+ * ```ts
+ * searchsortedMany([1, 3, 5, 7], [2, 5, 8])
+ * // → [1, 2, 4]
+ *
+ * searchsortedMany([1, 3, 5, 7], [2, 5, 8], { side: "right" })
+ * // → [1, 3, 4]
+ * ```
+ *
+ * Mirrors `numpy.searchsorted(a, [v1, v2, ...])`.
+ *
+ * @param a - Sorted array to search (ascending order assumed).
+ * @param vs - Values to locate.
+ * @param options - `side`, `sorter`, and optional `compareFn`.
+ * @returns Array of insertion indices, one per element of `vs`.
+ */
+export function searchsortedMany(
+ a: readonly Scalar[],
+ vs: readonly Scalar[],
+ options: SearchSortedOptions = {},
+): number[] {
+ const { side = "left", sorter, compareFn = defaultCompare } = options;
+ const n = a.length;
+ const get: (i: number) => Scalar =
+ sorter !== undefined ? (i) => valueAtSorted(a, sorter, i) : (i) => valueAt(a, i);
+ return vs.map((v) => bisect(n, get, v, side, compareFn));
+}
+
+/**
+ * Return the `sorter` array (argsort) that would sort `a` in ascending order.
+ *
+ * Useful for building the `sorter` parameter when `a` is not pre-sorted:
+ *
+ * ```ts
+ * const a = [5, 1, 3];
+ * const sorter = argsortScalars(a);
+ * // sorter → [1, 2, 0] (indices of 1, 3, 5 in a)
+ * searchsorted(a, 2, { sorter }) // → 1 (between 1 and 3)
+ * ```
+ *
+ * Mirrors the `sorter` workflow in `numpy.searchsorted`.
+ *
+ * @param a - Array to compute the sort permutation for.
+ * @param compareFn - Optional custom comparator (default handles all Scalar types).
+ * @returns Integer permutation in `[0, a.length)` that sorts `a` ascending.
+ */
+export function argsortScalars(
+ a: readonly Scalar[],
+ compareFn: (x: Scalar, y: Scalar) => number = defaultCompare,
+): number[] {
+ const indices = a.map((_, i) => i);
+ indices.sort((i, j) => compareFn(valueAt(a, i), valueAt(a, j)));
+ return indices;
+}
diff --git a/src/core/timedelta.ts b/src/core/timedelta.ts
new file mode 100644
index 00000000..4eb9c788
--- /dev/null
+++ b/src/core/timedelta.ts
@@ -0,0 +1,660 @@
+/**
+ * Timedelta and TimedeltaIndex — fixed-duration time spans.
+ *
+ * Mirrors `pandas.Timedelta` and `pandas.TimedeltaIndex`.
+ *
+ * A {@link Timedelta} represents a duration (difference between two instants),
+ * stored internally as a whole number of milliseconds. It mirrors the most
+ * commonly used subset of `pandas.Timedelta`:
+ *
+ * - Construction from component fields (`days`, `hours`, `minutes`, …)
+ * - Construction from a total millisecond count
+ * - Parsing an ISO-8601-like string (`"P1DT2H3M4.5S"` / `"1 days 02:03:04.500"`)
+ * - Arithmetic: add, subtract, multiply (by scalar), negate, abs
+ * - Comparison and equality
+ * - Component accessors (`days`, `hours`, `minutes`, `seconds`, `milliseconds`)
+ * - Total-unit conversions (`totalDays`, `totalHours`, `totalMinutes`, `totalSeconds`)
+ * - Human-readable `toString()`
+ *
+ * A {@link TimedeltaIndex} is an ordered sequence of {@link Timedelta} values
+ * suitable for use as a row or column index.
+ *
+ * @example
+ * ```ts
+ * const td = Timedelta.fromComponents({ days: 1, hours: 2, minutes: 30 });
+ * td.toString(); // "1 days 02:30:00"
+ * td.totalHours; // 26.5
+ *
+ * const idx = TimedeltaIndex.fromRange(
+ * Timedelta.fromComponents({ hours: 0 }),
+ * Timedelta.fromComponents({ hours: 4 }),
+ * Timedelta.fromComponents({ hours: 1 }),
+ * );
+ * idx.size; // 5
+ * idx.at(2).totalHours; // 2
+ * ```
+ *
+ * @module
+ */
+
+// ─── types ───────────────────────────────────────────────────────────────────
+
+/** Component fields accepted by {@link Timedelta.fromComponents}. */
+export interface TimedeltaComponents {
+ readonly weeks?: number;
+ readonly days?: number;
+ readonly hours?: number;
+ readonly minutes?: number;
+ readonly seconds?: number;
+ readonly milliseconds?: number;
+}
+
+/** Options accepted by {@link TimedeltaIndex} factory methods. */
+export interface TimedeltaIndexOptions {
+ /** Optional name label for the index. */
+ readonly name?: string | null;
+}
+
+// ─── internal constants ───────────────────────────────────────────────────────
+
+const MS_PER_SECOND = 1_000;
+const MS_PER_MINUTE = 60_000;
+const MS_PER_HOUR = 3_600_000;
+const MS_PER_DAY = 86_400_000;
+const MS_PER_WEEK = 7 * MS_PER_DAY;
+
+// ─── top-level regex constants ────────────────────────────────────────────────
+
+/** ISO 8601 duration: P[nD][T[nH][nM][nS]] (only the subset we support) */
+const RE_ISO =
+ /^-?P(?:(\d+(?:\.\d+)?)W)?(?:(\d+(?:\.\d+)?)D)?(?:T(?:(\d+(?:\.\d+)?)H)?(?:(\d+(?:\.\d+)?)M)?(?:(\d+(?:\.\d+)?)S)?)?$/i;
+
+/** pandas-style: "N days HH:MM:SS[.mmm]" */
+const RE_PANDAS = /^(-)?(\d+) days? (\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?$/i;
+
+/** Simple "HH:MM:SS[.mmm]" with optional sign */
+const RE_HHMMSS = /^(-)?(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?$/;
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Floor-divide `a` by `b`, returning a non-negative remainder. */
+function floorDiv(a: number, b: number): [quotient: number, remainder: number] {
+ const q = Math.floor(a / b);
+ return [q, a - q * b];
+}
+
+/** Parse a fractional-seconds or sub-second string (up to 3 ms digits). */
+function parseFrac(frac: string | undefined): number {
+ if (frac === undefined || frac === "") {
+ return 0;
+ }
+ // Pad / truncate to exactly 3 digits → milliseconds
+ return Number(frac.slice(0, 3).padEnd(3, "0"));
+}
+
+/** Zero-pad a number to at least 2 digits. */
+function pad2(n: number): string {
+ return String(Math.abs(n)).padStart(2, "0");
+}
+
+// ─── Timedelta ────────────────────────────────────────────────────────────────
+
+/**
+ * A fixed-duration time span, stored as a whole number of milliseconds.
+ *
+ * Construct via {@link fromComponents}, {@link fromMilliseconds}, or
+ * {@link parse}.
+ */
+export class Timedelta {
+ /** Total duration in milliseconds (may be negative). */
+ readonly totalMilliseconds: number;
+
+ private constructor(ms: number) {
+ if (!Number.isFinite(ms)) {
+ throw new RangeError(`Timedelta: milliseconds must be finite, got ${ms}`);
+ }
+ this.totalMilliseconds = Math.trunc(ms);
+ }
+
+ // ── factories ────────────────────────────────────────────────────────────
+
+ /**
+ * Create a Timedelta from individual component fields.
+ *
+ * All fields default to `0`. Values may be fractional or negative.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromComponents({ days: 1, hours: 12 }).totalHours; // 36
+ * ```
+ */
+ static fromComponents(c: TimedeltaComponents): Timedelta {
+ const ms =
+ (c.weeks ?? 0) * MS_PER_WEEK +
+ (c.days ?? 0) * MS_PER_DAY +
+ (c.hours ?? 0) * MS_PER_HOUR +
+ (c.minutes ?? 0) * MS_PER_MINUTE +
+ (c.seconds ?? 0) * MS_PER_SECOND +
+ (c.milliseconds ?? 0);
+ return new Timedelta(ms);
+ }
+
+ /**
+ * Create a Timedelta from a total millisecond count.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromMilliseconds(3_600_000).totalHours; // 1
+ * ```
+ */
+ static fromMilliseconds(ms: number): Timedelta {
+ return new Timedelta(ms);
+ }
+
+ /**
+ * Parse a string representation into a Timedelta.
+ *
+ * Supported formats:
+ * - ISO 8601 subset: `"P1DT2H3M4S"`, `"PT1.5H"`, `"-P1D"`
+ * - pandas-style: `"1 days 02:03:04"`, `"2 days 06:30:00.500"`
+ * - HH:MM:SS[.mmm]: `"01:30:00"`, `"-02:15:00.250"`
+ *
+ * @throws {SyntaxError} When the string cannot be parsed.
+ */
+ static parse(s: string): Timedelta {
+ const trimmed = s.trim();
+
+ // ISO 8601
+ const iso = RE_ISO.exec(trimmed);
+ if (iso !== null) {
+ const sign = trimmed.startsWith("-") ? -1 : 1;
+ const [, wStr, dStr, hStr, mStr, sStr] = iso;
+ const ms =
+ Number(wStr ?? 0) * MS_PER_WEEK +
+ Number(dStr ?? 0) * MS_PER_DAY +
+ Number(hStr ?? 0) * MS_PER_HOUR +
+ Number(mStr ?? 0) * MS_PER_MINUTE +
+ Number(sStr ?? 0) * MS_PER_SECOND;
+ return new Timedelta(sign * ms);
+ }
+
+ // pandas-style "N days HH:MM:SS[.mmm]"
+ const pandas = RE_PANDAS.exec(trimmed);
+ if (pandas !== null) {
+ const [, signStr, daysStr, hStr, mStr, sStr, fracStr] = pandas;
+ const sign = signStr === "-" ? -1 : 1;
+ const ms =
+ Number(daysStr) * MS_PER_DAY +
+ Number(hStr) * MS_PER_HOUR +
+ Number(mStr) * MS_PER_MINUTE +
+ Number(sStr) * MS_PER_SECOND +
+ parseFrac(fracStr);
+ return new Timedelta(sign * ms);
+ }
+
+ // HH:MM:SS[.mmm]
+ const hms = RE_HHMMSS.exec(trimmed);
+ if (hms !== null) {
+ const [, signStr, hStr, mStr, sStr, fracStr] = hms;
+ const sign = signStr === "-" ? -1 : 1;
+ const ms =
+ Number(hStr) * MS_PER_HOUR +
+ Number(mStr) * MS_PER_MINUTE +
+ Number(sStr) * MS_PER_SECOND +
+ parseFrac(fracStr);
+ return new Timedelta(sign * ms);
+ }
+
+ throw new SyntaxError(`Timedelta.parse: cannot parse "${s}"`);
+ }
+
+ // ── component accessors ──────────────────────────────────────────────────
+
+ /**
+ * Whole days component (floor towards zero).
+ *
+ * For negative durations the sign is preserved (e.g. -1 for −23 h).
+ */
+ get days(): number {
+ return Math.trunc(this.totalMilliseconds / MS_PER_DAY);
+ }
+
+ /** Hours component (0–23), always non-negative within the day. */
+ get hours(): number {
+ return Math.floor(Math.abs(this.totalMilliseconds % MS_PER_DAY) / MS_PER_HOUR);
+ }
+
+ /** Minutes component (0–59). */
+ get minutes(): number {
+ return Math.floor((Math.abs(this.totalMilliseconds) % MS_PER_HOUR) / MS_PER_MINUTE);
+ }
+
+ /** Seconds component (0–59). */
+ get seconds(): number {
+ return Math.floor((Math.abs(this.totalMilliseconds) % MS_PER_MINUTE) / MS_PER_SECOND);
+ }
+
+ /** Milliseconds component (0–999). */
+ get milliseconds(): number {
+ return Math.abs(this.totalMilliseconds) % MS_PER_SECOND;
+ }
+
+ // ── total-unit conversions ────────────────────────────────────────────────
+
+ /** Duration expressed in whole + fractional days. */
+ get totalDays(): number {
+ return this.totalMilliseconds / MS_PER_DAY;
+ }
+
+ /** Duration expressed in whole + fractional hours. */
+ get totalHours(): number {
+ return this.totalMilliseconds / MS_PER_HOUR;
+ }
+
+ /** Duration expressed in whole + fractional minutes. */
+ get totalMinutes(): number {
+ return this.totalMilliseconds / MS_PER_MINUTE;
+ }
+
+ /** Duration expressed in whole + fractional seconds. */
+ get totalSeconds(): number {
+ return this.totalMilliseconds / MS_PER_SECOND;
+ }
+
+ // ── arithmetic ────────────────────────────────────────────────────────────
+
+ /**
+ * Return `this + other`.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromComponents({ hours: 1 })
+ * .add(Timedelta.fromComponents({ minutes: 30 }))
+ * .totalMinutes; // 90
+ * ```
+ */
+ add(other: Timedelta): Timedelta {
+ return new Timedelta(this.totalMilliseconds + other.totalMilliseconds);
+ }
+
+ /**
+ * Return `this - other`.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromComponents({ hours: 2 })
+ * .sub(Timedelta.fromComponents({ hours: 1 }))
+ * .totalHours; // 1
+ * ```
+ */
+ sub(other: Timedelta): Timedelta {
+ return new Timedelta(this.totalMilliseconds - other.totalMilliseconds);
+ }
+
+ /**
+ * Return `this * scalar`.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromComponents({ hours: 1 }).mul(3).totalHours; // 3
+ * ```
+ */
+ mul(scalar: number): Timedelta {
+ return new Timedelta(this.totalMilliseconds * scalar);
+ }
+
+ /**
+ * Return the negation of this duration.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromComponents({ hours: 1 }).negate().totalHours; // -1
+ * ```
+ */
+ negate(): Timedelta {
+ return new Timedelta(-this.totalMilliseconds);
+ }
+
+ /**
+ * Return the absolute value of this duration.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromComponents({ hours: -3 }).abs().totalHours; // 3
+ * ```
+ */
+ abs(): Timedelta {
+ return new Timedelta(Math.abs(this.totalMilliseconds));
+ }
+
+ /**
+ * Divide by another Timedelta, returning the ratio as a plain number.
+ *
+ * @throws {RangeError} When `other` is zero.
+ */
+ divBy(other: Timedelta): number {
+ if (other.totalMilliseconds === 0) {
+ throw new RangeError("Timedelta.divBy: cannot divide by zero duration");
+ }
+ return this.totalMilliseconds / other.totalMilliseconds;
+ }
+
+ // ── comparison ────────────────────────────────────────────────────────────
+
+ /**
+ * Compare two Timedeltas.
+ *
+ * Returns `< 0` if `this < other`, `0` if equal, `> 0` if `this > other`.
+ */
+ compareTo(other: Timedelta): number {
+ return this.totalMilliseconds - other.totalMilliseconds;
+ }
+
+ /** Return `true` if `this` represents the same duration as `other`. */
+ equals(other: Timedelta): boolean {
+ return this.totalMilliseconds === other.totalMilliseconds;
+ }
+
+ // ── string representation ─────────────────────────────────────────────────
+
+ /**
+ * Return a pandas-compatible string: `"N days HH:MM:SS[.mmm]"`.
+ *
+ * For negative durations the sign is shown as a leading `-`.
+ * The millisecond part is omitted when it is zero.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromComponents({ days: 1, hours: 2, minutes: 3, seconds: 4 })
+ * .toString(); // "1 days 02:03:04"
+ * Timedelta.fromComponents({ hours: -25 })
+ * .toString(); // "-1 days 01:00:00"
+ * ```
+ */
+ toString(): string {
+ const totalMs = this.totalMilliseconds;
+ const sign = totalMs < 0 ? "-" : "";
+ const absMs = Math.abs(totalMs);
+
+ const [daysQ, remAfterDays] = floorDiv(absMs, MS_PER_DAY);
+ const [hoursQ, remAfterHours] = floorDiv(remAfterDays, MS_PER_HOUR);
+ const [minutesQ, remAfterMinutes] = floorDiv(remAfterHours, MS_PER_MINUTE);
+ const [secondsQ, msQ] = floorDiv(remAfterMinutes, MS_PER_SECOND);
+
+ const time = `${pad2(hoursQ)}:${pad2(minutesQ)}:${pad2(secondsQ)}`;
+ const fracPart = msQ === 0 ? "" : `.${String(msQ).padStart(3, "0")}`;
+ return `${sign}${daysQ} days ${time}${fracPart}`;
+ }
+
+ /**
+ * Return an ISO 8601 duration string.
+ *
+ * @example
+ * ```ts
+ * Timedelta.fromComponents({ days: 1, hours: 2 }).toISOString(); // "P1DT2H"
+ * Timedelta.fromComponents({ hours: -1 }).toISOString(); // "-PT1H"
+ * ```
+ */
+ toISOString(): string {
+ const absMs = Math.abs(this.totalMilliseconds);
+ const sign = this.totalMilliseconds < 0 ? "-" : "";
+
+ const [daysQ, remAfterDays] = floorDiv(absMs, MS_PER_DAY);
+ const [hoursQ, remAfterHours] = floorDiv(remAfterDays, MS_PER_HOUR);
+ const [minutesQ, remAfterMinutes] = floorDiv(remAfterHours, MS_PER_MINUTE);
+ const [secondsQ, msQ] = floorDiv(remAfterMinutes, MS_PER_SECOND);
+
+ let timePart = "";
+ if (hoursQ !== 0) {
+ timePart += `${hoursQ}H`;
+ }
+ if (minutesQ !== 0) {
+ timePart += `${minutesQ}M`;
+ }
+ if (secondsQ !== 0 || msQ !== 0) {
+ const fracSec = msQ === 0 ? `${secondsQ}S` : `${secondsQ}.${String(msQ).padStart(3, "0")}S`;
+ timePart += fracSec;
+ }
+
+ const datePart = daysQ !== 0 ? `${daysQ}D` : "";
+ const tSection = timePart !== "" ? `T${timePart}` : "";
+
+ if (datePart === "" && tSection === "") {
+ return "PT0S";
+ }
+ return `${sign}P${datePart}${tSection}`;
+ }
+}
+
+// ─── TimedeltaIndex ───────────────────────────────────────────────────────────
+
+/**
+ * An ordered array of {@link Timedelta} values for use as a row / column index.
+ *
+ * @example
+ * ```ts
+ * const idx = TimedeltaIndex.fromTimedeltas([
+ * Timedelta.fromComponents({ hours: 0 }),
+ * Timedelta.fromComponents({ hours: 1 }),
+ * Timedelta.fromComponents({ hours: 2 }),
+ * ]);
+ * idx.size; // 3
+ * idx.at(1).totalHours; // 1
+ * ```
+ */
+export class TimedeltaIndex {
+ private readonly _data: readonly Timedelta[];
+
+ /** Optional label for this index. */
+ readonly name: string | null;
+
+ private constructor(data: readonly Timedelta[], name: string | null) {
+ this._data = data;
+ this.name = name;
+ }
+
+ // ── factories ────────────────────────────────────────────────────────────
+
+ /**
+ * Create a TimedeltaIndex from an array of {@link Timedelta} values.
+ *
+ * @example
+ * ```ts
+ * const idx = TimedeltaIndex.fromTimedeltas([
+ * Timedelta.fromComponents({ hours: 0 }),
+ * Timedelta.fromComponents({ hours: 1 }),
+ * ]);
+ * ```
+ */
+ static fromTimedeltas(
+ deltas: readonly Timedelta[],
+ options?: TimedeltaIndexOptions,
+ ): TimedeltaIndex {
+ return new TimedeltaIndex([...deltas], options?.name ?? null);
+ }
+
+ /**
+ * Create a range of evenly-spaced Timedeltas (inclusive of both endpoints).
+ *
+ * @param start - First value.
+ * @param stop - Last value (inclusive when `step` divides evenly).
+ * @param step - Interval between values.
+ * @throws {RangeError} When `step` is zero or would produce an infinite sequence.
+ *
+ * @example
+ * ```ts
+ * const idx = TimedeltaIndex.fromRange(
+ * Timedelta.fromComponents({ hours: 0 }),
+ * Timedelta.fromComponents({ hours: 4 }),
+ * Timedelta.fromComponents({ hours: 1 }),
+ * );
+ * idx.size; // 5
+ * ```
+ */
+ static fromRange(
+ start: Timedelta,
+ stop: Timedelta,
+ step: Timedelta,
+ options?: TimedeltaIndexOptions,
+ ): TimedeltaIndex {
+ if (step.totalMilliseconds === 0) {
+ throw new RangeError("TimedeltaIndex.fromRange: step must be non-zero");
+ }
+ const deltas: Timedelta[] = [];
+ let current = start.totalMilliseconds;
+ const stopMs = stop.totalMilliseconds;
+ const stepMs = step.totalMilliseconds;
+ const forward = stepMs > 0;
+ while (forward ? current <= stopMs : current >= stopMs) {
+ deltas.push(Timedelta.fromMilliseconds(current));
+ current += stepMs;
+ }
+ return new TimedeltaIndex(deltas, options?.name ?? null);
+ }
+
+ /**
+ * Create a TimedeltaIndex by parsing an array of strings.
+ *
+ * Each string is forwarded to {@link Timedelta.parse}.
+ *
+ * @example
+ * ```ts
+ * TimedeltaIndex.fromStrings(["0 days 01:00:00", "0 days 02:00:00"]);
+ * ```
+ */
+ static fromStrings(strings: readonly string[], options?: TimedeltaIndexOptions): TimedeltaIndex {
+ const deltas = strings.map((s) => Timedelta.parse(s));
+ return new TimedeltaIndex(deltas, options?.name ?? null);
+ }
+
+ // ── accessors ────────────────────────────────────────────────────────────
+
+ /** Number of elements in this index. */
+ get size(): number {
+ return this._data.length;
+ }
+
+ /**
+ * Return the Timedelta at position `i` (0-based).
+ *
+ * @throws {RangeError} When `i` is out of bounds.
+ */
+ at(i: number): Timedelta {
+ if (i < 0 || i >= this._data.length) {
+ throw new RangeError(`TimedeltaIndex.at: index ${i} out of bounds [0, ${this._data.length})`);
+ }
+ // biome-ignore lint/style/noNonNullAssertion: bounds checked above
+ return this._data[i]!;
+ }
+
+ /** Return all values as a plain array. */
+ toArray(): Timedelta[] {
+ return [...this._data];
+ }
+
+ // ── operations ────────────────────────────────────────────────────────────
+
+ /**
+ * Return a new TimedeltaIndex sorted in ascending order.
+ *
+ * @example
+ * ```ts
+ * idx.sort().at(0).totalMilliseconds; // smallest duration
+ * ```
+ */
+ sort(options?: { ascending?: boolean }): TimedeltaIndex {
+ const asc = options?.ascending ?? true;
+ const sorted = [...this._data].sort((a, b) => {
+ const diff = a.totalMilliseconds - b.totalMilliseconds;
+ return asc ? diff : -diff;
+ });
+ return new TimedeltaIndex(sorted, this.name);
+ }
+
+ /**
+ * Return a new TimedeltaIndex with duplicates removed (first occurrence kept).
+ */
+ unique(): TimedeltaIndex {
+ const seen = new Set();
+ const unique: Timedelta[] = [];
+ for (const td of this._data) {
+ if (!seen.has(td.totalMilliseconds)) {
+ seen.add(td.totalMilliseconds);
+ unique.push(td);
+ }
+ }
+ return new TimedeltaIndex(unique, this.name);
+ }
+
+ /**
+ * Shift every element by adding `delta` to each value.
+ *
+ * @example
+ * ```ts
+ * idx.shift(Timedelta.fromComponents({ hours: 1 }));
+ * ```
+ */
+ shift(delta: Timedelta): TimedeltaIndex {
+ const shifted = this._data.map((td) => td.add(delta));
+ return new TimedeltaIndex(shifted, this.name);
+ }
+
+ /**
+ * Return the minimum Timedelta in this index.
+ *
+ * @throws {RangeError} When the index is empty.
+ */
+ min(): Timedelta {
+ const first = this._data[0];
+ if (first === undefined) {
+ throw new RangeError("TimedeltaIndex.min: empty index");
+ }
+ let best = first;
+ for (const td of this._data) {
+ if (td.totalMilliseconds < best.totalMilliseconds) {
+ best = td;
+ }
+ }
+ return best;
+ }
+
+ /**
+ * Return the maximum Timedelta in this index.
+ *
+ * @throws {RangeError} When the index is empty.
+ */
+ max(): Timedelta {
+ const first = this._data[0];
+ if (first === undefined) {
+ throw new RangeError("TimedeltaIndex.max: empty index");
+ }
+ let best = first;
+ for (const td of this._data) {
+ if (td.totalMilliseconds > best.totalMilliseconds) {
+ best = td;
+ }
+ }
+ return best;
+ }
+
+ /**
+ * Return a new index containing only elements that satisfy `predicate`.
+ */
+ filter(predicate: (td: Timedelta, i: number) => boolean): TimedeltaIndex {
+ return new TimedeltaIndex(this._data.filter(predicate), this.name);
+ }
+
+ /**
+ * Return all string representations as an array.
+ */
+ toStrings(): string[] {
+ return this._data.map((td) => td.toString());
+ }
+
+ /**
+ * Return a new TimedeltaIndex with the given `name`.
+ */
+ rename(name: string | null): TimedeltaIndex {
+ return new TimedeltaIndex(this._data, name);
+ }
+}
diff --git a/src/core/timestamp.ts b/src/core/timestamp.ts
new file mode 100644
index 00000000..f6d0a5bf
--- /dev/null
+++ b/src/core/timestamp.ts
@@ -0,0 +1,1104 @@
+/**
+ * Timestamp — a timezone-aware datetime scalar.
+ *
+ * Mirrors `pandas.Timestamp`: a single point in time with optional timezone,
+ * component accessors, arithmetic with {@link Timedelta}, comparison operators,
+ * and a rich set of formatting utilities.
+ *
+ * Stored internally as **milliseconds since the Unix epoch (UTC)** plus optional
+ * sub-millisecond `microsecond` and `nanosecond` offsets. For naive timestamps
+ * (no timezone) the millisecond value is interpreted as a wall-clock time in UTC
+ * space — matching pandas' behaviour where naive Timestamps carry no offset.
+ *
+ * ## Construction
+ *
+ * ```ts
+ * // From ISO string
+ * const ts1 = new Timestamp("2024-01-15T10:30:00");
+ *
+ * // From unix seconds
+ * const ts2 = new Timestamp(1705312200, { unit: "s" });
+ *
+ * // From JS Date
+ * const ts3 = new Timestamp(new Date("2024-01-15T10:30:00Z"));
+ *
+ * // With timezone
+ * const ts4 = new Timestamp("2024-01-15 10:30:00", { tz: "America/New_York" });
+ *
+ * // Static constructors
+ * const now = Timestamp.now("UTC");
+ * const today = Timestamp.today();
+ * ```
+ *
+ * ## Key properties
+ *
+ * ```ts
+ * ts.year; ts.month; ts.day; ts.hour; ts.minute; ts.second;
+ * ts.millisecond; ts.microsecond; ts.nanosecond;
+ * ts.dayofweek; // 0 = Monday … 6 = Sunday (same as pandas)
+ * ts.dayofyear; ts.quarter; ts.tz;
+ * ts.is_leap_year; ts.is_month_start; ts.is_month_end;
+ * ```
+ *
+ * ## Arithmetic
+ *
+ * ```ts
+ * const later = ts.add(Timedelta.fromComponents({ hours: 2 }));
+ * const delta = later.sub(ts); // Timedelta
+ * ```
+ *
+ * @module
+ */
+
+import { Timedelta } from "./timedelta.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/** Numeric unit for the `unit` option in {@link TimestampOptions}. */
+export type TimestampUnit = "s" | "ms" | "us" | "ns";
+
+/** Options accepted by the {@link Timestamp} constructor. */
+export interface TimestampOptions {
+ /**
+ * IANA timezone identifier (e.g. `"UTC"`, `"America/New_York"`).
+ * If supplied, the timestamp is localised to this timezone during display.
+ */
+ readonly tz?: string | null;
+ /**
+ * Interpretation of numeric input.
+ * `"s"` = Unix seconds, `"ms"` = milliseconds (default),
+ * `"us"` = microseconds, `"ns"` = nanoseconds.
+ */
+ readonly unit?: TimestampUnit;
+ /** Extra nanoseconds (0–999) beyond the millisecond boundary. */
+ readonly nanosecond?: number;
+}
+
+/** Component fields for constructing a Timestamp from parts. */
+export interface TimestampComponents {
+ readonly year: number;
+ readonly month: number; // 1-based
+ readonly day: number;
+ readonly hour?: number;
+ readonly minute?: number;
+ readonly second?: number;
+ readonly millisecond?: number;
+ readonly microsecond?: number;
+ readonly nanosecond?: number;
+ readonly tz?: string | null;
+}
+
+// ─── internal helpers ──────────────────────────────────────────────────────────
+
+const MS_PER_SECOND = 1_000;
+const MS_PER_MINUTE = 60_000;
+const MS_PER_HOUR = 3_600_000;
+const MS_PER_DAY = 86_400_000;
+
+const WEEKDAY_NAMES: readonly string[] = [
+ "Monday",
+ "Tuesday",
+ "Wednesday",
+ "Thursday",
+ "Friday",
+ "Saturday",
+ "Sunday",
+];
+const WEEKDAY_ABBR: readonly string[] = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"];
+const MONTH_NAMES: readonly string[] = [
+ "", // 1-based
+ "January",
+ "February",
+ "March",
+ "April",
+ "May",
+ "June",
+ "July",
+ "August",
+ "September",
+ "October",
+ "November",
+ "December",
+];
+const MONTH_ABBR: readonly string[] = [
+ "",
+ "Jan",
+ "Feb",
+ "Mar",
+ "Apr",
+ "May",
+ "Jun",
+ "Jul",
+ "Aug",
+ "Sep",
+ "Oct",
+ "Nov",
+ "Dec",
+];
+
+/** Left-pad a number with zeros to `len` digits. */
+function pad(n: number, len: number): string {
+ return String(Math.abs(n)).padStart(len, "0");
+}
+
+/** Days in month (1-based month). */
+function daysInMonth(year: number, month: number): number {
+ return new Date(year, month, 0).getDate();
+}
+
+/** Is `year` a leap year? */
+function isLeapYear(year: number): boolean {
+ return (year % 4 === 0 && year % 100 !== 0) || year % 400 === 0;
+}
+
+/**
+ * Day of year (1-based) for the given UTC year/month/day.
+ */
+function dayOfYear(year: number, month: number, day: number): number {
+ const start = Date.UTC(year, 0, 1);
+ const curr = Date.UTC(year, month - 1, day);
+ return Math.round((curr - start) / MS_PER_DAY) + 1;
+}
+
+/**
+ * Return all local date/time components for `utcMs` in `tz`.
+ * For naive (tz=null) timestamps, returns the UTC components directly.
+ */
+interface DateParts {
+ year: number;
+ month: number; // 1-based
+ day: number;
+ hour: number;
+ minute: number;
+ second: number;
+ weekday: number; // 0=Monday, 6=Sunday
+}
+
+function getLocalParts(utcMs: number, tz: string | null): DateParts {
+ if (tz === null) {
+ const d = new Date(utcMs);
+ // Use UTC accessors because for naive timestamps the stored ms value
+ // is already in "wall-clock UTC" space.
+ const year = d.getUTCFullYear();
+ const month = d.getUTCMonth() + 1;
+ const day = d.getUTCDate();
+ const hour = d.getUTCHours();
+ const minute = d.getUTCMinutes();
+ const second = d.getUTCSeconds();
+ // JS: 0=Sun … 6=Sat → pandas: 0=Mon … 6=Sun
+ const jsDow = d.getUTCDay();
+ const weekday = jsDow === 0 ? 6 : jsDow - 1;
+ return { year, month, day, hour, minute, second, weekday };
+ }
+
+ const fmt = new Intl.DateTimeFormat("en-CA", {
+ timeZone: tz,
+ year: "numeric",
+ month: "2-digit",
+ day: "2-digit",
+ hour: "2-digit",
+ minute: "2-digit",
+ second: "2-digit",
+ hour12: false,
+ });
+ const parts = fmt.formatToParts(new Date(utcMs));
+
+ let year = 0;
+ let month = 0;
+ let day = 0;
+ let hour = 0;
+ let minute = 0;
+ let second = 0;
+
+ for (const p of parts) {
+ switch (p.type) {
+ case "year":
+ year = Number(p.value);
+ break;
+ case "month":
+ month = Number(p.value);
+ break;
+ case "day":
+ day = Number(p.value);
+ break;
+ case "hour":
+ hour = Number(p.value) % 24;
+ break;
+ case "minute":
+ minute = Number(p.value);
+ break;
+ case "second":
+ second = Number(p.value);
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Compute weekday: get the weekday of the *local* date in the given tz.
+ const localMidnight = Date.UTC(year, month - 1, day);
+ const jsDow = new Date(localMidnight).getUTCDay();
+ const weekday = jsDow === 0 ? 6 : jsDow - 1;
+
+ return { year, month, day, hour, minute, second, weekday };
+}
+
+/**
+ * Return the UTC offset in minutes for `tz` at `utcMs`.
+ * Positive means east of UTC (e.g. +05:30 → +330).
+ */
+function utcOffsetMinutes(utcMs: number, tz: string): number {
+ const d = new Date(utcMs);
+ const fmt = new Intl.DateTimeFormat("en-CA", {
+ timeZone: tz,
+ year: "numeric",
+ month: "2-digit",
+ day: "2-digit",
+ hour: "2-digit",
+ minute: "2-digit",
+ second: "2-digit",
+ hour12: false,
+ });
+ const parts = fmt.formatToParts(d);
+ let year = 0;
+ let month = 0;
+ let day = 0;
+ let hour = 0;
+ let minute = 0;
+ let second = 0;
+ for (const p of parts) {
+ switch (p.type) {
+ case "year":
+ year = Number(p.value);
+ break;
+ case "month":
+ month = Number(p.value);
+ break;
+ case "day":
+ day = Number(p.value);
+ break;
+ case "hour":
+ hour = Number(p.value) % 24;
+ break;
+ case "minute":
+ minute = Number(p.value);
+ break;
+ case "second":
+ second = Number(p.value);
+ break;
+ default:
+ break;
+ }
+ }
+ const localMs = Date.UTC(year, month - 1, day, hour, minute, second);
+ return (localMs - utcMs) / MS_PER_MINUTE;
+}
+
+/** Convert wall-clock "naive UTC" ms to a true UTC ms for a given timezone. */
+function wallClockToUtc(wallMs: number, tz: string): number {
+ // First estimate: offset at the wall-clock time treated as UTC.
+ const offset1 = utcOffsetMinutes(wallMs, tz);
+ const utc1 = wallMs - offset1 * MS_PER_MINUTE;
+ // Refine: recompute offset at utc1 (handles DST edges).
+ const offset2 = utcOffsetMinutes(utc1, tz);
+ const utc2 = wallMs - offset2 * MS_PER_MINUTE;
+ return utc2;
+}
+
+// ─── string parsing ────────────────────────────────────────────────────────────
+
+// Regex for ISO-like datetime strings.
+// Groups: 1=year, 2=month, 3=day, 4=hour, 5=minute, 6=second, 7=fraction,
+// 8=tz-sign, 9=tz-hour, 10=tz-minute (or "Z" in group 8 for UTC).
+const RE_DATETIME =
+ /^(\d{4})-(\d{2})-(\d{2})[T ](\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?(Z|([+-])(\d{2}):?(\d{2}))?$/;
+
+const RE_DATE_ONLY = /^(\d{4})-(\d{2})-(\d{2})$/;
+
+/** Parse an ISO-like datetime string into UTC milliseconds and tz metadata. */
+function parseString(
+ s: string,
+ tzHint: string | null | undefined,
+): { utcMs: number; parsedTz: string | null } {
+ const trimmed = s.trim();
+
+ // Try full datetime.
+ const mDt = RE_DATETIME.exec(trimmed);
+ if (mDt !== null) {
+ const year = Number(mDt[1]);
+ const month = Number(mDt[2]) - 1;
+ const day = Number(mDt[3]);
+ const hour = Number(mDt[4]);
+ const minute = Number(mDt[5]);
+ const second = Number(mDt[6]);
+
+ // Parse sub-second fraction → milliseconds + microseconds.
+ let ms = 0;
+ if (mDt[7] !== undefined) {
+ const frac = mDt[7].padEnd(6, "0").slice(0, 6);
+ ms = Math.floor(Number(frac) / 1000);
+ }
+
+ const tzSign = mDt[8];
+ if (tzSign === "Z") {
+ // Explicit UTC.
+ const utcMs = Date.UTC(year, month, day, hour, minute, second, ms);
+ return { utcMs, parsedTz: "UTC" };
+ }
+ if (mDt[9] !== undefined && mDt[10] !== undefined) {
+ // Explicit offset (+HH:MM or -HH:MM).
+ const sign = mDt[9] === "+" ? 1 : -1;
+ const offsetMs =
+ sign * (Number(mDt[10]) * MS_PER_HOUR + Number(mDt[11] ?? "0") * MS_PER_MINUTE);
+ const wallMs = Date.UTC(year, month, day, hour, minute, second, ms);
+ const utcMs = wallMs - offsetMs;
+ return { utcMs, parsedTz: tzHint ?? "UTC" };
+ }
+
+ // No timezone in string.
+ const wallMs = Date.UTC(year, month, day, hour, minute, second, ms);
+ if (tzHint) {
+ const utcMs = wallClockToUtc(wallMs, tzHint);
+ return { utcMs, parsedTz: tzHint };
+ }
+ return { utcMs: wallMs, parsedTz: null };
+ }
+
+ // Try date-only.
+ const mDate = RE_DATE_ONLY.exec(trimmed);
+ if (mDate !== null) {
+ const year = Number(mDate[1]);
+ const month = Number(mDate[2]) - 1;
+ const day = Number(mDate[3]);
+ const wallMs = Date.UTC(year, month, day);
+ if (tzHint) {
+ const utcMs = wallClockToUtc(wallMs, tzHint);
+ return { utcMs, parsedTz: tzHint };
+ }
+ return { utcMs: wallMs, parsedTz: null };
+ }
+
+ throw new Error(`Timestamp: cannot parse "${s}"`);
+}
+
+// ─── frequency helpers for floor/ceil/round ───────────────────────────────────
+
+/** Return the size of a frequency in milliseconds. */
+function freqMs(freq: string): number {
+ const upper = freq.toUpperCase();
+ if (upper === "NS" || upper === "N") {
+ return 0; // nanosecond — treat as 1ms for our resolution
+ }
+ if (upper === "US" || upper === "U") {
+ return 1;
+ }
+ if (upper === "MS" || upper === "L") {
+ return 1;
+ }
+ if (upper === "S") {
+ return MS_PER_SECOND;
+ }
+ if (upper === "T" || upper === "MIN" || upper === "MIN") {
+ return MS_PER_MINUTE;
+ }
+ if (upper === "H") {
+ return MS_PER_HOUR;
+ }
+ if (upper === "D") {
+ return MS_PER_DAY;
+ }
+ // Try "Nunit" pattern (e.g. "2H", "15T").
+ const m = /^(\d+)(.+)$/.exec(freq);
+ if (m !== null && m[1] !== undefined && m[2] !== undefined) {
+ return Number(m[1]) * freqMs(m[2]);
+ }
+ throw new Error(`Timestamp.floor/ceil/round: unsupported frequency "${freq}"`);
+}
+
+// ─── Internal raw-construction sentinel ───────────────────────────────────────
+
+/**
+ * Internal-only class used to create Timestamp instances from pre-parsed fields
+ * without going through the full parsing pipeline. Never exported.
+ */
+class RawTimestamp {
+ constructor(
+ readonly utcMs: number,
+ readonly tz: string | null,
+ readonly us: number,
+ readonly ns: number,
+ ) {}
+}
+
+// ─── Timestamp class ──────────────────────────────────────────────────────────
+
+/**
+ * A single point in time — the TypeScript/tsb equivalent of `pandas.Timestamp`.
+ *
+ * @example
+ * ```ts
+ * const ts = new Timestamp("2024-06-15T12:00:00Z");
+ * ts.year; // 2024
+ * ts.month; // 6
+ * ts.dayofweek; // 5 (Saturday)
+ * ts.isoformat(); // "2024-06-15T12:00:00.000Z"
+ *
+ * const ts2 = ts.add(Timedelta.fromComponents({ hours: 3 }));
+ * ts2.hour; // 15
+ * ```
+ */
+export class Timestamp {
+ /** Milliseconds since Unix epoch (UTC). */
+ readonly _utcMs: number;
+ /** IANA timezone, or null for naive. */
+ readonly _tz: string | null;
+ /** Sub-millisecond microseconds (0–999). */
+ readonly _us: number;
+ /** Sub-microsecond nanoseconds (0–999). */
+ readonly _ns: number;
+
+ // ─── construction ────────────────────────────────────────────────────────────
+
+ /**
+ * Create a Timestamp from a string, number, or Date.
+ *
+ * @param input - ISO string, Unix numeric value, or JS Date.
+ * @param options - Optional tz, unit, and nanosecond overrides.
+ */
+ constructor(
+ input: string | number | Date | Timestamp | RawTimestamp,
+ options?: TimestampOptions,
+ ) {
+ if (input instanceof RawTimestamp) {
+ this._utcMs = input.utcMs;
+ this._tz = input.tz;
+ this._us = input.us;
+ this._ns = input.ns;
+ return;
+ }
+ const tz = options?.tz ?? null;
+ const unit: TimestampUnit = options?.unit ?? "ms";
+ const nsExtra = options?.nanosecond ?? 0;
+
+ if (input instanceof Timestamp) {
+ this._utcMs = input._utcMs;
+ this._tz = tz !== null ? tz : input._tz;
+ this._us = input._us;
+ this._ns = nsExtra !== 0 ? nsExtra : input._ns;
+ return;
+ }
+
+ if (input instanceof Date) {
+ this._utcMs = input.getTime();
+ this._tz = tz;
+ this._us = 0;
+ this._ns = nsExtra;
+ return;
+ }
+
+ if (typeof input === "number") {
+ let utcMs: number;
+ let us = 0;
+ switch (unit) {
+ case "s":
+ utcMs = Math.trunc(input) * MS_PER_SECOND;
+ break;
+ case "ms":
+ utcMs = Math.trunc(input);
+ break;
+ case "us": {
+ utcMs = Math.trunc(input / 1000);
+ us = Math.trunc(input % 1000);
+ break;
+ }
+ case "ns": {
+ utcMs = Math.trunc(input / 1_000_000);
+ us = Math.trunc((input % 1_000_000) / 1_000);
+ break;
+ }
+ default:
+ utcMs = Math.trunc(input);
+ }
+ this._utcMs = utcMs;
+ this._tz = tz;
+ this._us = us;
+ this._ns = nsExtra;
+ return;
+ }
+
+ // String input.
+ const { utcMs, parsedTz } = parseString(input, tz);
+ this._utcMs = utcMs;
+ this._tz = tz !== null ? tz : parsedTz;
+ this._us = 0;
+ this._ns = nsExtra;
+ }
+
+ /**
+ * Create a Timestamp from individual date/time components.
+ *
+ * @example
+ * ```ts
+ * Timestamp.fromComponents({ year: 2024, month: 6, day: 15, hour: 12 });
+ * ```
+ */
+ static fromComponents(c: TimestampComponents): Timestamp {
+ const tz = c.tz ?? null;
+ const wallMs = Date.UTC(
+ c.year,
+ c.month - 1,
+ c.day,
+ c.hour ?? 0,
+ c.minute ?? 0,
+ c.second ?? 0,
+ c.millisecond ?? 0,
+ );
+ let utcMs = wallMs;
+ if (tz !== null) {
+ utcMs = wallClockToUtc(wallMs, tz);
+ }
+ return new Timestamp(new RawTimestamp(utcMs, tz, c.microsecond ?? 0, c.nanosecond ?? 0));
+ }
+
+ /** Current UTC time as a Timestamp (optionally localised to `tz`). */
+ static now(tz?: string | null): Timestamp {
+ return new Timestamp(Date.now(), { tz: tz ?? null });
+ }
+
+ /**
+ * Today's date at midnight (naive, local-machine wall clock).
+ * Mirrors `pandas.Timestamp.today()` which returns today at midnight.
+ */
+ static today(): Timestamp {
+ const now = new Date();
+ const wallMs = Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate());
+ return new Timestamp(wallMs, { tz: null });
+ }
+
+ /**
+ * Create a Timestamp from a Unix timestamp (seconds since epoch).
+ *
+ * @param ts - Unix seconds (float).
+ * @param tz - Optional IANA timezone.
+ */
+ static fromtimestamp(ts: number, tz?: string | null): Timestamp {
+ return new Timestamp(ts, { unit: "s", tz: tz ?? null });
+ }
+
+ /**
+ * Parse an ISO 8601 string.
+ *
+ * @example
+ * ```ts
+ * Timestamp.fromisoformat("2024-06-15T12:00:00");
+ * ```
+ */
+ static fromisoformat(s: string): Timestamp {
+ return new Timestamp(s);
+ }
+
+ // ─── local-time component accessors ──────────────────────────────────────────
+
+ /** Cached parts (lazy). */
+ private _cachedParts: DateParts | undefined = undefined;
+ private _localParts(): DateParts {
+ if (this._cachedParts === undefined) {
+ this._cachedParts = getLocalParts(this._utcMs, this._tz);
+ }
+ return this._cachedParts;
+ }
+
+ /** Four-digit year. */
+ get year(): number {
+ return this._localParts().year;
+ }
+ /** Month (1–12). */
+ get month(): number {
+ return this._localParts().month;
+ }
+ /** Day of month (1–31). */
+ get day(): number {
+ return this._localParts().day;
+ }
+ /** Hour (0–23). */
+ get hour(): number {
+ return this._localParts().hour;
+ }
+ /** Minute (0–59). */
+ get minute(): number {
+ return this._localParts().minute;
+ }
+ /** Second (0–59). */
+ get second(): number {
+ return this._localParts().second;
+ }
+ /** Millisecond (0–999). */
+ get millisecond(): number {
+ return this._utcMs % 1_000;
+ }
+ /** Microsecond (0–999999): millisecond * 1000 + sub-ms microseconds. */
+ get microsecond(): number {
+ return (this._utcMs % 1_000) * 1_000 + this._us;
+ }
+ /** Nanosecond (0–999). */
+ get nanosecond(): number {
+ return this._ns;
+ }
+
+ /**
+ * Day of week (0=Monday, 6=Sunday), matching pandas convention.
+ */
+ get dayofweek(): number {
+ return this._localParts().weekday;
+ }
+ /** Alias for {@link dayofweek}. */
+ get weekday(): number {
+ return this.dayofweek;
+ }
+
+ /** Day of year (1–366). */
+ get dayofyear(): number {
+ const { year, month, day } = this._localParts();
+ return dayOfYear(year, month, day);
+ }
+
+ /** ISO week number (1–53). */
+ get week(): number {
+ const { year, month, day } = this._localParts();
+ const d = new Date(Date.UTC(year, month - 1, day));
+ // ISO week: set date to nearest Thursday.
+ const thu = new Date(d.getTime());
+ thu.setUTCDate(d.getUTCDate() + 4 - (d.getUTCDay() || 7));
+ const yearStart = new Date(Date.UTC(thu.getUTCFullYear(), 0, 1));
+ return Math.ceil(((thu.getTime() - yearStart.getTime()) / MS_PER_DAY + 1) / 7);
+ }
+
+ /** Quarter (1–4). */
+ get quarter(): number {
+ return Math.ceil(this._localParts().month / 3);
+ }
+
+ /** IANA timezone string, or null for naive. */
+ get tz(): string | null {
+ return this._tz;
+ }
+ /** Alias for {@link tz}. */
+ get tzinfo(): string | null {
+ return this._tz;
+ }
+ /** Always null — tsb Timestamps have no fixed frequency. */
+ get freq(): null {
+ return null;
+ }
+
+ // ─── boolean properties ───────────────────────────────────────────────────────
+
+ /** True if the year is a leap year. */
+ get is_leap_year(): boolean {
+ return isLeapYear(this.year);
+ }
+
+ /** True if this is the first day of the month. */
+ get is_month_start(): boolean {
+ return this.day === 1;
+ }
+
+ /** True if this is the last day of the month. */
+ get is_month_end(): boolean {
+ const { year, month, day } = this._localParts();
+ return day === daysInMonth(year, month);
+ }
+
+ /** True if this is the first day of a quarter. */
+ get is_quarter_start(): boolean {
+ return (
+ this.day === 1 &&
+ (this.month === 1 || this.month === 4 || this.month === 7 || this.month === 10)
+ );
+ }
+
+ /** True if this is the last day of a quarter. */
+ get is_quarter_end(): boolean {
+ const { year, month, day } = this._localParts();
+ return (
+ day === daysInMonth(year, month) &&
+ (month === 3 || month === 6 || month === 9 || month === 12)
+ );
+ }
+
+ /** True if this is the first day of the year (Jan 1). */
+ get is_year_start(): boolean {
+ return this.month === 1 && this.day === 1;
+ }
+
+ /** True if this is the last day of the year (Dec 31). */
+ get is_year_end(): boolean {
+ return this.month === 12 && this.day === 31;
+ }
+
+ // ─── conversion methods ───────────────────────────────────────────────────────
+
+ /**
+ * Unix timestamp as fractional seconds (float).
+ * Mirrors `pandas.Timestamp.timestamp()`.
+ */
+ timestamp(): number {
+ return this._utcMs / MS_PER_SECOND;
+ }
+
+ /**
+ * Date portion as a plain object `{ year, month, day }`.
+ * Mirrors `pandas.Timestamp.date()`.
+ */
+ date(): { year: number; month: number; day: number } {
+ const { year, month, day } = this._localParts();
+ return { year, month, day };
+ }
+
+ /**
+ * Time portion as a plain object `{ hour, minute, second, microsecond }`.
+ * Mirrors `pandas.Timestamp.time()`.
+ */
+ time(): { hour: number; minute: number; second: number; microsecond: number } {
+ const { hour, minute, second } = this._localParts();
+ return { hour, minute, second, microsecond: this.microsecond };
+ }
+
+ /** Convert to a JS `Date` object (millisecond precision). */
+ toDate(): Date {
+ return new Date(this._utcMs);
+ }
+
+ /**
+ * Return an ISO 8601 string.
+ *
+ * @param sep - Separator between date and time (default `"T"`).
+ * @param timespec - Precision: `"auto"`, `"hours"`, `"minutes"`, `"seconds"`,
+ * `"milliseconds"`, `"microseconds"` (default `"auto"`).
+ */
+ isoformat(sep = "T", timespec = "auto"): string {
+ const { year, month, day, hour, minute, second } = this._localParts();
+ const ms = this._utcMs % 1_000;
+ const datePart = `${pad(year, 4)}-${pad(month, 2)}-${pad(day, 2)}`;
+ const spec =
+ timespec === "auto" ? (ms !== 0 || this._us !== 0 ? "microseconds" : "seconds") : timespec;
+
+ let timePart: string;
+ switch (spec) {
+ case "hours":
+ timePart = `${pad(hour, 2)}`;
+ break;
+ case "minutes":
+ timePart = `${pad(hour, 2)}:${pad(minute, 2)}`;
+ break;
+ case "seconds":
+ timePart = `${pad(hour, 2)}:${pad(minute, 2)}:${pad(second, 2)}`;
+ break;
+ case "milliseconds":
+ timePart = `${pad(hour, 2)}:${pad(minute, 2)}:${pad(second, 2)}.${pad(ms, 3)}`;
+ break;
+ default:
+ timePart = `${pad(hour, 2)}:${pad(minute, 2)}:${pad(second, 2)}.${pad(ms * 1_000 + this._us, 6)}`;
+ break;
+ }
+
+ const tzSuffix =
+ this._tz === null
+ ? ""
+ : this._tz === "UTC"
+ ? "+00:00"
+ : (() => {
+ const offMin = utcOffsetMinutes(this._utcMs, this._tz);
+ const sign = offMin >= 0 ? "+" : "-";
+ const absMin = Math.abs(offMin);
+ return `${sign}${pad(Math.floor(absMin / 60), 2)}:${pad(absMin % 60, 2)}`;
+ })();
+
+ return `${datePart}${sep}${timePart}${tzSuffix}`;
+ }
+
+ /**
+ * Format using strftime-style format codes.
+ *
+ * Supported codes: `%Y %y %m %d %H %M %S %f %j %A %a %B %b %p %Z %z %w %I %% %n`
+ *
+ * @example
+ * ```ts
+ * ts.strftime("%Y-%m-%d %H:%M:%S"); // "2024-06-15 12:00:00"
+ * ```
+ */
+ strftime(format: string): string {
+ const { year, month, day, hour, minute, second, weekday } = this._localParts();
+ const ms = this._utcMs % 1_000;
+ const us6 = ms * 1_000 + this._us;
+ const hour12 = hour % 12 === 0 ? 12 : hour % 12;
+ const ampm = hour < 12 ? "AM" : "PM";
+ const doy = dayOfYear(year, month, day);
+
+ const tzName = this._tz ?? "";
+ const tzOffset = (() => {
+ if (this._tz === null) {
+ return "";
+ }
+ const offMin = utcOffsetMinutes(this._utcMs, this._tz);
+ const sign = offMin >= 0 ? "+" : "-";
+ const absMin = Math.abs(offMin);
+ return `${sign}${pad(Math.floor(absMin / 60), 2)}${pad(absMin % 60, 2)}`;
+ })();
+
+ // JS weekday: 0=Sunday … 6=Saturday (for %w).
+ const jsDow = weekday === 6 ? 0 : weekday + 1;
+
+ return format.replace(/%[A-Za-z%n]/g, (token) => {
+ switch (token) {
+ case "%Y":
+ return pad(year, 4);
+ case "%y":
+ return pad(year % 100, 2);
+ case "%m":
+ return pad(month, 2);
+ case "%d":
+ return pad(day, 2);
+ case "%H":
+ return pad(hour, 2);
+ case "%I":
+ return pad(hour12, 2);
+ case "%M":
+ return pad(minute, 2);
+ case "%S":
+ return pad(second, 2);
+ case "%f":
+ return pad(us6, 6);
+ case "%j":
+ return pad(doy, 3);
+ case "%A":
+ return WEEKDAY_NAMES[weekday] ?? "";
+ case "%a":
+ return WEEKDAY_ABBR[weekday] ?? "";
+ case "%B":
+ return MONTH_NAMES[month] ?? "";
+ case "%b":
+ return MONTH_ABBR[month] ?? "";
+ case "%p":
+ return ampm;
+ case "%Z":
+ return tzName;
+ case "%z":
+ return tzOffset;
+ case "%w":
+ return String(jsDow);
+ case "%%":
+ return "%";
+ case "%n":
+ return "\n";
+ default:
+ return token;
+ }
+ });
+ }
+
+ // ─── rounding ─────────────────────────────────────────────────────────────────
+
+ /**
+ * Round down to the nearest `freq` boundary.
+ *
+ * @example
+ * ```ts
+ * new Timestamp("2024-01-15T10:37:29Z").floor("H").hour; // 10
+ * ```
+ */
+ floor(freq: string): Timestamp {
+ const unit = freqMs(freq);
+ if (unit === 0) {
+ return new Timestamp(this);
+ }
+ const floored = Math.floor(this._utcMs / unit) * unit;
+ return new Timestamp(floored, { tz: this._tz });
+ }
+
+ /**
+ * Round up to the nearest `freq` boundary.
+ *
+ * @example
+ * ```ts
+ * new Timestamp("2024-01-15T10:37:29Z").ceil("H").hour; // 11
+ * ```
+ */
+ ceil(freq: string): Timestamp {
+ const unit = freqMs(freq);
+ if (unit === 0) {
+ return new Timestamp(this);
+ }
+ const ceiled = Math.ceil(this._utcMs / unit) * unit;
+ return new Timestamp(ceiled, { tz: this._tz });
+ }
+
+ /**
+ * Round to the nearest `freq` boundary (ties go to even).
+ *
+ * @example
+ * ```ts
+ * new Timestamp("2024-01-15T10:37:30Z").round("H").hour; // 11
+ * ```
+ */
+ round(freq: string): Timestamp {
+ const unit = freqMs(freq);
+ if (unit === 0) {
+ return new Timestamp(this);
+ }
+ const rounded = Math.round(this._utcMs / unit) * unit;
+ return new Timestamp(rounded, { tz: this._tz });
+ }
+
+ /**
+ * Set the time component to midnight (00:00:00.000).
+ *
+ * @example
+ * ```ts
+ * new Timestamp("2024-01-15T10:37:00Z").normalize().hour; // 0
+ * ```
+ */
+ normalize(): Timestamp {
+ return this.floor("D");
+ }
+
+ // ─── timezone operations ──────────────────────────────────────────────────────
+
+ /**
+ * Localise a naive timestamp to `tz` (treating the stored time as a
+ * wall-clock time in that timezone).
+ *
+ * Mirrors `pandas.Timestamp.tz_localize(tz)`.
+ *
+ * @throws If the timestamp is already timezone-aware.
+ */
+ tz_localize(tz: string): Timestamp {
+ if (this._tz !== null) {
+ throw new Error(
+ `Timestamp.tz_localize: timestamp is already tz-aware (tz="${this._tz}"). Use tz_convert() to change timezone.`,
+ );
+ }
+ // Re-interpret the wall-clock UTC ms as a local time in `tz`.
+ const utcMs = wallClockToUtc(this._utcMs, tz);
+ return new Timestamp(new RawTimestamp(utcMs, tz, this._us, this._ns));
+ }
+
+ /**
+ * Convert a timezone-aware timestamp to a different timezone.
+ *
+ * Mirrors `pandas.Timestamp.tz_convert(tz)`.
+ *
+ * @throws If the timestamp is naive.
+ */
+ tz_convert(tz: string): Timestamp {
+ if (this._tz === null) {
+ throw new Error(
+ "Timestamp.tz_convert: timestamp is timezone-naive. Use tz_localize() first.",
+ );
+ }
+ return new Timestamp(new RawTimestamp(this._utcMs, tz, this._us, this._ns));
+ }
+
+ // ─── arithmetic ───────────────────────────────────────────────────────────────
+
+ /**
+ * Add a {@link Timedelta} to this Timestamp, returning a new Timestamp.
+ *
+ * @example
+ * ```ts
+ * const tomorrow = ts.add(Timedelta.fromComponents({ days: 1 }));
+ * ```
+ */
+ add(delta: Timedelta): Timestamp {
+ return new Timestamp(
+ new RawTimestamp(this._utcMs + delta.totalMilliseconds, this._tz, this._us, this._ns),
+ );
+ }
+
+ /**
+ * Subtract a Timedelta or another Timestamp.
+ *
+ * - `sub(Timedelta)` → Timestamp displaced by the negative of the delta.
+ * - `sub(Timestamp)` → Timedelta representing the time difference.
+ *
+ * @example
+ * ```ts
+ * const yesterday = ts.sub(Timedelta.fromComponents({ days: 1 }));
+ * const delta = ts2.sub(ts1); // Timedelta
+ * ```
+ */
+ sub(other: Timedelta): Timestamp;
+ sub(other: Timestamp): Timedelta;
+ sub(other: Timedelta | Timestamp): Timestamp | Timedelta {
+ if (other instanceof Timedelta) {
+ return new Timestamp(
+ new RawTimestamp(this._utcMs - other.totalMilliseconds, this._tz, this._us, this._ns),
+ );
+ }
+ return Timedelta.fromMilliseconds(this._utcMs - other._utcMs);
+ }
+
+ // ─── comparisons ─────────────────────────────────────────────────────────────
+
+ /** Primitive value (ms since epoch) — enables `<`, `>`, `<=`, `>=` operators. */
+ valueOf(): number {
+ return this._utcMs;
+ }
+
+ /** True if `this` and `other` represent the same instant. */
+ eq(other: Timestamp): boolean {
+ return this._utcMs === other._utcMs;
+ }
+ /** True if `this` and `other` represent different instants. */
+ ne(other: Timestamp): boolean {
+ return this._utcMs !== other._utcMs;
+ }
+ /** True if `this` is before `other`. */
+ lt(other: Timestamp): boolean {
+ return this._utcMs < other._utcMs;
+ }
+ /** True if `this` is before or equal to `other`. */
+ le(other: Timestamp): boolean {
+ return this._utcMs <= other._utcMs;
+ }
+ /** True if `this` is after `other`. */
+ gt(other: Timestamp): boolean {
+ return this._utcMs > other._utcMs;
+ }
+ /** True if `this` is after or equal to `other`. */
+ ge(other: Timestamp): boolean {
+ return this._utcMs >= other._utcMs;
+ }
+
+ // ─── name helpers ──────────────────────────────────────────────────────────────
+
+ /**
+ * Full English name of the day of week.
+ *
+ * @example `ts.day_name()` → `"Saturday"`
+ */
+ day_name(): string {
+ return WEEKDAY_NAMES[this.dayofweek] ?? "";
+ }
+
+ /**
+ * Full English name of the month.
+ *
+ * @example `ts.month_name()` → `"January"`
+ */
+ month_name(): string {
+ return MONTH_NAMES[this.month] ?? "";
+ }
+
+ // ─── string representation ────────────────────────────────────────────────────
+
+ /** Human-readable string — uses ISO format with timezone if aware. */
+ toString(): string {
+ return this.isoformat();
+ }
+
+ /** JSON serialisation delegates to {@link toString}. */
+ toJSON(): string {
+ return this.toString();
+ }
+}
diff --git a/src/groupby/groupby.ts b/src/groupby/groupby.ts
index ff49e266..299cc54f 100644
--- a/src/groupby/groupby.ts
+++ b/src/groupby/groupby.ts
@@ -21,6 +21,7 @@ import { RangeIndex } from "../core/index.ts";
import { DataFrame } from "../core/index.ts";
import { Series } from "../core/index.ts";
import type { Label, Scalar } from "../types.ts";
+import type { NamedAggSpec } from "./named_agg.ts";
// ─── types ────────────────────────────────────────────────────────────────────
@@ -304,6 +305,53 @@ export class DataFrameGroupBy {
return this._runAgg(colSpecs, asIndex);
}
+ /**
+ * Aggregate each group using named aggregation specs.
+ *
+ * Each key in `spec` becomes the output column name; the `NamedAgg` value
+ * specifies which source column to aggregate and how.
+ */
+ aggNamed(spec: NamedAggSpec, asIndex = true): DataFrame {
+ const groupKeys = this._groups.map((g) => g.key);
+ const resultCols: Record = {};
+
+ if (!asIndex) {
+ if (this._by.length === 1) {
+ const byCol = this._by[0] as string;
+ resultCols[byCol] = groupKeys.slice();
+ } else {
+ for (const by of this._by) {
+ resultCols[by] = [];
+ }
+ for (const g of this._groups) {
+ const parts = (g.key as string).split("__SEP__");
+ this._by.forEach((by, idx) => {
+ const byArr = resultCols[by];
+ if (byArr !== undefined) {
+ byArr.push(parts[idx] ?? null);
+ }
+ });
+ }
+ }
+ }
+
+ for (const [outCol, namedSpec] of Object.entries(spec)) {
+ const fn = resolveAgg(namedSpec.aggfunc);
+ const srcVals = this._df.col(namedSpec.column).values as readonly Scalar[];
+ resultCols[outCol] = this._groups.map((g) =>
+ fn(g.positions.map((p) => srcVals[p] as Scalar)),
+ );
+ }
+
+ const rowIdx: Index = asIndex
+ ? new Index(groupKeys)
+ : defaultIndex(groupKeys.length);
+
+ return DataFrame.fromColumns(resultCols as Record, {
+ index: rowIdx,
+ });
+ }
+
/** Shorthand for `agg("sum")` — numeric columns only, like pandas. */
sum(): DataFrame {
const cols = this._numericValueCols();
diff --git a/src/groupby/index.ts b/src/groupby/index.ts
index 06bf8cd3..9ac6f8c3 100644
--- a/src/groupby/index.ts
+++ b/src/groupby/index.ts
@@ -6,3 +6,5 @@
export { DataFrameGroupBy, SeriesGroupBy } from "./groupby.ts";
export type { AggFn, AggName, AggSpec } from "./groupby.ts";
+export { NamedAgg, namedAgg, isNamedAggSpec } from "./named_agg.ts";
+export type { NamedAggSpec } from "./named_agg.ts";
diff --git a/src/groupby/named_agg.ts b/src/groupby/named_agg.ts
new file mode 100644
index 00000000..57212c03
--- /dev/null
+++ b/src/groupby/named_agg.ts
@@ -0,0 +1,78 @@
+/**
+ * NamedAgg — named aggregation spec for GroupBy.
+ *
+ * Mirrors `pandas.NamedAgg` (a named tuple of `column` + `aggfunc`).
+ * Used with `DataFrameGroupBy.agg()` to rename output columns while selecting
+ * which source column to aggregate and how.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, namedAgg } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({
+ * dept: ["eng", "eng", "hr", "hr"],
+ * salary: [100, 120, 80, 90],
+ * headcount: [1, 1, 1, 1],
+ * });
+ *
+ * df.groupby("dept").aggNamed({
+ * total_salary: namedAgg("salary", "sum"),
+ * avg_salary: namedAgg("salary", "mean"),
+ * employees: namedAgg("headcount", "count"),
+ * });
+ * // dept | total_salary | avg_salary | employees
+ * // eng | 220 | 110 | 2
+ * // hr | 170 | 85 | 2
+ * ```
+ */
+
+import type { AggFn, AggName } from "./groupby.ts";
+
+// ─── NamedAgg ─────────────────────────────────────────────────────────────────
+
+/**
+ * Specification that binds a source column, an aggregation function, and
+ * (implicitly via the dict key) an output column name.
+ *
+ * Create with the `namedAgg()` factory to avoid `new` boilerplate.
+ */
+export class NamedAgg {
+ /** Source column to read from the DataFrame. */
+ readonly column: string;
+ /** Aggregation to apply — a built-in name or a custom function. */
+ readonly aggfunc: AggName | AggFn;
+
+ constructor(column: string, aggfunc: AggName | AggFn) {
+ this.column = column;
+ this.aggfunc = aggfunc;
+ }
+}
+
+/**
+ * Factory shorthand for `new NamedAgg(column, aggfunc)`.
+ *
+ * @example
+ * ```ts
+ * df.groupby("dept").aggNamed({
+ * total: namedAgg("salary", "sum"),
+ * avg: namedAgg("salary", "mean"),
+ * });
+ * ```
+ */
+export function namedAgg(column: string, aggfunc: AggName | AggFn): NamedAgg {
+ return new NamedAgg(column, aggfunc);
+}
+
+/** A dict of output-column-name → NamedAgg spec. */
+export type NamedAggSpec = Readonly>;
+
+/**
+ * Returns true if every value in the spec record is a `NamedAgg` instance.
+ * Used to distinguish `NamedAggSpec` from plain `Record`.
+ */
+export function isNamedAggSpec(spec: unknown): spec is NamedAggSpec {
+ if (typeof spec !== "object" || spec === null) {
+ return false;
+ }
+ return Object.values(spec as Record).every((v) => v instanceof NamedAgg);
+}
diff --git a/src/index.ts b/src/index.ts
index 44aa1357..b95ad4be 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -3,6 +3,7 @@
*
* @packageDocumentation
*/
+// merged: 2026-04-09T19:37Z (re-merge main into PR branch, barrel-export conflicts resolved by keeping PR superset)
// Core exports will be added here as features are implemented.
// Each module is imported and re-exported from its feature file in src/.
@@ -45,12 +46,16 @@ export { DatetimeAccessor } from "./core/index.ts";
export type { DatetimeSeriesLike } from "./core/index.ts";
export { DataFrameGroupBy, SeriesGroupBy } from "./groupby/index.ts";
export type { AggFn, AggName, AggSpec } from "./groupby/index.ts";
+export { NamedAgg, namedAgg, isNamedAggSpec } from "./groupby/index.ts";
+export type { NamedAggSpec } from "./groupby/index.ts";
export { describe, quantile } from "./stats/index.ts";
export type { DescribeOptions } from "./stats/index.ts";
export { readCsv, toCsv } from "./io/index.ts";
export type { ReadCsvOptions, ToCsvOptions } from "./io/index.ts";
export { readJson, toJson } from "./io/index.ts";
export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./io/index.ts";
+export { jsonNormalize } from "./io/index.ts";
+export type { JsonNormalizeOptions, JsonPath } from "./io/index.ts";
export { pearsonCorr, dataFrameCorr, dataFrameCov } from "./stats/index.ts";
export type { CorrMethod, CorrOptions, CovOptions } from "./stats/index.ts";
export { Rolling } from "./window/index.ts";
@@ -81,6 +86,10 @@ export type {
} from "./reshape/index.ts";
export { stack, unstack, STACK_DEFAULT_SEP } from "./reshape/index.ts";
export type { StackOptions, UnstackOptions } from "./reshape/index.ts";
+export { wideToLong } from "./reshape/index.ts";
+export type { WideToLongOptions } from "./reshape/index.ts";
+export { pivotTableFull } from "./reshape/index.ts";
+export type { PivotTableFullOptions } from "./reshape/index.ts";
export { MultiIndex } from "./core/index.ts";
export type { MultiIndexOptions } from "./core/index.ts";
export { rankSeries, rankDataFrame } from "./stats/index.ts";
@@ -114,6 +123,187 @@ export {
export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/index.ts";
export { valueCounts, dataFrameValueCounts } from "./stats/index.ts";
export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts";
+export { whereSeries, maskSeries, whereDataFrame, maskDataFrame } from "./stats/index.ts";
+export type {
+ WherePredicate,
+ SeriesCond,
+ DataFrameCond,
+ WhereMaskOptions,
+} from "./stats/index.ts";
+export {
+ seriesEq,
+ seriesNe,
+ seriesLt,
+ seriesGt,
+ seriesLe,
+ seriesGe,
+ dataFrameEq,
+ dataFrameNe,
+ dataFrameLt,
+ dataFrameGt,
+ dataFrameLe,
+ dataFrameGe,
+} from "./stats/index.ts";
+export type { CompareOp, SeriesOther, DataFrameOther } from "./stats/index.ts";
+export { shiftSeries, diffSeries, dataFrameShift, dataFrameDiff } from "./stats/index.ts";
+export type { ShiftDiffDataFrameOptions } from "./stats/index.ts";
+export { interpolateSeries, dataFrameInterpolate } from "./stats/index.ts";
+export type {
+ InterpolateMethod,
+ LimitDirection,
+ InterpolateOptions,
+ DataFrameInterpolateOptions,
+} from "./stats/index.ts";
+export { fillnaSeries, fillnaDataFrame } from "./stats/index.ts";
+export type {
+ FillnaMethod,
+ FillnaSeriesOptions,
+ ColumnFillMap,
+ FillnaDataFrameOptions,
+} from "./stats/index.ts";
+export { Interval, IntervalIndex } from "./core/index.ts";
+export type { IntervalClosed, IntervalIndexOptions } from "./core/index.ts";
+export { cut, qcut, cutIntervalIndex, qcutIntervalIndex } from "./stats/index.ts";
+export type { CutOptions, QCutOptions } from "./stats/index.ts";
+export { sampleSeries, sampleDataFrame } from "./stats/index.ts";
+export type { SampleSeriesOptions, SampleDataFrameOptions } from "./stats/index.ts";
+export { applySeries, applymap, dataFrameApply } from "./stats/index.ts";
+export type { DataFrameApplyOptions } from "./stats/index.ts";
+export { CategoricalIndex } from "./core/index.ts";
+export type { CategoricalIndexOptions } from "./core/index.ts";
+export {
+ pipeSeries,
+ dataFramePipe,
+ pipeTo,
+ dataFramePipeTo,
+ pipeChain,
+ dataFramePipeChain,
+} from "./stats/index.ts";
+
+export { Period, PeriodIndex } from "./core/index.ts";
+export type { PeriodFreq, PeriodIndexOptions } from "./core/index.ts";
+export { Timedelta, TimedeltaIndex } from "./core/index.ts";
+export type { TimedeltaComponents, TimedeltaIndexOptions } from "./core/index.ts";
+export {
+ Day,
+ Hour,
+ Minute,
+ Second,
+ Milli,
+ Week,
+ MonthEnd,
+ MonthBegin,
+ YearEnd,
+ YearBegin,
+ BusinessDay,
+} from "./core/index.ts";
+export type { DateOffset, WeekOptions } from "./core/index.ts";
+export { DatetimeIndex, date_range, bdate_range, resolveFreq } from "./core/index.ts";
+export type { DateRangeFreq, DateRangeOptions, DatetimeIndexOptions } from "./core/index.ts";
+export { TZDatetimeIndex, tz_localize, tz_convert } from "./core/index.ts";
+export {
+ seriesFloor,
+ dataFrameFloor,
+ seriesCeil,
+ dataFrameCeil,
+ seriesTrunc,
+ dataFrameTrunc,
+ seriesSqrt,
+ dataFrameSqrt,
+ seriesExp,
+ dataFrameExp,
+ seriesLog,
+ dataFrameLog,
+ seriesLog2,
+ dataFrameLog2,
+ seriesLog10,
+ dataFrameLog10,
+ seriesSign,
+ dataFrameSign,
+} from "./stats/index.ts";
+export {
+ seriesPow,
+ dataFramePow,
+ seriesMod,
+ dataFrameMod,
+ seriesFloorDiv,
+ dataFrameFloorDiv,
+} from "./stats/index.ts";
+export {
+ seriesAdd,
+ seriesRadd,
+ seriesSub,
+ seriesRsub,
+ seriesMul,
+ seriesRmul,
+ seriesDiv,
+ seriesRdiv,
+ dataFrameAdd,
+ dataFrameRadd,
+ dataFrameSub,
+ dataFrameRsub,
+ dataFrameMul,
+ dataFrameRmul,
+ dataFrameDiv,
+ dataFrameRdiv,
+} from "./stats/index.ts";
+export { getDummies, dataFrameGetDummies } from "./stats/index.ts";
+export type { GetDummiesOptions, DataFrameGetDummiesOptions } from "./stats/index.ts";
+export { factorize, seriesFactorize } from "./stats/index.ts";
+export type { FactorizeOptions, FactorizeResult } from "./stats/index.ts";
+export { crosstab, seriesCrosstab } from "./stats/index.ts";
+export type { AggFunc, Normalize, CrosstabOptions } from "./stats/index.ts";
+export { toNumeric, toNumericArray, toNumericScalar, toNumericSeries } from "./stats/index.ts";
+export type { ToNumericDowncast, ToNumericErrors, ToNumericOptions } from "./stats/index.ts";
+export { seriesMemoryUsage, dataFrameMemoryUsage } from "./stats/index.ts";
+export type { MemoryUsageOptions } from "./stats/index.ts";
+export { selectDtypes } from "./stats/index.ts";
+export type { DtypeSelector, SelectDtypesOptions } from "./stats/index.ts";
+export { clipSeriesWithBounds, clipDataFrameWithBounds } from "./stats/index.ts";
+export type {
+ BoundArg,
+ SeriesClipBoundsOptions,
+ DataFrameClipBoundsOptions,
+} from "./stats/index.ts";
+export { Timestamp } from "./core/index.ts";
+export type { TimestampOptions, TimestampComponents, TimestampUnit } from "./core/index.ts";
+export { dataFrameAssign } from "./core/index.ts";
+export type { AssignColSpec, AssignSpec } from "./core/index.ts";
+export { inferDtype } from "./stats/index.ts";
+export type { InferredDtype, InferDtypeOptions } from "./stats/index.ts";
+export { isna, notna, isnull, notnull } from "./stats/index.ts";
+export { dropna, dropnaSeries, dropnaDataFrame } from "./stats/index.ts";
+export type { DropnaHow, DropnaDataFrameOptions } from "./stats/index.ts";
+export { combineFirstSeries, combineFirstDataFrame } from "./stats/index.ts";
+export { natCompare, natSorted, natSortKey, natArgSort } from "./core/index.ts";
+export type { NatSortOptions, NatSortedOptions } from "./core/index.ts";
+export { searchsorted, searchsortedMany, argsortScalars } from "./core/index.ts";
+export type { SearchSortedSide, SearchSortedOptions } from "./core/index.ts";
+export { valueCountsBinned } from "./stats/index.ts";
+export type { ValueCountsBinnedOptions } from "./stats/index.ts";
+
+export {
+ duplicatedSeries,
+ duplicatedDataFrame,
+ dropDuplicatesSeries,
+ dropDuplicatesDataFrame,
+} from "./stats/index.ts";
+export type {
+ KeepPolicy,
+ DuplicatedDataFrameOptions,
+ DuplicatedSeriesOptions,
+} from "./stats/index.ts";
+export { reindexSeries, reindexDataFrame } from "./core/index.ts";
+export type { ReindexMethod, ReindexSeriesOptions, ReindexDataFrameOptions } from "./core/index.ts";
+
+export { alignSeries, alignDataFrame } from "./core/index.ts";
+export type { AlignSeriesOptions, AlignDataFrameOptions } from "./core/index.ts";
+
+export { explodeSeries, explodeDataFrame } from "./stats/index.ts";
+export type { ExplodeOptions, ExplodeDataFrameOptions } from "./stats/index.ts";
+
+export { isin, dataFrameIsin } from "./stats/index.ts";
+export type { IsinValues, IsinDict, DataFrameIsinValues } from "./stats/index.ts";
export {
insertColumn,
@@ -131,29 +321,9 @@ export type {
DictTight,
SplitInput,
} from "./core/index.ts";
-export { wideToLong } from "./reshape/index.ts";
-export type { WideToLongOptions } from "./reshape/index.ts";
-export { cut, qcut } from "./stats/index.ts";
-export type { BinResult, CutOptions, QCutOptions } from "./stats/index.ts";
export { rollingSem, rollingSkew, rollingKurt, rollingQuantile } from "./stats/index.ts";
export type { WindowExtOptions, RollingQuantileOptions } from "./stats/index.ts";
-export { seriesWhere, seriesMask, dataFrameWhere, dataFrameMask } from "./stats/index.ts";
-export type {
- SeriesCond,
- DataFrameCond,
- SeriesWhereOptions,
- DataFrameWhereOptions,
-} from "./stats/index.ts";
-export {
- isna,
- notna,
- isnull,
- notnull,
- fillna,
- dropna,
- countna,
- countValid,
-} from "./stats/index.ts";
+export { fillna, countna, countValid } from "./stats/index.ts";
export type { IsnaInput, FillnaOptions, DropnaOptions } from "./stats/index.ts";
export {
getAttrs,
@@ -175,7 +345,6 @@ export {
pipe,
seriesApply,
seriesTransform,
- dataFrameApply,
dataFrameApplyMap,
dataFrameTransform,
dataFrameTransformRows,
@@ -233,7 +402,6 @@ export {
export type {
NormalizeForm,
StrInput,
- GetDummiesOptions,
ExtractAllOptions,
SplitExpandOptions,
ExtractGroupsOptions,
diff --git a/src/io/index.ts b/src/io/index.ts
index 1788a87b..d4f27f3b 100644
--- a/src/io/index.ts
+++ b/src/io/index.ts
@@ -8,3 +8,5 @@ export { readCsv, toCsv } from "./csv.ts";
export type { ReadCsvOptions, ToCsvOptions } from "./csv.ts";
export { readJson, toJson } from "./json.ts";
export type { ReadJsonOptions, ToJsonOptions, JsonOrient } from "./json.ts";
+export { jsonNormalize } from "./json_normalize.ts";
+export type { JsonPath, JsonNormalizeOptions } from "./json_normalize.ts";
diff --git a/src/io/json_normalize.ts b/src/io/json_normalize.ts
new file mode 100644
index 00000000..3eae0b9c
--- /dev/null
+++ b/src/io/json_normalize.ts
@@ -0,0 +1,376 @@
+/**
+ * jsonNormalize — flatten nested JSON to a flat DataFrame.
+ *
+ * Mirrors `pandas.json_normalize()`:
+ * - `jsonNormalize(data, options?)` — normalize semi-structured JSON data into
+ * a flat table.
+ *
+ * Key capabilities:
+ * - Flattens nested dicts using a configurable separator (default `"."`).
+ * - `recordPath` — path (or array of paths) to nested arrays of records.
+ * - `meta` — top-level (or path) fields to pull into every output row.
+ * - `metaPrefix` — prefix for meta columns (default: none).
+ * - `recordPrefix` — prefix for record-level columns (default: none).
+ * - `errors` — `"raise"` (default) or `"ignore"` for missing meta keys.
+ * - `maxLevel` — maximum depth to flatten nested dicts (undefined = unlimited).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { RangeIndex } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import { Dtype } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── JSON value types (no `any`) ──────────────────────────────────────────────
+
+/** A JSON primitive (leaf value). */
+type JsonPrimitive = string | number | boolean | null;
+
+/** Any valid JSON value. */
+type JsonValue = JsonPrimitive | JsonValue[] | JsonObject;
+
+/** A JSON object (dict). */
+interface JsonObject {
+ [key: string]: JsonValue;
+}
+
+// ─── Public types ─────────────────────────────────────────────────────────────
+
+/**
+ * A single path segment or a multi-step path for `recordPath` / `meta` fields.
+ *
+ * - `string` — a single key name
+ * - `string[]` — a sequence of keys to traverse (e.g. `["a", "b"]` → `data.a.b`)
+ */
+export type JsonPath = string | readonly string[];
+
+/** Options for {@link jsonNormalize}. */
+export interface JsonNormalizeOptions {
+ /**
+ * Path in each record to the list of child records to normalize.
+ * May be a string key, an array of keys (nested path), or an array of
+ * such paths to normalize multiple levels.
+ * Default: undefined (normalize each top-level record as-is).
+ */
+ readonly recordPath?: JsonPath | readonly JsonPath[];
+ /**
+ * Fields from the outer record to include as columns in every output row.
+ * Each entry may be a string key or an array of keys for a nested path.
+ * Default: undefined.
+ */
+ readonly meta?: readonly JsonPath[];
+ /**
+ * Prefix to prepend to meta columns. Default: `""`.
+ */
+ readonly metaPrefix?: string;
+ /**
+ * Prefix to prepend to record columns. Default: `""`.
+ */
+ readonly recordPrefix?: string;
+ /**
+ * Separator used to join nested key names into a flat column name.
+ * Default: `"."`.
+ */
+ readonly sep?: string;
+ /**
+ * `"raise"` — throw an error when a `meta` key is missing.
+ * `"ignore"` — use `null` for missing meta keys.
+ * Default: `"raise"`.
+ */
+ readonly errors?: "raise" | "ignore";
+ /**
+ * Maximum nesting depth to flatten. Dicts deeper than this become
+ * sub-objects (JSON strings) rather than being expanded.
+ * Default: unlimited (`undefined`).
+ */
+ readonly maxLevel?: number;
+}
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+/** Resolve a {@link JsonPath} to a string key or array of string keys. */
+function toPathArray(p: JsonPath): readonly string[] {
+ return typeof p === "string" ? [p] : p;
+}
+
+/** Traverse a nested object along `path`, returning the value or `undefined`. */
+function getPath(obj: JsonObject, path: readonly string[]): JsonValue | undefined {
+ let cur: JsonValue = obj;
+ for (const key of path) {
+ if (typeof cur !== "object" || cur === null || Array.isArray(cur)) {
+ return undefined;
+ }
+ cur = (cur as JsonObject)[key] as JsonValue;
+ if (cur === undefined) {
+ return undefined;
+ }
+ }
+ return cur;
+}
+
+/**
+ * Flatten a single JSON object to a `Record`, joining nested
+ * key paths with `sep` up to `maxLevel` depth.
+ */
+function flattenObject(
+ obj: JsonObject,
+ sep: string,
+ maxLevel: number | undefined,
+ prefix: string,
+ depth: number,
+): Record {
+ const result: Record = {};
+ for (const [k, v] of Object.entries(obj)) {
+ const fullKey = prefix === "" ? k : `${prefix}${depth === 0 ? "" : sep}${k}`;
+ const atMax = maxLevel !== undefined && depth >= maxLevel;
+ if (!atMax && typeof v === "object" && v !== null && !Array.isArray(v)) {
+ const nested = flattenObject(v as JsonObject, sep, maxLevel, fullKey, depth + 1);
+ for (const [nk, nv] of Object.entries(nested)) {
+ result[nk] = nv;
+ }
+ } else if (Array.isArray(v) || (typeof v === "object" && v !== null)) {
+ // Arrays at this level become JSON strings
+ result[fullKey] = JSON.stringify(v);
+ } else {
+ result[fullKey] = v as Scalar;
+ }
+ }
+ return result;
+}
+
+/**
+ * Normalize data along a single `recordPath`, extracting meta fields from
+ * the outer record.
+ */
+function normalizeWithPath(
+ records: readonly JsonObject[],
+ recordPath: readonly string[],
+ meta: readonly (readonly string[])[],
+ metaPrefix: string,
+ recordPrefix: string,
+ sep: string,
+ errors: "raise" | "ignore",
+ maxLevel: number | undefined,
+): Record[] {
+ const rows: Record[] = [];
+ for (const record of records) {
+ // Extract nested records
+ const nested = getPath(record, recordPath);
+ if (nested === undefined || nested === null) {
+ continue;
+ }
+ if (!Array.isArray(nested)) {
+ throw new TypeError(
+ `jsonNormalize: recordPath "${recordPath.join(sep)}" did not point to an array`,
+ );
+ }
+ const childRecords = nested as JsonValue[];
+
+ // Extract meta values from this parent record
+ const metaValues: Record = {};
+ for (const metaPath of meta) {
+ const colName = metaPrefix + (metaPath.length === 1 ? metaPath[0] : metaPath.join(sep));
+ const val = getPath(record, metaPath);
+ if (val === undefined) {
+ if (errors === "raise") {
+ throw new Error(`jsonNormalize: meta key "${metaPath.join(".")}" not found in record`);
+ }
+ metaValues[colName] = null;
+ } else if (typeof val === "object") {
+ // Nested object / array → stringify
+ metaValues[colName] = JSON.stringify(val);
+ } else {
+ metaValues[colName] = val as Scalar;
+ }
+ }
+
+ // Flatten each child record and merge meta
+ for (const child of childRecords) {
+ let childFlat: Record;
+ if (typeof child === "object" && child !== null && !Array.isArray(child)) {
+ childFlat = flattenObject(child as JsonObject, sep, maxLevel, recordPrefix, 0);
+ } else {
+ childFlat = { [recordPrefix === "" ? "value" : recordPrefix]: child as Scalar };
+ }
+ rows.push({ ...childFlat, ...metaValues });
+ }
+ }
+ return rows;
+}
+
+// ─── Public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Normalize semi-structured JSON data into a flat DataFrame.
+ *
+ * @param data - A JSON object, an array of JSON objects, or a JSON string.
+ * @param options - Normalization options.
+ * @returns A flat {@link DataFrame}.
+ *
+ * @example
+ * ```ts
+ * import { jsonNormalize } from "tsb";
+ *
+ * const data = [
+ * { id: 1, info: { name: "Alice", age: 30 } },
+ * { id: 2, info: { name: "Bob", age: 25 } },
+ * ];
+ * const df = jsonNormalize(data);
+ * // columns: ["id", "info.name", "info.age"]
+ * ```
+ */
+export function jsonNormalize(
+ data: JsonObject | readonly JsonObject[] | string,
+ options: JsonNormalizeOptions = {},
+): DataFrame {
+ const {
+ recordPath,
+ meta = [],
+ metaPrefix = "",
+ recordPrefix = "",
+ sep = ".",
+ errors = "raise",
+ maxLevel,
+ } = options;
+
+ // ── Parse input ────────────────────────────────────────────────────────────
+ let records: readonly JsonObject[];
+ if (typeof data === "string") {
+ const parsed: unknown = JSON.parse(data);
+ if (Array.isArray(parsed)) {
+ records = parsed as JsonObject[];
+ } else if (typeof parsed === "object" && parsed !== null) {
+ records = [parsed as JsonObject];
+ } else {
+ throw new TypeError("jsonNormalize: JSON string must be an object or array");
+ }
+ } else if (Array.isArray(data)) {
+ records = data as readonly JsonObject[];
+ } else {
+ records = [data as JsonObject];
+ }
+
+ // ── Normalise meta paths ───────────────────────────────────────────────────
+ const metaPaths: readonly (readonly string[])[] = meta.map(toPathArray);
+
+ // ── Normalise recordPath ───────────────────────────────────────────────────
+ let rows: Record[];
+
+ if (recordPath === undefined) {
+ // No recordPath — flatten each top-level record
+ rows = [];
+ for (const record of records) {
+ const flat = flattenObject(record, sep, maxLevel, recordPrefix, 0);
+ // Remap keys with recordPrefix already applied in flattenObject
+ const row: Record = {};
+ for (const [k, v] of Object.entries(flat)) {
+ row[k] = v;
+ }
+ // Attach meta (from same record)
+ for (const metaPath of metaPaths) {
+ const colName = metaPrefix + (metaPath.length === 1 ? metaPath[0] : metaPath.join(sep));
+ const val = getPath(record, metaPath);
+ if (val === undefined) {
+ if (errors === "raise") {
+ throw new Error(`jsonNormalize: meta key "${metaPath.join(".")}" not found in record`);
+ }
+ row[colName] = null;
+ } else if (typeof val === "object") {
+ row[colName] = JSON.stringify(val);
+ } else {
+ row[colName] = val as Scalar;
+ }
+ }
+ rows.push(row);
+ }
+ } else {
+ // recordPath provided
+ // Normalise to array of (single) paths
+ const pathList: readonly (readonly string[])[] = (() => {
+ if (Array.isArray(recordPath) && recordPath.length > 0 && Array.isArray(recordPath[0])) {
+ // Array of paths
+ return (recordPath as readonly JsonPath[]).map(toPathArray);
+ }
+ return [toPathArray(recordPath as JsonPath)];
+ })();
+
+ // Chain paths: for multiple recordPaths, each subsequent path is applied
+ // to the records produced by the previous one. This matches pandas
+ // behaviour where you can supply a list of levels to drill into.
+ let currentRecords: readonly JsonObject[] = records;
+ for (let i = 0; i < pathList.length; i++) {
+ const path = pathList[i];
+ if (path === undefined) {
+ continue;
+ }
+ const isFinal = i === pathList.length - 1;
+ if (isFinal) {
+ rows = normalizeWithPath(
+ currentRecords,
+ path,
+ metaPaths,
+ metaPrefix,
+ recordPrefix,
+ sep,
+ errors,
+ maxLevel,
+ );
+ } else {
+ // Drill into intermediate path to get next level records
+ const nextRecords: JsonObject[] = [];
+ for (const rec of currentRecords) {
+ const nested = getPath(rec, path);
+ if (Array.isArray(nested)) {
+ for (const child of nested) {
+ if (typeof child === "object" && child !== null && !Array.isArray(child)) {
+ nextRecords.push(child as JsonObject);
+ }
+ }
+ }
+ }
+ currentRecords = nextRecords;
+ }
+ }
+ rows ??= [];
+ }
+
+ // ── Build column sets and DataFrame ───────────────────────────────────────
+ if (rows.length === 0) {
+ return DataFrame.fromColumns({});
+ }
+
+ // Union of all column names (preserve first-seen insertion order)
+ const colOrder: string[] = [];
+ const colSet = new Set();
+ for (const row of rows) {
+ for (const k of Object.keys(row)) {
+ if (!colSet.has(k)) {
+ colSet.add(k);
+ colOrder.push(k);
+ }
+ }
+ }
+
+ const colData: Record = {};
+ for (const col of colOrder) {
+ colData[col] = rows.map((r) => (col in r ? r[col] : null) as Scalar);
+ }
+
+ // Build Series columns and infer dtypes
+ const seriesMap = new Map>();
+ for (const [col, vals] of Object.entries(colData)) {
+ let dtype: Dtype;
+ if (vals.every((v) => v === null || typeof v === "number")) {
+ dtype = vals.some((v) => v !== null && !Number.isInteger(v)) ? Dtype.float64 : Dtype.int64;
+ } else if (vals.every((v) => v === null || typeof v === "boolean")) {
+ dtype = Dtype.bool;
+ } else {
+ dtype = Dtype.object;
+ }
+ seriesMap.set(col, new Series({ data: vals, name: col, dtype }));
+ }
+
+ return new DataFrame(seriesMap, new RangeIndex(rows.length));
+}
diff --git a/src/reshape/index.ts b/src/reshape/index.ts
index 849c435d..6e03a5c3 100644
--- a/src/reshape/index.ts
+++ b/src/reshape/index.ts
@@ -12,3 +12,5 @@ export { stack, unstack, STACK_DEFAULT_SEP } from "./stack_unstack.ts";
export type { StackOptions, UnstackOptions } from "./stack_unstack.ts";
export { wideToLong } from "./wide_to_long.ts";
export type { WideToLongOptions } from "./wide_to_long.ts";
+export { pivotTableFull } from "./pivot_table.ts";
+export type { PivotTableFullOptions } from "./pivot_table.ts";
diff --git a/src/reshape/pivot_table.ts b/src/reshape/pivot_table.ts
new file mode 100644
index 00000000..9dbb9933
--- /dev/null
+++ b/src/reshape/pivot_table.ts
@@ -0,0 +1,384 @@
+/**
+ * pivot_table — full pivot table with margins (grand totals) support.
+ *
+ * Extends `pivotTable` from `pivot.ts` with:
+ * - **`margins`**: add a grand-total row and column (default `false`)
+ * - **`margins_name`**: label for the grand-total row/column (default `"All"`)
+ * - **`sort`**: sort row and column labels alphabetically (default `true`)
+ *
+ * Mirrors `pandas.pivot_table`.
+ *
+ * @example
+ * ```ts
+ * const df = DataFrame.fromColumns({
+ * region: ["North","North","South","South"],
+ * product: ["A","B","A","B"],
+ * sales: [100, 200, 150, 250],
+ * });
+ * pivotTableFull(df, {
+ * index: "region",
+ * columns: "product",
+ * values: "sales",
+ * aggfunc: "sum",
+ * margins: true,
+ * });
+ * // A B All
+ * // North 100 200 300
+ * // South 150 250 400
+ * // All 250 450 700
+ * ```
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+import type { AggFuncName, PivotTableOptions } from "./pivot.ts";
+
+// ─── public types ──────────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link pivotTableFull}.
+ *
+ * Extends {@link PivotTableOptions} with margins, margins_name, and sort.
+ */
+export interface PivotTableFullOptions extends PivotTableOptions {
+ /**
+ * If `true`, add a grand-total row and column to the result.
+ * The totals are computed by applying `aggfunc` over all raw values
+ * (not over already-aggregated cell values).
+ * Default `false`.
+ */
+ readonly margins?: boolean;
+ /**
+ * Label for the grand-total row and column when `margins` is `true`.
+ * Default `"All"`.
+ */
+ readonly margins_name?: string;
+ /**
+ * Sort row and column labels alphabetically before assembling the result.
+ * Default `true`.
+ */
+ readonly sort?: boolean;
+}
+
+// ─── private helpers ──────────────────────────────────────────────────────────
+
+/** True when a scalar is missing. */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/** Normalise a string | string[] to string[]. */
+function toArr(x: string | readonly string[]): string[] {
+ return typeof x === "string" ? [x] : [...x];
+}
+
+/** Read a scalar from a DataFrame column by row position. */
+function getVal(df: DataFrame, col: string, ri: number): Scalar {
+ return df.col(col).values[ri] ?? null;
+}
+
+/** Build a composite key from multiple columns at a given row position. */
+function makeKey(df: DataFrame, cols: readonly string[], ri: number): string {
+ return cols.map((c) => String(getVal(df, c, ri))).join("\x00");
+}
+
+/** Collect unique keys in insertion order from a key-generation function. */
+function collectUniqueKeys(n: number, keyFn: (i: number) => string): string[] {
+ const order: string[] = [];
+ const seen = new Set();
+ for (let i = 0; i < n; i++) {
+ const k = keyFn(i);
+ if (!seen.has(k)) {
+ seen.add(k);
+ order.push(k);
+ }
+ }
+ return order;
+}
+
+/** Resolve which value columns to aggregate. */
+function resolveValuesCols(
+ df: DataFrame,
+ optValues: PivotTableOptions["values"],
+ idxCols: string[],
+ colCols: string[],
+): string[] {
+ if (optValues !== undefined) {
+ const cols = toArr(optValues);
+ for (const c of cols) {
+ if (!df.has(c)) {
+ throw new RangeError(`values column "${c}" does not exist.`);
+ }
+ }
+ return cols;
+ }
+ const exclude = new Set([...idxCols, ...colCols]);
+ return df.columns.values.filter((c) => !exclude.has(c));
+}
+
+/** Compute an aggregation over an array of raw numeric values. */
+function aggregate(nums: number[], fn: AggFuncName): number {
+ if (fn === "count") {
+ return nums.length;
+ }
+ if (nums.length === 0) {
+ return Number.NaN;
+ }
+ if (fn === "first") {
+ return nums[0] as number;
+ }
+ if (fn === "last") {
+ return nums.at(-1) as number;
+ }
+ if (fn === "min") {
+ return Math.min(...nums);
+ }
+ if (fn === "max") {
+ return Math.max(...nums);
+ }
+ if (fn === "sum") {
+ return nums.reduce((s, v) => s + v, 0);
+ }
+ // mean
+ return nums.reduce((s, v) => s + v, 0) / nums.length;
+}
+
+/**
+ * Bucket key for a (rowKey, colKey, valueCol) triple.
+ * Uses \x01 as separator (different from \x00 used inside multi-col keys).
+ */
+function bucketKey(rk: string, ck: string, valCol: string): string {
+ return `${rk}\x01${ck}\x01${valCol}`;
+}
+
+/** Build all raw-value buckets for the pivot table. */
+function buildBuckets(
+ df: DataFrame,
+ idxCols: string[],
+ colCols: string[],
+ valuesCols: string[],
+): {
+ rowKeyOrder: string[];
+ colKeyOrder: string[];
+ buckets: Map;
+} {
+ const n = df.index.size;
+ const rowKeyFn = (ri: number): string => makeKey(df, idxCols, ri);
+ const colKeyFn = (ri: number): string => makeKey(df, colCols, ri);
+
+ const rowKeyOrder = collectUniqueKeys(n, rowKeyFn);
+ const colKeyOrder = collectUniqueKeys(n, colKeyFn);
+
+ const buckets = new Map();
+
+ for (let ri = 0; ri < n; ri++) {
+ const rk = rowKeyFn(ri);
+ const ck = colKeyFn(ri);
+ for (const valCol of valuesCols) {
+ const key = bucketKey(rk, ck, valCol);
+ let bucket = buckets.get(key);
+ if (bucket === undefined) {
+ bucket = [];
+ buckets.set(key, bucket);
+ }
+ const v = getVal(df, valCol, ri);
+ if (!isMissing(v) && typeof v === "number") {
+ bucket.push(v);
+ }
+ }
+ }
+
+ return { rowKeyOrder, colKeyOrder, buckets };
+}
+
+/** Compute the aggregated cell value, applying fill_value when empty. */
+function cellValue(
+ rk: string,
+ ck: string,
+ valCol: string,
+ buckets: Map,
+ aggfunc: AggFuncName,
+ fillValue: Scalar,
+): Scalar {
+ const bucket = buckets.get(bucketKey(rk, ck, valCol));
+ if (bucket === undefined || bucket.length === 0) {
+ return aggfunc === "count" ? 0 : fillValue;
+ }
+ return aggregate(bucket, aggfunc);
+}
+
+/** Compute the margin value for a (merged-key, valCol) pair by concatenating buckets. */
+function marginValue(
+ keys: string[],
+ fixedKey: string,
+ valCol: string,
+ buckets: Map,
+ fixedIsRow: boolean,
+ aggfunc: AggFuncName,
+ fillValue: Scalar,
+): Scalar {
+ const combined: number[] = [];
+ for (const k of keys) {
+ const bkey = fixedIsRow ? bucketKey(fixedKey, k, valCol) : bucketKey(k, fixedKey, valCol);
+ const bucket = buckets.get(bkey);
+ if (bucket) {
+ combined.push(...bucket);
+ }
+ }
+ if (combined.length === 0) {
+ return aggfunc === "count" ? 0 : fillValue;
+ }
+ return aggregate(combined, aggfunc);
+}
+
+/** Convert a composite row key to a display Label. */
+function rowKeyToLabel(rk: string): Label {
+ const parts = rk.split("\x00");
+ // parts[0] is string | undefined; ?? null gives string | null, a subset of Label
+ const single = parts[0] ?? null;
+ return parts.length === 1 ? single : parts.join(", ");
+}
+
+// ─── main export ──────────────────────────────────────────────────────────────
+
+/**
+ * Create a full pivot table with optional grand-total margins.
+ *
+ * Mirrors `pandas.pivot_table` / `pandas.DataFrame.pivot_table`.
+ *
+ * @param df - Source DataFrame.
+ * @param options - Full pivot table options.
+ * @returns A new aggregated DataFrame.
+ */
+export function pivotTableFull(df: DataFrame, options: PivotTableFullOptions): DataFrame {
+ const aggfunc: AggFuncName = options.aggfunc ?? "mean";
+ const fillValue: Scalar = options.fill_value ?? null;
+ const dropna: boolean = options.dropna ?? false;
+ const margins: boolean = options.margins ?? false;
+ const marginsName: string = options.margins_name ?? "All";
+ const sort: boolean = options.sort ?? true;
+
+ const idxCols = toArr(options.index);
+ const colCols = toArr(options.columns);
+
+ for (const c of [...idxCols, ...colCols]) {
+ if (!df.has(c)) {
+ throw new RangeError(`Column "${c}" does not exist.`);
+ }
+ }
+
+ const valuesCols = resolveValuesCols(df, options.values, idxCols, colCols);
+ const { rowKeyOrder, colKeyOrder, buckets } = buildBuckets(df, idxCols, colCols, valuesCols);
+
+ // Optionally sort row and column keys
+ const finalRowOrder = sort ? [...rowKeyOrder].sort() : rowKeyOrder;
+ const finalColOrder = sort ? [...colKeyOrder].sort() : colKeyOrder;
+
+ const isSingleValue = valuesCols.length === 1;
+
+ // Build output column name list
+ const outColNames: string[] = [];
+ for (const ck of finalColOrder) {
+ for (const valCol of valuesCols) {
+ outColNames.push(isSingleValue ? ck : `${valCol}_${ck}`);
+ }
+ }
+ if (margins) {
+ for (const valCol of valuesCols) {
+ outColNames.push(isSingleValue ? marginsName : `${valCol}_${marginsName}`);
+ }
+ }
+
+ // Build output column arrays
+ const outCols: Record = {};
+ for (const name of outColNames) {
+ outCols[name] = [];
+ }
+
+ // Fill data rows
+ for (const rk of finalRowOrder) {
+ for (const ck of finalColOrder) {
+ for (const valCol of valuesCols) {
+ const name = isSingleValue ? ck : `${valCol}_${ck}`;
+ const v = cellValue(rk, ck, valCol, buckets, aggfunc, fillValue);
+ outCols[name]?.push(v);
+ }
+ }
+ if (margins) {
+ // "All" column for this row: aggregate across all column groups
+ for (const valCol of valuesCols) {
+ const allColName = isSingleValue ? marginsName : `${valCol}_${marginsName}`;
+ const v = marginValue(colKeyOrder, rk, valCol, buckets, true, aggfunc, fillValue);
+ outCols[allColName]?.push(v);
+ }
+ }
+ }
+
+ // Build row index labels
+ const rowIndexLabels: Label[] = finalRowOrder.map(rowKeyToLabel);
+
+ // Append margins row ("All" row) if requested
+ if (margins) {
+ for (const ck of finalColOrder) {
+ for (const valCol of valuesCols) {
+ const name = isSingleValue ? ck : `${valCol}_${ck}`;
+ const v = marginValue(rowKeyOrder, ck, valCol, buckets, false, aggfunc, fillValue);
+ outCols[name]?.push(v);
+ }
+ }
+ // Grand total (corner cell)
+ for (const valCol of valuesCols) {
+ const allColName = isSingleValue ? marginsName : `${valCol}_${marginsName}`;
+ const grandBucket: number[] = [];
+ for (const rk of rowKeyOrder) {
+ for (const ck of colKeyOrder) {
+ const bucket = buckets.get(bucketKey(rk, ck, valCol));
+ if (bucket) {
+ grandBucket.push(...bucket);
+ }
+ }
+ }
+ const v =
+ grandBucket.length === 0
+ ? aggfunc === "count"
+ ? 0
+ : fillValue
+ : aggregate(grandBucket, aggfunc);
+ outCols[allColName]?.push(v);
+ }
+ rowIndexLabels.push(marginsName);
+ }
+
+ // Apply dropna: remove all-missing columns and rows
+ if (dropna) {
+ const colsToKeep = outColNames.filter((name) => outCols[name]?.some((v) => !isMissing(v)));
+ const keptCols: Record = {};
+ for (const name of colsToKeep) {
+ const c = outCols[name];
+ if (c !== undefined) {
+ keptCols[name] = c;
+ }
+ }
+ const rowsToKeep = rowIndexLabels.map((_, ri) =>
+ colsToKeep.some((name) => !isMissing(keptCols[name]?.[ri] ?? null)),
+ );
+ const keptLabels = rowIndexLabels.filter((_, ri) => rowsToKeep[ri]);
+ const filteredCols: Record = {};
+ for (const name of colsToKeep) {
+ const c = keptCols[name];
+ if (c !== undefined) {
+ filteredCols[name] = c.filter((_, ri) => rowsToKeep[ri]);
+ }
+ }
+ return DataFrame.fromColumns(filteredCols, {
+ index: new Index(keptLabels),
+ });
+ }
+
+ return DataFrame.fromColumns(outCols, {
+ index: new Index(rowIndexLabels),
+ });
+}
diff --git a/src/reshape/wide_to_long.ts b/src/reshape/wide_to_long.ts
index e7b4cce3..8f8141ba 100644
--- a/src/reshape/wide_to_long.ts
+++ b/src/reshape/wide_to_long.ts
@@ -1,218 +1,219 @@
/**
- * wide_to_long — reshape a wide DataFrame to a long format by collapsing
- * stub-prefixed column groups into rows.
+ * wide_to_long — reshape a wide DataFrame to long format using column name stubs.
*
* Mirrors `pandas.wide_to_long(df, stubnames, i, j, sep='', suffix='\\d+')`.
*
- * Given a DataFrame whose columns include groups like
- * `"A1"`, `"A2"`, `"B1"`, `"B2"` (stubs `["A","B"]`, separator `""`, suffix `\\d+`),
- * this function pivots those groups into long format where each unique suffix
- * value becomes a new row:
- *
- * ```
- * id num A B
- * x 1 1 5
- * x 2 3 7
- * y 1 2 6
- * y 2 4 8
- * ```
+ * Takes a wide-format DataFrame where multiple columns share a common prefix
+ * (stub) and a varying suffix, and reshapes it into a long-format DataFrame
+ * where each stub becomes a column and the suffixes become values in a new
+ * column named `j`.
*
* @example
* ```ts
- * import { DataFrame } from "tsb";
- * import { wideToLong } from "tsb";
- *
* const df = DataFrame.fromColumns({
- * id: ["x", "y"],
- * A1: [1, 2],
- * A2: [3, 4],
- * B1: [5, 6],
- * B2: [7, 8],
+ * id: ["x", "y"],
+ * A1: [1, 2],
+ * A2: [3, 4],
+ * B1: [5, 6],
+ * B2: [7, 8],
* });
- *
- * const long = wideToLong(df, ["A", "B"], "id", "num");
- * // long.columns.values → ["id", "num", "A", "B"]
- * // long.shape → [4, 4]
+ * wideToLong(df, ["A", "B"], "id", "year");
+ * // id year A B
+ * // x 1 1 5
+ * // y 1 2 6
+ * // x 2 3 7
+ * // y 2 4 8
* ```
*
* @module
*/
-import type { Index } from "../core/base-index.ts";
-import { DataFrame } from "../core/frame.ts";
-import { RangeIndex } from "../core/range-index.ts";
+import { DataFrame } from "../core/index.ts";
+import type { Index } from "../core/index.ts";
+import { RangeIndex } from "../core/index.ts";
import type { Label, Scalar } from "../types.ts";
-// ─── public types ──────────────────────────────────────────────────────────────
+// ─── public types ─────────────────────────────────────────────────────────────
/** Options for {@link wideToLong}. */
export interface WideToLongOptions {
/**
- * Separator between stub name and suffix in column names.
- * Defaults to `""` (no separator).
- * @example `sep: "_"` matches columns like `"value_2021"`, `"value_2022"`.
+ * Separator between the stub name and the suffix in column names.
+ * For example, `sep: "_"` matches `"A_1"`, `"A_2"`, etc.
+ * @defaultValue `""`
*/
readonly sep?: string;
/**
- * Regular expression (as a string) that the suffix must match.
- * Defaults to `"\\d+"` (one or more digits).
- * @example `suffix: "[a-z]+"` matches alphabetic suffixes.
+ * Regular expression (or string pattern) that matches the suffix portion of
+ * column names after the stub and separator. The entire rest of the column
+ * name (after `stub + sep`) must match this pattern.
+ * @defaultValue `/\d+/` — numeric suffixes only
*/
- readonly suffix?: string;
+ readonly suffix?: RegExp | string;
}
// ─── helpers ──────────────────────────────────────────────────────────────────
-/** Normalise a string-or-string-array option to `string[]`. */
+/** Normalize a string or string-array argument to `string[]`. */
function toStringArray(x: readonly string[] | string): string[] {
return typeof x === "string" ? [x] : [...x];
}
+/** Escape a string for safe embedding in a `RegExp` pattern. */
+function escapeRegex(s: string): string {
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
/**
- * Collect the unique suffix values that appear in the DataFrame column names
- * for the given stubs, separator, and suffix regex.
+ * Build a compiled anchor regex for a single stub.
*
- * Returns suffixes in the order they first appear (scanning columns left to right).
+ * Matches column names of the form `{stub}{sep}{suffix}` where `suffix`
+ * matches the full remaining text. The suffix portion is captured in group 1.
*/
-function collectSuffixes(
- colNames: readonly string[],
- stubs: readonly string[],
- sep: string,
- suffixRe: RegExp,
-): string[] {
- const seen = new Map(); // suffix → first-seen position
- for (const col of colNames) {
- for (const stub of stubs) {
- const prefix = stub + sep;
- if (col.startsWith(prefix)) {
- const rest = col.slice(prefix.length);
- const m = rest.match(suffixRe);
- if (m !== null && m[0] === rest) {
- const pos = seen.size;
- if (!seen.has(rest)) {
- seen.set(rest, pos);
- }
- }
- }
- }
- }
- return [...seen.keys()].sort((a, b) => {
- // Sort numerically when both look like integers, otherwise lexicographically.
- const na = Number(a);
- const nb = Number(b);
- if (!(Number.isNaN(na) || Number.isNaN(nb))) {
- return na - nb;
- }
- return a < b ? -1 : a > b ? 1 : 0;
- });
+function buildStubRegex(stub: string, sep: string, suffixPattern: string): RegExp {
+ return new RegExp(`^${escapeRegex(stub)}${escapeRegex(sep)}(${suffixPattern})$`);
}
-// ─── wideToLong ───────────────────────────────────────────────────────────────
+/**
+ * Try to parse a suffix string as a number; if it is not purely numeric,
+ * return it as-is.
+ */
+function parseSuffix(raw: string): Scalar {
+ return /^-?\d+(\.\d+)?$/.test(raw) ? Number(raw) : raw;
+}
+
+// ─── main function ────────────────────────────────────────────────────────────
/**
- * Reshape a wide-format DataFrame to long format by collapsing stub-prefixed
- * column groups into rows.
+ * Reshape a wide-format DataFrame to long format using column name stubs.
*
- * Mirrors `pandas.wide_to_long(df, stubnames, i, j, sep='', suffix='\\d+')`.
+ * Each group of columns that shares a stub prefix (e.g. `A1`, `A2`, `B1`,
+ * `B2` with stubs `["A", "B"]`) is collapsed into a single column per stub,
+ * with a new column named `j` holding the extracted suffix values.
+ *
+ * Columns not listed in `stubnames` or `i` are silently dropped (mirrors
+ * pandas behaviour).
*
- * @param df Source DataFrame (not mutated).
- * @param stubnames Stub name(s) that prefix the wide columns (e.g. `["A", "B"]`).
- * @param i Column name(s) to use as id variables (kept for every row).
- * @param j Name of the new column that will hold the suffix value.
- * @param options Optional `sep` and `suffix` overrides.
- * @returns A new long-format DataFrame.
+ * @param df - Source wide-format DataFrame.
+ * @param stubnames - One or more column-name prefixes (stubs) to reshape.
+ * @param i - Column(s) to use as identifier variables (kept as-is).
+ * @param j - Name of the new column that holds the extracted suffixes.
+ * @param options - Optional `sep` and `suffix` settings.
+ * @returns Long-format DataFrame with id columns, `j`, and one column per stub.
*
- * @throws {RangeError} if any `i` column does not exist in `df`.
- * @throws {RangeError} if `j` conflicts with an existing non-stub column name.
+ * @throws {RangeError} If an `i` column does not exist.
+ * @throws {RangeError} If `j` conflicts with an existing column name that is
+ * not a stub column.
+ * @throws {RangeError} If no stub columns are found for any of the stubs.
*/
export function wideToLong(
df: DataFrame,
stubnames: readonly string[] | string,
i: readonly string[] | string,
j: string,
- options: WideToLongOptions = {},
+ options?: WideToLongOptions,
): DataFrame {
const stubs = toStringArray(stubnames);
const idCols = toStringArray(i);
- const sep = options.sep ?? "";
- const suffixPattern = options.suffix ?? "\\d+";
- const suffixRe = new RegExp(`^(?:${suffixPattern})$`);
+ const sep = options?.sep ?? "";
+ const rawSuffix = options?.suffix ?? /\d+/;
+ const suffixPattern = rawSuffix instanceof RegExp ? rawSuffix.source : rawSuffix;
- // Validate id columns exist.
+ // ── validate id columns ────────────────────────────────────────────────────
for (const col of idCols) {
if (!df.has(col)) {
- throw new RangeError(`id column "${col}" does not exist in DataFrame.`);
+ throw new RangeError(`wide_to_long: id column "${col}" not found in DataFrame.`);
}
}
- // j must not conflict with a non-stub, non-id column.
- const colNames = [...df.columns.values];
+ // ── validate j does not shadow a non-stub existing column ─────────────────
+ // (pandas raises ValueError if j clashes with a remaining non-stub column)
const stubSet = new Set(stubs);
- for (const col of colNames) {
- if (col === j && !stubSet.has(col) && !idCols.includes(col)) {
- throw new RangeError(`Column name "${j}" conflicts with existing column.`);
- }
+ if (!stubSet.has(j) && df.has(j) && !idCols.includes(j)) {
+ // Allow j to equal a stub name (it will be overwritten), but not an
+ // unrelated column.
+ throw new RangeError(
+ `wide_to_long: j column name "${j}" conflicts with an existing non-stub column.`,
+ );
}
- // Collect ordered suffix values.
- const suffixes = collectSuffixes(colNames, stubs, sep, suffixRe);
-
- const nRows = df.index.size;
-
- // Build output column arrays.
- const idArrays: Record = {};
- for (const col of idCols) {
- idArrays[col] = [];
- }
- const jArray: Scalar[] = [];
- const stubArrays: Record = {};
+ // ── build per-stub regexes and find all unique suffixes ───────────────────
+ const stubRegexes = new Map();
for (const stub of stubs) {
- stubArrays[stub] = [];
+ stubRegexes.set(stub, buildStubRegex(stub, sep, suffixPattern));
+ }
+
+ // Collect unique suffixes in first-seen order (scanning columns left-to-right).
+ const suffixOrder: string[] = [];
+ const suffixSeen = new Set();
+
+ for (const col of df.columns.values) {
+ for (const [, re] of stubRegexes) {
+ const m = re.exec(col);
+ if (m !== null) {
+ const rawSfx = m[1];
+ if (rawSfx !== undefined && !suffixSeen.has(rawSfx)) {
+ suffixSeen.add(rawSfx);
+ suffixOrder.push(rawSfx);
+ }
+ break; // column matched one stub; no need to check others
+ }
+ }
}
- // Coerce suffix to number if possible (for the j-column values).
- function coerceSuffix(s: string): Scalar {
- const n = Number(s);
- return Number.isNaN(n) ? s : n;
+ if (suffixOrder.length === 0) {
+ throw new RangeError(
+ `wide_to_long: no columns matched any of the stub patterns ${JSON.stringify(stubs)}.`,
+ );
}
- for (const suffix of suffixes) {
- for (let row = 0; row < nRows; row++) {
- // Append id column values.
+ // ── allocate output arrays ────────────────────────────────────────────────
+ const nRows = df.index.size;
+ const totalRows = nRows * suffixOrder.length;
+
+ const idColData: Map = new Map(idCols.map((c) => [c, []]));
+ const jCol: Scalar[] = [];
+ const stubColData: Map = new Map(stubs.map((s) => [s, []]));
+
+ // ── fill output arrays ────────────────────────────────────────────────────
+ for (const rawSfx of suffixOrder) {
+ const parsedJ = parseSuffix(rawSfx);
+ for (let ri = 0; ri < nRows; ri++) {
+ // id columns
for (const col of idCols) {
- const arr = idArrays[col];
- if (arr !== undefined) {
- arr.push((df.col(col).values[row] ?? null) as Scalar);
- }
+ (idColData.get(col) as Scalar[]).push(df.col(col).values[ri] ?? null);
}
- // Append j value.
- jArray.push(coerceSuffix(suffix));
- // Append stub values.
+ // j column
+ jCol.push(parsedJ);
+ // stub columns
for (const stub of stubs) {
- const wideColName = stub + sep + suffix;
- const arr = stubArrays[stub];
- if (arr !== undefined) {
- const wideCol = df.get(wideColName);
- const val: Scalar =
- wideCol !== undefined ? ((wideCol.values[row] ?? null) as Scalar) : null;
- arr.push(val);
- }
+ const colName = `${stub}${sep}${rawSfx}`;
+ const val: Scalar = df.has(colName) ? (df.col(colName).values[ri] ?? null) : null;
+ (stubColData.get(stub) as Scalar[]).push(val);
}
}
}
- // Assemble output DataFrame column map.
- const outData: Record = {};
+ // ── assemble output DataFrame ─────────────────────────────────────────────
+ const outCols: Record = {};
for (const col of idCols) {
- outData[col] = idArrays[col] ?? [];
+ const arr = idColData.get(col);
+ if (arr !== undefined) {
+ outCols[col] = arr;
+ }
}
- outData[j] = jArray;
+ outCols[j] = jCol;
for (const stub of stubs) {
- outData[stub] = stubArrays[stub] ?? [];
+ const arr = stubColData.get(stub);
+ if (arr !== undefined) {
+ outCols[stub] = arr;
+ }
}
- const totalRows = nRows * suffixes.length;
- const rowIndex = new RangeIndex(totalRows) as unknown as Index;
+ const rowIndex: Index =
+ totalRows === 0
+ ? (new RangeIndex(0) as unknown as Index)
+ : (new RangeIndex(totalRows) as unknown as Index);
- return DataFrame.fromColumns(outData as Record, { index: rowIndex });
+ return DataFrame.fromColumns(outCols, { index: rowIndex });
}
diff --git a/src/stats/add_sub_mul_div.ts b/src/stats/add_sub_mul_div.ts
new file mode 100644
index 00000000..56fb9b9a
--- /dev/null
+++ b/src/stats/add_sub_mul_div.ts
@@ -0,0 +1,355 @@
+/**
+ * add_sub_mul_div — element-wise addition, subtraction, multiplication, and
+ * true-division for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.add(other)` / `DataFrame.add(other)`
+ * - `Series.sub(other)` / `DataFrame.sub(other)`
+ * - `Series.mul(other)` / `DataFrame.mul(other)`
+ * - `Series.div(other)` / `DataFrame.div(other)` (true division — returns float)
+ * - `Series.radd(other)` / `DataFrame.radd(other)` (reversed operands)
+ * - `Series.rsub(other)` / `DataFrame.rsub(other)`
+ * - `Series.rmul(other)` / `DataFrame.rmul(other)`
+ * - `Series.rdiv(other)` / `DataFrame.rdiv(other)`
+ *
+ * Each function accepts either a **scalar** (number) or another
+ * **Series / DataFrame** of compatible shape as the right-hand operand.
+ * When two Series are supplied the operation is performed **positionally**
+ * (index labels are not used for alignment — same as pandas' default
+ * `fill_value=None` positional path when shapes match).
+ *
+ * Missing values (null / NaN / undefined) are propagated unchanged.
+ * Division by zero follows IEEE-754: `n / 0 → ±Infinity`, `0 / 0 → NaN`
+ * (consistent with `pandas.Series.div` on floats).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a number (possibly NaN / Infinity). */
+function isNum(v: Scalar): v is number {
+ return typeof v === "number";
+}
+
+/**
+ * Apply a two-argument numeric transform to two value arrays of the same
+ * length. Both `a[i]` and `b[i]` must be finite numbers; otherwise the
+ * missing/non-numeric value is propagated.
+ */
+function zipNumeric(
+ as: readonly Scalar[],
+ bs: readonly Scalar[],
+ fn: (a: number, b: number) => number,
+): Scalar[] {
+ const n = as.length;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ const a = as[i] as Scalar;
+ const b = bs[i] as Scalar;
+ if (isNum(a) && !Number.isNaN(a) && isNum(b) && !Number.isNaN(b)) {
+ out[i] = fn(a, b);
+ } else {
+ out[i] = a === null || a === undefined || (isNum(a) && Number.isNaN(a)) ? a : b;
+ }
+ }
+ return out;
+}
+
+/**
+ * Apply a scalar numeric transform to a value array. Non-numeric values
+ * pass through unchanged.
+ */
+function mapScalar(
+ vals: readonly Scalar[],
+ scalar: number,
+ fn: (a: number, b: number) => number,
+): Scalar[] {
+ const out: Scalar[] = new Array(vals.length);
+ for (let i = 0; i < vals.length; i++) {
+ const v = vals[i] as Scalar;
+ out[i] = isNum(v) && !Number.isNaN(v) ? fn(v, scalar) : v;
+ }
+ return out;
+}
+
+/** Apply a binary column-wise transform to every column of a DataFrame. */
+function colWiseBinary(
+ df: DataFrame,
+ other: DataFrame | number,
+ fn: (a: number, b: number) => number,
+): DataFrame {
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ let data: Scalar[];
+ if (typeof other === "number") {
+ data = mapScalar(col.values, other, fn);
+ } else {
+ const otherCol = other.col(name);
+ data = zipNumeric(col.values, otherCol.values, fn);
+ }
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+// ─── add ──────────────────────────────────────────────────────────────────────
+
+const _add = (a: number, b: number): number => a + b;
+
+/**
+ * Add `other` to each element of `series`.
+ *
+ * `other` may be a scalar number or another Series of the same length.
+ * Missing values are propagated unchanged.
+ * Mirrors `pandas.Series.add(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesAdd } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesAdd(s, 10).values; // [11, 12, 13]
+ * seriesAdd(s, new Series({ data: [4, 5, 6] })).values; // [5, 7, 9]
+ * ```
+ */
+export function seriesAdd(series: Series, other: number | Series): Series {
+ const data =
+ typeof other === "number"
+ ? mapScalar(series.values, other, _add)
+ : zipNumeric(series.values, other.values, _add);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+/**
+ * Add `other` to every numeric cell of `df`.
+ *
+ * `other` may be a scalar number or a DataFrame with the same columns.
+ * Mirrors `pandas.DataFrame.add(other)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameAdd } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * dataFrameAdd(df, 10).col("a").values; // [11, 12]
+ * ```
+ */
+export function dataFrameAdd(df: DataFrame, other: number | DataFrame): DataFrame {
+ return colWiseBinary(df, other, _add);
+}
+
+/**
+ * Reversed addition: compute `other + series[i]` for each element.
+ *
+ * For a commutative operation this is equivalent to `seriesAdd`, but it is
+ * provided for API parity with `pandas.Series.radd`.
+ */
+export function seriesRadd(series: Series, other: number | Series): Series {
+ // addition is commutative
+ return seriesAdd(series, other);
+}
+
+/**
+ * Reversed addition for DataFrame: `other + df[col][i]`.
+ * Equivalent to `dataFrameAdd` due to commutativity.
+ * Mirrors `pandas.DataFrame.radd`.
+ */
+export function dataFrameRadd(df: DataFrame, other: number | DataFrame): DataFrame {
+ return dataFrameAdd(df, other);
+}
+
+// ─── sub ──────────────────────────────────────────────────────────────────────
+
+const _sub = (a: number, b: number): number => a - b;
+const _rsub = (a: number, b: number): number => b - a;
+
+/**
+ * Subtract `other` from each element of `series`.
+ *
+ * `other` may be a scalar number or another Series of the same length.
+ * Missing values are propagated unchanged.
+ * Mirrors `pandas.Series.sub(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesSub } from "tsb";
+ * const s = new Series({ data: [10, 20, 30] });
+ * seriesSub(s, 5).values; // [5, 15, 25]
+ * ```
+ */
+export function seriesSub(series: Series, other: number | Series): Series {
+ const data =
+ typeof other === "number"
+ ? mapScalar(series.values, other, _sub)
+ : zipNumeric(series.values, other.values, _sub);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+/**
+ * Subtract each element of `series` from `other` (reversed operands).
+ *
+ * Computes `other - series[i]`. Mirrors `pandas.Series.rsub(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesRsub } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesRsub(s, 10).values; // [9, 8, 7] (10 - 1, 10 - 2, 10 - 3)
+ * ```
+ */
+export function seriesRsub(series: Series, other: number | Series): Series {
+ const data =
+ typeof other === "number"
+ ? mapScalar(series.values, other, _rsub)
+ : zipNumeric(series.values, other.values, _rsub);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+/**
+ * Subtract each element of `df` from a scalar or corresponding DataFrame cell.
+ * Mirrors `pandas.DataFrame.sub(other)`.
+ */
+export function dataFrameSub(df: DataFrame, other: number | DataFrame): DataFrame {
+ return colWiseBinary(df, other, _sub);
+}
+
+/**
+ * Reversed subtraction for DataFrame: `other - df[col][i]`.
+ * Mirrors `pandas.DataFrame.rsub(other)`.
+ */
+export function dataFrameRsub(df: DataFrame, other: number | DataFrame): DataFrame {
+ return colWiseBinary(df, other, _rsub);
+}
+
+// ─── mul ──────────────────────────────────────────────────────────────────────
+
+const _mul = (a: number, b: number): number => a * b;
+
+/**
+ * Multiply each element of `series` by `other`.
+ *
+ * `other` may be a scalar number or another Series of the same length.
+ * Missing values are propagated unchanged.
+ * Mirrors `pandas.Series.mul(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesMul } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesMul(s, 3).values; // [3, 6, 9]
+ * ```
+ */
+export function seriesMul(series: Series, other: number | Series): Series {
+ const data =
+ typeof other === "number"
+ ? mapScalar(series.values, other, _mul)
+ : zipNumeric(series.values, other.values, _mul);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+/**
+ * Reversed multiplication: `other * series[i]`.
+ * For a commutative operation this is equivalent to `seriesMul`.
+ * Mirrors `pandas.Series.rmul`.
+ */
+export function seriesRmul(series: Series, other: number | Series): Series {
+ return seriesMul(series, other);
+}
+
+/**
+ * Multiply every numeric cell of `df` by `other`.
+ * Mirrors `pandas.DataFrame.mul(other)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameMul } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * dataFrameMul(df, 2).col("b").values; // [6, 8]
+ * ```
+ */
+export function dataFrameMul(df: DataFrame, other: number | DataFrame): DataFrame {
+ return colWiseBinary(df, other, _mul);
+}
+
+/**
+ * Reversed multiplication for DataFrame.
+ * Equivalent to `dataFrameMul` due to commutativity.
+ * Mirrors `pandas.DataFrame.rmul`.
+ */
+export function dataFrameRmul(df: DataFrame, other: number | DataFrame): DataFrame {
+ return dataFrameMul(df, other);
+}
+
+// ─── div (true division) ──────────────────────────────────────────────────────
+
+const _div = (a: number, b: number): number => a / b;
+const _rdiv = (a: number, b: number): number => b / a;
+
+/**
+ * Divide each element of `series` by `other` (true division).
+ *
+ * Division by zero follows IEEE-754: `n / 0 → ±Infinity`, `0 / 0 → NaN`.
+ * `other` may be a scalar number or another Series of the same length.
+ * Missing values are propagated unchanged.
+ * Mirrors `pandas.Series.div(other)` (also known as `truediv`).
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesDiv } from "tsb";
+ * const s = new Series({ data: [4, 9, 16] });
+ * seriesDiv(s, 2).values; // [2, 4.5, 8]
+ * ```
+ */
+export function seriesDiv(series: Series, other: number | Series): Series {
+ const data =
+ typeof other === "number"
+ ? mapScalar(series.values, other, _div)
+ : zipNumeric(series.values, other.values, _div);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+/**
+ * Reversed true-division: compute `other / series[i]` for each element.
+ * Mirrors `pandas.Series.rdiv(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesRdiv } from "tsb";
+ * const s = new Series({ data: [2, 4, 8] });
+ * seriesRdiv(s, 16).values; // [8, 4, 2] (16/2, 16/4, 16/8)
+ * ```
+ */
+export function seriesRdiv(series: Series, other: number | Series): Series {
+ const data =
+ typeof other === "number"
+ ? mapScalar(series.values, other, _rdiv)
+ : zipNumeric(series.values, other.values, _rdiv);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+/**
+ * Divide every numeric cell of `df` by `other` (true division).
+ * Mirrors `pandas.DataFrame.div(other)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameDiv } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [4, 9], b: [6, 8] });
+ * dataFrameDiv(df, 2).col("a").values; // [2, 4.5]
+ * ```
+ */
+export function dataFrameDiv(df: DataFrame, other: number | DataFrame): DataFrame {
+ return colWiseBinary(df, other, _div);
+}
+
+/**
+ * Reversed true-division for DataFrame: `other / df[col][i]`.
+ * Mirrors `pandas.DataFrame.rdiv(other)`.
+ */
+export function dataFrameRdiv(df: DataFrame, other: number | DataFrame): DataFrame {
+ return colWiseBinary(df, other, _rdiv);
+}
diff --git a/src/stats/apply.ts b/src/stats/apply.ts
new file mode 100644
index 00000000..cc1edf12
--- /dev/null
+++ b/src/stats/apply.ts
@@ -0,0 +1,168 @@
+/**
+ * apply — element-wise and axis-wise function application for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods:
+ * - `Series.apply(fn)` — apply a function to each element of a Series.
+ * - `DataFrame.applymap(fn)` / `DataFrame.map(fn)` — apply element-wise to every cell.
+ * - `DataFrame.apply(fn, axis=0|1)` — apply fn to each column or each row.
+ *
+ * All functions are **pure** (return new Series/DataFrame; inputs are unchanged).
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/**
+ * Options for {@link dataFrameApply}.
+ *
+ * Controls which axis the aggregating function is applied along.
+ */
+export interface DataFrameApplyOptions {
+ /**
+ * Axis along which `fn` is applied.
+ *
+ * - `0` / `"index"` (default): apply `fn` to each **column** Series →
+ * the result has one value per column, indexed by column names.
+ * - `1` / `"columns"`: apply `fn` to each **row** Series →
+ * the result has one value per row, indexed by row labels.
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** Build an `Index` from an array of strings for column-name axes. */
+function colIndex(names: readonly string[]): Index {
+ return new Index(names as readonly Label[]);
+}
+
+/** Extract a single row from a DataFrame as a Series (columns as index). */
+function extractRow(df: DataFrame, rowIdx: number, colIdx: Index): Series {
+ const rowData: Scalar[] = [];
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ rowData.push(col.values[rowIdx] as Scalar);
+ }
+ return new Series({ data: rowData, index: colIdx });
+}
+
+/** Apply fn to each column of a DataFrame; return one value per column. */
+function applyAxis0(df: DataFrame, fn: (slice: Series) => Scalar): Series {
+ const results: Scalar[] = [];
+ for (const name of df.columns.values) {
+ results.push(fn(df.col(name)));
+ }
+ return new Series({ data: results, index: colIndex(df.columns.values) });
+}
+
+/** Apply fn to each row of a DataFrame; return one value per row. */
+function applyAxis1(df: DataFrame, fn: (slice: Series) => Scalar): Series {
+ const nRows = df.index.size;
+ const results: Scalar[] = new Array(nRows);
+ const colIdx = colIndex(df.columns.values);
+ for (let i = 0; i < nRows; i++) {
+ results[i] = fn(extractRow(df, i, colIdx));
+ }
+ return new Series({ data: results, index: df.index });
+}
+
+// ─── applySeries ──────────────────────────────────────────────────────────────
+
+/**
+ * Apply a function to each element of a Series.
+ *
+ * The function receives the element value and its label. The returned Series
+ * preserves the original index and name.
+ *
+ * Mirrors `pandas.Series.apply(func)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, applySeries } from "tsb";
+ * const s = new Series({ data: [1, 2, 3], name: "x" });
+ * applySeries(s, (v) => (v as number) * 2).values; // [2, 4, 6]
+ * ```
+ */
+export function applySeries(
+ series: Series,
+ fn: (value: Scalar, label: Label) => Scalar,
+): Series {
+ const n = series.values.length;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ out[i] = fn(series.values[i] as Scalar, series.index.at(i));
+ }
+ return new Series({ data: out, index: series.index, name: series.name });
+}
+
+// ─── applymap ─────────────────────────────────────────────────────────────────
+
+/**
+ * Apply a function element-wise to every cell of a DataFrame.
+ *
+ * The function receives the cell value and the column name. The returned
+ * DataFrame preserves the original shape, index, and column names.
+ *
+ * Mirrors `pandas.DataFrame.applymap(func)` (renamed `DataFrame.map` in pandas 2.1+).
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, applymap } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * applymap(df, (v) => (v as number) ** 2).col("b").values; // [9, 16]
+ * ```
+ */
+export function applymap(df: DataFrame, fn: (value: Scalar, colName: string) => Scalar): DataFrame {
+ const colMap = new Map>();
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const n = col.values.length;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ out[i] = fn(col.values[i] as Scalar, name);
+ }
+ colMap.set(name, new Series({ data: out, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+// ─── dataFrameApply ───────────────────────────────────────────────────────────
+
+/**
+ * Apply a function to each column or each row of a DataFrame.
+ *
+ * With `axis=0` (default): `fn` receives each **column** as a `Series`;
+ * the result is a `Series` indexed by column names (one value per column).
+ *
+ * With `axis=1`: `fn` receives each **row** as a `Series` (column names as index);
+ * the result is a `Series` indexed by row labels (one value per row).
+ *
+ * Mirrors `pandas.DataFrame.apply(func, axis=0|1)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameApply } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+ * // column sum (axis=0):
+ * dataFrameApply(df, (col) => col.sum()).values; // [6, 15]
+ * // row sum (axis=1):
+ * dataFrameApply(df, (row) => row.sum(), { axis: 1 }).values; // [5, 7, 9]
+ * ```
+ */
+export function dataFrameApply(
+ df: DataFrame,
+ fn: (slice: Series) => Scalar,
+ options: DataFrameApplyOptions = {},
+): Series {
+ const axis = options.axis ?? 0;
+ if (axis === 1 || axis === "columns") {
+ return applyAxis1(df, fn);
+ }
+ return applyAxis0(df, fn);
+}
diff --git a/src/stats/clip_with_bounds.ts b/src/stats/clip_with_bounds.ts
new file mode 100644
index 00000000..15d8efa9
--- /dev/null
+++ b/src/stats/clip_with_bounds.ts
@@ -0,0 +1,323 @@
+/**
+ * clip_with_bounds — element-wise clip with per-element or per-column/row bounds.
+ *
+ * Extends the scalar-only {@link clip} / {@link dataFrameClip} from `elem_ops`
+ * to support Series- and array-based bounds. Mirrors:
+ *
+ * - `pandas.Series.clip(lower, upper)` where lower/upper may be a Series
+ * - `pandas.DataFrame.clip(lower, upper, axis=0)` where lower/upper may be
+ * a Series (applied along the specified axis) or a DataFrame (element-wise)
+ *
+ * All functions are **pure** — inputs are never mutated.
+ * Missing values (null / NaN) propagate unchanged through every operation.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** A scalar numeric bound, a positional array, or a Series aligned by label. */
+export type BoundArg =
+ | number
+ | null
+ | undefined
+ | readonly (number | null)[]
+ | Series
+ | DataFrame;
+
+/** Options for {@link clipSeriesWithBounds}. */
+export interface SeriesClipBoundsOptions {
+ /**
+ * Lower bound. Values below this are replaced with the bound value.
+ * May be a scalar, a positional array, or a Series aligned by label.
+ * `null` / `undefined` = no lower bound.
+ */
+ readonly lower?: BoundArg;
+ /**
+ * Upper bound. Values above this are replaced with the bound value.
+ * May be a scalar, a positional array, or a Series aligned by label.
+ * `null` / `undefined` = no upper bound.
+ */
+ readonly upper?: BoundArg;
+}
+
+/** Options for {@link clipDataFrameWithBounds}. */
+export interface DataFrameClipBoundsOptions extends SeriesClipBoundsOptions {
+ /**
+ * Axis along which a Series bound is broadcast:
+ * - `0` / `"index"` (default): Series is indexed on **row labels** — each row
+ * in the DataFrame is clipped to the bound at the matching row label.
+ * - `1` / `"columns"`: Series is indexed on **column names** — each column
+ * in the DataFrame is clipped to the corresponding scalar bound.
+ *
+ * When `lower` / `upper` is a `DataFrame`, `axis` is ignored and clipping is
+ * done element-wise (position-wise, column by column).
+ */
+ readonly axis?: 0 | 1 | "index" | "columns";
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a finite number (not null / undefined / NaN). */
+function isFiniteNum(v: Scalar): v is number {
+ return typeof v === "number" && !Number.isNaN(v);
+}
+
+/**
+ * Resolve a `BoundArg` to a parallel numeric array of length `n`.
+ * - Scalar → every position gets that value (or `null` for no bound).
+ * - `readonly array` → used positionally; must have length `n`.
+ * - `Series` → aligned by label against `refIndex`; positions without a
+ * matching label receive `null` (no bound at that position).
+ */
+function resolveBound(
+ bound: BoundArg,
+ n: number,
+ refIndex: { at(i: number): Label; size: number },
+): (number | null)[] {
+ if (bound === null || bound === undefined) {
+ return new Array(n).fill(null);
+ }
+ if (typeof bound === "number") {
+ return new Array(n).fill(bound);
+ }
+ if (Array.isArray(bound)) {
+ const arr = bound as readonly (number | null)[];
+ if (arr.length !== n) {
+ throw new RangeError(`Bound array length ${arr.length} does not match Series length ${n}`);
+ }
+ return arr.map((v) => (typeof v === "number" && !Number.isNaN(v) ? v : null));
+ }
+
+ // Series — align by label
+ const s = bound as Series;
+ const labelMap = new Map();
+ for (let j = 0; j < s.index.size; j++) {
+ labelMap.set(String(s.index.at(j)), j);
+ }
+ const result: (number | null)[] = new Array(n).fill(null);
+ for (let i = 0; i < n; i++) {
+ const label = String(refIndex.at(i));
+ const j = labelMap.get(label);
+ if (j !== undefined) {
+ const v = s.values[j] as Scalar;
+ result[i] = isFiniteNum(v) ? v : null;
+ }
+ }
+ return result;
+}
+
+/** Clip a single value against a lo/hi pair (null = no bound). */
+function clipValue(v: Scalar, lo: number | null, hi: number | null): Scalar {
+ if (!isFiniteNum(v)) {
+ return v;
+ }
+ let out: number = v;
+ if (lo !== null && out < lo) {
+ out = lo;
+ }
+ if (hi !== null && out > hi) {
+ out = hi;
+ }
+ return out;
+}
+
+// ─── Series clip with bounds ──────────────────────────────────────────────────
+
+/**
+ * Clip a Series with per-element lower / upper bounds.
+ *
+ * Bounds may be a scalar, a positional `(number|null)[]`, or a `Series`
+ * that is aligned against the input Series by **index label** (not position).
+ * Labels present in the bound Series but not in the input are ignored; labels
+ * in the input with no matching bound label are left unclipped on that side.
+ *
+ * Non-numeric values (null, NaN, strings, …) are passed through unchanged.
+ * Mirrors `pandas.Series.clip(lower, upper)` with Series bounds.
+ *
+ * @example
+ * ```ts
+ * import { Series, clipSeriesWithBounds } from "tsb";
+ *
+ * const s = new Series({ data: [1, 5, 10, 15], name: "x" });
+ * const lo = new Series({ data: [2, 2, 2, 2] });
+ * const hi = new Series({ data: [6, 6, 6, 6] });
+ * clipSeriesWithBounds(s, { lower: lo, upper: hi }).values;
+ * // [2, 5, 6, 6]
+ * ```
+ */
+export function clipSeriesWithBounds(
+ series: Series,
+ options: SeriesClipBoundsOptions = {},
+): Series {
+ const n = series.values.length;
+ const loBounds = resolveBound(options.lower, n, series.index);
+ const hiBounds = resolveBound(options.upper, n, series.index);
+
+ const data: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ data[i] = clipValue(
+ series.values[i] as Scalar,
+ loBounds[i] as number | null,
+ hiBounds[i] as number | null,
+ );
+ }
+
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+// ─── DataFrame clip with bounds ───────────────────────────────────────────────
+
+/**
+ * Clip every numeric cell of a DataFrame with flexible lower / upper bounds.
+ *
+ * ### Bound types
+ *
+ * | `lower` / `upper` | `axis` | Behaviour |
+ * |---|---|---|
+ * | scalar (`number \| null`) | any | Same scalar bound applied to every cell |
+ * | `Series` | `0` / `"index"` (default) | Series indexed on **row labels** — each row uses its matching scalar bound |
+ * | `Series` | `1` / `"columns"` | Series indexed on **column names** — each column uses its matching scalar bound |
+ * | `DataFrame` | ignored | Element-wise clipping — each cell uses its matching cell in the bound DataFrame |
+ * | positional `number[]` | `0` / `"index"` | Positional per-row bounds |
+ * | positional `number[]` | `1` / `"columns"` | Positional per-column bounds |
+ *
+ * Missing values (null / NaN) in the data propagate unchanged.
+ *
+ * Mirrors `pandas.DataFrame.clip(lower, upper, axis=0)` with Series/DataFrame bounds.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, Series, clipDataFrameWithBounds } from "tsb";
+ *
+ * const df = DataFrame.fromColumns({ a: [1, 5, 10], b: [2, 6, 12] });
+ *
+ * // Per-column bounds (axis=1)
+ * const lo = new Series({ data: [0, 3], index: new Index(["a", "b"]) });
+ * const hi = new Series({ data: [8, 9], index: new Index(["a", "b"]) });
+ * clipDataFrameWithBounds(df, { lower: lo, upper: hi, axis: 1 });
+ * // col "a": [1, 5, 8] col "b": [3, 6, 9]
+ *
+ * // Element-wise bounds with a DataFrame
+ * const loDF = DataFrame.fromColumns({ a: [0, 0, 0], b: [3, 3, 3] });
+ * clipDataFrameWithBounds(df, { lower: loDF });
+ * // col "a": [1, 5, 10] col "b": [3, 6, 12]
+ * ```
+ */
+export function clipDataFrameWithBounds(
+ df: DataFrame,
+ options: DataFrameClipBoundsOptions = {},
+): DataFrame {
+ const { lower, upper, axis = 0 } = options;
+ const nRows = df.index.size;
+ const colNames = df.columns.values;
+ const nCols = colNames.length;
+
+ // Element-wise DataFrame bounds
+ if (lower instanceof DataFrame || upper instanceof DataFrame) {
+ return _clipDFElementWise(df, lower, upper);
+ }
+
+ const axisIsColumns = axis === 1 || axis === "columns";
+
+ if (axisIsColumns) {
+ // axis=1: each column gets its own scalar bound resolved from the Series/array
+ const resolveColumnBounds = (bound: BoundArg | undefined): (number | null)[] => {
+ const aligned = resolveBound(bound, nCols, df.columns);
+ if (bound instanceof Series && aligned.every((v) => v === null) && bound.size === nCols) {
+ return bound.values.map((v) => (isFiniteNum(v) ? v : null));
+ }
+ return aligned;
+ };
+ const loBounds = resolveColumnBounds(lower);
+ const hiBounds = resolveColumnBounds(upper);
+
+ const colMap = new Map>();
+ for (let ci = 0; ci < nCols; ci++) {
+ const name = colNames[ci] as string;
+ const col = df.col(name);
+ const lo = loBounds[ci] as number | null;
+ const hi = hiBounds[ci] as number | null;
+ const data: Scalar[] = col.values.map((v) => clipValue(v as Scalar, lo, hi));
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+ return new DataFrame(colMap, df.index);
+ }
+
+ // axis=0 (default): each row gets its own scalar bound resolved from the Series/array
+ const loBounds = resolveBound(lower, nRows, df.index);
+ const hiBounds = resolveBound(upper, nRows, df.index);
+
+ const colMap = new Map>();
+ for (const name of colNames) {
+ const col = df.col(name as string);
+ const data: Scalar[] = col.values.map((v, ri) =>
+ clipValue(v as Scalar, loBounds[ri] as number | null, hiBounds[ri] as number | null),
+ );
+ colMap.set(name as string, new Series({ data, index: df.index, name: name as string }));
+ }
+ return new DataFrame(colMap, df.index);
+}
+
+/**
+ * Element-wise clip against optional lower/upper DataFrames.
+ * Each cell [row, col] is clipped to [lo[row,col], hi[row,col]] when present.
+ */
+function _clipDFElementWise(
+ df: DataFrame,
+ lower: BoundArg | DataFrame,
+ upper: BoundArg | DataFrame,
+): DataFrame {
+ const colNames = df.columns.values;
+ const colMap = new Map>();
+
+ for (const name of colNames) {
+ const colName = name as string;
+ const col = df.col(colName);
+
+ const loCol: Series | null =
+ lower instanceof DataFrame && lower.columns.values.includes(name) ? lower.col(colName) : null;
+ const hiCol: Series | null =
+ upper instanceof DataFrame && upper.columns.values.includes(name) ? upper.col(colName) : null;
+
+ // Scalar fallback when bound is not a DataFrame
+ const loScalar: number | null =
+ lower instanceof DataFrame
+ ? null
+ : typeof lower === "number" && isFiniteNum(lower)
+ ? lower
+ : null;
+ const hiScalar: number | null =
+ upper instanceof DataFrame
+ ? null
+ : typeof upper === "number" && isFiniteNum(upper)
+ ? upper
+ : null;
+
+ const data: Scalar[] = col.values.map((v, ri) => {
+ const lo =
+ loCol !== null
+ ? (() => {
+ const bv = loCol.values[ri] as Scalar;
+ return isFiniteNum(bv) ? bv : null;
+ })()
+ : loScalar;
+ const hi =
+ hiCol !== null
+ ? (() => {
+ const bv = hiCol.values[ri] as Scalar;
+ return isFiniteNum(bv) ? bv : null;
+ })()
+ : hiScalar;
+ return clipValue(v as Scalar, lo, hi);
+ });
+
+ colMap.set(colName, new Series({ data, index: df.index, name: colName }));
+ }
+
+ return new DataFrame(colMap, df.index);
+}
diff --git a/src/stats/combine_first.ts b/src/stats/combine_first.ts
new file mode 100644
index 00000000..1029bc82
--- /dev/null
+++ b/src/stats/combine_first.ts
@@ -0,0 +1,186 @@
+/**
+ * combine_first — update a Series/DataFrame with non-null values from another.
+ *
+ * Mirrors `pandas.Series.combine_first()` / `DataFrame.combine_first()`:
+ *
+ * - {@link combineFirstSeries} — fill missing values in `self` from `other`, taking the union of indices.
+ * - {@link combineFirstDataFrame} — fill missing cells in `self` from `other`, taking the union of row and column indices.
+ *
+ * ### Semantics
+ *
+ * The result has the **union** of the two index sets. For each label present
+ * in the result index, the value is:
+ *
+ * 1. The value from `self` if it is not missing (not `null`, `undefined`, or `NaN`).
+ * 2. Otherwise, the value from `other` if it has one.
+ * 3. Otherwise `null`.
+ *
+ * `self` values always take priority; `other` only fills gaps.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import type { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a missing value (null, undefined, or NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Build a lookup map from label → array of positional indices for a given Index.
+ * Supports duplicate labels by storing all positions.
+ */
+function buildLabelMap(idx: Index): Map {
+ const map = new Map();
+ for (let i = 0; i < idx.size; i++) {
+ const key = String(idx.at(i));
+ const arr = map.get(key);
+ if (arr !== undefined) {
+ arr.push(i);
+ } else {
+ map.set(key, [i]);
+ }
+ }
+ return map;
+}
+
+// ─── Series ───────────────────────────────────────────────────────────────────
+
+/**
+ * Result of combining two Series via {@link combineFirstSeries}.
+ *
+ * The result has `dtype = "object"` because the union can contain values from
+ * either Series regardless of their original dtype.
+ */
+
+/**
+ * Update calling Series with non-null values from `other`.
+ *
+ * The result index is the union of `self.index` and `other.index`. For each
+ * label, `self`'s value is used unless it is missing, in which case `other`'s
+ * value fills the gap.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: [1, null, 3], index: ["x", "y", "z"] });
+ * const b = new Series({ data: [10, 20, 40], index: ["x", "y", "w"] });
+ * combineFirstSeries(a, b);
+ * // Series { x:1, y:20, z:3, w:40 }
+ * ```
+ */
+export function combineFirstSeries(self: Series, other: Series): Series {
+ const selfIdx = self.index as Index;
+ const otherIdx = other.index as Index;
+ const unionIdx = selfIdx.union(otherIdx);
+
+ const selfMap = buildLabelMap(selfIdx);
+ const otherMap = buildLabelMap(otherIdx);
+
+ const data: Scalar[] = [];
+
+ for (let i = 0; i < unionIdx.size; i++) {
+ const key = String(unionIdx.at(i));
+
+ const selfPositions = selfMap.get(key);
+ const selfVal =
+ selfPositions !== undefined ? (self.values[selfPositions[0] ?? 0] as Scalar) : undefined;
+
+ if (isMissing(selfVal)) {
+ const otherPositions = otherMap.get(key);
+ if (otherPositions !== undefined) {
+ data.push(other.values[otherPositions[0] ?? 0] as Scalar);
+ } else {
+ data.push(null);
+ }
+ } else {
+ data.push(selfVal as Scalar);
+ }
+ }
+
+ return new Series({
+ data,
+ index: unionIdx,
+ name: self.name,
+ });
+}
+
+// ─── DataFrame ────────────────────────────────────────────────────────────────
+
+/**
+ * Update calling DataFrame with non-null values from `other`.
+ *
+ * The result has the **union** of both row indices and both column sets.
+ * For each (row, column) cell, `self`'s value is used unless it is missing,
+ * in which case `other`'s value fills the gap.
+ *
+ * @example
+ * ```ts
+ * const a = DataFrame.fromColumns({ x: [1, null], y: [3, 4] }, { index: ["r0", "r1"] });
+ * const b = DataFrame.fromColumns({ x: [10, 20], z: [30, 40] }, { index: ["r0", "r2"] });
+ * combineFirstDataFrame(a, b);
+ * // DataFrame with rows r0,r1,r2 and cols x,y,z
+ * ```
+ */
+export function combineFirstDataFrame(self: DataFrame, other: DataFrame): DataFrame {
+ const selfRowIdx = self.index as Index;
+ const otherRowIdx = other.index as Index;
+ const unionRowIdx = selfRowIdx.union(otherRowIdx);
+
+ // Column union: self columns first, then other-only columns
+ const selfCols = new Set(self.columns.values as string[]);
+ const unionCols: string[] = [...(self.columns.values as string[])];
+ for (const c of other.columns.values as string[]) {
+ if (!selfCols.has(c)) {
+ unionCols.push(c);
+ }
+ }
+
+ const selfRowMap = buildLabelMap(selfRowIdx);
+ const otherRowMap = buildLabelMap(otherRowIdx);
+
+ const resultColMap = new Map>();
+
+ for (const colName of unionCols) {
+ const selfHasCol = self.has(colName);
+ const otherHasCol = other.has(colName);
+
+ const data: Scalar[] = [];
+
+ for (let i = 0; i < unionRowIdx.size; i++) {
+ const rowKey = String(unionRowIdx.at(i));
+
+ let resolved: Scalar = null;
+
+ if (selfHasCol) {
+ const selfRowPos = selfRowMap.get(rowKey);
+ if (selfRowPos !== undefined) {
+ const pos = selfRowPos[0] ?? 0;
+ const v = self.col(colName).values[pos] as Scalar;
+ if (!isMissing(v)) {
+ resolved = v;
+ }
+ }
+ }
+
+ if (isMissing(resolved) && otherHasCol) {
+ const otherRowPos = otherRowMap.get(rowKey);
+ if (otherRowPos !== undefined) {
+ const pos = otherRowPos[0] ?? 0;
+ resolved = other.col(colName).values[pos] as Scalar;
+ }
+ }
+
+ data.push(resolved);
+ }
+
+ resultColMap.set(colName, new Series({ data, index: unionRowIdx }));
+ }
+
+ return new DataFrame(resultColMap, unionRowIdx);
+}
diff --git a/src/stats/compare.ts b/src/stats/compare.ts
new file mode 100644
index 00000000..63132b61
--- /dev/null
+++ b/src/stats/compare.ts
@@ -0,0 +1,326 @@
+/**
+ * compare — element-wise comparison operations for Series and DataFrame.
+ *
+ * Mirrors the following pandas methods (all return boolean results):
+ * - `Series.eq(other)` / `DataFrame.eq(other)` — element-wise `==`
+ * - `Series.ne(other)` / `DataFrame.ne(other)` — element-wise `!=`
+ * - `Series.lt(other)` / `DataFrame.lt(other)` — element-wise `<`
+ * - `Series.gt(other)` / `DataFrame.gt(other)` — element-wise `>`
+ * - `Series.le(other)` / `DataFrame.le(other)` — element-wise `<=`
+ * - `Series.ge(other)` / `DataFrame.ge(other)` — element-wise `>=`
+ *
+ * The `other` argument may be:
+ * - a **scalar** (`Scalar`) — compared against every element
+ * - a **Series** — compared element-by-element (by position)
+ * - a **DataFrame** — compared column-by-column, element-by-element
+ * (for DataFrame variants)
+ *
+ * Missing values (`null` / `undefined` / `NaN`) in either operand always yield
+ * `false` — matching pandas' default `fill_value=np.nan` behaviour.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** The six binary comparison operators. */
+export type CompareOp = "eq" | "ne" | "lt" | "gt" | "le" | "ge";
+
+/**
+ * The `other` argument for Series comparison functions.
+ * May be a scalar, another Series (aligned by position), or a plain boolean array.
+ */
+export type SeriesOther = Scalar | Series;
+
+/**
+ * The `other` argument for DataFrame comparison functions.
+ * May be a scalar (broadcast to all cells) or another DataFrame (column-aligned).
+ */
+export type DataFrameOther = Scalar | DataFrame;
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when `v` is a non-null, non-NaN value that can be compared with `<`. */
+function isComparable(v: Scalar): boolean {
+ if (v === null || v === undefined) {
+ return false;
+ }
+ if (typeof v === "number" && Number.isNaN(v)) {
+ return false;
+ }
+ return true;
+}
+
+/** Apply comparison `op` to two values; returns `false` when either is missing. */
+function compareScalars(a: Scalar, b: Scalar, op: CompareOp): boolean {
+ if (!(isComparable(a) && isComparable(b))) {
+ // eq is special: null eq null → false (pandas NaN != NaN convention)
+ return false;
+ }
+ switch (op) {
+ case "eq":
+ return a === b;
+ case "ne":
+ return a !== b;
+ case "lt":
+ return (a as number) < (b as number);
+ case "gt":
+ return (a as number) > (b as number);
+ case "le":
+ return (a as number) <= (b as number);
+ case "ge":
+ return (a as number) >= (b as number);
+ }
+}
+
+/**
+ * Build an array of boolean results by comparing `vals` element-wise
+ * against `other` (resolved to a scalar array of the same length).
+ * Returns `Scalar[]` so it can be directly used in Series/DataFrame constructors.
+ */
+function buildBoolArray(
+ vals: readonly Scalar[],
+ others: readonly Scalar[],
+ op: CompareOp,
+): Scalar[] {
+ const n = vals.length;
+ const out: Scalar[] = new Array(n);
+ for (let i = 0; i < n; i++) {
+ out[i] = compareScalars(vals[i] as Scalar, others[i] as Scalar, op);
+ }
+ return out;
+}
+
+/** Resolve `SeriesOther` to a scalar array of length `n`. */
+function resolveSeriesOther(other: SeriesOther, n: number): readonly Scalar[] {
+ if (other instanceof Series) {
+ if (other.values.length !== n) {
+ throw new RangeError(
+ `Other Series length ${other.values.length} does not match Series length ${n}`,
+ );
+ }
+ return other.values;
+ }
+ // Broadcast scalar
+ return new Array(n).fill(other as Scalar);
+}
+
+// ─── Series comparison factory ────────────────────────────────────────────────
+
+function makeSeriesCompare(
+ series: Series,
+ other: SeriesOther,
+ op: CompareOp,
+): Series {
+ const n = series.values.length;
+ const others = resolveSeriesOther(other, n);
+ const data = buildBoolArray(series.values, others, op);
+ return new Series({ data, index: series.index, name: series.name });
+}
+
+// ─── Series comparison functions ──────────────────────────────────────────────
+
+/**
+ * Element-wise equality `==`.
+ *
+ * Returns a `Series` that is `true` where `series[i] === other[i]`.
+ * Missing values always yield `false`.
+ *
+ * Mirrors `pandas.Series.eq(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesEq } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesEq(s, 2).values; // [false, true, false]
+ * ```
+ */
+export function seriesEq(series: Series, other: SeriesOther): Series {
+ return makeSeriesCompare(series, other, "eq");
+}
+
+/**
+ * Element-wise inequality `!=`.
+ *
+ * Mirrors `pandas.Series.ne(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesNe } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesNe(s, 2).values; // [true, false, true]
+ * ```
+ */
+export function seriesNe(series: Series, other: SeriesOther): Series {
+ return makeSeriesCompare(series, other, "ne");
+}
+
+/**
+ * Element-wise less-than `<`.
+ *
+ * Mirrors `pandas.Series.lt(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesLt } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesLt(s, 2).values; // [true, false, false]
+ * ```
+ */
+export function seriesLt(series: Series, other: SeriesOther): Series {
+ return makeSeriesCompare(series, other, "lt");
+}
+
+/**
+ * Element-wise greater-than `>`.
+ *
+ * Mirrors `pandas.Series.gt(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesGt } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesGt(s, 2).values; // [false, false, true]
+ * ```
+ */
+export function seriesGt(series: Series, other: SeriesOther): Series {
+ return makeSeriesCompare(series, other, "gt");
+}
+
+/**
+ * Element-wise less-than-or-equal `<=`.
+ *
+ * Mirrors `pandas.Series.le(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesLe } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesLe(s, 2).values; // [true, true, false]
+ * ```
+ */
+export function seriesLe(series: Series, other: SeriesOther): Series {
+ return makeSeriesCompare(series, other, "le");
+}
+
+/**
+ * Element-wise greater-than-or-equal `>=`.
+ *
+ * Mirrors `pandas.Series.ge(other)`.
+ *
+ * @example
+ * ```ts
+ * import { Series, seriesGe } from "tsb";
+ * const s = new Series({ data: [1, 2, 3] });
+ * seriesGe(s, 2).values; // [false, true, true]
+ * ```
+ */
+export function seriesGe(series: Series, other: SeriesOther): Series {
+ return makeSeriesCompare(series, other, "ge");
+}
+
+// ─── DataFrame helpers ────────────────────────────────────────────────────────
+
+/** Resolve `DataFrameOther` to a scalar for a given column of length `n`. */
+function resolveDfOther(other: DataFrameOther, colName: string, n: number): readonly Scalar[] {
+ if (other instanceof DataFrame) {
+ const col = other.get(colName);
+ if (col === undefined) {
+ // Column missing → fill with null (comparisons will all be false)
+ return new Array(n).fill(null);
+ }
+ if (col.values.length !== n) {
+ throw new RangeError(
+ `Other DataFrame column "${colName}" length ${col.values.length} does not match ${n}`,
+ );
+ }
+ return col.values;
+ }
+ // Broadcast scalar
+ return new Array(n).fill(other as Scalar);
+}
+
+/** Apply comparison op column-wise across a DataFrame. */
+function makeDataFrameCompare(df: DataFrame, other: DataFrameOther, op: CompareOp): DataFrame {
+ const nrows = df.shape[0];
+ const colMap = new Map>();
+
+ for (const name of df.columns.values) {
+ const col = df.col(name);
+ const others = resolveDfOther(other, name, nrows);
+ const data = buildBoolArray(col.values, others, op);
+ colMap.set(name, new Series({ data, index: df.index, name }));
+ }
+
+ return new DataFrame(colMap, df.index);
+}
+
+// ─── DataFrame comparison functions ───────────────────────────────────────────
+
+/**
+ * Element-wise equality `==` across a DataFrame.
+ *
+ * `other` may be a scalar (broadcast to all cells) or another DataFrame
+ * (compared column-by-column).
+ *
+ * Mirrors `pandas.DataFrame.eq(other)`.
+ *
+ * @example
+ * ```ts
+ * import { DataFrame, dataFrameEq } from "tsb";
+ * const df = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] });
+ * dataFrameEq(df, 2).col("a").values; // [false, true]
+ * ```
+ */
+export function dataFrameEq(df: DataFrame, other: DataFrameOther): DataFrame {
+ return makeDataFrameCompare(df, other, "eq");
+}
+
+/**
+ * Element-wise inequality `!=` across a DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.ne(other)`.
+ */
+export function dataFrameNe(df: DataFrame, other: DataFrameOther): DataFrame {
+ return makeDataFrameCompare(df, other, "ne");
+}
+
+/**
+ * Element-wise less-than `<` across a DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.lt(other)`.
+ */
+export function dataFrameLt(df: DataFrame, other: DataFrameOther): DataFrame {
+ return makeDataFrameCompare(df, other, "lt");
+}
+
+/**
+ * Element-wise greater-than `>` across a DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.gt(other)`.
+ */
+export function dataFrameGt(df: DataFrame, other: DataFrameOther): DataFrame {
+ return makeDataFrameCompare(df, other, "gt");
+}
+
+/**
+ * Element-wise less-than-or-equal `<=` across a DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.le(other)`.
+ */
+export function dataFrameLe(df: DataFrame, other: DataFrameOther): DataFrame {
+ return makeDataFrameCompare(df, other, "le");
+}
+
+/**
+ * Element-wise greater-than-or-equal `>=` across a DataFrame.
+ *
+ * Mirrors `pandas.DataFrame.ge(other)`.
+ */
+export function dataFrameGe(df: DataFrame, other: DataFrameOther): DataFrame {
+ return makeDataFrameCompare(df, other, "ge");
+}
diff --git a/src/stats/crosstab.ts b/src/stats/crosstab.ts
new file mode 100644
index 00000000..6cf257b9
--- /dev/null
+++ b/src/stats/crosstab.ts
@@ -0,0 +1,361 @@
+/**
+ * crosstab — compute a cross-tabulation of two or more factors.
+ *
+ * Mirrors `pandas.crosstab(index, columns, values, rownames, colnames,
+ * aggfunc, margins, margins_name, dropna, normalize)`.
+ *
+ * By default, counts the number of observations where the row factor equals
+ * row `r` **and** the column factor equals column `c`. When `values` and
+ * `aggfunc` are provided, aggregates those values instead of counting.
+ *
+ * @module
+ */
+
+import { DataFrame } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── public types ─────────────────────────────────────────────────────────────
+
+/** Aggregation function that reduces a non-empty array of numbers to a scalar. */
+export type AggFunc = (values: readonly number[]) => number;
+
+/** Normalize mode for {@link CrosstabOptions}. */
+export type Normalize = boolean | "index" | "columns" | "all";
+
+/** Options for {@link crosstab}. */
+export interface CrosstabOptions {
+ /**
+ * Numeric values to aggregate. When provided, `aggfunc` must also be
+ * supplied. Length must equal the length of `index`.
+ */
+ readonly values?: readonly Scalar[];
+ /**
+ * Function used to aggregate `values` within each cell.
+ * Required when `values` is provided; ignored otherwise.
+ *
+ * @example `(vs) => vs.reduce((s, v) => s + v, 0) / vs.length` (mean)
+ */
+ readonly aggfunc?: AggFunc;
+ /**
+ * Name for the row-axis index in the resulting DataFrame.
+ * @defaultValue `"row"`
+ */
+ readonly rowname?: string;
+ /**
+ * Name for the column-axis labels in the resulting DataFrame.
+ * @defaultValue `"col"`
+ */
+ readonly colname?: string;
+ /**
+ * Whether to add row and column margin totals.
+ * @defaultValue `false`
+ */
+ readonly margins?: boolean;
+ /**
+ * Label for the margins row/column.
+ * @defaultValue `"All"`
+ */
+ readonly marginsName?: string;
+ /**
+ * If `true` (default), exclude missing values (null / undefined / NaN)
+ * from both the row and column factors.
+ * If `false`, treat missing values as a category (rendered as `"NaN"`).
+ * @defaultValue `true`
+ */
+ readonly dropna?: boolean;
+ /**
+ * Normalize cell values to proportions:
+ * - `false` (default) — raw counts / aggregated values
+ * - `true` or `"all"` — divide each cell by the grand total
+ * - `"index"` — divide each cell by its row total
+ * - `"columns"` — divide each cell by its column total
+ * @defaultValue `false`
+ */
+ readonly normalize?: Normalize;
+}
+
+// ─── helpers ──────────────────────────────────────────────────────────────────
+
+/** True when the value is missing (null / undefined / NaN). */
+function isMissing(v: Scalar): boolean {
+ return v === null || v === undefined || (typeof v === "number" && Number.isNaN(v));
+}
+
+/**
+ * Stable string key for any scalar, matching the `factorize` approach:
+ * prefix with `typeof` to keep `null`, `undefined`, `NaN`, and the string
+ * `"null"` apart.
+ */
+function scalarKey(v: Scalar): string {
+ if (v === null) {
+ return "object:null";
+ }
+ if (v === undefined) {
+ return "undefined:undefined";
+ }
+ if (typeof v === "number" && Number.isNaN(v)) {
+ return "number:NaN";
+ }
+ return `${typeof v}:${String(v)}`;
+}
+
+/** Render a scalar as a human-readable label string. Missing → `"NaN"`. */
+function labelStr(v: Scalar): string {
+ return isMissing(v) ? "NaN" : String(v);
+}
+
+/**
+ * Collect unique values in first-seen order from `vals`.
+ * Missing values are included only when `dropna` is `false`.
+ */
+function collectUniques(vals: readonly Scalar[], dropna: boolean): Scalar[] {
+ const seen = new Set();
+ const out: Scalar[] = [];
+ for (const v of vals) {
+ if (dropna && isMissing(v)) {
+ continue;
+ }
+ const key = scalarKey(v);
+ if (!seen.has(key)) {
+ seen.add(key);
+ out.push(v);
+ }
+ }
+ return out;
+}
+
+// ─── core implementation ───────────────────────────────────────────────────────
+
+/**
+ * Build a cross-tabulation frequency table (or aggregation table) from two
+ * equal-length arrays of factor values.
+ *
+ * @param index - Row factor values. Must have the same length as `columns`.
+ * @param columns - Column factor values.
+ * @param options - Additional options (aggregation, margins, normalization, …).
+ * @returns A `DataFrame` whose rows represent `index` categories, whose
+ * column names represent `columns` categories, and whose cells contain
+ * counts or aggregated values.
+ *
+ * @example
+ * ```ts
+ * const idx = ["foo", "foo", "bar", "bar"];
+ * const col = ["A", "B", "A", "B"];
+ * const ct = crosstab(idx, col);
+ * // DataFrame:
+ * // A B
+ * // bar 1 1
+ * // foo 1 1
+ * ```
+ */
+export function crosstab(
+ index: readonly Scalar[] | Series,
+ columns: readonly Scalar[] | Series,
+ options: CrosstabOptions = {},
+): DataFrame {
+ const {
+ values,
+ aggfunc,
+ rowname = "row",
+ margins = false,
+ marginsName = "All",
+ dropna = true,
+ normalize = false,
+ } = options;
+
+ // Flatten Series to plain arrays.
+ const rowVals: readonly Scalar[] = index instanceof Series ? (index.values as Scalar[]) : index;
+ const colVals: readonly Scalar[] =
+ columns instanceof Series ? (columns.values as Scalar[]) : columns;
+
+ if (rowVals.length !== colVals.length) {
+ throw new RangeError(
+ `crosstab: index and columns must have the same length (got ${rowVals.length} vs ${colVals.length})`,
+ );
+ }
+
+ if (values !== undefined && aggfunc === undefined) {
+ throw new TypeError("crosstab: `aggfunc` is required when `values` is provided");
+ }
+
+ const n = rowVals.length;
+
+ // Collect unique row / column categories in first-seen order.
+ const rowUniques = collectUniques(rowVals, dropna);
+ const colUniques = collectUniques(colVals, dropna);
+
+ // Build lookup maps: scalarKey → 0-based position.
+ const rowPos = new Map();
+ for (let i = 0; i < rowUniques.length; i++) {
+ rowPos.set(scalarKey(rowUniques[i] as Scalar), i);
+ }
+ const colPos = new Map();
+ for (let i = 0; i < colUniques.length; i++) {
+ colPos.set(scalarKey(colUniques[i] as Scalar), i);
+ }
+
+ const nRows = rowUniques.length;
+ const nCols = colUniques.length;
+
+ // Initialize accumulator structures.
+ // counts[r][c] = frequency of (rowUniques[r], colUniques[c]) pairs.
+ const counts: number[][] = Array.from({ length: nRows }, () => new Array(nCols).fill(0));
+ // buckets[r][c] = collected numeric values for aggregation (when values+aggfunc provided).
+ const buckets: Array> | null =
+ values !== undefined
+ ? Array.from({ length: nRows }, () =>
+ Array.from({ length: nCols }, () => undefined),
+ )
+ : null;
+
+ // Populate accumulators.
+ for (let i = 0; i < n; i++) {
+ const rv = rowVals[i] as Scalar;
+ const cv = colVals[i] as Scalar;
+ if (dropna && (isMissing(rv) || isMissing(cv))) {
+ continue;
+ }
+ const ri = rowPos.get(scalarKey(rv));
+ const ci = colPos.get(scalarKey(cv));
+ if (ri === undefined || ci === undefined) {
+ continue;
+ }
+
+ if (buckets !== null && values !== undefined) {
+ const sv = values[i] as Scalar;
+ if (typeof sv === "number" && !Number.isNaN(sv)) {
+ const cell = buckets[ri];
+ if (cell !== undefined) {
+ const existing = cell[ci];
+ if (existing === undefined) {
+ cell[ci] = [sv];
+ } else {
+ existing.push(sv);
+ }
+ }
+ }
+ } else {
+ const row = counts[ri];
+ if (row !== undefined) {
+ row[ci] = (row[ci] ?? 0) + 1;
+ }
+ }
+ }
+
+ // Resolve cell values from counts or aggregated buckets.
+ const cells: number[][] = Array.from({ length: nRows }, (_, ri) =>
+ Array.from({ length: nCols }, (_, ci) => {
+ if (buckets !== null && aggfunc !== undefined) {
+ const arr = buckets[ri]?.[ci];
+ return arr !== undefined && arr.length > 0 ? aggfunc(arr) : 0;
+ }
+ return counts[ri]?.[ci] ?? 0;
+ }),
+ );
+
+ // Apply normalization before adding margins.
+ if (normalize !== false) {
+ const mode: "all" | "index" | "columns" = normalize === true ? "all" : normalize;
+ if (mode === "all") {
+ let grand = 0;
+ for (const row of cells) {
+ for (const v of row) {
+ grand += v;
+ }
+ }
+ if (grand !== 0) {
+ for (const row of cells) {
+ for (let c = 0; c < row.length; c++) {
+ row[c] = (row[c] ?? 0) / grand;
+ }
+ }
+ }
+ } else if (mode === "index") {
+ for (const row of cells) {
+ const total = row.reduce((s, v) => s + v, 0);
+ if (total !== 0) {
+ for (let c = 0; c < row.length; c++) {
+ row[c] = (row[c] ?? 0) / total;
+ }
+ }
+ }
+ } else {
+ // "columns": divide by column totals
+ for (let c = 0; c < nCols; c++) {
+ let total = 0;
+ for (const row of cells) {
+ total += row[c] ?? 0;
+ }
+ if (total !== 0) {
+ for (const row of cells) {
+ row[c] = (row[c] ?? 0) / total;
+ }
+ }
+ }
+ }
+ }
+
+ // Build column data: column-label → array of row values.
+ const colData: Record = {};
+ for (let ci = 0; ci < nCols; ci++) {
+ const name = labelStr(colUniques[ci] as Scalar);
+ colData[name] = cells.map((row) => row[ci] ?? 0);
+ }
+
+ // Build row labels.
+ const rowLabels: Label[] = rowUniques.map((v) => labelStr(v as Scalar) as Label);
+
+ // Add margin totals when requested.
+ let finalRowLabels = rowLabels;
+ if (margins) {
+ // Append column-total to each column array.
+ for (let ci = 0; ci < nCols; ci++) {
+ const name = labelStr(colUniques[ci] as Scalar);
+ const col = colData[name];
+ if (col !== undefined) {
+ col.push(col.reduce((s: number, v) => s + (typeof v === "number" ? v : 0), 0));
+ }
+ }
+ // Add an "All" column with row totals (and grand total in the last cell).
+ const allCol: Scalar[] = cells.map((row) => row.reduce((s, v) => s + v, 0));
+ allCol.push(allCol.reduce((s: number, v) => s + (typeof v === "number" ? v : 0), 0));
+ colData[marginsName] = allCol;
+ finalRowLabels = [...rowLabels, marginsName as Label];
+ }
+
+ // Build the DataFrame.
+ const rowIndex = new Index(finalRowLabels, rowname);
+ const colMap = new Map>();
+ for (const [name, data] of Object.entries(colData)) {
+ colMap.set(name, new Series({ data, index: rowIndex as Index }));
+ }
+ return new DataFrame(colMap, rowIndex);
+}
+
+// ─── Series overload ──────────────────────────────────────────────────────────
+
+/**
+ * Cross-tabulate two `Series` objects, using their `.name` properties
+ * as the default row / column axis names.
+ *
+ * @example
+ * ```ts
+ * const a = new Series({ data: ["foo", "foo", "bar"], name: "A" });
+ * const b = new Series({ data: ["x", "y", "x"], name: "B" });
+ * const ct = seriesCrosstab(a, b);
+ * ```
+ */
+export function seriesCrosstab(
+ index: Series,
+ columns: Series,
+ options: Omit & {
+ readonly rowname?: string;
+ readonly colname?: string;
+ } = {},
+): DataFrame {
+ const rowname = options.rowname ?? (typeof index.name === "string" ? index.name : "row");
+ const colname = options.colname ?? (typeof columns.name === "string" ? columns.name : "col");
+ return crosstab(index, columns, { ...options, rowname, colname });
+}
diff --git a/src/stats/cut.ts b/src/stats/cut.ts
new file mode 100644
index 00000000..8c656a84
--- /dev/null
+++ b/src/stats/cut.ts
@@ -0,0 +1,419 @@
+/**
+ * cut and qcut — bin continuous values into discrete intervals.
+ *
+ * Mirrors `pandas.cut()` and `pandas.qcut()`:
+ *
+ * - `cut()` divides the range of `x` into equal-width bins (when `bins` is an
+ * integer) or uses caller-supplied bin edges.
+ * - `qcut()` divides by sample quantiles so each bin holds approximately the
+ * same number of observations.
+ *
+ * Both functions return a `Series` where each value is an
+ * interval label like `"(0.0, 1.0]"`, `null` for out-of-range values, an
+ * integer code when `labels=false`, or a custom string when custom labels are
+ * provided.
+ *
+ * @example
+ * ```ts
+ * import { cut, qcut } from "tsb";
+ *
+ * const s = new Series({ data: [1, 2, 3, 4, 5] });
+ * cut(s, 2);
+ * // Series ["(0.995, 3.0]", "(0.995, 3.0]", "(0.995, 3.0]", "(3.0, 5.005]", "(3.0, 5.005]"]
+ *
+ * qcut(s, 2);
+ * // Series ["(0.999, 3.0]", "(0.999, 3.0]", "(0.999, 3.0]", "(3.0, 5.0]", "(3.0, 5.0]"]
+ * ```
+ *
+ * @module
+ */
+
+import { IntervalIndex } from "../core/index.ts";
+import type { IntervalClosed } from "../core/index.ts";
+import { Series } from "../core/index.ts";
+import { Index } from "../core/index.ts";
+import type { Label, Scalar } from "../types.ts";
+
+// ─── option types ──────────────────────────────────────────────────────────
+
+/** Options for {@link cut}. */
+export interface CutOptions {
+ /**
+ * Whether intervals are right-closed `(a, b]` (`true`) or left-closed
+ * `[a, b)` (`false`). Default `true`.
+ */
+ readonly right?: boolean;
+ /**
+ * Labels to use for the bins.
+ *
+ * - `undefined` (default): interval strings like `"(0, 1]"`.
+ * - `false`: integer codes (0-indexed position).
+ * - `string[]`: custom label per bin (length must equal number of bins).
+ */
+ readonly labels?: readonly string[] | false;
+ /**
+ * When `true`, the leftmost bin includes its left edge even when
+ * `right=true`. Mirrors pandas `include_lowest`. Default `false`.
+ */
+ readonly includeLowest?: boolean;
+ /**
+ * What to do when computed bin edges contain duplicates (only relevant for
+ * user-supplied bin arrays). Default `"raise"`.
+ */
+ readonly duplicates?: "raise" | "drop";
+}
+
+/** Options for {@link qcut}. */
+export interface QCutOptions {
+ /** Same as in {@link CutOptions}. */
+ readonly labels?: readonly string[] | false;
+ /**
+ * What to do when quantile-based bin edges contain duplicates (can happen
+ * with highly-repeated values). Default `"raise"`.
+ */
+ readonly duplicates?: "raise" | "drop";
+}
+
+// ─── internal helpers ──────────────────────────────────────────────────────
+
+/** Extract numeric values from a Series or plain array (NaN for non-numeric). */
+function extractNums(x: readonly Scalar[] | Series): readonly number[] {
+ const raw: readonly Scalar[] = x instanceof Series ? (x.values as readonly Scalar[]) : x;
+ return raw.map((v): number => {
+ if (typeof v === "number" && Number.isFinite(v)) {
+ return v;
+ }
+ return Number.NaN;
+ });
+}
+
+/** Extract the Index from a CutInput (or build a RangeIndex). */
+function extractIndex(x: readonly Scalar[] | Series, len: number): Index {
+ if (x instanceof Series) {
+ return x.index as Index;
+ }
+ const labels: Label[] = Array.from({ length: len }, (_, i): Label => i);
+ return new Index