diff --git a/benchmarks/results.json b/benchmarks/results.json index 13295d81..7d1fa6ec 100644 --- a/benchmarks/results.json +++ b/benchmarks/results.json @@ -1,357 +1 @@ -{ - "benchmarks": [ - { - "function": "concat", - "tsb": { - "function": "concat", - "mean_ms": 128.9745293, - "iterations": 20, - "total_ms": 2579.490586 - }, - "pandas": { - "function": "concat", - "mean_ms": 0.11375509999993483, - "iterations": 20, - "total_ms": 2.2751019999986966 - }, - "ratio": 1133.791 - }, - { - "function": "dataframe_apply", - "tsb": { - "function": "dataframe_apply", - "mean_ms": 16.7897294, - "iterations": 10, - "total_ms": 167.897294 - }, - "pandas": { - "function": "dataframe_apply", - "mean_ms": 47.161531699998704, - "iterations": 10, - "total_ms": 471.61531699998704 - }, - "ratio": 0.356 - }, - { - "function": "dataframe_creation", - "tsb": { - "function": "dataframe_creation", - "mean_ms": 223.22429929999998, - "iterations": 10, - "total_ms": 2232.242993 - }, - "pandas": { - "function": "dataframe_creation", - "mean_ms": 5.148059900000135, - "iterations": 10, - "total_ms": 51.48059900000135 - }, - "ratio": 43.361 - }, - { - "function": "dataframe_dropna", - "tsb": { - "function": "dataframe_dropna", - "mean_ms": 172.72901985000004, - "iterations": 20, - "total_ms": 3454.5803970000006 - }, - "pandas": { - "function": "dataframe_dropna", - "mean_ms": 2.42739894999886, - "iterations": 20, - "total_ms": 48.547978999977204 - }, - "ratio": 71.158 - }, - { - "function": "dataframe_filter", - "tsb": { - "function": "dataframe_filter", - "mean_ms": 126.19991375, - "iterations": 20, - "total_ms": 2523.998275 - }, - "pandas": { - "function": "dataframe_filter", - "mean_ms": 0.4964389500003108, - "iterations": 20, - "total_ms": 9.928779000006216 - }, - "ratio": 254.21 - }, - { - "function": "dataframe_rename", - "tsb": { - "function": "dataframe_rename", - "mean_ms": 0.008352200000000209, - "iterations": 20, - "total_ms": 0.1670440000000042 - }, - "pandas": { - "function": "dataframe_rename", - "mean_ms": 0.17103454999869427, - "iterations": 20, - "total_ms": 3.4206909999738855 - }, - "ratio": 0.049 - }, - { - "function": "dataframe_sort", - "tsb": { - "function": "dataframe_sort", - "mean_ms": 434.5389244, - "iterations": 10, - "total_ms": 4345.389244 - }, - "pandas": { - "function": "dataframe_sort", - "mean_ms": 33.301584399998774, - "iterations": 10, - "total_ms": 333.01584399998774 - }, - "ratio": 13.049 - }, - { - "function": "describe", - "tsb": { - "function": "describe", - "mean_ms": 19.719739000000004, - "iterations": 10, - "total_ms": 197.19739000000004 - }, - "pandas": { - "function": "describe", - "mean_ms": 5.521558600003118, - "iterations": 10, - "total_ms": 55.21558600003118 - }, - "ratio": 3.571 - }, - { - "function": "ewm_mean", - "tsb": { - "function": "ewm_mean", - "mean_ms": 118.5438748, - "iterations": 10, - "total_ms": 1185.438748 - }, - "pandas": { - "function": "ewm_mean", - "mean_ms": 1.7652839999982461, - "iterations": 10, - "total_ms": 17.65283999998246 - }, - "ratio": 67.153 - }, - { - "function": "groupby_mean", - "tsb": { - "function": "groupby_mean", - "mean_ms": 21.510315099999996, - "iterations": 10, - "total_ms": 215.10315099999997 - }, - "pandas": { - "function": "groupby_mean", - "mean_ms": 8.079756900002621, - "iterations": 10, - "total_ms": 80.79756900002621 - }, - "ratio": 2.662 - }, - { - "function": "merge", - "tsb": { - "function": "merge", - "mean_ms": 10348.345783, - "iterations": 3, - "total_ms": 31045.037349000002 - }, - "pandas": { - "function": "merge", - "mean_ms": 60.42320619999941, - "iterations": 10, - "total_ms": 604.2320619999941 - }, - "ratio": 171.264 - }, - { - "function": "pivot_table", - "tsb": { - "function": "pivot_table", - "mean_ms": 117.3417057, - "iterations": 10, - "total_ms": 1173.417057 - }, - "pandas": { - "function": "pivot_table", - "mean_ms": 22.500251999997545, - "iterations": 10, - "total_ms": 225.00251999997545 - }, - "ratio": 5.215 - }, - { - "function": "read_csv", - "tsb": { - "function": "read_csv", - "mean_ms": 589.2802257999999, - "iterations": 5, - "total_ms": 2946.401129 - }, - "pandas": { - "function": "read_csv", - "mean_ms": 29.951929399999244, - "iterations": 5, - "total_ms": 149.75964699999622 - }, - "ratio": 19.674 - }, - { - "function": "rolling_mean", - "tsb": { - "function": "rolling_mean", - "mean_ms": 419.62945440000004, - "iterations": 10, - "total_ms": 4196.294544 - }, - "pandas": { - "function": "rolling_mean", - "mean_ms": 1.71982609999759, - "iterations": 10, - "total_ms": 17.1982609999759 - }, - "ratio": 243.995 - }, - { - "function": "series_arithmetic", - "tsb": { - "function": "series_arithmetic", - "mean_ms": 122.68170964999999, - "iterations": 20, - "total_ms": 2453.634193 - }, - "pandas": { - "function": "series_arithmetic", - "mean_ms": 0.764571400000591, - "iterations": 20, - "total_ms": 15.29142800001182 - }, - "ratio": 160.458 - }, - { - "function": "series_creation", - "tsb": { - "function": "series_creation", - "mean_ms": 103.015, - "iterations": 50, - "total_ms": 5150.754 - }, - "pandas": { - "function": "series_creation", - "mean_ms": 7.607, - "iterations": 50, - "total_ms": 380.349 - }, - "ratio": 13.542 - }, - { - "function": "series_cumsum", - "tsb": { - "function": "series_cumsum", - "mean_ms": 58.26283665, - "iterations": 20, - "total_ms": 1165.256733 - }, - "pandas": { - "function": "series_cumsum", - "mean_ms": 1.1250383499998406, - "iterations": 20, - "total_ms": 22.500766999996813 - }, - "ratio": 51.787 - }, - { - "function": "series_fillna", - "tsb": { - "function": "series_fillna", - "mean_ms": 61.56140175, - "iterations": 20, - "total_ms": 1231.228035 - }, - "pandas": { - "function": "series_fillna", - "mean_ms": 0.18527670000025864, - "iterations": 20, - "total_ms": 3.705534000005173 - }, - "ratio": 332.267 - }, - { - "function": "series_shift", - "tsb": { - "function": "series_shift", - "mean_ms": 110.16682740000002, - "iterations": 20, - "total_ms": 2203.336548 - }, - "pandas": { - "function": "series_shift", - "mean_ms": 0.07249699999931636, - "iterations": 20, - "total_ms": 1.4499399999863272 - }, - "ratio": 1519.605 - }, - { - "function": "series_sort", - "tsb": { - "function": "series_sort", - "mean_ms": 161.28472190000002, - "iterations": 10, - "total_ms": 1612.8472190000002 - }, - "pandas": { - "function": "series_sort", - "mean_ms": 5.127767300001551, - "iterations": 10, - "total_ms": 51.27767300001551 - }, - "ratio": 31.453 - }, - { - "function": "series_string_ops", - "tsb": { - "function": "series_string_ops", - "mean_ms": 243.85622659999999, - "iterations": 10, - "total_ms": 2438.562266 - }, - "pandas": { - "function": "series_string_ops", - "mean_ms": 34.08206670000027, - "iterations": 10, - "total_ms": 340.8206670000027 - }, - "ratio": 7.155 - }, - { - "function": "series_value_counts", - "tsb": { - "function": "series_value_counts", - "mean_ms": 38.8205242, - "iterations": 10, - "total_ms": 388.205242 - }, - "pandas": { - "function": "series_value_counts", - "mean_ms": 9.212644899997713, - "iterations": 10, - "total_ms": 92.12644899997713 - }, - "ratio": 4.214 - } - ], - "timestamp": "2026-04-13T00:11:36Z" -} +{ "benchmarks": [], "timestamp": null } diff --git a/benchmarks/run_benchmarks.sh b/benchmarks/run_benchmarks.sh old mode 100755 new mode 100644 diff --git a/bun.lock b/bun.lock new file mode 100644 index 00000000..163b75ec --- /dev/null +++ b/bun.lock @@ -0,0 +1,50 @@ +{ + "lockfileVersion": 1, + "configVersion": 0, + "workspaces": { + "": { + "name": "tsb", + "devDependencies": { + "@biomejs/biome": "^1.9.4", + "@types/bun": "^1.1.14", + "fast-check": "^3.22.0", + }, + "peerDependencies": { + "typescript": "^5.7.0", + }, + }, + }, + "packages": { + "@biomejs/biome": ["@biomejs/biome@1.9.4", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "1.9.4", "@biomejs/cli-darwin-x64": "1.9.4", "@biomejs/cli-linux-arm64": "1.9.4", "@biomejs/cli-linux-arm64-musl": "1.9.4", "@biomejs/cli-linux-x64": "1.9.4", "@biomejs/cli-linux-x64-musl": "1.9.4", "@biomejs/cli-win32-arm64": "1.9.4", "@biomejs/cli-win32-x64": "1.9.4" }, "bin": { "biome": "bin/biome" } }, "sha512-1rkd7G70+o9KkTn5KLmDYXihGoTaIGO9PIIN2ZB7UJxFrWw04CZHPYiMRjYsaDvVV7hP1dYNRLxSANLaBFGpog=="], + + "@biomejs/cli-darwin-arm64": ["@biomejs/cli-darwin-arm64@1.9.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-bFBsPWrNvkdKrNCYeAp+xo2HecOGPAy9WyNyB/jKnnedgzl4W4Hb9ZMzYNbf8dMCGmUdSavlYHiR01QaYR58cw=="], + + "@biomejs/cli-darwin-x64": ["@biomejs/cli-darwin-x64@1.9.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-ngYBh/+bEedqkSevPVhLP4QfVPCpb+4BBe2p7Xs32dBgs7rh9nY2AIYUL6BgLw1JVXV8GlpKmb/hNiuIxfPfZg=="], + + "@biomejs/cli-linux-arm64": ["@biomejs/cli-linux-arm64@1.9.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-fJIW0+LYujdjUgJJuwesP4EjIBl/N/TcOX3IvIHJQNsAqvV2CHIogsmA94BPG6jZATS4Hi+xv4SkBBQSt1N4/g=="], + + "@biomejs/cli-linux-arm64-musl": ["@biomejs/cli-linux-arm64-musl@1.9.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-v665Ct9WCRjGa8+kTr0CzApU0+XXtRgwmzIf1SeKSGAv+2scAlW6JR5PMFo6FzqqZ64Po79cKODKf3/AAmECqA=="], + + "@biomejs/cli-linux-x64": ["@biomejs/cli-linux-x64@1.9.4", "", { "os": "linux", "cpu": "x64" }, "sha512-lRCJv/Vi3Vlwmbd6K+oQ0KhLHMAysN8lXoCI7XeHlxaajk06u7G+UsFSO01NAs5iYuWKmVZjmiOzJ0OJmGsMwg=="], + + "@biomejs/cli-linux-x64-musl": ["@biomejs/cli-linux-x64-musl@1.9.4", "", { "os": "linux", "cpu": "x64" }, "sha512-gEhi/jSBhZ2m6wjV530Yy8+fNqG8PAinM3oV7CyO+6c3CEh16Eizm21uHVsyVBEB6RIM8JHIl6AGYCv6Q6Q9Tg=="], + + "@biomejs/cli-win32-arm64": ["@biomejs/cli-win32-arm64@1.9.4", "", { "os": "win32", "cpu": "arm64" }, "sha512-tlbhLk+WXZmgwoIKwHIHEBZUwxml7bRJgk0X2sPyNR3S93cdRq6XulAZRQJ17FYGGzWne0fgrXBKpl7l4M87Hg=="], + + "@biomejs/cli-win32-x64": ["@biomejs/cli-win32-x64@1.9.4", "", { "os": "win32", "cpu": "x64" }, "sha512-8Y5wMhVIPaWe6jw2H+KlEm4wP/f7EW3810ZLmDlrEEy5KvBsb9ECEfu/kMWD484ijfQ8+nIi0giMgu9g1UAuuA=="], + + "@types/bun": ["@types/bun@1.3.11", "", { "dependencies": { "bun-types": "1.3.11" } }, "sha512-5vPne5QvtpjGpsGYXiFyycfpDF2ECyPcTSsFBMa0fraoxiQyMJ3SmuQIGhzPg2WJuWxVBoxWJ2kClYTcw/4fAg=="], + + "@types/node": ["@types/node@25.5.2", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg=="], + + "bun-types": ["bun-types@1.3.11", "", { "dependencies": { "@types/node": "*" } }, "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg=="], + + "fast-check": ["fast-check@3.23.2", "", { "dependencies": { "pure-rand": "^6.1.0" } }, "sha512-h5+1OzzfCC3Ef7VbtKdcv7zsstUQwUDlYpUTvjeUsJAssPgLn7QzbboPtL5ro04Mq0rPOsMzl7q5hIbRs2wD1A=="], + + "pure-rand": ["pure-rand@6.1.0", "", {}, "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="], + } +} diff --git a/playground/add_sub_mul_div.html b/playground/add_sub_mul_div.html new file mode 100644 index 00000000..956ae26f --- /dev/null +++ b/playground/add_sub_mul_div.html @@ -0,0 +1,214 @@ + + + + + + tsb — add / sub / mul / div + + + + + +

add / sub / mul / div

+

+ Element-wise arithmetic between a Series (or DataFrame) and a scalar or another + Series — mirrors pandas.Series.add(), .sub(), + .mul(), and .div(). +

+ +
+

1 — add: Series + scalar

+

+ seriesAdd(series, scalar) adds a constant to every element. + Missing values (null / NaN) are propagated unchanged. + Mirrors pandas.Series.add(other). +

+
import { Series, seriesAdd } from "tsb";
+
+const s = new Series({ data: [1, 2, null, 4] });
+const result = seriesAdd(s, 10);
+console.log([...result.values]); // [11, 12, null, 14]
+
+
Loading…
+
+ +
+

2 — add: Series + Series (positional)

+

+ When other is another Series, elements are paired positionally + (same as pandas default when shapes match). +

+
import { Series, seriesAdd } from "tsb";
+
+const a = new Series({ data: [1, 2, 3] });
+const b = new Series({ data: [4, 5, 6] });
+seriesAdd(a, b).values;  // [5, 7, 9]
+
+
Loading…
+
+ +
+

3 — sub / rsub

+

+ seriesSub(s, other) computes s − other. + seriesRsub(s, other) computes the reverse: other − s. +

+
import { Series, seriesSub, seriesRsub } from "tsb";
+
+const s = new Series({ data: [10, 20, 30] });
+seriesSub(s, 5).values;    // [5, 15, 25]
+seriesRsub(s, 100).values; // [90, 80, 70]
+
+
Loading…
+
+ +
+

4 — mul: multiply

+

+ seriesMul(s, other) multiplies every element. + seriesRmul is the reversed form (commutative, provided for API symmetry). +

+
import { Series, seriesMul } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, null] });
+seriesMul(s, 3).values;  // [3, 6, 9, null]
+
+const weights = new Series({ data: [0.5, 1, 2, 1] });
+seriesMul(s, weights).values;  // [0.5, 2, 6, null]
+
+
Loading…
+
+ +
+

5 — div / rdiv (true division)

+

+ seriesDiv(s, other) performs IEEE-754 true division. + Division by zero yields ±Infinity or NaN (0÷0), + matching pandas.Series.div. + seriesRdiv(s, other) computes other / s. +

+
import { Series, seriesDiv, seriesRdiv } from "tsb";
+
+const s = new Series({ data: [4, 9, 0, null] });
+seriesDiv(s, 2).values;    // [2, 4.5, Infinity, null]
+seriesRdiv(s, 36).values;  // [9, 4, Infinity, null]
+
+
Loading…
+
+ +
+

6 — DataFrame arithmetic

+

+ All four operations work on DataFrames too. A scalar is broadcast across + every cell; a DataFrame operand is paired column-by-column, row-by-row. +

+
import { DataFrame, dataFrameAdd, dataFrameMul, dataFrameDiv } from "tsb";
+
+const df = DataFrame.fromColumns({ price: [10, 20, 30], qty: [3, 5, 2] });
+
+// Add a discount
+dataFrameAdd(df, 5).col("price").values;   // [15, 25, 35]
+
+// Scale everything by 2
+dataFrameMul(df, 2).col("qty").values;     // [6, 10, 4]
+
+// Revenue per item / some constant
+dataFrameDiv(df, 10).col("price").values;  // [1, 2, 3]
+
+
Loading…
+
+ +
+

7 — Missing value propagation

+

+ Following pandas convention, any operation involving a missing value + (null or NaN) returns the missing value unchanged. +

+
import { Series, seriesAdd, seriesMul, seriesDiv } from "tsb";
+
+const s = new Series({ data: [1, null, NaN, 4] });
+seriesAdd(s, 10).values;  // [11, null, NaN, 14]
+seriesMul(s, 2).values;   // [2, null, NaN, 8]
+seriesDiv(s, 2).values;   // [0.5, null, NaN, 2]
+
+
Loading…
+
+ + + + diff --git a/playground/align.html b/playground/align.html new file mode 100644 index 00000000..bcf7a21d --- /dev/null +++ b/playground/align.html @@ -0,0 +1,254 @@ + + + + + + tsb — align + + + +
+ ← tsb playground +

align

+

Realign two Series or DataFrames to a common axis — mirrors pandas.Series.align / pandas.DataFrame.align.

+
+ +
+
+

+ align takes two objects and aligns them to the same axis, returning + a pair [alignedLeft, alignedRight] that share the same index. + Labels present in one but not the other are filled with a fillValue (default null). +

+ + +

Join policies

+ + + + + + + + +
joinResult index
"outer" (default)Union of both indices
"inner"Intersection of both indices
"left"Left object's index
"right"Right object's index
+ +

+ See also: + pandas.Series.align + · + pandas.DataFrame.align +

+
+ +
+

1 · alignSeries — outer (default)

+
import { Series, Index, alignSeries } from "tsb";
+
+const a = new Series({ data: [1, 2, 3], index: new Index(["a", "b", "c"]) });
+const b = new Series({ data: [10, 20],  index: new Index(["b", "c"]) });
+
+// Default join="outer" → union of indices
+const [la, ra] = alignSeries(a, b);
+la.toArray();  // → [1, 2, 3]     (index: a, b, c)
+ra.toArray();  // → [null, 10, 20] (index: a, b, c)
+
+
+
+ +
+

2 · alignSeries — inner join

+
const [li, ri] = alignSeries(a, b, { join: "inner" });
+li.toArray();  // → [2, 3]   (only shared labels: b, c)
+ri.toArray();  // → [10, 20]
+
+
+
+ +
+

3 · alignSeries — left / right join + fillValue

+
const x = new Series({ data: [1, 2, 3], index: new Index(["a", "b", "c"]) });
+const y = new Series({ data: [10, 30],   index: new Index(["b", "d"]) });
+
+// join="left": result index = x's index
+const [ll, rl] = alignSeries(x, y, { join: "left", fillValue: 0 });
+ll.toArray();  // → [1, 2, 3]
+rl.toArray();  // → [0, 10, 0]  ("d" is outside x's index → dropped)
+
+// join="right": result index = y's index
+const [lr, rr] = alignSeries(x, y, { join: "right", fillValue: 0 });
+lr.toArray();  // → [2, 0]      ("b" matches, "d" is new)
+rr.toArray();  // → [10, 30]
+
+
+
+ +
+

4 · alignDataFrame — outer, both axes

+
import { DataFrame, Index, alignDataFrame } from "tsb";
+
+const a = DataFrame.fromColumns(
+  { x: [1, 2], y: [3, 4] },
+  { index: new Index(["r0", "r1"]) },
+);
+const b = DataFrame.fromColumns(
+  { y: [10], z: [20] },
+  { index: new Index(["r1"]) },
+);
+
+// Default: align both rows and columns (outer union)
+const [la, ra] = alignDataFrame(a, b);
+
+// la  →  shape [2, 3]  columns: x, y, z
+//        row r0: x=1, y=3, z=null
+//        row r1: x=2, y=4, z=null
+la.col("z").toArray();  // → [null, null]
+
+// ra  →  shape [2, 3]  columns: x, y, z
+//        row r0: x=null, y=null, z=null
+//        row r1: x=null, y=10,   z=20
+ra.col("x").toArray();  // → [null, null]
+ra.col("y").toArray();  // → [null, 10]
+
+
+
+ +
+

5 · alignDataFrame — axis=0 (rows only)

+
// axis=0 aligns rows but leaves columns untouched
+const [la5, ra5] = alignDataFrame(a, b, { axis: 0 });
+la5.columns.toArray();  // → ["x", "y"]   (unchanged)
+ra5.columns.toArray();  // → ["y", "z"]   (unchanged)
+la5.index.toArray();    // → ["r0", "r1"] (outer union)
+ra5.index.toArray();    // → ["r0", "r1"] (outer union)
+
+
+
+ +
+

6 · alignDataFrame — axis=1 (columns only)

+
// axis=1 aligns columns but leaves rows untouched
+const [la6, ra6] = alignDataFrame(a, b, { axis: 1 });
+la6.index.toArray();    // → ["r0", "r1"]  (unchanged)
+ra6.index.toArray();    // → ["r1"]        (unchanged)
+la6.columns.toArray().sort();  // → ["x", "y", "z"]
+ra6.columns.toArray().sort();  // → ["x", "y", "z"]
+
+
+
+ +
+

7 · Arithmetic after alignment

+
// A common use-case: element-wise arithmetic on misaligned Series
+const p = new Series({ data: [100, 200, 300], index: new Index(["a", "b", "c"]) });
+const q = new Series({ data: [1, 2],           index: new Index(["b", "c"]) });
+
+const [ap, aq] = alignSeries(p, q, { fillValue: 0 });
+// Now same shape — do element-wise addition
+const sum = ap.add(aq);
+sum.toArray();   // → [100, 201, 302]
+sum.index.toArray();  // → ["a", "b", "c"]
+
+
+
+
+ + + + diff --git a/playground/apply.html b/playground/apply.html new file mode 100644 index 00000000..050aef2a --- /dev/null +++ b/playground/apply.html @@ -0,0 +1,128 @@ + + + + + + tsb — apply + + + + + +

apply

+

Element-wise and axis-wise function application — mirrors pandas.Series.apply(), pandas.DataFrame.applymap(), and pandas.DataFrame.apply().

+ +
+

1 — Series.apply: transform each element

+

applySeries(series, fn) calls fn(value, label) for every element and returns a new Series with the results.

+
import { Series, applySeries } from "tsb";
+
+const s = new Series({ data: [1, 4, 9, 16], name: "squares" });
+
+// Square root of each element
+const r = applySeries(s, (v) => Math.sqrt(v));
+console.log([...r.values]); // [1, 2, 3, 4]
+
+// Use the label in the transform
+const s2 = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
+const labeled = applySeries(s2, (v, lbl) => `${lbl}=${v}`);
+console.log([...labeled.values]); // ["a=10", "b=20", "c=30"]
+
+
Loading…
+
+ +
+

2 — DataFrame.applymap: element-wise over entire DataFrame

+

applymap(df, fn) calls fn(value, colName) for every cell and returns a new DataFrame with the same shape.

+
import { DataFrame, applymap } from "tsb";
+
+const df = DataFrame.fromColumns({
+  price: [10.5, 22.0, 8.75],
+  qty:   [3,    1,    5   ],
+});
+
+// Round every number to 1 decimal place
+const rounded = applymap(df, (v) => Math.round(v * 10) / 10);
+console.log([...rounded.col("price").values]); // [10.5, 22, 8.8]
+
+// Use the column name in the transform
+const tagged = applymap(df, (v, col) => `${col}:${v}`);
+console.log(tagged.col("price").values[0]); // "price:10.5"
+
+
Loading…
+
+ +
+

3 — DataFrame.apply (axis=0): aggregate each column

+

dataFrameApply(df, fn) with default axis=0 passes each column as a Series to fn and returns a Series indexed by column names.

+
import { DataFrame, dataFrameApply } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, 2, 3],
+  b: [10, 20, 30],
+  c: [100, 200, 300],
+});
+
+// Sum of each column
+const colSums = dataFrameApply(df, (col) => col.sum());
+console.log([...colSums.index.values]); // ["a", "b", "c"]
+console.log([...colSums.values]);       // [6, 60, 600]
+
+// Mean of each column
+const colMeans = dataFrameApply(df, (col) => col.mean());
+console.log([...colMeans.values]); // [2, 20, 200]
+
+
Loading…
+
+ +
+

4 — DataFrame.apply (axis=1): aggregate each row

+

dataFrameApply(df, fn, { axis: 1 }) passes each row as a Series to fn and returns a Series indexed by row labels.

+
import { DataFrame, dataFrameApply } from "tsb";
+
+const df = DataFrame.fromColumns(
+  { a: [1, 2, 3], b: [4, 5, 6] },
+  { index: ["r0", "r1", "r2"] },
+);
+
+// Sum across columns for each row
+const rowSums = dataFrameApply(df, (row) => row.sum(), { axis: 1 });
+console.log([...rowSums.index.values]); // ["r0", "r1", "r2"]
+console.log([...rowSums.values]);       // [5, 7, 9]
+
+// Max value in each row
+const rowMax = dataFrameApply(df, (row) => row.max(), { axis: 1 });
+console.log([...rowMax.values]); // [4, 5, 6]
+
+
Loading…
+
+ +
+

5 — Handling missing values

+

The callback receives null / NaN as-is — you decide how to handle them.

+
import { Series, applySeries } from "tsb";
+
+const s = new Series({ data: [1, null, 3, null, 5] });
+
+// Replace nulls with 0, double numbers
+const r = applySeries(s, (v) => (v === null ? 0 : v * 2));
+console.log([...r.values]); // [2, 0, 6, 0, 10]
+
+
Loading…
+
+ + + diff --git a/playground/assign.html b/playground/assign.html new file mode 100644 index 00000000..f915431c --- /dev/null +++ b/playground/assign.html @@ -0,0 +1,107 @@ + + + + + + tsb — DataFrame.assign() + + + +

← tsb playground

+ +

DataFrame.assign()

+

+ Mirrors + pandas.DataFrame.assign(). Returns a new DataFrame with the given + columns added or replaced. The source DataFrame is never mutated. +

+ +

Specifier kinds

+ + + + + + + +
SpecifierTypeDescription
Arrayreadonly Scalar[]Values aligned by position with the row index
SeriesSeries<Scalar>A Series aligned by position
Callable(df: DataFrame) => Scalar[] | SeriesReceives the in-progress DataFrame (earlier columns in this call are already visible)
+ +

Example 1 — Array and Series

+
import { DataFrame, Series, dataFrameAssign } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [10, 20, 30] });
+
+const df2 = dataFrameAssign(df, {
+  c: [7, 8, 9],                          // array
+  d: new Series({ data: [4, 5, 6] }),    // Series
+});
+
+// df2.columns.values  →  ["a", "b", "c", "d"]
+// df2.col("c").values →  [7, 8, 9]
+// df2.col("d").values →  [4, 5, 6]
+
+ +

Example 2 — Callable (chained derivations)

+
const df3 = dataFrameAssign(df, {
+  // 1st: add "total" — callable receives df (no "total" column yet)
+  total: (d) => d.col("a").values.map((v, i) =>
+    (v as number) + (d.col("b").values[i] as number)
+  ),
+  // 2nd: add "tax" — callable now sees "total" because it was added above
+  tax: (d) => d.col("total").values.map((v) => (v as number) * 0.1),
+});
+
+// df3.col("total").values  →  [11, 22, 33]
+// df3.col("tax").values    →  [1.1, 2.2, 3.3]
+
+ +

Example 3 — Instance method

+
// DataFrame.assign() is also available as an instance method.
+const df4 = df.assign({
+  squared_a: (d: DataFrame) => d.col("a").values.map((v) => (v as number) ** 2),
+});
+// df4.col("squared_a").values  →  [1, 4, 9]
+
+ +

Example 4 — Replace an existing column

+
// If a key already exists as a column it is replaced in-place (order preserved).
+const df5 = dataFrameAssign(df, { b: [100, 200, 300] });
+
+// df5.columns.values   →  ["a", "b"]  (order unchanged)
+// df5.col("b").values  →  [100, 200, 300]
+
+ +
+ Pandas parity note: callables are applied in insertion-order and each one + receives the DataFrame produced by all earlier assignments in the same call — matching + pandas' behaviour since Python 3.7+ where dict preserves insertion order. +
+ +

API

+
// Standalone function
+function dataFrameAssign(df: DataFrame, spec: AssignSpec): DataFrame;
+
+// Instance method (same behaviour)
+df.assign(spec: AssignSpec): DataFrame;
+
+// Types
+type AssignColSpec =
+  | readonly Scalar[]
+  | Series<Scalar>
+  | ((df: DataFrame) => readonly Scalar[] | Series<Scalar>);
+
+type AssignSpec = Readonly<Record<string, AssignColSpec>>;
+
+ + diff --git a/playground/benchmarks.html b/playground/benchmarks.html index c4a74f9f..6b5dde65 100644 --- a/playground/benchmarks.html +++ b/playground/benchmarks.html @@ -300,58 +300,43 @@

🤖 About

// Find max time for scaling bars let maxTime = 0; for (const b of benchmarks) { - if (b.tsb != null) maxTime = Math.max(maxTime, b.tsb.mean_ms); - if (b.pandas != null) maxTime = Math.max(maxTime, b.pandas.mean_ms); + maxTime = Math.max(maxTime, b.tsb.mean_ms, b.pandas.mean_ms); } // Render bar chart for (const b of benchmarks) { const label = b.function.replace(/_/g, " "); - const pyPct = b.pandas != null ? (b.pandas.mean_ms / maxTime) * 100 : 0; - const tsPct = b.tsb != null ? (b.tsb.mean_ms / maxTime) * 100 : 0; - - const tsBar = b.tsb != null - ? '
' + b.tsb.mean_ms.toFixed(3) + ' ms
' - : '
pending
'; - const pyBar = b.pandas != null - ? '
' + b.pandas.mean_ms.toFixed(3) + ' ms
' - : '
pending
'; + const tsPct = (b.tsb.mean_ms / maxTime) * 100; + const pyPct = (b.pandas.mean_ms / maxTime) * 100; const row = document.createElement("div"); row.className = "bar-row"; row.innerHTML = '
' + label + '
' + - '
' + tsBar + pyBar + '
'; + '
' + + '
' + b.tsb.mean_ms + ' ms
' + + '
' + b.pandas.mean_ms + ' ms
' + + '
'; barChart.appendChild(row); } // Render table for (const b of benchmarks) { - const ratio = (b.tsb != null && b.pandas != null && b.pandas.mean_ms > 0) - ? b.tsb.mean_ms / b.pandas.mean_ms - : null; - const faster = ratio != null ? (ratio < 1 ? "tsb" : "pandas") : "—"; - const badgeClass = ratio != null ? (ratio < 1 ? "fast" : "slow") : ""; - const fasterClass = ratio != null ? (ratio < 1 ? "faster-tsb" : "faster-pandas") : ""; - const ratioDisplay = ratio != null - ? '' + ratio.toFixed(3) + "x" - : "—"; - const displayRatio = ratio != null - ? (ratio < 1 - ? (1 / ratio).toFixed(2) + "x faster" - : ratio.toFixed(2) + "x slower") - : ""; - const fasterDisplay = ratio != null ? faster + " (" + displayRatio + ")" : "—"; - const tsMsDisplay = b.tsb != null ? b.tsb.mean_ms.toFixed(3) : "—"; - const pyMsDisplay = b.pandas != null ? b.pandas.mean_ms.toFixed(3) : "—"; + const ratio = b.ratio; + const faster = ratio < 1 ? "tsb" : "pandas"; + const badgeClass = ratio < 1 ? "fast" : "slow"; + const fasterClass = ratio < 1 ? "faster-tsb" : "faster-pandas"; + const displayRatio = ratio < 1 + ? (1 / ratio).toFixed(2) + "x faster" + : ratio.toFixed(2) + "x slower"; const tr = document.createElement("tr"); tr.innerHTML = "" + b.function.replace(/_/g, " ") + "" + - "" + tsMsDisplay + "" + - "" + pyMsDisplay + "" + - "" + ratioDisplay + "" + - '' + fasterDisplay + ""; + "" + b.tsb.mean_ms + "" + + "" + b.pandas.mean_ms + "" + + '' + ratio + "x" + + '' + faster + " (" + displayRatio + ")"; benchTbody.appendChild(tr); } })(); diff --git a/playground/categorical_index.html b/playground/categorical_index.html new file mode 100644 index 00000000..41ff5fab --- /dev/null +++ b/playground/categorical_index.html @@ -0,0 +1,180 @@ + + + + + + tsb — CategoricalIndex + + + + + +

CategoricalIndex

+

+ An index whose values are constrained to a fixed set of categories — mirrors + pandas.CategoricalIndex. +

+ +
+

1 — Basic construction

+

+ Create a CategoricalIndex from an array of labels. Categories are + inferred automatically (sorted, deduplicated). Internally values are stored as + integer codes. +

+
import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromArray(["b", "a", "c", "a", "b"]);
+
+console.log("size:", ci.size);                      // 5
+console.log("categories:", ci.categories.toArray()); // ["a","b","c"]
+console.log("codes:", [...ci.codes]);                // [1,0,2,0,1]
+console.log("ordered:", ci.ordered);                 // false
+console.log("at(0):", ci.at(0));                     // "b"
+console.log("getLoc('a'):", ci.getLoc("a"));         // 1
+
+
▶ run
+
+ +
+

2 — Explicit categories and ordered flag

+

+ Supply explicit categories to control their order. Set ordered: true + to unlock comparison operations between category labels. +

+
import { CategoricalIndex } from "tsb";
+
+const sizes = CategoricalIndex.fromArray(
+  ["M", "S", "L", "XL", "S"],
+  {
+    categories: ["S", "M", "L", "XL"],
+    ordered: true,
+    name: "size",
+  },
+);
+
+console.log("categories:", sizes.categories.toArray()); // ["S","M","L","XL"]
+console.log("codes:", [...sizes.codes]);                 // [1,0,2,3,0]
+console.log("ordered:", sizes.ordered);                  // true
+console.log("name:", sizes.name);                        // "size"
+
+// Order-aware comparison: "S" < "L"?
+console.log("compareLabels('S','L'):", sizes.compareLabels("S", "L")); // negative
+
+
▶ run
+
+ +
+

3 — fromCodes constructor

+

Build a CategoricalIndex directly from a category list and pre-computed codes. Code -1 represents a missing (NA) value.

+
import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromCodes(
+  ["low", "mid", "high"],
+  [0, 2, -1, 1, 0],
+);
+
+console.log("toArray():", ci.toArray());
+// → ["low", "high", null, "mid", "low"]
+
+
▶ run
+
+ +
+

4 — Category mutations

+

+ All mutation methods return a new CategoricalIndex; + the original is unchanged. +

+
import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromArray(["a", "b", "c", "b"]);
+
+// Rename: same codes, new labels
+const renamed = ci.renameCategories(["x", "y", "z"]);
+console.log("renamed:", renamed.toArray()); // ["x","y","z","y"]
+
+// Add a category that doesn't appear in the data yet
+const added = ci.addCategories(["d"]);
+console.log("added cats:", added.categories.toArray()); // ["a","b","c","d"]
+
+// Remove "b" → entries become null
+const removed = ci.removeCategories(["b"]);
+console.log("after remove:", removed.toArray()); // ["a",null,"c",null]
+
+// Remove unused categories
+const ci2 = CategoricalIndex.fromArray(["a", "b"], { categories: ["a", "b", "c", "d"] });
+console.log("nCats before:", ci2.nCategories);                     // 4
+console.log("nCats after:", ci2.removeUnusedCategories().nCategories); // 2
+
+
▶ run
+
+ +
+

5 — Reorder and setCategories

+
import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromArray(["a", "b", "c"]);
+
+// Reorder — must be a permutation of existing categories
+const reordered = ci.reorderCategories(["c", "a", "b"]);
+console.log("categories:", reordered.categories.toArray()); // ["c","a","b"]
+console.log("data:", reordered.toArray());                   // ["a","b","c"] (unchanged)
+
+// Set completely new categories — entries outside new set → null
+const set = ci.setCategories(["a", "c"]);
+console.log("after setCategories:", set.toArray()); // ["a", null, "c"]
+
+
▶ run
+
+ +
+

6 — Set-like operations on categories

+
import { CategoricalIndex } from "tsb";
+
+const a = CategoricalIndex.fromArray(["a", "b", "b"]);
+const b = CategoricalIndex.fromArray(["b", "c", "c"]);
+
+// Union of category sets (left data retained)
+const u = a.unionCategories(b);
+console.log("union categories:", u.categories.toArray()); // ["a","b","c"]
+console.log("union data:", u.toArray());                   // ["a","b","b"]
+
+// Intersection of category sets
+const ci = CategoricalIndex.fromArray(["a", "b", "c"]);
+const other = CategoricalIndex.fromArray(["b", "c", "d"]);
+const inter = ci.intersectCategories(other);
+console.log("intersect categories:", inter.categories.toArray()); // ["b","c"]
+console.log("intersect data:", inter.toArray());                   // [null,"b","c"]
+
+
▶ run
+
+ +
+

7 — getLocsAll and membership

+
import { CategoricalIndex } from "tsb";
+
+const ci = CategoricalIndex.fromArray(["a", "b", "a", "c", "a"]);
+
+console.log("all locs of 'a':", ci.getLocsAll("a")); // [0, 2, 4]
+console.log("contains 'b':", ci.contains("b"));       // true
+console.log("contains 'z':", ci.contains("z"));       // false
+console.log("hasCategory 'c':", ci.hasCategory("c")); // true (even if not in data with explicit cats)
+
+
▶ run
+
+ + diff --git a/playground/clip_with_bounds.html b/playground/clip_with_bounds.html new file mode 100644 index 00000000..a787b66c --- /dev/null +++ b/playground/clip_with_bounds.html @@ -0,0 +1,144 @@ + + + + + + tsb — clip with bounds + + + +

← tsb playground

+ +

✂️ Clip with bounds

+

+ Extends scalar clip to support per-element bounds. Mirrors + + pandas.Series.clip(lower, upper) and + + pandas.DataFrame.clip(lower, upper, axis) with Series or DataFrame bounds. +

+ +

Bound types

+ + + + + + + + + +
Bound argumentBehaviour
numberSame scalar bound for every element
null / omittedNo bound on that side
(number | null)[]Positional per-element bounds
Series<Scalar>Aligned by index label — each element looks up its label in the bound Series
DataFrame (DataFrame variant only)Element-wise — each cell is clipped to the matching cell in the bound DataFrame
+ +

Example 1 — Series with scalar bounds

+
import { Series, clipSeriesWithBounds } from "tsb";
+
+const s = new Series({ data: [-5, 1, 7, 12] });
+
+clipSeriesWithBounds(s, { lower: 0, upper: 8 }).values;
+// [0, 1, 7, 8]
+
+ +

Example 2 — Series bounds (label-aligned)

+
import { Index, Series, clipSeriesWithBounds } from "tsb";
+
+const prices = new Series({
+  data: [90, 110, 85, 120],
+  index: new Index(["AAPL", "GOOG", "MSFT", "AMZN"]),
+  name: "price",
+});
+
+// Per-stock price floors
+const floors = new Series({
+  data: [95, 80, 100],
+  index: new Index(["AAPL", "MSFT", "GOOG"]),
+});
+
+clipSeriesWithBounds(prices, { lower: floors }).values;
+// AAPL: max(90, 95)=95  GOOG: max(110, 100)=110  MSFT: max(85, 80)=85  AMZN: 120 (no bound)
+// [95, 110, 85, 120]
+
+ +

Example 3 — DataFrame clip with per-column bounds (axis=1)

+
import { DataFrame, Index, Series, clipDataFrameWithBounds } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [0, 5, 12],
+  b: [3, 8, 15],
+});
+
+// Each column has its own lower/upper bound
+const lo = new Series({ data: [1, 4], index: new Index(["a", "b"]) });
+const hi = new Series({ data: [10, 9], index: new Index(["a", "b"]) });
+
+const result = clipDataFrameWithBounds(df, { lower: lo, upper: hi, axis: 1 });
+// col "a": [1, 5, 10]  (lower=1, upper=10)
+// col "b": [4, 8,  9]  (lower=4, upper=9)
+
+ +

Example 4 — DataFrame clip with per-row bounds (axis=0, default)

+
import { DataFrame, Series, clipDataFrameWithBounds } from "tsb";
+
+const df = DataFrame.fromColumns({
+  min_temp: [-5, -2, 1, 4],
+  max_temp: [10, 15, 18, 22],
+});
+
+// Daily operational thresholds (per row)
+const lowerBound = new Series({ data: [0, 0, 0, 0] });   // never below 0
+const upperBound = new Series({ data: [12, 12, 20, 20] }); // row-specific caps
+
+const result = clipDataFrameWithBounds(df, { lower: lowerBound, upper: upperBound, axis: 0 });
+// min_temp: [0, 0, 1, 4]   max_temp: [10, 12, 18, 20]
+
+ +

Example 5 — Element-wise DataFrame bounds

+
import { DataFrame, clipDataFrameWithBounds } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 5, 10], b: [2, 8, 3] });
+const lo = DataFrame.fromColumns({ a: [3, 3, 3], b: [0, 9, 0] });
+const hi = DataFrame.fromColumns({ a: [8, 8, 8], b: [5, 5, 5] });
+
+const result = clipDataFrameWithBounds(df, { lower: lo, upper: hi });
+// col "a": [3, 5, 8]  col "b": [2, 5, 3]
+
+ +

Null / NaN propagation

+
+ Missing values pass through unchanged. If the input contains null or + NaN, the output retains it regardless of the bounds. +
+
import { Series, clipSeriesWithBounds } from "tsb";
+
+const s = new Series({ data: [null, -5, NaN, 10] });
+clipSeriesWithBounds(s, { lower: 0, upper: 8 }).values;
+// [null, 0, NaN, 8]
+
+ +

API reference

+ + + + + + + + + + + + +
FunctionSignature
clipSeriesWithBounds(series, { lower?, upper? }) → Series
clipDataFrameWithBounds(df, { lower?, upper?, axis? }) → DataFrame
+ + diff --git a/playground/combine_first.html b/playground/combine_first.html new file mode 100644 index 00000000..a80bd540 --- /dev/null +++ b/playground/combine_first.html @@ -0,0 +1,135 @@ + + + + + + tsb — combine_first + + + +
+ ← tsb playground +

combine_first

+

Patch missing values in a Series or DataFrame with values from another — mirrors pandas.Series.combine_first and pandas.DataFrame.combine_first.

+
+ +
+
+

+ combineFirstSeries(self, other) and combineFirstDataFrame(self, other) + update the calling object with non-null values from other. + The result index is the union of both index sets. + For each label, self's value takes priority; other's value is only + used when self's value is missing (null, undefined, or NaN). +

+

+ Mirrors + pandas.Series.combine_first + and + pandas.DataFrame.combine_first. +

+ +

Semantics

+ + + + + + + + +
Self valueOther valueResult
non-nullanythingself value
null / NaNnon-nullother value
null / NaNnull / missingnull
any (new label)other value
+
+ +
+

Example 1 — Series: fill gaps with values from another Series

+
import { Series, combineFirstSeries } from "tsb";
+
+const a = new Series({ data: [1, null, 3], index: ["x", "y", "z"] });
+const b = new Series({ data: [10, 20, 30, 40], index: ["x", "y", "z", "w"] });
+
+const result = combineFirstSeries(a, b);
+// index: ["x", "y", "z", "w"]
+// values: [1, 20, 3, 40]
+//
+// - "x": a has 1 (non-null) → keeps 1
+// - "y": a has null → filled from b → 20
+// - "z": a has 3 (non-null) → keeps 3
+// - "w": a has no entry → comes from b → 40
+
+
+ +
+

Example 2 — DataFrame: patch missing cells across row/column union

+
import { DataFrame, combineFirstDataFrame } from "tsb";
+
+const a = DataFrame.fromColumns(
+  { x: [1, null], y: [3, 4] },
+  { index: ["r0", "r1"] },
+);
+const b = DataFrame.fromColumns(
+  { x: [10, 20], z: [30, 40] },
+  { index: ["r0", "r2"] },
+);
+
+const result = combineFirstDataFrame(a, b);
+// rows:    r0, r1, r2
+// columns: x, y, z
+//
+// result["r0"]["x"] = 1   (a wins)
+// result["r1"]["x"] = null (a had null, b has no r1 → null)
+// result["r2"]["x"] = 20  (a has no r2 → from b)
+// result["r0"]["y"] = 3   (a only)
+// result["r1"]["y"] = 4   (a only)
+// result["r2"]["y"] = null (no r2 in a, no y in b)
+// result["r0"]["z"] = 30  (b only)
+// result["r1"]["z"] = null (b has no r1)
+// result["r2"]["z"] = 40  (b only)
+
+
+ +
+

Example 3 — NaN is treated as missing

+
import { Series, combineFirstSeries } from "tsb";
+
+const sensor1 = new Series({
+  data: [NaN, 22.5, 23.1, NaN],
+  index: [0, 1, 2, 3],
+  name: "temperature",
+});
+const sensor2 = new Series({
+  data: [21.0, 22.0, NaN, 24.0],
+  index: [0, 1, 2, 3],
+  name: "temperature",
+});
+
+const merged = combineFirstSeries(sensor1, sensor2);
+// values: [21.0, 22.5, 23.1, 24.0]
+// Gaps in sensor1 filled from sensor2
+
+
+ +
+

Example 4 — Temporal data backfill

+
import { Series, combineFirstSeries } from "tsb";
+
+// Primary data source with some gaps
+const primary = new Series({
+  data: [100, null, 102, null, 104],
+  index: ["2024-01", "2024-02", "2024-03", "2024-04", "2024-05"],
+});
+
+// Secondary source to fill gaps + extends to June
+const backup = new Series({
+  data: [99, 101, 103, 103, 105, 106],
+  index: ["2024-01", "2024-02", "2024-03", "2024-04", "2024-05", "2024-06"],
+});
+
+const complete = combineFirstSeries(primary, backup);
+// index:  2024-01, 2024-02, 2024-03, 2024-04, 2024-05, 2024-06
+// values: 100,     101,     102,     103,     104,     106
+
+
+
+ + diff --git a/playground/compare.html b/playground/compare.html new file mode 100644 index 00000000..c4127f6d --- /dev/null +++ b/playground/compare.html @@ -0,0 +1,273 @@ + + + + + + tsb — Comparison Ops | Interactive Playground + + + +
+
+

tsb playground

+

Interactive tutorial — Element-wise Comparison Operations  ·  ← back to index

+
+
+ +
+ +
+

🔍 Comparison Operations

+

+ tsb implements all six pandas comparison methods: + eq, ne, lt, gt, le, ge. + They work on both Series and DataFrame, and accept either a scalar + or another Series/DataFrame as the other argument. +

+

All functions return a boolean Series/DataFrame. Missing values (null / NaN) always yield false.

+ + + + + + + + + + + + + +
Functionpandas equivalentOperatorDescription
seriesEq(s, other)s.eq(other)==Element-wise equality
seriesNe(s, other)s.ne(other)!=Element-wise inequality
seriesLt(s, other)s.lt(other)<Less than
seriesGt(s, other)s.gt(other)>Greater than
seriesLe(s, other)s.le(other)<=Less than or equal
seriesGe(s, other)s.ge(other)>=Greater than or equal
+

DataFrame variants follow the same pattern: dataFrameEq, dataFrameNe, etc.

+
+ + +
+

1 — seriesEq with a scalar

+

Compare every element of a Series against a single scalar value:

+
import { Series, seriesEq } from "tsb"; + +const s = new Series({ data: [1, 2, 3, 2, 1] }); + +const result = seriesEq(s, 2); +// → [false, true, false, true, false] + +// Use this as a boolean mask for filtering: +// s.values.filter((_, i) => result.values[i]) → [2, 2]
+
+ + +
+

2 — seriesNe: inequality

+

seriesNe is the complement of seriesEq for non-null values:

+
import { Series, seriesNe } from "tsb"; + +const s = new Series({ data: ["apple", "banana", "apple", "cherry"] }); + +seriesNe(s, "apple").values; +// → [false, true, false, true]
+
+ + +
+

3 — Ordering comparisons: lt, gt, le, ge

+

Order comparisons work for numbers, strings, or any comparable type:

+
import { Series, seriesLt, seriesGt, seriesLe, seriesGe } from "tsb"; + +const scores = new Series({ data: [45, 72, 88, 60, 95] }); + +seriesLt(scores, 60).values; // [true, false, false, false, false] +seriesGe(scores, 60).values; // [false, true, true, true, true] + +// lt and ge are always complementary for finite, non-null values: +// lt[i] !== ge[i] for every i
+
+ + +
+

4 — Comparing two Series element-by-element

+

Pass a Series as other to compare position-by-position:

+
import { Series, seriesEq, seriesLt } from "tsb"; + +const actual = new Series({ data: [1, 2, 3, 4] }); +const expected = new Series({ data: [1, 3, 3, 2] }); + +seriesEq(actual, expected).values; // [true, false, true, false] +seriesLt(actual, expected).values; // [false, true, false, false] + +// Throws RangeError if lengths differ
+
+ + +
+

5 — Missing value behaviour

+

+ Following pandas' NaN-propagation convention: comparing a missing value against + anything (including another missing value) always returns false. +

+
import { Series, seriesEq, seriesNe, seriesLt } from "tsb"; + +const s = new Series({ data: [1, null, NaN, 3] }); + +seriesEq(s, 1).values; // [true, false, false, false] +seriesNe(s, 1).values; // [false, false, false, true ] +seriesLt(s, 2).values; // [true, false, false, false] + +// null eq null → false (NaN != NaN convention) +seriesEq(s, null).values; // [false, false, false, false]
+
+ + +
+

6 — DataFrame comparison with a scalar

+

Broadcast a scalar to every cell in a DataFrame:

+
import { DataFrame, dataFrameGt, dataFrameLe } from "tsb"; + +const df = DataFrame.fromColumns({ + math: [55, 72, 88], + science: [60, 45, 91], +}); + +dataFrameGt(df, 60).col("math").values; +// → [false, true, true] + +dataFrameLe(df, 60).col("science").values; +// → [true, true, false]
+
+ + +
+

7 — DataFrame compared against another DataFrame

+

Column names are used to align the two DataFrames. Missing columns in other yield false:

+
import { DataFrame, dataFrameEq } from "tsb"; + +const df1 = DataFrame.fromColumns({ a: [1, 2], b: [3, 4] }); +const df2 = DataFrame.fromColumns({ a: [1, 0], b: [3, 5] }); + +dataFrameEq(df1, df2).col("a").values; // [true, false] +dataFrameEq(df1, df2).col("b").values; // [true, false]
+
+ + +
+

8 — Combining with whereSeries for conditional selection

+

Comparison ops pair naturally with whereSeries / maskSeries:

+
import { Series, seriesGe, whereSeries } from "tsb"; + +const temps = new Series({ data: [18, 22, 30, 15, 27] }); + +// Mark values below 20 as null using the boolean mask: +const isWarm = seriesGe(temps, 20); // [false, true, true, false, true] +const warmOnly = whereSeries(temps, isWarm); + +warmOnly.values; // [null, 22, 30, null, 27]
+
+ +
+ + + + diff --git a/playground/crosstab.html b/playground/crosstab.html new file mode 100644 index 00000000..484154f4 --- /dev/null +++ b/playground/crosstab.html @@ -0,0 +1,217 @@ + + + + + + tsb — crosstab: cross-tabulation + + + +

crosstab tsb

+

+ Cross-tabulation — the TypeScript port of + pandas.crosstab(). + Count (or aggregate) the co-occurrence of two categorical variables, + producing a two-dimensional frequency table. +

+

+ Supports margins (row/column totals), normalize + (proportions), custom aggfunc, and missing-value control + via dropna. +

+ +

1. Basic frequency table

+
+

Count how often each combination of row/column categories appears.

+ + +
Click ▶ Run to execute
+
+ +

2. With margins (row/column totals)

+
+

+ Set margins: true to add an "All" row and + column showing totals. Use marginsName to change the + label. +

+ + +
Click ▶ Run to execute
+
+ +

3. Normalize to proportions

+
+

+ Use normalize: true (or "all", + "index", "columns") to convert raw counts + into proportions. +

+ + +
Click ▶ Run to execute
+
+ +

4. Custom aggregation (values + aggfunc)

+
+

+ Provide numeric values and an aggfunc to + aggregate values within each cell instead of just counting. +

+ + +
Click ▶ Run to execute
+
+ +

5. seriesCrosstab — Series input

+
+

+ Use seriesCrosstab to cross-tabulate two + Series objects directly. The Series .name + is used as the default axis name. +

+ + +
Click ▶ Run to execute
+
+ +

6. Missing values (dropna)

+
+

+ By default (dropna: true), any row where either factor is + missing is dropped. Set dropna: false to include missing + values as their own "NaN" category. +

+ + +
Click ▶ Run to execute
+
+ + + + diff --git a/playground/cut.html b/playground/cut.html new file mode 100644 index 00000000..524363c7 --- /dev/null +++ b/playground/cut.html @@ -0,0 +1,125 @@ + + + + + + tsb — cut / qcut + + + + + +

cut / qcut

+

Bin continuous values into discrete intervals — mirrors pandas.cut() and pandas.qcut().

+ +
+

1 — cut: equal-width bins

+

cut(x, bins) divides the range of x into bins equal-width intervals. Each value is labelled with the interval it falls into.

+
import { Series, cut } from "tsb";
+
+const scores = new Series({ data: [15, 32, 47, 63, 78, 91], name: "score" });
+const binned = cut(scores, 3);
+console.log(binned.toArray());
+// ["(14.924, 40.667]", "(14.924, 40.667]", "(40.667, 66.333]",
+//  "(40.667, 66.333]", "(66.333, 92.091]", "(66.333, 92.091]"]
+
["(14.924, 40.667]", "(14.924, 40.667]", "(40.667, 66.333]", "(40.667, 66.333]", "(66.333, 92.091]", "(66.333, 92.091]"]
+
+ +
+

2 — cut: explicit bin edges

+

Pass an array of bin edges for full control over boundaries. Values outside the edges become null.

+
const ages = new Series({ data: [5, 15, 25, 45, 65, 80] });
+const groups = cut(ages, [0, 18, 60, 100], {
+  labels: ["youth", "adult", "senior"],
+});
+console.log(groups.toArray());
+// ["youth", "youth", "adult", "adult", "senior", "senior"]
+
["youth", "youth", "adult", "adult", "senior", "senior"]
+
+ +
+

3 — cut: integer codes

+

Pass labels: false to get zero-indexed integer bin codes instead of interval strings.

+
const data = [10, 20, 30, 40, 50];
+const codes = cut(data, 3, { labels: false });
+console.log(codes.toArray());
+// [0, 0, 1, 2, 2]
+
[0, 0, 1, 2, 2]
+
+ +
+

4 — cut: right=false (left-closed intervals)

+

By default intervals are right-closed (a, b]. Set right: false for left-closed [a, b).

+
const vals = new Series({ data: [0, 1, 2, 3] });
+const leftClosed = cut(vals, [0, 1, 2, 3], { right: false });
+console.log(leftClosed.toArray());
+// ["[0, 1)", "[1, 2)", "[2, 3)", "[2, 3)"]
+// Note: 3 falls in last bin because right edge of last bin is included
+
["[0, 1)", "[1, 2)", "[2, 3)", "[2, 3)"]
+
+ +
+

5 — qcut: quantile-based binning

+

qcut(x, q) creates bins so that each bin holds approximately the same number of observations (equal-frequency binning).

+
import { qcut } from "tsb";
+
+const income = new Series({ data: [20000, 35000, 42000, 58000, 75000, 120000] });
+const quartiles = qcut(income, 2);
+console.log(quartiles.toArray());
+// Lower and upper halves by median
+
["(19999.98, 50000.0]", "(19999.98, 50000.0]", "(19999.98, 50000.0]", "(50000.0, 120000.0]", "(50000.0, 120000.0]", "(50000.0, 120000.0]"]
+
+ +
+

6 — qcut: custom quantile fractions

+

Pass an array of quantile fractions [0, ..., 1] for precise control over bin boundaries.

+
const scores2 = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
+const deciles = qcut(scores2, [0, 0.25, 0.5, 0.75, 1.0], {
+  labels: ["Q1", "Q2", "Q3", "Q4"],
+});
+console.log(deciles.toArray());
+// ["Q1", "Q1", "Q2", "Q2", "Q3", "Q3", "Q4", "Q4", "Q4", "Q4"]
+
["Q1", "Q1", "Q2", "Q2", "Q3", "Q3", "Q4", "Q4", "Q4", "Q4"]
+
+ +
+

7 — cutIntervalIndex: inspect the bins

+

Use cutIntervalIndex() to retrieve the IntervalIndex that describes the bins, useful for further analysis or re-use.

+
import { cutIntervalIndex } from "tsb";
+
+const idx = cutIntervalIndex([1, 2, 3, 4, 5], 3);
+console.log(idx.size);   // 3
+console.log(idx.at(0).toString());  // "(0.996, 2.333]"
+console.log(idx.at(1).toString());  // "(2.333, 3.667]"
+console.log(idx.at(2).toString());  // "(3.667, 5.005]"
+
3 +(0.996, 2.333] +(2.333, 3.667] +(3.667, 5.005]
+
+ +
+

8 — Handling duplicates

+

When bin edges contain duplicates (common with repeated values in qcut), control behavior with duplicates: "drop".

+
// Repeated values create duplicate quantile edges → use "drop" to handle gracefully
+const skewed = [1, 1, 1, 1, 1, 2, 3, 4, 5];
+const result = qcut(skewed, 4, { duplicates: "drop" });
+console.log(result.toArray());
+
null values for duplicates, non-null where distinct bins exist
+
💡 Use duplicates: "drop" whenever your data has many repeated values. The default "raise" behaviour alerts you to potential binning issues.
+
+ + diff --git a/playground/date-offset.html b/playground/date-offset.html new file mode 100644 index 00000000..16a6e3bd --- /dev/null +++ b/playground/date-offset.html @@ -0,0 +1,261 @@ + + + + + + tsb — DateOffset + + + + + +

DateOffset

+

+ Calendar-aware date arithmetic — mirrors + pandas.tseries.offsets. +

+ +
+

1 — Available offsets

+

+ tsb provides eleven offset types for shifting dates by calendar-aware units. + All operations work in UTC to avoid DST surprises. +

+ + + + + + + + + + + + + +
Classpandas equivalentDescription
Day(n)Day(n)n calendar days
Hour(n)Hour(n)n hours
Minute(n)Minute(n)n minutes
Second(n)Second(n)n seconds
Milli(n)Milli(n)n milliseconds
Week(n, {weekday?})Week(n, weekday)n weeks, optional weekday alignment
MonthEnd(n)MonthEnd(n)n month-ends (last day of month)
MonthBegin(n)MonthBegin(n)n month-starts (first day of month)
YearEnd(n)YearEnd(n)n year-ends (Dec 31)
YearBegin(n)YearBegin(n)n year-starts (Jan 1)
BusinessDay(n)BDay(n)n business days (Mon–Fri)
+
+ +
+

2 — Fixed-time offsets (Day, Hour, Minute, Second, Milli)

+

+ These offsets add a fixed number of milliseconds. Every date is "on offset" + so rollforward and rollback are no-ops. +

+
import { Day, Hour, Minute, Second, Milli } from "tsb";
+
+const d = new Date(Date.UTC(2024, 0, 1));   // 2024-01-01T00:00:00Z
+
+new Day(3).apply(d).toISOString();          // "2024-01-04T00:00:00.000Z"
+new Day(-1).apply(d).toISOString();         // "2023-12-31T00:00:00.000Z"
+new Hour(2).apply(d).toISOString();         // "2024-01-01T02:00:00.000Z"
+new Minute(90).apply(d).toISOString();      // "2024-01-01T01:30:00.000Z"
+new Second(30).apply(d).toISOString();      // "2024-01-01T00:00:30.000Z"
+new Milli(500).apply(d).getTime() - d.getTime();  // 500
+
2024-01-04T00:00:00.000Z +2023-12-31T00:00:00.000Z +2024-01-01T02:00:00.000Z +2024-01-01T01:30:00.000Z +2024-01-01T00:00:30.000Z +500
+
+ +
+

3 — Week offset

+

+ Week(n) adds n × 7 days. With an optional + weekday (pandas convention: 0 = Monday … 6 = Sunday), + the offset snaps to the nearest occurrence of that weekday. +

+
import { Week } from "tsb";
+
+const wed = new Date(Date.UTC(2024, 0, 17));   // Wednesday 2024-01-17
+const mon = new Date(Date.UTC(2024, 0, 15));   // Monday    2024-01-15
+
+// Plain week — no alignment
+new Week(2).apply(wed).toISOString().slice(0, 10);     // "2024-01-31"
+
+// Weekday-aligned (weekday=0 → Monday)
+const wk = new Week(1, { weekday: 0 });
+wk.apply(wed).toISOString().slice(0, 10);    // "2024-01-22" (next Mon)
+wk.apply(mon).toISOString().slice(0, 10);    // "2024-01-22" (Mon → next Mon)
+
+// Rollforward / rollback
+wk.rollforward(wed).toISOString().slice(0, 10);  // "2024-01-22"
+wk.rollback(wed).toISOString().slice(0, 10);     // "2024-01-15"
+
+// onOffset
+wk.onOffset(mon);   // true
+wk.onOffset(wed);   // false
+
"2024-01-31" +"2024-01-22" +"2024-01-22" +"2024-01-22" +"2024-01-15" +true +false
+
+ +
+

4 — MonthEnd & MonthBegin

+

+ Anchored to the last and first day of each calendar month respectively. + Non-anchor dates are snapped before counting remaining steps. +

+
import { MonthEnd, MonthBegin } from "tsb";
+
+const mid = new Date(Date.UTC(2024, 0, 15));   // 2024-01-15
+const end = new Date(Date.UTC(2024, 0, 31));   // 2024-01-31
+
+// MonthEnd
+new MonthEnd(1).apply(mid).toISOString().slice(0, 10);    // "2024-01-31"
+new MonthEnd(2).apply(mid).toISOString().slice(0, 10);    // "2024-02-29" (leap)
+new MonthEnd(1).apply(end).toISOString().slice(0, 10);    // "2024-02-29"
+new MonthEnd(-1).apply(mid).toISOString().slice(0, 10);   // "2023-12-31"
+
+new MonthEnd(0).rollforward(mid).toISOString().slice(0, 10); // "2024-01-31"
+new MonthEnd(0).rollback(mid).toISOString().slice(0, 10);    // "2023-12-31"
+
+// MonthBegin
+new MonthBegin(1).apply(mid).toISOString().slice(0, 10);  // "2024-02-01"
+new MonthBegin(-1).apply(mid).toISOString().slice(0, 10); // "2024-01-01"
+
+new MonthBegin(0).rollforward(mid).toISOString().slice(0, 10); // "2024-02-01"
+new MonthBegin(0).rollback(mid).toISOString().slice(0, 10);    // "2024-01-01"
+
"2024-01-31" +"2024-02-29" +"2024-02-29" +"2023-12-31" +"2024-01-31" +"2023-12-31" +"2024-02-01" +"2024-01-01" +"2024-02-01" +"2024-01-01"
+
+ +
+

5 — YearEnd & YearBegin

+

+ YearEnd anchors to December 31; YearBegin + anchors to January 1. +

+
import { YearEnd, YearBegin } from "tsb";
+
+const d = new Date(Date.UTC(2024, 6, 4));   // 2024-07-04
+
+new YearEnd(1).apply(d).toISOString().slice(0, 10);    // "2024-12-31"
+new YearEnd(2).apply(d).toISOString().slice(0, 10);    // "2025-12-31"
+new YearEnd(-1).apply(d).toISOString().slice(0, 10);   // "2023-12-31"
+
+new YearBegin(1).apply(d).toISOString().slice(0, 10);  // "2025-01-01"
+new YearBegin(-1).apply(d).toISOString().slice(0, 10); // "2024-01-01"
+
+const yr2024 = new Date(Date.UTC(2024, 11, 31));
+new YearEnd(0).rollforward(yr2024).toISOString().slice(0, 10);  // "2024-12-31"
+new YearEnd(0).rollback(d).toISOString().slice(0, 10);          // "2023-12-31"
+
"2024-12-31" +"2025-12-31" +"2023-12-31" +"2025-01-01" +"2024-01-01" +"2024-12-31" +"2023-12-31"
+
+ +
+

6 — BusinessDay

+

+ Advances by weekdays only (Monday–Friday), skipping Saturday and Sunday. + Starting from a non-business-day, each step moves to the next + (or previous) business day. +

+
import { BusinessDay } from "tsb";
+
+const fri = new Date(Date.UTC(2024, 0, 12));   // Friday 2024-01-12
+const sat = new Date(Date.UTC(2024, 0, 13));   // Saturday
+
+new BusinessDay(1).apply(fri).toISOString().slice(0, 10);   // "2024-01-15" (Mon)
+new BusinessDay(3).apply(fri).toISOString().slice(0, 10);   // "2024-01-17" (Wed)
+new BusinessDay(-1).apply(fri).toISOString().slice(0, 10);  // "2024-01-11" (Thu)
+
+// From Saturday — first step lands on next Monday
+new BusinessDay(1).apply(sat).toISOString().slice(0, 10);   // "2024-01-15"
+new BusinessDay(-1).apply(sat).toISOString().slice(0, 10);  // "2024-01-12" (Fri)
+
+// Rolling
+new BusinessDay(0).rollforward(sat).toISOString().slice(0, 10); // "2024-01-15"
+new BusinessDay(0).rollback(sat).toISOString().slice(0, 10);    // "2024-01-12"
+
+new BusinessDay(0).onOffset(fri);   // true
+new BusinessDay(0).onOffset(sat);   // false
+
"2024-01-15" +"2024-01-17" +"2024-01-11" +"2024-01-15" +"2024-01-12" +"2024-01-15" +"2024-01-12" +true +false
+
+ +
+

7 — multiply & negate

+

+ Every offset class supports multiply(factor) and + negate() to produce a scaled or reversed copy. +

+
import { Day, MonthEnd, BusinessDay } from "tsb";
+
+new Day(3).multiply(4).n;          // 12
+new MonthEnd(2).negate().n;        // -2
+new BusinessDay(5).multiply(2).n;  // 10
+
+// negate is equivalent to multiply(-1)
+const bday = new BusinessDay(3);
+const fri = new Date(Date.UTC(2024, 0, 12));
+
+bday.negate().apply(bday.apply(fri)).toISOString().slice(0, 10); // "2024-01-12"
+
12 +-2 +10 +"2024-01-12"
+
+ +
+

8 — Static factory methods

+

Every class also provides a static of(n) factory:

+
import { Day, MonthEnd, Week, BusinessDay } from "tsb";
+
+const d = new Date(Date.UTC(2024, 0, 15));
+
+Day.of(5).apply(d).toISOString().slice(0, 10);          // "2024-01-20"
+MonthEnd.of(1).apply(d).toISOString().slice(0, 10);     // "2024-01-31"
+Week.of(1, { weekday: 0 }).apply(d).toISOString().slice(0, 10); // "2024-01-22"
+BusinessDay.of(2).apply(d).toISOString().slice(0, 10);  // "2024-01-17"
+
"2024-01-20" +"2024-01-31" +"2024-01-22" +"2024-01-17"
+
+ + + diff --git a/playground/date_range.html b/playground/date_range.html new file mode 100644 index 00000000..143022eb --- /dev/null +++ b/playground/date_range.html @@ -0,0 +1,517 @@ + + + + + + tsb — date_range / bdate_range + + + +
+

tsb — date_range / bdate_range

+

Generate fixed-frequency DatetimeIndex sequences · mirrors pandas.date_range & pandas.bdate_range

+
+
+ ← back to index + +

Frequency Reference

+
+ + + + + + + + + + + + + + + +
StringOffsetExample
DCalendar day2024-01-01 → 2024-01-02
BBusiness day (Mon–Fri)2024-01-05 → 2024-01-08
HHour2024-01-01T00 → 2024-01-01T01
T / minMinute
SSecond
MSMonth-start (1st)2024-01-01 → 2024-02-01
MEMonth-end (last day)2024-01-31 → 2024-02-29
QSQuarter-start2024-01-01 → 2024-04-01
QEQuarter-end2024-03-31 → 2024-06-30
AS / YSYear-start (Jan 1)2024-01-01 → 2025-01-01
AE / YEYear-end (Dec 31)2024-12-31 → 2025-12-31
+
+ +

Interactive Builder

+
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
+
+ +
Press Generate →
+
+
+
+ +

DatetimeIndex Operations

+
+

+ Generate an index, then apply .sort(), .unique(), .normalize(), .shift(n, freq), .filter(), .min() / .max(). +

+
+
+
+ + +
+
+ + +
+
+ + +
+ +
+
+ +
Press Apply →
+
+
+
+ +

Code Snippets

+
+
Select a scenario to see the TypeScript code:
+
+ + + + + +
+
+
+ + + + diff --git a/playground/datetime_tz.html b/playground/datetime_tz.html new file mode 100644 index 00000000..93a309f4 --- /dev/null +++ b/playground/datetime_tz.html @@ -0,0 +1,187 @@ + + + + + + tsb — TZDatetimeIndex: tz_localize & tz_convert + + + +

TZDatetimeIndex tsb

+

+ Timezone-aware date sequences — the TypeScript port of + pandas.DatetimeIndex.tz_localize and + pandas.DatetimeIndex.tz_convert. +

+

+ All timestamps are stored as UTC milliseconds internally. + tz_localize interprets wall-clock times in the given IANA timezone, + while tz_convert preserves UTC and only changes the display zone. +

+ +

1. tz_localize — naive → tz-aware

+
+

Treat each timestamp's UTC components as wall-clock times in the given timezone.

+ + +
Click ▶ Run to execute
+
+ +

2. tz_convert — change display timezone

+
+

Keep the same UTC instants; re-display them in a different timezone.

+ + +
Click ▶ Run to execute
+
+ +

3. Round-trip & tz_localize_none

+
+

Strip the timezone with tz_localize_none() to get a naive index back.

+ + +
Click ▶ Run to execute
+
+ +

4. Transformations (sort / filter / unique)

+
+ + +
Click ▶ Run to execute
+
+ +

5. DST Spring-forward & Fall-back (America/New_York 2024)

+
+ + +
Click ▶ Run to execute
+
+ + + + diff --git a/playground/dropna.html b/playground/dropna.html new file mode 100644 index 00000000..fa5758cb --- /dev/null +++ b/playground/dropna.html @@ -0,0 +1,171 @@ + + + + + + tsb — dropna + + + +
+ ← tsb playground +

dropna

+

Remove missing values from a Series or DataFrame — mirrors pandas.DataFrame.dropna and pandas.Series.dropna.

+
+ +
+
+

+ dropna(input, options?) removes rows or columns that contain missing values + (null, undefined, or NaN) from a Series or DataFrame. + It mirrors + pandas.DataFrame.dropna + and + pandas.Series.dropna. +

+ +

Options (DataFrame only)

+ + + + + + + + +
OptionTypeDefaultDescription
axis0 | 1 | "index" | "columns"0Drop rows (0) or columns (1).
how"any" | "all""any"Drop if any value is missing, or only if all are missing.
threshnumberMinimum non-null count to keep (overrides how).
subsetstring[]Only check these columns when scanning rows (axis=0 only).
+
+ +
+

Example 1 — Series: drop missing elements

+
import { Series, dropna } from "tsb";
+
+const s = new Series({ data: [1, null, NaN, 4, undefined, 6] });
+const clean = dropna(s);
+
+clean.values;  // [1, 4, 6]
+clean.size;    // 3
+
+
+ +
+

Example 2 — DataFrame: drop rows with any missing value (default)

+
import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name:  ["Alice", "Bob",  "Carol", "Dave"],
+  score: [95,      null,   88,      null  ],
+  grade: ["A",     "B",   null,    "C"   ],
+});
+
+// Drop any row that has at least one null
+const clean = dropna(df);
+clean.shape;   // [1, 3]  — only "Alice" row survives (score=95, grade="A")
+
+
+ +
+

Example 3 — how = "all": only drop fully-null rows

+
import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1,    null, null],
+  b: [null, null, 3   ],
+});
+
+// Row 1: both null → dropped
+// Row 0 and Row 2: at least one non-null → kept
+const clean = dropna(df, { how: "all" });
+clean.shape;   // [2, 2]
+
+
+ +
+

Example 4 — thresh: require at least N non-null values

+
import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1,    null, 3   ],
+  b: [4,    null, null],
+  c: [7,    null, 9   ],
+});
+
+// thresh=2: keep rows where at least 2 of 3 values are present
+// Row 0: 3 present → keep
+// Row 1: 0 present → drop
+// Row 2: 2 present → keep
+const clean = dropna(df, { thresh: 2 });
+clean.shape;   // [2, 3]
+
+
+ +
+

Example 5 — subset: only check specific columns

+
import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:    [1,    2,    3   ],
+  score: [95,   null, 88  ],
+  notes: ["ok", "ok", null],
+});
+
+// Only check the "score" column for nulls — ignore "notes"
+const clean = dropna(df, { subset: ["score"] });
+// Row 1 (score=null) is dropped; Row 2 (notes=null but score=88) is kept.
+clean.shape;   // [2, 3]
+
+
+ +
+

Example 6 — axis = 1: drop columns with missing values

+
import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, null, 3],   // has a null → dropped
+  b: [4, 5,    6],   // no nulls  → kept
+  c: [7, 8,    null],// has a null → dropped
+});
+
+const clean = dropna(df, { axis: 1 });
+clean.columns.toArray();  // ["b"]
+clean.shape;              // [3, 1]
+
+
+ +
+

Example 7 — axis = 1, how = "all": only drop all-null columns

+
import { DataFrame, dropna } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [null, null, null],  // all null  → dropped
+  b: [1,    null, 3   ],  // some null → kept
+  c: [4,    5,    6   ],  // no null   → kept
+});
+
+const clean = dropna(df, { axis: 1, how: "all" });
+clean.columns.toArray();  // ["b", "c"]
+
+
+ +
+

Pandas equivalence table

+ + + + + + + + + + + +
tsbpandas
dropna(series)series.dropna()
dropna(df)df.dropna()
dropna(df, { how: "all" })df.dropna(how="all")
dropna(df, { thresh: 2 })df.dropna(thresh=2)
dropna(df, { subset: ["a", "b"] })df.dropna(subset=["a", "b"])
dropna(df, { axis: 1 })df.dropna(axis=1)
dropna(df, { axis: 1, how: "all" })df.dropna(axis=1, how="all")
+
+
+ + + + diff --git a/playground/duplicated.html b/playground/duplicated.html new file mode 100644 index 00000000..09a1aff7 --- /dev/null +++ b/playground/duplicated.html @@ -0,0 +1,133 @@ + + + + + + tsb — duplicated / drop_duplicates + + + +
+ ← tsb playground +

duplicated / drop_duplicates

+

Find and remove duplicate rows — mirrors pandas.DataFrame.duplicated and pandas.DataFrame.drop_duplicates.

+
+ +
+
+

+ duplicatedDataFrame(df, options?) returns a boolean Series indicating + which rows are duplicates of a previous (or later, depending on keep) row. + dropDuplicatesDataFrame(df, options?) returns a new DataFrame with duplicate rows + removed. + Both mirror + pandas.DataFrame.duplicated + and + pandas.DataFrame.drop_duplicates. +

+ +

+ Series variants duplicatedSeries and dropDuplicatesSeries operate + on a single column. +

+ +

Options

+ + + + + + + + + + + +
OptionTypeDefaultDescription
subsetstring[]all columnsOnly consider these columns when checking for duplicates (DataFrame only).
keep"first" | "last" | false"first" + "first" — keep the first occurrence, mark later ones.
+ "last" — keep the last occurrence, mark earlier ones.
+ false — mark all occurrences of any duplicate. +
+
+ +
+

Example 1 — Basic: find duplicate rows

+
import { DataFrame, duplicatedDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name:  ["Alice", "Bob", "Alice", "Carol"],
+  score: [90,      85,    90,      88    ],
+});
+
+// Row 2 ("Alice", 90) is a duplicate of Row 0
+const mask = duplicatedDataFrame(df);
+mask.values; // [false, false, true, false]
+
+
+ +
+

Example 2 — Drop duplicate rows

+
import { DataFrame, dropDuplicatesDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name:  ["Alice", "Bob", "Alice", "Carol"],
+  score: [90,      85,    90,      88    ],
+});
+
+const deduped = dropDuplicatesDataFrame(df);
+deduped.shape; // [3, 2]  — "Alice" row 2 removed
+
+
+ +
+

Example 3 — subset: only check specific columns

+
import { DataFrame, dropDuplicatesDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:    [1, 2, 1, 3],
+  value: ["a", "b", "c", "d"],  // different values, but same id
+});
+
+// Drop based on "id" only — row 2 (id=1) is dup even though value differs
+const deduped = dropDuplicatesDataFrame(df, { subset: ["id"] });
+deduped.shape; // [3, 2]
+
+
+ +
+

Example 4 — keep="last": keep the last occurrence

+
import { DataFrame, duplicatedDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  city: ["NYC", "LA", "NYC", "Chicago"],
+});
+
+// keep="last" → mark the FIRST occurrence of each dup, keep the last
+duplicatedDataFrame(df, { keep: "last" }).values;
+// [true, false, false, false]
+
+// keep=false → mark ALL occurrences of any duplicate
+duplicatedDataFrame(df, { keep: false }).values;
+// [true, false, true, false]
+
+
+ +
+

Example 5 — Series: deduplicate values

+
import { Series, duplicatedSeries, dropDuplicatesSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 1, 3, 2, 4] });
+
+duplicatedSeries(s).values;     // [false, false, true, false, true, false]
+dropDuplicatesSeries(s).values; // [1, 2, 3, 4]
+
+// keep=false → mark all duplicate values
+duplicatedSeries(s, { keep: false }).values;
+// [true, true, true, false, true, false]
+
+
+
+ + + + diff --git a/playground/explode.html b/playground/explode.html new file mode 100644 index 00000000..4e994cf6 --- /dev/null +++ b/playground/explode.html @@ -0,0 +1,139 @@ + + + + + + tsb — explode + + + + + +

explode

+

Transform list-like elements into individual rows — mirrors pandas.Series.explode() and pandas.DataFrame.explode().

+ +
+

1 — Series.explode: lists to rows

+

explodeSeries(s) expands each array element into its own row. The original index label is repeated for each item. Null / empty arrays each produce a single null row.

+
import { Series, explodeSeries } from "tsb";
+
+const s = new Series({
+  data: [[1, 2, 3], "foo", [], [3, 4]],
+  name: "x",
+});
+
+const out = explodeSeries(s);
+console.log([...out.values]);  // [1, 2, 3, "foo", null, 3, 4]
+console.log([...out.index.values]); // [0, 0, 0, 1, 2, 3, 3]
+
+
Loading…
+
+ +
+

2 — Series.explode with ignoreIndex

+

Pass ignoreIndex: true to replace the resulting index with a fresh RangeIndex instead of repeating original labels.

+
import { Series, explodeSeries } from "tsb";
+
+const s = new Series({
+  data: [[10, 20], [30]],
+  index: ["row-A", "row-B"],
+});
+
+// Default: repeats original labels
+const repeated = explodeSeries(s);
+console.log([...repeated.index.values]); // ["row-A", "row-A", "row-B"]
+
+// With ignoreIndex: fresh RangeIndex
+const fresh = explodeSeries(s, { ignoreIndex: true });
+console.log([...fresh.index.values]); // [0, 1, 2]
+
+
Loading…
+
+ +
+

3 — DataFrame.explode: expand a list column

+

explodeDataFrame(df, "col") explodes a single column; all other columns repeat their value for every generated row.

+
import { DataFrame, explodeDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name:   ["Alice",   "Bob"  ],
+  scores: [[95, 87],  [72, 65, 88]],
+});
+
+const out = explodeDataFrame(df, "scores");
+console.log([...out.col("name").values]);   // ["Alice","Alice","Bob","Bob","Bob"]
+console.log([...out.col("scores").values]); // [95, 87, 72, 65, 88]
+console.log([...out.index.values]);         // [0, 0, 1, 1, 1]
+
+
Loading…
+
+ +
+

4 — Handling null and empty lists

+

Null values remain as a single null row. An empty array also becomes a single null row (matching pandas' NaN behaviour).

+
import { DataFrame, explodeDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  id:   [1,      2,   3       ],
+  tags: [["a", "b"], null, []],
+});
+
+const out = explodeDataFrame(df, "tags");
+console.log([...out.col("id").values]);   // [1, 1, 2, 3]
+console.log([...out.col("tags").values]); // ["a", "b", null, null]
+
+
Loading…
+
+ +
+

5 — Multi-column simultaneous explode

+

Pass an array of column names to explode multiple columns at the same time. Each row's lists must have the same length across the exploded columns.

+
import { DataFrame, explodeDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  key:    ["x",         "y"        ],
+  left:   [[1, 2],      [3, 4]     ],
+  right:  [["a", "b"],  ["c", "d"] ],
+});
+
+const out = explodeDataFrame(df, ["left", "right"]);
+console.log([...out.col("key").values]);   // ["x","x","y","y"]
+console.log([...out.col("left").values]);  // [1, 2, 3, 4]
+console.log([...out.col("right").values]); // ["a","b","c","d"]
+
+
Loading…
+
+ +
+

6 — ignoreIndex on DataFrame.explode

+
import { DataFrame, explodeDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  v: [[10, 20], [30, 40]],
+});
+
+const out = explodeDataFrame(df, "v", { ignoreIndex: true });
+console.log([...out.index.values]); // [0, 1, 2, 3]
+
+
Loading…
+
+ +
+ Pandas parity: explodeSeries mirrors pandas.Series.explode(); explodeDataFrame mirrors pandas.DataFrame.explode(). Scalar values are passed through unchanged; empty arrays produce a single null (NaN in pandas). The ignoreIndex option corresponds to pandas' ignore_index parameter. +
+ + + diff --git a/playground/factorize.html b/playground/factorize.html new file mode 100644 index 00000000..dee95704 --- /dev/null +++ b/playground/factorize.html @@ -0,0 +1,154 @@ + + + + + + tsb — factorize: integer encoding + + + +

factorize tsb

+

+ Integer encoding of categorical values — the TypeScript port of + pandas.factorize() and Series.factorize(). + Maps each unique value to a monotonically increasing integer code, + returning both the codes array and the + uniques array. +

+

+ Missing values (null / undefined / NaN) receive code -1 + by default. Useful as a lightweight alternative to full dummy encoding + when you need ordinal indices for categorical data. +

+ +

1. Basic factorize — first-seen order

+
+

+ By default, unique values appear in first-seen order, + matching pandas' behaviour for object arrays. +

+ + +
Click ▶ Run to execute
+
+ +

2. Sorted uniques

+
+

+ Pass sort: true to sort unique values before assigning + codes. Numbers are sorted numerically; strings lexicographically. +

+ + +
Click ▶ Run to execute
+
+ +

3. Missing values → sentinel code -1

+
+

+ Null, undefined, and NaN receive code -1 by default and + are not included in uniques. Set + useNaSentinel: false to treat them as regular values. +

+ + +
Click ▶ Run to execute
+
+ +

4. seriesFactorize — works on a Series

+
+

+ seriesFactorize accepts a Series and returns + { codes: Series<number>, uniques: Series<T> }. +

+ + +
Click ▶ Run to execute
+
+ + + + diff --git a/playground/fillna.html b/playground/fillna.html new file mode 100644 index 00000000..90efd67c --- /dev/null +++ b/playground/fillna.html @@ -0,0 +1,277 @@ + + + + + + tsb — fillna + + + + + +

fillna

+

+ Fill missing values with a constant, forward fill, or backward fill — + mirrors pandas.Series.fillna() and pandas.DataFrame.fillna(). +

+ + +
+

1 · Scalar fill

+

+ Pass { value: scalar } to replace every missing element + (null, undefined, NaN) with a constant. +

+
import { Series, fillnaSeries } from "tsb";
+
+const s = new Series({ data: [1, null, null, 4] });
+fillnaSeries(s, { value: 0 }).values;
+// → [1, 0, 0, 4]
+
+const t = new Series({ data: [NaN, 2, NaN] });
+fillnaSeries(t, { value: 99 }).values;
+// → [99, 2, 99]
+
[1, 0, 0, 4] +[99, 2, 99]
+
+ + +
+

2 · Forward fill (ffill / pad)

+

+ method: "ffill" (alias "pad") carries the last known value + forward into subsequent missing positions. Leading nulls (before the + first known value) are left unchanged. +

+
import { Series, fillnaSeries } from "tsb";
+
+const s = new Series({ data: [null, 1, null, null, 4] });
+fillnaSeries(s, { method: "ffill" }).values;
+// → [null, 1, 1, 1, 4]
+
+// Trailing nulls are filled too
+const t = new Series({ data: [1, null, null] });
+fillnaSeries(t, { method: "ffill" }).values;
+// → [1, 1, 1]
+
[null, 1, 1, 1, 4] +[1, 1, 1]
+
+ + +
+

3 · Backward fill (bfill / backfill)

+

+ method: "bfill" (alias "backfill") carries the next known + value backward into preceding missing positions. Trailing nulls (after + the last known value) are left unchanged. +

+
import { Series, fillnaSeries } from "tsb";
+
+const s = new Series({ data: [null, null, 3, null, 5] });
+fillnaSeries(s, { method: "bfill" }).values;
+// → [3, 3, 3, 5, 5]
+
+// Leading nulls are filled from the first known value
+const t = new Series({ data: [null, null, 10] });
+fillnaSeries(t, { method: "bfill" }).values;
+// → [10, 10, 10]
+
[3, 3, 3, 5, 5] +[10, 10, 10]
+
+ + +
+

4 · Limiting the fill — limit

+

+ limit caps the number of consecutive missing values filled per + run. Positions beyond the limit remain missing. +

+
import { Series, fillnaSeries } from "tsb";
+
+// Only fill up to 1 consecutive missing value
+const s = new Series({ data: [1, null, null, null, 5] });
+fillnaSeries(s, { method: "ffill", limit: 1 }).values;
+// → [1, 1, null, null, 5]
+
+// bfill with limit=2
+fillnaSeries(s, { method: "bfill", limit: 2 }).values;
+// → [null, null, 5, 5, 5]
+
[1, 1, null, null, 5] +[null, null, 5, 5, 5]
+
+ + +
+

5 · DataFrame — scalar fill

+

+ fillnaDataFrame(df, { value: 0 }) fills every missing cell in + every column. +

+
import { DataFrame, fillnaDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, null, 3],
+  b: [null, 2, null],
+});
+
+const result = fillnaDataFrame(df, { value: 0 });
+result.col("a").values; // [1, 0, 3]
+result.col("b").values; // [0, 2, 0]
+
a: [1, 0, 3] +b: [0, 2, 0]
+
+ + +
+

6 · DataFrame — per-column fill map

+

+ Pass a plain object { colName: fillValue } to use a different + fill value for each column. Columns absent from the map are left unchanged. +

+
import { DataFrame, fillnaDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [null, 2, null],
+  b: [1,    null, 3],
+  c: [null, null, null],
+});
+
+const result = fillnaDataFrame(df, { value: { a: -1, b: 99 } });
+result.col("a").values; // [-1, 2, -1]
+result.col("b").values; // [1, 99, 3]
+result.col("c").values; // [null, null, null]  ← untouched
+
a: [-1, 2, -1] +b: [1, 99, 3] +c: [null, null, null]
+
+ + +
+

7 · DataFrame — method fill (axis=0 / axis=1)

+

+ method fills propagate along an axis. The default + axis=0 fills down each column; axis=1 + fills across each row. +

+
import { DataFrame, fillnaDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, null, null],
+  b: [null, 2, null],
+  c: [null, null, 3],
+});
+
+// axis=0 (default): ffill down each column
+const byCol = fillnaDataFrame(df, { method: "ffill" });
+byCol.col("a").values; // [1, 1, 1]
+byCol.col("b").values; // [null, 2, 2]
+byCol.col("c").values; // [null, null, 3]
+
+// axis=1: bfill across each row
+const byRow = fillnaDataFrame(df, { method: "bfill", axis: 1 });
+// row 0: [1, null, null] → bfill → [1, null, null]
+// row 1: [null, 2, null] → bfill → [2, 2, null]
+// row 2: [null, null, 3] → bfill → [3, 3, 3]
+
axis=0 ffill: + a: [1, 1, 1] + b: [null, 2, 2] + c: [null, null, 3] + +axis=1 bfill: + row 0: [1, null, null] + row 1: [2, 2, null] + row 2: [3, 3, 3]
+
+ + +
+

8 · DataFrame — fill values from a Series

+

+ When value is a Series<Scalar>, its index labels + are matched to DataFrame column names. This is the TypeScript equivalent of + df.fillna(series) in pandas. +

+
import { DataFrame, Series, fillnaDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  price:  [10, null, 30],
+  volume: [null, 200, null],
+});
+
+// Use Series index labels as column selectors
+const fills = new Series({
+  data:  [0, 0],
+  index: ["price", "volume"],
+});
+
+const result = fillnaDataFrame(df, { value: fills });
+result.col("price").values;  // [10, 0, 30]
+result.col("volume").values; // [0, 200, 0]
+
price: [10, 0, 30] +volume: [0, 200, 0]
+
+ + +
+

API summary

+ + + + + + + + + + + + +
FunctionSignature (simplified)Description
fillnaSeries(series, { value?, method?, limit? })Fill missing values in a Series
fillnaDataFrame(df, { value?, method?, limit?, axis? })Fill missing values in a DataFrame
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
OptionTypeDefaultDescription
valueScalar | ColumnFillMap | SeriesConstant or per-column fill value
method"ffill" | "pad" | "bfill" | "backfill"Propagation direction
limitnumberInfinityMax consecutive fills per run
axis0 | 1 | "index" | "columns"0Direction for method-based fill on DataFrames
+
+ + + diff --git a/playground/get_dummies.html b/playground/get_dummies.html new file mode 100644 index 00000000..2d878020 --- /dev/null +++ b/playground/get_dummies.html @@ -0,0 +1,153 @@ + + + + + + tsb — get_dummies: one-hot encoding + + + +

get_dummies tsb

+

+ One-hot / dummy encoding — the TypeScript port of + pandas.get_dummies(). + Convert categorical variables into binary indicator columns, + one column per unique value. +

+

+ Common in machine learning pipelines before fitting linear models. + Supports prefix, prefixSep, + dummyNa, and dropFirst options. +

+ +

1. Series → indicator DataFrame

+
+

Each unique value becomes a binary column (1 = present, 0 = absent).

+ + +
Click ▶ Run to execute
+
+ +

2. DataFrame — encode categorical columns

+
+

+ dataFrameGetDummies auto-detects string columns and + replaces them with indicator columns. Numeric columns are kept as-is. +

+ + +
Click ▶ Run to execute
+
+ +

3. Options: prefix, dummyNa, dropFirst

+
+

Fine-tune the encoding with optional parameters.

+ + +
Click ▶ Run to execute
+
+ +

4. Encode specific columns only

+
+

Pass columns to control which DataFrame columns are encoded.

+ + +
Click ▶ Run to execute
+
+ + + + diff --git a/playground/infer_dtype.html b/playground/infer_dtype.html new file mode 100644 index 00000000..2dbf1ea4 --- /dev/null +++ b/playground/infer_dtype.html @@ -0,0 +1,128 @@ + + + + + + tsb — inferDtype + + + +
+ ← tsb playground +

inferDtype

+

Infer the most specific dtype from a sequence of values — mirrors pandas.api.types.infer_dtype.

+
+ +
+
+

+ inferDtype(values, options?) inspects an array (or Series) of values and + returns a string label identifying the dominant data type. It mirrors the behaviour of + pandas.api.types.infer_dtype. +

+

Return values

+ + + + + + + + + + + + + + + + + + +
LabelMeaning
"empty"Zero elements, or all null/undefined (when skipna=true)
"boolean"All boolean
"integer"All integers (whole number or bigint)
"floating"All floating-point numbers (including ±Infinity, NaN)
"mixed-integer-float"Mix of integers and floats
"decimal"Mix of plain integers and bigint
"string"All strings
"date"All Date objects
"datetime"All Timestamp objects
"timedelta"All Timedelta objects
"period"All Period objects
"interval"All Interval objects
"mixed-integer"Mix of integers and non-numeric types
"mixed"Multiple heterogeneous non-numeric types
+
+ +
+

Example 1 — basic scalar types

+
import { inferDtype } from "tsb";
+
+inferDtype([1, 2, 3]);           // "integer"
+inferDtype([1.1, 2.2, 3.3]);     // "floating"
+inferDtype([1, 2.5, 3]);         // "mixed-integer-float"
+inferDtype([true, false, true]);  // "boolean"
+inferDtype(["a", "b", "c"]);     // "string"
+inferDtype([]);                  // "empty"
+inferDtype([null, null]);        // "empty"  (skipna=true by default)
+inferDtype([null, null], { skipna: false }); // "mixed"
+
+
+ +
+

Example 2 — working with Series

+
import { Series, inferDtype } from "tsb";
+
+const s1 = new Series({ data: [10, 20, 30] });
+inferDtype(s1);   // "integer"
+
+const s2 = new Series({ data: ["hello", "world"] });
+inferDtype(s2);   // "string"
+
+const s3 = new Series({ data: [1, null, 2, null, 3] });
+inferDtype(s3);   // "integer"  (nulls skipped by default)
+
+
+ +
+

Example 3 — specialised tsb types

+
import { inferDtype, Timestamp, Timedelta, Period, Interval } from "tsb";
+
+inferDtype([Timestamp.fromtimestamp(0), Timestamp.fromtimestamp(1)]);
+// "datetime"
+
+inferDtype([Timedelta.fromComponents({ days: 1 }), Timedelta.fromComponents({ hours: 2 })]);
+// "timedelta"
+
+inferDtype([Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M")]);
+// "period"
+
+inferDtype([new Interval(0, 1), new Interval(1, 2)]);
+// "interval"
+
+
+ +
+

Example 4 — mixed types

+
import { inferDtype } from "tsb";
+
+inferDtype([1, "a", 2]);         // "mixed-integer" (int + non-numeric non-float)
+inferDtype(["a", true, null]);   // "mixed"         (string + bool)
+inferDtype([1n, 2n, 3n]);        // "integer"       (bigint only)
+inferDtype([1n, 2]);             // "decimal"       (bigint + integer)
+inferDtype([1n, 2.5]);           // "mixed-integer-float"
+
+
+ +
+

API reference

+
function inferDtype(
+  values: readonly unknown[] | Series,
+  options?: InferDtypeOptions,
+): InferredDtype;
+
+interface InferDtypeOptions {
+  /**
+   * When true (default), null and undefined are ignored when
+   * determining the dtype. When false, they contribute to "mixed".
+   */
+  skipna?: boolean;
+}
+
+
+
+ + + + diff --git a/playground/interpolate.html b/playground/interpolate.html new file mode 100644 index 00000000..248dfd9d --- /dev/null +++ b/playground/interpolate.html @@ -0,0 +1,280 @@ + + + + + + tsb — interpolate + + + + + +

interpolate

+

+ Fill missing values by interpolation — + mirrors pandas.Series.interpolate() and pandas.DataFrame.interpolate(). +

+ + +
+

1 · Linear interpolation (default)

+

+ interpolateSeries(series) fills each run of missing values + (null, undefined, NaN) that lies + between two known values using straight-line interpolation. +

+
import { Series, interpolateSeries } from "tsb";
+
+const s = new Series({ data: [1, null, null, 4] });
+//                           ^  ^    ^    ^
+//                           0  1    2    3
+
+const filled = interpolateSeries(s);
+filled.values;
+// → [1, 2, 3, 4]
+
+
+filled.values → [1, 2, 3, 4] + +Position 1: 1 + (4 − 1) × (1/3) = 2 +Position 2: 1 + (4 − 1) × (2/3) = 3 +
+ +
+ ⚠ Leading & trailing gaps are not filled by + the linear method — there is no anchor on one side to interpolate from. + Use method: "ffill" or method: "bfill" to fill + those (see sections 2 and 3 below). +
+ +
const t = new Series({ data: [null, 1, null, 3, null] });
+
+interpolateSeries(t).values;
+// → [null, 1, 2, 3, null]
+//    ^^^^              ^^^^
+//    leading          trailing
+//    (unchanged)      (unchanged)
+
+
[null, 1, 2, 3, null]
+
+ + +
+

2 · Forward fill (ffill / pad / zero)

+

+ method: "ffill" carries the last known value forward into each + following gap. "pad" and "zero" are aliases. +

+
import { Series, interpolateSeries } from "tsb";
+
+const s = new Series({ data: [1, null, null, 4, null] });
+
+interpolateSeries(s, { method: "ffill" }).values;
+// → [1, 1, 1, 4, 4]
+
+
[1, 1, 1, 4, 4]
+ +
+ 🔔 Leading NaN (no value to carry forward from) remain missing. +
+
+ + +
+

3 · Backward fill (bfill / backfill)

+

+ method: "bfill" fills each gap from the next known + value looking backwards. +

+
import { Series, interpolateSeries } from "tsb";
+
+const s = new Series({ data: [null, 2, null, null, 5] });
+
+interpolateSeries(s, { method: "bfill" }).values;
+// → [2, 2, 5, 5, 5]
+
+
[2, 2, 5, 5, 5]
+
+ + +
+

4 · Nearest-neighbor

+

+ method: "nearest" fills each missing position with the value + of its closest non-missing neighbor. When equidistant, the + right neighbor wins. +

+
import { Series, interpolateSeries } from "tsb";
+
+// [1, ?, ?, 4]
+// pos 1: dist-left=1, dist-right=2  → left wins  → 1
+// pos 2: dist-left=2, dist-right=1  → right wins → 4
+const s = new Series({ data: [1, null, null, 4] });
+
+interpolateSeries(s, { method: "nearest" }).values;
+// → [1, 1, 4, 4]
+
+
[1, 1, 4, 4]
+ +
// Tie at equidistance: right wins
+const t = new Series({ data: [10, null, 30] });
+interpolateSeries(t, { method: "nearest" }).values;
+// → [10, 30, 30]   (pos 1 equidistant; right value 30 chosen)
+
+
[10, 30, 30]
+
+ + +
+

5 · Limiting how many values are filled

+

+ The limit option caps the number of consecutive missing values + that can be filled within a single gap. Pair it with + limitDirection to control which end of the gap is filled first. +

+ + + + + + +
limitDirectionFills from
"forward" (default)Left boundary of each gap
"backward"Right boundary of each gap
"both"Left and right boundaries
+ +
import { Series, interpolateSeries } from "tsb";
+
+// Gap of size 3 between 0 and 4
+const s = new Series({ data: [0, null, null, null, 4] });
+
+// limit=1, forward: fill only the first NaN from the left
+interpolateSeries(s, { limit: 1 }).values;
+// → [0, 1, null, null, 4]
+
+// limit=1, backward: fill only the last NaN from the right
+interpolateSeries(s, { limit: 1, limitDirection: "backward" }).values;
+// → [0, null, null, 3, 4]
+
+// limit=1, both: fill one from each end
+interpolateSeries(s, { limit: 1, limitDirection: "both" }).values;
+// → [0, 1, null, 3, 4]
+
+
+forward: [0, 1, null, null, 4] +backward: [0, null, null, 3, 4] +both: [0, 1, null, 3, 4] +
+
+ + +
+

6 · DataFrame — column-wise (axis=0, default)

+

+ dataFrameInterpolate(df) applies the chosen method + independently down each column. +

+
import { DataFrame, dataFrameInterpolate } from "tsb";
+
+const df = DataFrame.fromColumns({
+  temperature: [20, null, null, 23],
+  humidity:    [60, null, 70,   null],
+});
+
+const filled = dataFrameInterpolate(df);
+filled.col("temperature").values; // [20, 21, 22, 23]
+filled.col("humidity").values;    // [60, 65, 70, null]  ← trailing not filled
+
+
+temperature: [20, 21, 22, 23] +humidity: [60, 65, 70, null] +
+
+ + +
+

7 · DataFrame — row-wise (axis=1)

+

+ Set axis: 1 (or axis: "columns") to interpolate + across columns for each row. +

+
import { DataFrame, dataFrameInterpolate } from "tsb";
+
+const df = DataFrame.fromColumns({
+  t0: [0,  10],
+  t1: [null, null],   // missing
+  t2: [null, null],   // missing
+  t3: [6,  22],
+});
+
+// Row 0 interpolates 0 → 6  (linear, 4 steps)
+// Row 1 interpolates 10 → 22
+const filled = dataFrameInterpolate(df, { axis: 1 });
+filled.col("t1").values; // [2, 14]
+filled.col("t2").values; // [4, 18]
+
+
+Row 0: [0, 2, 4, 6] +Row 1: [10, 14, 18, 22] +
+
+ + +
+

8 · API summary

+ + + + + + + + + + +
FunctionDescription
interpolateSeries(series, options?)Fill missing values in a Series
dataFrameInterpolate(df, options?)Fill missing values in a DataFrame (column-wise or row-wise)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
OptionTypeDefaultDescription
methodInterpolateMethod"linear"Interpolation strategy
limitnumberInfinityMax consecutive NaN values to fill
limitDirectionLimitDirection"forward"Which end of each gap the limit counts from
axis (DataFrame only)0 | 1 | "index" | "columns"0Column-wise (0) or row-wise (1)
+
+ + + diff --git a/playground/interval.html b/playground/interval.html new file mode 100644 index 00000000..afbe9fa1 --- /dev/null +++ b/playground/interval.html @@ -0,0 +1,218 @@ + + + + + + tsb — Interval & IntervalIndex + + + + + +

Interval & IntervalIndex

+

+ Numeric intervals with configurable endpoint closure — + mirrors pandas.Interval and pandas.IntervalIndex. +

+ +
+

1 — Interval basics

+

+ An Interval represents a range between two numbers. The closed + parameter controls which endpoints are included: +

+ + + + + + +
closednotationleft included?right included?
"right" (default)(left, right]noyes
"left"[left, right)yesno
"both"[left, right]yesyes
"neither"(left, right)nono
+
import { Interval } from "tsb";
+
+const iv = new Interval(0, 1);       // default: right-closed (0, 1]
+console.log(iv.toString());          // "(0, 1]"
+console.log(iv.length);              // 1
+console.log(iv.mid);                 // 0.5
+console.log(iv.closedLeft);          // false
+console.log(iv.closedRight);         // true
+console.log(iv.contains(1));         // true  — right endpoint included
+console.log(iv.contains(0));         // false — left endpoint excluded
+console.log(iv.contains(0.5));       // true
+
+
Loading…
+
+ +
+

2 — All four closure modes

+
import { Interval } from "tsb";
+
+const modes = ["right", "left", "both", "neither"];
+for (const m of modes) {
+  const iv = new Interval(0, 1, m);
+  const at0 = iv.contains(0);
+  const at1 = iv.contains(1);
+  const mid = iv.contains(0.5);
+  console.log(`${iv.toString().padEnd(12)} contains(0)=${at0} contains(0.5)=${mid} contains(1)=${at1}`);
+}
+
+
Loading…
+
+ +
+

3 — Interval.overlaps()

+

+ Two intervals overlap when they share at least one point. +

+
import { Interval } from "tsb";
+
+const a = new Interval(0, 2);
+const b = new Interval(1, 3);
+const c = new Interval(5, 6);
+
+console.log(a.overlaps(b));   // true  — share [1, 2]
+console.log(a.overlaps(c));   // false — gap between 2 and 5
+console.log(b.overlaps(a));   // true  — symmetric
+
+// Adjacent intervals sharing exactly one endpoint
+const left  = new Interval(0, 1, "right");   // (0, 1]
+const right = new Interval(1, 2, "left");    // [1, 2)
+console.log(left.overlaps(right));   // true  — both include point 1
+
+
Loading…
+
+ +
+

4 — IntervalIndex.fromBreaks()

+

+ The most common way to create an IntervalIndex is from a list of + break-points (like the output of pandas.cut()). + Given n+1 breaks, you get n intervals. +

+
import { IntervalIndex } from "tsb";
+
+const idx = IntervalIndex.fromBreaks([0, 10, 20, 30, 40, 50]);
+console.log(idx.size);          // 5
+console.log(idx.at(0).toString());  // "(0, 10]"
+console.log(idx.at(-1).toString()); // "(40, 50]"
+console.log([...idx.left]);     // [0, 10, 20, 30, 40]
+console.log([...idx.right]);    // [10, 20, 30, 40, 50]
+console.log([...idx.mid]);      // [5, 15, 25, 35, 45]
+console.log(idx.isMonotonicIncreasing); // true
+
+
Loading…
+
+ +
+

5 — IntervalIndex.get_loc() — bin lookup

+

+ get_loc(value) finds which bin a value falls into. + Returns -1 when the value isn't in any interval. +

+
import { IntervalIndex } from "tsb";
+
+// Grade bands: F [0,50), D [50,60), C [60,70), B [70,80), A [80,100]
+const bands = IntervalIndex.fromArrays(
+  [0,  50, 60, 70, 80],
+  [50, 60, 70, 80, 100],
+  "right"  // (left, right]
+);
+
+const scores = [45, 55, 65, 75, 95, 100, -1];
+const labels = ["F", "D", "C", "B", "A"];
+
+for (const score of scores) {
+  const bin = bands.get_loc(score);
+  const grade = bin >= 0 ? labels[bin] : "out of range";
+  console.log(`score ${score} → ${grade}`);
+}
+
+
Loading…
+
+ +
+

6 — IntervalIndex.contains() and overlaps()

+
import { Interval, IntervalIndex } from "tsb";
+
+const idx = IntervalIndex.fromBreaks([0, 5, 10, 15]);
+// → (0,5], (5,10], (10,15]
+
+// contains: one boolean per interval
+console.log(idx.contains(7));    // [false, true, false]
+console.log(idx.contains(5));    // [true,  false, false]  (right-closed: 5 ∈ (0,5])
+
+// overlaps: does each interval share any point with the query?
+const query = new Interval(4, 6); // (4, 6]
+console.log(idx.overlaps(query)); // [true, true, false] — (0,5] and (5,10] both overlap
+
+
Loading…
+
+ +
+

7 — IntervalIndex.filter() and rename()

+
import { Interval, IntervalIndex } from "tsb";
+
+const idx = IntervalIndex.fromBreaks([0, 1, 2, 3, 4, 5], "right", { name: "raw" });
+
+// Keep only intervals that overlap with (1.5, 3.5]
+const query = new Interval(1.5, 3.5);
+const mask = idx.overlaps(query);
+const filtered = idx.filter(mask);
+
+console.log(filtered.size);           // 3
+console.log(filtered.toString());     // shows (1, 2], (2, 3], (3, 4]
+
+// Rename the index axis
+const named = idx.rename("score_bins");
+console.log(named.name);              // "score_bins"
+
+
Loading…
+
+ +
+

8 — Building from Interval objects

+
import { Interval, IntervalIndex } from "tsb";
+
+// Custom irregular intervals
+const intervals = [
+  new Interval(0, 5, "both"),     // [0, 5]
+  new Interval(5, 10, "neither"), // (5, 10)
+  new Interval(10, 20, "left"),   // [10, 20)
+];
+const idx = IntervalIndex.fromIntervals(intervals);
+console.log(idx.size);            // 3
+// Closure mode comes from first interval
+console.log(idx.closed);          // "both"
+console.log(idx.toString());
+
+
Loading…
+
+ + + + diff --git a/playground/isin.html b/playground/isin.html new file mode 100644 index 00000000..5a2ae910 --- /dev/null +++ b/playground/isin.html @@ -0,0 +1,139 @@ + + + + + + tsb — isin + + + + + +

isin

+

Element-wise membership testing — mirrors pandas.Series.isin() and pandas.DataFrame.isin().

+ +
+

1 — Series.isin: check membership in an array

+

isin(series, values) returns a boolean Series with true where each element appears in values. Accepts any iterable: arrays, Sets, generators.

+
import { Series, isin } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5], name: "scores" });
+
+const result = isin(s, [1, 3, 5]);
+console.log([...result.values]);  // [true, false, true, false, true]
+console.log(result.name);         // "scores"
+
+
Loading…
+
+ +
+

2 — Using a Set for O(1) lookups

+

Passing a Set avoids any extra construction overhead when you already have one:

+
import { Series, isin } from "tsb";
+
+const allowed = new Set(["apple", "cherry", "date"]);
+const fruits = new Series({ data: ["apple", "banana", "cherry", "elderberry"] });
+
+console.log([...isin(fruits, allowed).values]);
+// [true, false, true, false]
+
+
Loading…
+
+ +
+

3 — NaN and null behaviour

+

NaN is never a member of any collection (matches pandas behaviour). null uses strict equality and will match if present.

+
import { Series, isin } from "tsb";
+
+const s = new Series({ data: [1, NaN, null, 3] });
+
+console.log([...isin(s, [1, NaN, null]).values]);
+// NaN → false even though NaN is in the list
+// [true, false, true, true]
+
+
Loading…
+
+ +
+

4 — DataFrame.isin: shared collection

+

dataFrameIsin(df, values) checks every cell against the same collection and returns a boolean DataFrame of the same shape.

+
import { DataFrame, dataFrameIsin } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, 2, 3],
+  b: [3, 4, 5],
+  c: ["x", "y", "z"],
+});
+
+const result = dataFrameIsin(df, [1, 3, "z"]);
+// a: [true, false, true]
+// b: [true, false, false]
+// c: [false, false, true]
+console.log([...result.col("a").values]);
+console.log([...result.col("b").values]);
+console.log([...result.col("c").values]);
+
+
Loading…
+
+ +
+

5 — DataFrame.isin: per-column lookup (IsinDict)

+

Pass a plain object { colName: values, … } to give each column its own set of allowed values. Columns absent from the dict produce all false.

+
import { DataFrame, dataFrameIsin } from "tsb";
+
+const df = DataFrame.fromColumns({
+  region:  ["north", "south", "east", "west"],
+  revenue: [100, 200, 150, 300],
+  active:  [true, false, true, false],
+});
+
+// Check region against a whitelist, revenue against specific values
+const result = dataFrameIsin(df, {
+  region:  ["north", "east"],
+  revenue: [100, 300],
+});
+
+console.log([...result.col("region").values]);   // [true, false, true, false]
+console.log([...result.col("revenue").values]);  // [true, false, false, true]
+console.log([...result.col("active").values]);   // [false, false, false, false] — absent from dict
+
+
Loading…
+
+ +
+

6 — Filtering rows where any column matches

+

A common pattern: use dataFrameIsin as a boolean mask to filter rows.

+
import { DataFrame, dataFrameIsin } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name:  ["Alice", "Bob", "Carol", "Dave"],
+  score: [85, 92, 78, 95],
+});
+
+// Keep only rows where name is in the target set
+const mask = dataFrameIsin(df, { name: ["Alice", "Carol"] });
+const filtered = df.filter((_row, i) => mask.col("name").values[i] === true);
+console.log([...filtered.col("name").values]);   // ["Alice", "Carol"]
+console.log([...filtered.col("score").values]);  // [85, 78]
+
+
Loading…
+
+ +
+ Note: isin always returns a new boolean Series/DataFrame — it never mutates the input. The index and column labels are preserved exactly. +
+ + diff --git a/playground/json_normalize.html b/playground/json_normalize.html new file mode 100644 index 00000000..5469a255 --- /dev/null +++ b/playground/json_normalize.html @@ -0,0 +1,199 @@ + + + + + + tsb · json_normalize + + + +
+ ← tsb +

json_normalize

+
+
+ +
+

Overview

+

+ jsonNormalize(data, options?) flattens semi-structured (nested) JSON into + a flat DataFrame — mirroring pandas.json_normalize(). +

+

Key options:

+ +
+ +
+

Example 1 — flatten nested dicts

+
import { jsonNormalize } from "tsb";
+
+const data = [
+  { id: 1, info: { name: "Alice", city: "NY" } },
+  { id: 2, info: { name: "Bob",   city: "LA" } },
+];
+
+const df = jsonNormalize(data);
+// id  info.name  info.city
+// 1   Alice      NY
+// 2   Bob        LA
+
+ +
+

Example 2 — recordPath + meta

+
const orders = [
+  { orderId: "A1", customer: "Alice", items: [{ sku: "X", qty: 2 }, { sku: "Y", qty: 1 }] },
+  { orderId: "B2", customer: "Bob",   items: [{ sku: "Z", qty: 5 }] },
+];
+
+const df = jsonNormalize(orders, {
+  recordPath: "items",
+  meta: ["orderId", "customer"],
+});
+// sku  qty  orderId  customer
+// X    2    A1       Alice
+// Y    1    A1       Alice
+// Z    5    B2       Bob
+
+ +
+

Example 3 — maxLevel

+
const data = [{ a: { b: { c: { d: 99 } } } }];
+
+jsonNormalize(data, { maxLevel: 1 });
+// a.b  →  {"c":{"d":99}}   (depth 2 is not expanded)
+
+ +
+

Interactive playground

+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+
+
+ +
+ + + + diff --git a/playground/memory_usage.html b/playground/memory_usage.html new file mode 100644 index 00000000..78d0824b --- /dev/null +++ b/playground/memory_usage.html @@ -0,0 +1,301 @@ + + + + + + tsb — memory_usage + + + +

🧮 memory_usage

+

+ Estimate the memory consumed by a Series or DataFrame — + mirroring + pandas.Series.memory_usage() and + pandas.DataFrame.memory_usage(). +

+

+ Two options control the calculation: +

+

+ +

1 · Series memory_usage — fixed-width dtype

+
const s = new Series({ data: [1, 2, 3, 4], dtype: Dtype.int32 });
+// int32 → 4 bytes/element × 4 = 16 bytes of data
+// RangeIndex → 3 × 8 = 24 bytes (only start/stop/step stored)
+seriesMemoryUsage(s);             // 40  (data + index)
+seriesMemoryUsage(s, { index: false }); // 16 (data only)
+
click Run to evaluate
+ + +

2 · Series memory_usage — string dtype (shallow vs deep)

+
const s = new Series({ data: ["hello", "world", "tsb"], dtype: Dtype.string });
+// shallow: 3 × 8 bytes (one pointer per element)
+seriesMemoryUsage(s, { index: false });           // 24
+// deep: actual char data — each string = length*2 + 56 bytes overhead
+seriesMemoryUsage(s, { index: false, deep: true }); // "hello"=66, "world"=66, "tsb"=62 → 194
+
click Run to evaluate
+ + +

3 · DataFrame memory_usage — per-column breakdown

+
const df = new DataFrame({
+  id:    new Series({ data: [1, 2, 3], dtype: Dtype.int32 }),
+  score: new Series({ data: [9.5, 8.1, 7.2], dtype: Dtype.float64 }),
+  name:  new Series({ data: ["Alice", "Bob", "Carol"], dtype: Dtype.string }),
+});
+const mu = dataFrameMemoryUsage(df);
+// Returns Series indexed by ["Index", "id", "score", "name"]
+// Index (RangeIndex) → 24 bytes
+// id    (int32 × 3)  → 12 bytes
+// score (float64 × 3)→ 24 bytes
+// name  (string × 3, shallow) → 24 bytes
+
click Run to evaluate
+ + +

4 · DataFrame memory_usage — deep=true for string columns

+
const df = new DataFrame({
+  label: new Series({ data: ["short", "a slightly longer string"], dtype: Dtype.string }),
+});
+dataFrameMemoryUsage(df, { deep: true, index: false })
+// "short"                  → 5*2+56 = 66
+// "a slightly longer string" → 24*2+56 = 104
+
click Run to evaluate
+ + +

5 · Total memory across all columns

+
const df = new DataFrame({
+  a: new Series({ data: Array.from({length: 1000}, (_,i) => i), dtype: Dtype.int64 }),
+  b: new Series({ data: Array.from({length: 1000}, (_,i) => i * 0.1), dtype: Dtype.float64 }),
+});
+const mu = dataFrameMemoryUsage(df, { index: false });
+mu.sum(); // 1000*8 + 1000*8 = 16000 bytes
+
click Run to evaluate
+ + + + + + +

+ Part of tsb — a TypeScript port of pandas. + Built by Autoloop. +

+ + diff --git a/playground/named_agg.html b/playground/named_agg.html new file mode 100644 index 00000000..debdf0fa --- /dev/null +++ b/playground/named_agg.html @@ -0,0 +1,217 @@ + + + + + + tsb — NamedAgg Tutorial + + + +
+ ← tsb +

NamedAgg groupby

+
+
+ +
+

What is NamedAgg?

+

+ NamedAgg lets you rename output columns from a groupby aggregation while + simultaneously choosing which source column to aggregate and how. + It mirrors pandas.NamedAgg. +

+

+ Without NamedAgg, agg() keeps the original column names. + With aggNamed() you control the output name independently. +

+
+ +
+

Basic Usage

+
import { DataFrame, namedAgg } from "tsb";
+
+const df = DataFrame.fromColumns({
+  dept:      ["eng", "eng", "hr",  "hr",  "eng"],
+  salary:    [100,   120,   80,    90,    110  ],
+  headcount: [1,     1,     1,     1,     1    ],
+  score:     [4.0,   5.0,   3.0,   4.0,   3.5  ],
+});
+
+const result = df.groupby("dept").aggNamed({
+  total_salary:   namedAgg("salary",    "sum"),
+  avg_salary:     namedAgg("salary",    "mean"),
+  employees:      namedAgg("headcount", "sum"),
+  avg_score:      namedAgg("score",     "mean"),
+});
+
+// result:
+//         | total_salary | avg_salary | employees | avg_score
+// eng     | 330          | 110        | 3         | 4.167
+// hr      | 170          | 85         | 2         | 3.5
+ +
+
+

Input DataFrame

+ + + + + + + +
deptsalaryheadcountscore
eng10014.0
eng12015.0
hr8013.0
hr9014.0
eng11013.5
+
+
+

aggNamed result

+ + + + +
(index)total_salaryavg_salaryemployees
eng3301103
hr170852
+
+
+
+ +
+

Aggregate Same Column Multiple Ways

+

A key advantage of NamedAgg is applying multiple functions to the same source column simultaneously:

+
df.groupby("dept").aggNamed({
+  min_salary: namedAgg("salary", "min"),
+  max_salary: namedAgg("salary", "max"),
+  salary_count: namedAgg("salary", "count"),
+});
+
+ +
+

Custom Aggregation Functions

+

Pass any function (vals: readonly Scalar[]) => Scalar as the aggfunc:

+
const salaryRange = (vals: readonly Scalar[]) => {
+  const nums = vals.filter((v): v is number => typeof v === "number");
+  return Math.max(...nums) - Math.min(...nums);
+};
+
+df.groupby("dept").aggNamed({
+  salary_range: namedAgg("salary", salaryRange),
+});
+
+ +
+

Using the NamedAgg Class Directly

+

namedAgg(col, fn) is shorthand for new NamedAgg(col, fn):

+
import { NamedAgg } from "tsb";
+
+const spec = new NamedAgg("salary", "sum");
+console.log(spec.column);  // "salary"
+console.log(spec.aggfunc); // "sum"
+
+ +
+

asIndex=false

+

Pass false as the second argument to include the group key as a regular column:

+
df.groupby("dept").aggNamed(
+  { total_salary: namedAgg("salary", "sum") },
+  false,  // asIndex
+);
+// result has columns: ["dept", "total_salary"]
+
+ +
+

API Reference

+
// Factory function (recommended)
+namedAgg(column: string, aggfunc: AggName | AggFn): NamedAgg
+
+// Class constructor
+new NamedAgg(column: string, aggfunc: AggName | AggFn)
+
+// GroupBy method
+DataFrameGroupBy.aggNamed(spec: NamedAggSpec, asIndex?: boolean): DataFrame
+
+// Type guard
+isNamedAggSpec(spec: unknown): spec is NamedAggSpec
+
+// Types
+type NamedAggSpec = Readonly<Record<string, NamedAgg>>
+type AggName = "sum" | "mean" | "min" | "max" | "count" | "std" | "first" | "last" | "size"
+type AggFn = (values: readonly Scalar[]) => Scalar
+
+ +
+

Pandas Equivalent

+
# Python / pandas
+import pandas as pd
+
+df.groupby("dept").agg(
+    total_salary=pd.NamedAgg(column="salary", aggfunc="sum"),
+    avg_salary=pd.NamedAgg(column="salary", aggfunc="mean"),
+)
+
+ +
+ + diff --git a/playground/natsort.html b/playground/natsort.html new file mode 100644 index 00000000..77c1fd7f --- /dev/null +++ b/playground/natsort.html @@ -0,0 +1,133 @@ + + + + + + tsb — natsort + + + +
+ ← tsb playground +

natsort

+

Natural-order sorting for strings — mirrors the natsort package used by pandas.

+
+ +
+
+

+ Standard lexicographic sort places "file10" before "file2" because + "1" < "2". Natural sort compares embedded numbers numerically, so + "file2" correctly sorts before "file10". +

+

+ tsb exports four helpers: + natCompare(a, b) — comparator for Array.sort; + natSorted(arr) — returns a new naturally-sorted array; + natSortKey(s) — returns the token array used internally; + natArgSort(arr) — returns the permutation indices (like pandas.Index.argsort). +

+

+ Mirrors + pandas.Index.sort_values(key=natsort_keygen()) + and + natsort.natsorted(). +

+
+ +
+

1 · Basic usage

+
import { natSorted, natCompare, natSortKey, natArgSort } from "tsb";
+
+// File names sort by embedded number
+const files = ["file10.txt", "file2.txt", "file1.txt"];
+console.log(natSorted(files));
+// → ["file1.txt", "file2.txt", "file10.txt"]
+
+// Version strings
+const versions = ["1.10.0", "1.9.0", "1.2.0", "2.0.0"];
+console.log(natSorted(versions));
+// → ["1.2.0", "1.9.0", "1.10.0", "2.0.0"]
+
+// Use as Array.sort comparator
+const copy = [...files];
+copy.sort(natCompare);
+// → ["file1.txt", "file2.txt", "file10.txt"]
+
+ +
+

2 · Options

+
// ignoreCase — text tokens are folded to lower-case
+const words = ["Banana", "apple", "Cherry"];
+natSorted(words, { ignoreCase: true });
+// → ["apple", "Banana", "Cherry"]
+
+// reverse — descending natural order
+natSorted(["file1", "file10", "file2"], { reverse: true });
+// → ["file10", "file2", "file1"]
+
+ +
+

3 · Sorting objects with a key function

+
const rows = [
+  { path: "img/photo10.jpg" },
+  { path: "img/photo2.jpg" },
+  { path: "img/photo1.jpg" },
+];
+
+// key extracts the string to sort by
+import { natSorted } from "tsb";
+const sorted = natSorted(rows, { key: r => r.path });
+sorted.map(r => r.path);
+// → ["img/photo1.jpg", "img/photo2.jpg", "img/photo10.jpg"]
+
+ +
+

4 · natSortKey — inspect the token representation

+
import { natSortKey } from "tsb";
+
+natSortKey("file10.txt");   // → ["file", 10, ".txt"]
+natSortKey("007bonds");     // → [7, "bonds"]  (leading zeros stripped)
+natSortKey("abc");          // → ["abc"]
+natSortKey("42");           // → [42]
+
+// ignoreCase folds text tokens
+natSortKey("File10.TXT", { ignoreCase: true });
+// → ["file", 10, ".txt"]
+
+ +
+

5 · natArgSort — permutation indices

+
import { natArgSort } from "tsb";
+
+const arr = ["file10", "file2", "file1"];
+const idx = natArgSort(arr);
+// → [2, 1, 0]   (indices of "file1", "file2", "file10")
+
+idx.map(i => arr[i]);
+// → ["file1", "file2", "file10"]
+
+// Use with a tsb Index to sort labels naturally:
+// index.argsort() uses default lexicographic order;
+// natArgSort(index.values) gives natural order.
+
+ +
+

6 · Comparison with lexicographic sort

+
const data = ["item1", "item12", "item2", "item20", "item3"];
+
+// Lexicographic (default Array.sort)
+[...data].sort();
+// → ["item1", "item12", "item2", "item20", "item3"]  ← wrong
+
+// Natural sort
+natSorted(data);
+// → ["item1", "item2", "item3", "item12", "item20"]  ← correct
+
+
+ + + + diff --git a/playground/notna.html b/playground/notna.html new file mode 100644 index 00000000..bffa398a --- /dev/null +++ b/playground/notna.html @@ -0,0 +1,137 @@ + + + + + + tsb — notna / isna + + + +
+ ← tsb playground +

notna / isna

+

Element-wise missing-value detection — mirrors pandas.notna / pandas.isna.

+
+ +
+
+

+ isna(value) and notna(value) inspect scalars, arrays, Series, and DataFrames + and return a boolean (or boolean-valued object) indicating whether each element is missing. + They mirror + pandas.isna + and + pandas.notna. + The aliases isnull and notnull are also provided. +

+

What counts as missing?

+ + + + + + + + + + + +
ValueMissing?
null✅ yes
undefined✅ yes
NaN✅ yes
0, "", false❌ no — falsy but present
Infinity, -Infinity❌ no — defined numeric values
any string❌ no
any Date❌ no
+
+ +
+

Example 1 — scalars

+
import { isna, notna } from "tsb";
+
+isna(null);       // true
+isna(undefined);  // true
+isna(NaN);        // true
+
+isna(0);          // false  — zero is not missing
+isna("");         // false  — empty string is not missing
+isna(false);      // false  — false is not missing
+isna(42);         // false
+isna("hello");    // false
+
+notna(null);      // false
+notna(42);        // true
+
+
+ +
+

Example 2 — arrays

+
import { isna, notna } from "tsb";
+
+const arr = [1, null, NaN, "x", undefined, false];
+isna(arr);
+// [false, true, true, false, true, false]
+
+notna(arr);
+// [true, false, false, true, false, true]
+
+// Count missing values
+const missing = isna(arr).filter(Boolean).length;  // 3
+
+
+ +
+

Example 3 — Series

+
import { Series, isna, notna } from "tsb";
+
+const s = new Series({ data: [10, null, NaN, 40], name: "sales" });
+
+isna(s).values;
+// [false, true, true, false]
+
+notna(s).values;
+// [true, false, false, true]
+
+// Filter to non-missing values
+const present = s.filter((_, i) => notna(s).values[i] === true);
+
+
+ +
+

Example 4 — DataFrame

+
import { DataFrame, isna, notna } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name:   ["Alice", null,    "Carol"],
+  score:  [95,      NaN,     88     ],
+  active: [true,    false,   null   ],
+});
+
+isna(df).toRecords();
+// [
+//   { name: false, score: false, active: false },
+//   { name: true,  score: true,  active: false },
+//   { name: false, score: false, active: true  },
+// ]
+
+// Count nulls per column
+const nullCounts: Record<string, number> = {};
+for (const col of df.columns) {
+  nullCounts[String(col)] = isna(df.col(String(col))).values
+    .filter(Boolean).length;
+}
+// { name: 1, score: 1, active: 1 }
+
+
+ +
+

Example 5 — aliases

+
import { isnull, notnull } from "tsb";
+
+// isnull is an alias for isna
+// notnull is an alias for notna
+isnull(null);     // true
+notnull("hello"); // true
+
+
+
+ + + + diff --git a/playground/numeric_ops.html b/playground/numeric_ops.html new file mode 100644 index 00000000..32fa1096 --- /dev/null +++ b/playground/numeric_ops.html @@ -0,0 +1,208 @@ + + + + + + tsb — numeric math operations + + + + + +

numeric math operations

+

+ Element-wise mathematical functions for Series and DataFrame — + mirrors NumPy ufuncs applied to a pandas Series/DataFrame: + floor, ceil, trunc, + sqrt, exp, log, + log2, log10, sign. +

+ +
+ All functions are pure — they return a new Series/DataFrame + without mutating the input. Missing values (null / NaN) + propagate through every operation unchanged. +
+ +
+

1 — floor, ceil, trunc: rounding toward integers

+

+ seriesFloor(s) replaces each element with the largest integer ≤ the value.
+ seriesCeil(s) replaces each element with the smallest integer ≥ the value.
+ seriesTrunc(s) removes the fractional part, rounding toward zero.
+ For negative numbers: floor(-1.7) = -2, + ceil(-1.7) = -1, trunc(-1.7) = -1. +

+
import { Series, seriesFloor, seriesCeil, seriesTrunc } from "tsb";
+
+const s = new Series({ data: [-1.7, -0.2, 0, 1.2, 1.9] });
+
+console.log([...seriesFloor(s).values]);  // [-2, -1, 0, 1, 1]
+console.log([...seriesCeil(s).values]);   // [-1,  0, 0, 2, 2]
+console.log([...seriesTrunc(s).values]);  // [-1,  0, 0, 1, 1]
+
+
Loading…
+
+ +
+

2 — floor / ceil / trunc on a DataFrame

+

+ DataFrame variants (dataFrameFloor, dataFrameCeil, + dataFrameTrunc) apply the operation to every numeric column. +

+
import { DataFrame, dataFrameFloor, dataFrameCeil } from "tsb";
+
+const df = DataFrame.fromColumns({
+  price:  [10.49, 20.01, 30.99],
+  change: [-0.55,  1.23,  2.78],
+});
+
+const floored = dataFrameFloor(df);
+console.log([...floored.col("price").values]);  // [10, 20, 30]
+console.log([...floored.col("change").values]); // [-1, 1, 2]
+
+const ceiled = dataFrameCeil(df);
+console.log([...ceiled.col("price").values]);   // [11, 21, 31]
+
+
Loading…
+
+ +
+

3 — sqrt: square root

+

+ seriesSqrt(s) returns √x for each element. + Negative values produce NaN (real-valued, same as NumPy by default). + Mirrors np.sqrt(series). +

+
import { Series, seriesSqrt } from "tsb";
+
+const s = new Series({ data: [0, 1, 4, 9, 16, 25] });
+console.log([...seriesSqrt(s).values]); // [0, 1, 2, 3, 4, 5]
+
+// Negative values → NaN
+const mixed = new Series({ data: [-4, 0, 9] });
+console.log([...seriesSqrt(mixed).values]); // [NaN, 0, 3]
+
+
Loading…
+
+ +
+

4 — exp: exponential function

+

+ seriesExp(s) computes ex for each element. + Mirrors np.exp(series). +

+
import { Series, seriesExp } from "tsb";
+
+const s = new Series({ data: [0, 1, 2, -1] });
+const result = seriesExp(s);
+console.log(result.values[0].toFixed(4)); // 1.0000  (e^0 = 1)
+console.log(result.values[1].toFixed(4)); // 2.7183  (e^1 = e)
+console.log(result.values[2].toFixed(4)); // 7.3891  (e^2)
+console.log(result.values[3].toFixed(4)); // 0.3679  (e^-1 = 1/e)
+
+
Loading…
+
+ +
+

5 — log, log2, log10: logarithms

+

+ Three logarithm functions, matching the NumPy counterparts: +

+ + + + + +
FunctionBaseMirrors
seriesLoge (natural)np.log
seriesLog22np.log2
seriesLog1010np.log10
+

Values ≤ 0 produce -Infinity (for 0) or NaN (for negative).

+
import { Series, seriesLog, seriesLog2, seriesLog10 } from "tsb";
+
+const naturals = new Series({ data: [1, Math.E, Math.E ** 2] });
+console.log(seriesLog(naturals).values.map(v => +v.toFixed(4)));
+// [0, 1, 2]
+
+const powersOf2 = new Series({ data: [1, 2, 4, 8, 1024] });
+console.log([...seriesLog2(powersOf2).values]);
+// [0, 1, 2, 3, 10]
+
+const powersOf10 = new Series({ data: [1, 10, 100, 1000] });
+console.log([...seriesLog10(powersOf10).values]);
+// [0, 1, 2, 3]
+
+
Loading…
+
+ +
+

6 — sign: element sign

+

+ seriesSign(s) returns -1 for negative values, + 0 for zero, and 1 for positive values. + Mirrors np.sign(series). +

+
import { Series, seriesSign } from "tsb";
+
+const s = new Series({ data: [-100, -0.001, 0, 0.001, 100] });
+console.log([...seriesSign(s).values]); // [-1, -1, 0, 1, 1]
+
+
Loading…
+
+ +
+

7 — missing value propagation

+

+ All numeric ops propagate null and non-numeric values unchanged. + This matches the behaviour of pandas, where missing values are not coerced. +

+
import { Series, seriesFloor, seriesSqrt, seriesLog } from "tsb";
+
+const s = new Series({ data: [null, 4, null, 9] });
+
+console.log([...seriesFloor(s).values]); // [null, 4, null, 9]
+console.log([...seriesSqrt(s).values]);  // [null, 2, null, 3]
+console.log([...seriesLog(s).values]);   // [null, ~1.386, null, ~2.197]
+
+
Loading…
+
+ +
+

8 — composing operations

+

+ Combine multiple operations. For example, compute the log-transformed + square root of a price column — a common technique in data normalisation. +

+
import { Series, seriesSqrt, seriesLog } from "tsb";
+
+// Log-sqrt transformation: log(√x) = log(x)/2
+const prices = new Series({ data: [1, 4, 9, 100, 10000], name: "price" });
+
+const logSqrt = seriesLog(seriesSqrt(prices));
+console.log(logSqrt.values.map(v => +v.toFixed(4)));
+// [0, 0.6931, 1.0986, 2.3026, 4.6052]
+
+// Verify: equals log(x) / 2
+const logHalf = seriesLog(prices).values.map(v => +(v / 2).toFixed(4));
+console.log(logHalf);
+// [0, 0.6931, 1.0986, 2.3026, 4.6052]
+
+
Loading…
+
+ + + diff --git a/playground/period.html b/playground/period.html new file mode 100644 index 00000000..1f269b72 --- /dev/null +++ b/playground/period.html @@ -0,0 +1,253 @@ + + + + + + tsb — Period & PeriodIndex + + + + + +

Period & PeriodIndex

+

+ Fixed-frequency time spans — mirrors + pandas.Period and pandas.PeriodIndex. +

+ +
+

1 — Supported frequencies

+

+ A Period represents a single time span at a fixed frequency. + tsb supports eight frequencies: +

+ + + + + + + + + + +
CodeAliasDescriptionExample string
"A""Y"Calendar year2024
"Q"Calendar quarter2024Q2
"M"Calendar month2024-03
"W"ISO week (Mon–Sun)2024-01-01/2024-01-07
"D"Day2024-01-15
"H"Hour2024-01-15 14:00
"T""min"Minute2024-01-15 14:35
"S"Second2024-01-15 14:35:42
+
+ +
+

2 — Creating periods

+

Create periods from dates, strings, or directly from ordinals:

+
import { Period } from "tsb";
+
+// From a Date object
+const m = Period.fromDate(new Date("2024-03-15T00:00:00Z"), "M");
+console.log(m.toString());            // "2024-03"
+console.log(m.startTime.toISOString());  // "2024-03-01T00:00:00.000Z"
+console.log(m.endTime.toISOString());    // "2024-03-31T23:59:59.999Z"
+
+// From a string
+const q = Period.fromString("2024Q3", "Q");
+console.log(q.toString());            // "2024Q3"
+console.log(q.startTime.toISOString()); // "2024-07-01T00:00:00.000Z"
+
+// Direct construction from ordinal
+const p = new Period(54, "M");   // 54 months after Jan 1970 = July 2024
+console.log(p.toString());       // "2024-07"
+
+
Run to see output
+
+ +
+

3 — Period arithmetic

+

+ Periods support shift (add), difference (diff), + and comparison. Arithmetic is always within the same frequency. +

+
import { Period } from "tsb";
+
+const jan = Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M");
+const apr = jan.add(3);
+console.log(apr.toString());    // "2024-04"
+
+const dec = Period.fromDate(new Date("2024-12-01T00:00:00Z"), "M");
+console.log(dec.diff(jan));     // 11  (months between)
+
+// Comparison
+console.log(jan.compareTo(dec) < 0);  // true  (jan is earlier)
+console.log(jan.equals(jan.add(0)));  // true
+
+// contains: does a Date fall within a Period?
+const march15 = new Date("2024-03-15T00:00:00Z");
+const march = Period.fromDate(march15, "M");
+console.log(march.contains(march15));   // true
+console.log(march.contains(new Date("2024-04-01T00:00:00Z"))); // false
+
+
Run to see output
+
+ +
+

4 — Frequency conversion with asfreq

+

+ asfreq() converts a period to a different frequency. + The how parameter picks the start or end of the current period + as the anchor point. +

+
import { Period } from "tsb";
+
+const q2 = Period.fromString("2024Q2", "Q");
+
+// "start": April (first month of Q2)
+console.log(q2.asfreq("M", "start").toString());  // "2024-04"
+
+// "end": June (last month of Q2)
+console.log(q2.asfreq("M", "end").toString());    // "2024-06"
+
+// Going coarser: month → year
+const aug = Period.fromString("2024-08", "M");
+console.log(aug.asfreq("A").toString());           // "2024"
+console.log(aug.asfreq("Q").toString());           // "2024Q3"
+
+
Run to see output
+
+ +
+

5 — PeriodIndex: building ranges

+

+ A PeriodIndex is an ordered sequence of periods at a uniform + frequency, suitable for use as a row index. +

+
import { Period, PeriodIndex } from "tsb";
+
+// All four quarters of 2024
+const start = Period.fromDate(new Date("2024-01-01T00:00:00Z"), "Q");
+const end   = Period.fromDate(new Date("2024-12-31T00:00:00Z"), "Q");
+const quarters = PeriodIndex.fromRange(start, end);
+console.log(quarters.size);          // 4
+console.log(quarters.at(0).toString()); // "2024Q1"
+console.log(quarters.at(-1).toString()); // "2024Q4"
+
+// periodRange: start + count
+const months = PeriodIndex.periodRange(
+  Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M"),
+  12,
+);
+console.log(months.size);            // 12
+console.log(months.at(11).toString()); // "2024-12"
+
+// Iteration
+for (const q of quarters) {
+  console.log(q.toString());
+}
+
+
Run to see output
+
+ +
+

6 — PeriodIndex: lookup and transformation

+
import { Period, PeriodIndex } from "tsb";
+
+const idx = PeriodIndex.periodRange(
+  Period.fromDate(new Date("2024-01-01T00:00:00Z"), "M"),
+  6,  // Jan–Jun 2024
+);
+
+// Position lookup
+const mar = Period.fromString("2024-03", "M");
+console.log(idx.getLoc(mar));         // 2
+console.log(idx.contains(mar));       // true
+
+// Shift the whole index
+const shifted = idx.shift(6);
+console.log(shifted.at(0).toString()); // "2024-07"
+console.log(shifted.at(5).toString()); // "2024-12"
+
+// Convert to different frequency
+const asQtr = idx.asfreq("Q", "start");
+console.log(asQtr.freq);             // "Q"
+// Note: some months map to the same quarter → duplicates
+console.log(asQtr.unique().size);    // 2 (Q1 and Q2)
+
+// Get Date arrays
+const starts = idx.toDatetimeStart();
+console.log(starts[0]?.toISOString()); // "2024-01-01T00:00:00.000Z"
+
+
Run to see output
+
+ +
+

7 — Weekly periods

+

+ Weekly periods span Monday–Sunday. The string representation shows the + full range: YYYY-MM-DD/YYYY-MM-DD. +

+
import { Period, PeriodIndex } from "tsb";
+
+// 1970-01-01 is a Thursday — it belongs to week 0 (Mon 1969-12-29 → Sun 1970-01-04)
+const w0 = Period.fromDate(new Date("1970-01-01T00:00:00Z"), "W");
+console.log(w0.ordinal);               // 0
+console.log(w0.startTime.toISOString()); // "1969-12-29T00:00:00.000Z"
+console.log(w0.endTime.toISOString());   // "1970-01-04T23:59:59.999Z"
+console.log(w0.toString());            // "1969-12-29/1970-01-04"
+
+// Build a 4-week index
+const fourWeeks = PeriodIndex.periodRange(
+  Period.fromDate(new Date("2024-01-01T00:00:00Z"), "W"),
+  4,
+);
+for (const w of fourWeeks) {
+  console.log(w.toString());
+}
+
+
Run to see output
+
+ +
+

8 — Sub-daily periods

+
import { Period } from "tsb";
+
+// Hourly
+const h = Period.fromDate(new Date("2024-03-15T14:35:00Z"), "H");
+console.log(h.toString());   // "2024-03-15 14:00"
+console.log(h.durationMs);  // 3_600_000
+
+// Minutely (alias "min")
+const t = Period.fromDate(new Date("2024-03-15T14:35:42Z"), "min");
+console.log(t.toString());   // "2024-03-15 14:35"
+console.log(t.freq);         // "T"
+
+// Secondly
+const s = Period.fromDate(new Date("2024-03-15T14:35:42.500Z"), "S");
+console.log(s.toString());   // "2024-03-15 14:35:42"
+console.log(s.durationMs);  // 1_000
+
+
Run to see output
+
+ + + + diff --git a/playground/pipe.html b/playground/pipe.html new file mode 100644 index 00000000..64ddfb1b --- /dev/null +++ b/playground/pipe.html @@ -0,0 +1,191 @@ + + + + + + tsb — pipe + + + + + +

pipe

+

+ Function-application helpers for left-to-right method chaining — mirrors + pandas.Series.pipe() and pandas.DataFrame.pipe(). +

+ +
+

1 — pipeSeries: apply a function to a Series

+

+ pipeSeries(series, fn, ...args) calls fn(series, ...args) + and returns the result. Use it to build readable transformation chains without + deep nesting. +

+
import { Series, pipeSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5], name: "nums" });
+
+// Simple transform — return the size
+const size = pipeSeries(s, (x) => x.size);
+console.log("size:", size);   // 5
+
+// Pass extra arguments to the function
+const offset = pipeSeries(s, (x, n) => x.size + n, 10);
+console.log("size+10:", offset);  // 15
+
+
▶ run
+
+ +
+

2 — dataFramePipe: apply a function to a DataFrame

+

+ dataFramePipe(df, fn, ...args) works the same way for DataFrames. + This mirrors pandas.DataFrame.pipe(fn, *args). +

+
import { DataFrame, dataFramePipe } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, 2, 3, 4, 5],
+  b: [10, 20, 30, 40, 50],
+});
+
+// Get the shape without nesting
+const shape = dataFramePipe(df, (d) => d.shape);
+console.log("shape:", shape);   // [5, 2]
+
+// Chain: take head, then get row count
+const rowCount = dataFramePipe(dataFramePipe(df, (d) => d.head(3)), (d) => d.shape[0]);
+console.log("rows after head(3):", rowCount);  // 3
+
+
▶ run
+
+ +
+

3 — pipeChain: chain multiple Series transforms

+

+ pipeChain(series, f1, f2, f3, ...) applies a sequence of + Series → Series transforms in left-to-right order. + This is the cleanest API when building multi-step pipelines. +

+
import { Series, pipeChain } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4] });
+
+// Helper transforms
+const addOne  = (x) => new Series({ data: [...x.values].map((v) => v + 1) });
+const double  = (x) => new Series({ data: [...x.values].map((v) => v * 2) });
+const square  = (x) => new Series({ data: [...x.values].map((v) => v * v) });
+
+// Without pipe: deeply nested and hard to read
+const nested = square(double(addOne(s)));
+console.log("nested:", [...nested.values]);  // [16, 36, 64, 100]
+
+// With pipeChain: clean left-to-right
+const piped = pipeChain(s, addOne, double, square);
+console.log("piped:", [...piped.values]);    // [16, 36, 64, 100]
+
+
▶ run
+
+ +
+

4 — dataFramePipeChain: chain multiple DataFrame transforms

+

+ dataFramePipeChain(df, f1, f2, ...) applies a sequence of + DataFrame → DataFrame transforms, ideal for data-prep pipelines. +

+
import { DataFrame, dataFramePipeChain } from "tsb";
+
+const df = DataFrame.fromColumns({
+  score: [10, 5, 80, 95, 42, 3, 77],
+  label: ["a", "b", "c", "d", "e", "f", "g"],
+});
+
+// Build a pipeline of DataFrame → DataFrame steps
+const head4  = (d) => d.head(4);
+const tail3  = (d) => d.tail(3);
+
+const result = dataFramePipeChain(df, head4, tail3);
+
+console.log("shape:", result.shape);          // [3, 2]
+console.log("records:", result.toRecords());
+// [{ score: 5, label: "b" }, { score: 80, label: "c" }, { score: 95, label: "d" }]
+
+
▶ run
+
+ +
+

5 — pipeTo / dataFramePipeTo: control the insertion point

+

+ pandas supports a tuple form df.pipe((fn, "kwarg_name")) where the + DataFrame goes to a specific keyword argument. In tsb we provide + pipeTo(series, pos, fn, ...otherArgs) and + dataFramePipeTo(df, pos, fn, ...otherArgs) which splice the value + at a chosen zero-based argument position. +

+
import { Series, pipeTo } from "tsb";
+
+const seriesB = new Series({ data: [10, 20, 30] });
+
+// A function that expects (left, right) arguments
+const concatValues = (left, right) => {
+  return [...left.values, ...right.values];
+};
+
+// Insert seriesB as the SECOND argument (pos=1) alongside seriesA
+const seriesA = new Series({ data: [1, 2, 3] });
+const result = pipeTo(seriesB, 1, concatValues, seriesA);
+
+console.log("result:", result);  // [1, 2, 3, 10, 20, 30]
+
+
▶ run
+
+ +
+

6 — Practical data pipeline example

+

+ Combining multiple pipe utilities to build a complete data transformation + pipeline, similar to the .pipe() method chains common in pandas. +

+
import { DataFrame, dataFramePipeChain } from "tsb";
+
+// Raw sales data with some issues
+const df = DataFrame.fromColumns({
+  revenue: [100, null, 250, 75, null, 400],
+  region:  ["north", "south", "east", "west", "north", "east"],
+});
+
+// Step functions: each DataFrame → DataFrame
+const dropNulls = (d) => {
+  // Keep only rows where revenue is not null
+  const mask = [...d.col("revenue").values].map((v) => v !== null);
+  const revVals = [...d.col("revenue").values].filter((_, i) => mask[i]);
+  const regVals = [...d.col("region").values].filter((_, i) => mask[i]);
+  return DataFrame.fromColumns({ revenue: revVals, region: regVals });
+};
+
+const top3 = (d) => d.head(3);
+
+// Build the pipeline
+const result = dataFramePipeChain(df, dropNulls, top3);
+console.log("shape:", result.shape);          // [3, 2]
+console.log("records:", result.toRecords());
+
+
▶ run
+
+ + + diff --git a/playground/pivot_table.html b/playground/pivot_table.html new file mode 100644 index 00000000..c2ffadd1 --- /dev/null +++ b/playground/pivot_table.html @@ -0,0 +1,169 @@ + + + + + + tsb — pivotTableFull + + + +
+ ← tsb playground +

pivotTableFull

+

Full pivot table with grand-total margins — mirrors pandas.pivot_table.

+
+ +
+
+

+ pivotTableFull(df, options) reshapes a DataFrame by aggregating values + across row and column groups, and optionally appends a grand-total "All" row and column. +

+ +
+ +
+

Example 1 — sales by region and product (sum + margins)

+
import { DataFrame, pivotTableFull } from "tsb";
+
+const df = DataFrame.fromColumns({
+  region:  ["North","North","South","South","North","South"],
+  product: ["A",    "B",    "A",    "B",    "A",    "B"   ],
+  sales:   [100,    200,    150,    250,    120,    180   ],
+});
+
+const result = pivotTableFull(df, {
+  index:   "region",
+  columns: "product",
+  values:  "sales",
+  aggfunc: "sum",
+  margins: true,
+});
+
+// Output:
+//          A    B    All
+// North   220  200  420
+// South   150  430  580
+// All     370  630 1000
+
+ +
+

Example 2 — mean aggregation with custom margins_name

+
const df = DataFrame.fromColumns({
+  team:  ["Eng","Eng","Mkt","Mkt"],
+  level: ["Sr","Jr","Sr","Jr"],
+  score: [90,  70,  80, 60],
+});
+
+const result = pivotTableFull(df, {
+  index:        "team",
+  columns:      "level",
+  values:       "score",
+  aggfunc:      "mean",
+  margins:      true,
+  margins_name: "Total",
+});
+
+// Output:
+//       Jr   Sr   Total
+// Eng   70   90   80
+// Mkt   60   80   70
+// Total 65   85   75
+
+ +
+

Example 3 — sort=false preserves insertion order

+
const df = DataFrame.fromColumns({
+  r: ["Z", "A", "M", "Z"],
+  c: ["b", "a", "c", "a"],
+  v: [1,   2,   3,   4  ],
+});
+
+pivotTableFull(df, {
+  index:   "r",
+  columns: "c",
+  values:  "v",
+  aggfunc: "sum",
+  sort:    false,
+});
+
+// Rows in order: Z, A, M  (insertion order)
+// Cols in order: b, a, c  (insertion order)
+
+ +
+

Example 4 — count with margins

+
const df = DataFrame.fromColumns({
+  dept:   ["Eng","Eng","Mkt","Mkt","Eng"],
+  level:  ["Sr","Jr","Sr","Jr","Sr"],
+  salary: [120, 80, 110, 75, 130],
+});
+
+pivotTableFull(df, {
+  index:   "dept",
+  columns: "level",
+  values:  "salary",
+  aggfunc: "count",
+  margins: true,
+});
+
+//       Jr   Sr   All
+// Eng    1    2    3
+// Mkt    1    1    2
+// All    2    3    5
+
+ +
+

Key differences from pivotTable

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FeaturepivotTablepivotTableFull
Grand-total marginsmargins
Custom margin labelmargins_name
Sort labelsinsertion ordersort (default true)
All aggfuncs
fill_value, dropna
+
+
+ + + + diff --git a/playground/pow_mod.html b/playground/pow_mod.html new file mode 100644 index 00000000..6113bc72 --- /dev/null +++ b/playground/pow_mod.html @@ -0,0 +1,194 @@ + + + + + + tsb — pow / mod / floordiv + + + + + +

pow / mod / floordiv

+

+ Element-wise exponentiation, modulo, and floor-division for Series and DataFrame — + mirroring pandas.Series.pow, .mod, and .floordiv. +

+ +
+

Overview

+

All three operations work element-wise on Series and DataFrame:

+ + + + + + + + + + +
Functionpandas equivalentOperatorDescription
seriesPow(s, other)s.pow(other)**Raise to power
seriesMod(s, other)s.mod(other)%Modulo (Python/pandas sign rule)
seriesFloorDiv(s, other)s.floordiv(other)//Floor division (rounds toward −∞)
dataFramePow(df, other)df.pow(other)**Column-wise exponentiation
dataFrameMod(df, other)df.mod(other)%Column-wise modulo
dataFrameFloorDiv(df, other)df.floordiv(other)//Column-wise floor division
+

The other operand may be a scalar number or another Series / DataFrame + of the same shape (positional alignment). Missing values (null / NaN) propagate unchanged.

+
+ +
+

seriesPow — exponentiation

+
import { Series, seriesPow } from "tsb";
+
+// Scalar exponent: each element raised to the power 2
+const s = new Series({ data: [1, 2, 3, 4, 5], name: "x" });
+console.log(seriesPow(s, 2).values);
+// [1, 4, 9, 16, 25]
+
+// Series exponent: element-wise pairing
+const exponents = new Series({ data: [1, 2, 3, 4, 5] });
+console.log(seriesPow(s, exponents).values);
+// [1, 4, 27, 256, 3125]
+
+// Square root via pow(0.5)
+const sq = new Series({ data: [4, 9, 16, 25] });
+console.log(seriesPow(sq, 0.5).values);
+// [2, 3, 4, 5]
+
[1, 4, 9, 16, 25] +[1, 4, 27, 256, 3125] +[2, 3, 4, 5]
+
+ +
+

seriesMod — Python-style modulo

+
+ Sign rule: Unlike JavaScript's % operator (which follows C semantics), + seriesMod uses Python / pandas semantics: the result always has the + same sign as the divisor. For example, -7 mod 3 = 2 (not -1). +
+
import { Series, seriesMod } from "tsb";
+
+// Positive divisor: result is always in [0, divisor)
+const s = new Series({ data: [-7, -4, 0, 3, 10] });
+console.log(seriesMod(s, 3).values);
+// [2, 2, 0, 0, 1]
+
+// JavaScript % would give different results for negatives:
+// [-7 % 3, -4 % 3, 0 % 3, 3 % 3, 10 % 3] = [-1, -1, 0, 0, 1]
+
+// Series divisor: element-wise
+const divisors = new Series({ data: [3, 4, 5, 6, 7] });
+console.log(seriesMod(s, divisors).values);
+// [2, 0, 0, 3, 3]
+
+// Missing values propagate unchanged
+const withNull = new Series({ data: [10, null, 15] });
+console.log(seriesMod(withNull, 4).values);
+// [2, null, 3]
+
[2, 2, 0, 0, 1] +[2, 0, 0, 3, 3] +[2, null, 3]
+
+ +
+

seriesFloorDiv — floor division

+
+ Rounding rule: seriesFloorDiv rounds toward −∞ + (Python / pandas // semantics). This differs from JavaScript's + Math.trunc for negative values: -7 // 2 = -4 (not -3). +
+
import { Series, seriesFloorDiv } from "tsb";
+
+const s = new Series({ data: [7, -7, 10, -10, 0] });
+
+// Floor division by scalar
+console.log(seriesFloorDiv(s, 2).values);
+// [3, -4, 5, -5, 0]
+// Note: -7 // 2 = -4 (floor toward -∞, not -3 from trunc)
+
+// Compare with Math.trunc:
+// Math.trunc(-7/2) = Math.trunc(-3.5) = -3  ← different!
+
+// Series divisor
+const divisors = new Series({ data: [2, 3, 4, 5, 1] });
+console.log(seriesFloorDiv(s, divisors).values);
+// [3, -3, 2, -2, 0]
+
+// The div-mod identity: floordiv(a,b)*b + mod(a,b) === a (for integers)
+// -7 = (-4)*2 + 1  ✓  (not (-3)*2 + (-1) which is JS % behavior)
+
[3, -4, 5, -5, 0] +[3, -3, 2, -2, 0]
+
+ +
+

DataFrame operations

+
import { DataFrame, dataFramePow, dataFrameMod, dataFrameFloorDiv } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [2,  3,  4],
+  b: [7, -7, 10],
+});
+
+// dataFramePow: raise every cell to the power 2
+console.log("pow(2):");
+console.log(dataFramePow(df, 2).col("a").values);  // [4, 9, 16]
+console.log(dataFramePow(df, 2).col("b").values);  // [49, 49, 100]
+
+// dataFrameMod: element-wise modulo by scalar
+console.log("mod(3):");
+console.log(dataFrameMod(df, 3).col("a").values);  // [2, 0, 1]
+console.log(dataFrameMod(df, 3).col("b").values);  // [1, 2, 1]
+
+// dataFrameFloorDiv: floor division by scalar
+console.log("floordiv(3):");
+console.log(dataFrameFloorDiv(df, 3).col("a").values);  // [0, 1, 1]
+console.log(dataFrameFloorDiv(df, 3).col("b").values);  // [2, -3, 3]
+
+// DataFrame × DataFrame: column-aligned
+const df2 = DataFrame.fromColumns({ a: [1, 2, 3], b: [3, 4, 5] });
+console.log("pow(df2):");
+console.log(dataFramePow(df, df2).col("a").values);  // [2, 9, 64]
+
pow(2): +[4, 9, 16] +[49, 49, 100] +mod(3): +[2, 0, 1] +[1, 2, 1] +floordiv(3): +[0, 1, 1] +[2, -3, 3] +pow(df2): +[2, 9, 64]
+
+ +
+

Pandas comparison

+
# pandas equivalent
+import pandas as pd
+
+s = pd.Series([-7, 0, 7, 10])
+print(s.pow(2))       # [49, 0, 49, 100]
+print(s.mod(3))       # [2, 0, 1, 1]   (Python sign rule)
+print(s.floordiv(2))  # [-4, 0, 3, 5]  (floor toward -∞)
+
+df = pd.DataFrame({"a": [2, 3, 4], "b": [7, -7, 10]})
+print(df.pow(2))       # a: [4,9,16]  b: [49,49,100]
+print(df.mod(3))       # a: [2,0,1]   b: [1,2,1]
+print(df.floordiv(3))  # a: [0,1,1]   b: [2,-3,3]
+
+ + + diff --git a/playground/reindex.html b/playground/reindex.html new file mode 100644 index 00000000..55ddb8f5 --- /dev/null +++ b/playground/reindex.html @@ -0,0 +1,166 @@ + + + + + + tsb — reindex + + + +
+ ← tsb playground +

reindex

+

Align a Series or DataFrame to a new axis — mirrors pandas.Series.reindex / pandas.DataFrame.reindex.

+
+ +
+
+

+ reindex lets you align a Series or DataFrame to a new index, + filling gaps with a fill value or propagating adjacent values. +

+ +

+ Missing labels get null by default, or any fillValue you choose. + You can also propagate values using method: "ffill" (forward fill), + "bfill" (backward fill), or "nearest". +

+

+ See also: + pandas.Series.reindex + · + pandas.DataFrame.reindex +

+
+ +
+

1 · reindexSeries — basics

+
import { Series, Index, reindexSeries } from "tsb";
+
+const s = new Series({ data: [10, 20, 30], index: new Index(["a", "b", "c"]) });
+
+// Reorder labels
+reindexSeries(s, ["c", "a", "b"]).toArray();
+// → [30, 10, 20]
+
+// Extend with new labels → null by default
+reindexSeries(s, ["a", "b", "c", "d"]).toArray();
+// → [10, 20, 30, null]
+
+// Extend with custom fill value
+reindexSeries(s, ["a", "b", "c", "d"], { fillValue: 0 }).toArray();
+// → [10, 20, 30, 0]
+
+// Drop labels
+reindexSeries(s, ["a", "c"]).toArray();
+// → [10, 30]
+
+ +
+

2 · Fill methods

+
import { Series, Index, reindexSeries } from "tsb";
+
+const temps = new Series({
+  data: [15, 18, 22],
+  index: new Index([0, 2, 5]),  // sparse integer index
+});
+
+// Forward fill — carry last known value forward
+reindexSeries(temps, [0, 1, 2, 3, 4, 5], { method: "ffill" }).toArray();
+// → [15, 15, 18, 18, 18, 22]
+//       ^^       ^^ ^^      ← filled from left
+
+// Backward fill — carry next known value backward
+reindexSeries(temps, [0, 1, 2, 3, 4, 5], { method: "bfill" }).toArray();
+// → [15, 18, 18, 22, 22, 22]
+//       ^^       ^^  ^^  ← filled from right
+
+// Nearest — use closest value (prefer right on tie)
+reindexSeries(temps, [0, 1, 2, 3, 4, 5], { method: "nearest" }).toArray();
+// → [15, 15, 18, 18, 22, 22]
+
+// Limit — cap consecutive fills
+reindexSeries(temps, [0, 1, 2, 3, 4, 5], { method: "ffill", limit: 1 }).toArray();
+// → [15, 15, 18, null, null, 22]
+//       ^^       ^^ only 1 consecutive fill
+
+ +
+

3 · reindexDataFrame — rows

+
import { DataFrame, reindexDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  open:  [100, 102, 105],
+  close: [ 98, 104, 107],
+});
+// shape [3, 2], RangeIndex [0, 1, 2]
+
+// Extend to 5 rows — new rows filled with null
+reindexDataFrame(df, { index: [0, 1, 2, 3, 4] }).col("open").toArray();
+// → [100, 102, 105, null, null]
+
+// Forward-fill new rows
+reindexDataFrame(df, { index: [0, 1, 2, 3, 4], method: "ffill" }).col("open").toArray();
+// → [100, 102, 105, 105, 105]
+
+ +
+

4 · reindexDataFrame — columns

+
import { DataFrame, reindexDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({ a: [1, 2, 3], b: [4, 5, 6] });
+
+// Reorder columns
+reindexDataFrame(df, { columns: ["b", "a"] }).columns.toArray();
+// → ["b", "a"]
+
+// Add a new column filled with 0
+const r = reindexDataFrame(df, { columns: ["a", "b", "c"], fillValue: 0 });
+r.col("c").toArray();
+// → [0, 0, 0]
+
+ +
+

5 · Reindex rows and columns simultaneously

+
import { DataFrame, reindexDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  x: [1, 2, 3],
+  y: [4, 5, 6],
+});
+
+const r = reindexDataFrame(df, {
+  index:    [0, 1, 2, 3],   // extend to 4 rows
+  columns:  ["x", "y", "z"], // add column "z"
+  fillValue: -1,
+});
+// shape [4, 3]
+r.col("z").toArray();  // → [-1, -1, -1, -1]
+r.col("x").toArray();  // → [1, 2, 3, -1]
+
+ +
+

6 · Pandas equivalents

+
# Python / pandas equivalent
+import pandas as pd
+
+s = pd.Series([10, 20, 30], index=["a", "b", "c"])
+
+# reindexSeries(s, newLabels)  →  s.reindex(newLabels)
+s.reindex(["a", "b", "c", "d"])       # NaN for "d"
+s.reindex(["a", "b", "c", "d"], fill_value=0)
+s.reindex(range(5), method="ffill")    # forward fill gaps
+
+df = pd.DataFrame({"a": [1,2,3], "b": [4,5,6]})
+
+# reindexDataFrame(df, { index, columns })
+df.reindex([0, 1, 2, 3])
+df.reindex(columns=["a", "b", "c"])
+df.reindex(index=[0,1,2,3], columns=["a","b","c"], fill_value=0)
+
+
+ + diff --git a/playground/sample.html b/playground/sample.html new file mode 100644 index 00000000..2ba4e93e --- /dev/null +++ b/playground/sample.html @@ -0,0 +1,187 @@ + + + + + + tsb — sample + + + + + +

sample

+

Random sampling from Series and DataFrame — mirrors pandas.Series.sample() and pandas.DataFrame.sample().

+ +
+

1 — Basic Series sampling

+

sampleSeries(s, { n }) returns a new Series with n randomly chosen elements. Pass randomState for reproducible results.

+
import { Series, sampleSeries } from "tsb";
+
+const s = new Series({
+  data: [10, 20, 30, 40, 50],
+  index: ["a", "b", "c", "d", "e"],
+});
+
+// Sample 3 elements — same result every time with randomState
+const r = sampleSeries(s, { n: 3, randomState: 42 });
+console.log([...r.values]);         // 3 values from [10,20,30,40,50]
+console.log([...r.index.values]);   // corresponding labels
+
+
Loading…
+
+ +
+

2 — Sampling a fraction

+

Instead of a fixed count, use frac to specify a proportion of the data (0–1).

+
import { Series, sampleSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] });
+
+// Sample 40% of the data
+const r = sampleSeries(s, { frac: 0.4, randomState: 0 });
+console.log(r.size);        // 4  (= round(0.4 × 10))
+console.log([...r.values]); // 4 random values
+
+
Loading…
+
+ +
+

3 — Sampling with replacement

+

Set replace: true to allow the same element to be selected more than once. This also lets you request more items than the Series contains.

+
import { Series, sampleSeries } from "tsb";
+
+const s = new Series({ data: [1, 2, 3] });
+
+// 6 samples from a 3-element Series — duplicates allowed
+const r = sampleSeries(s, { n: 6, replace: true, randomState: 7 });
+console.log(r.size);        // 6
+console.log([...r.values]); // may contain repeated values, e.g. [3, 1, 3, 2, 1, 1]
+
+
Loading…
+
+ +
+

4 — Weighted sampling

+

Provide a weights array to bias the random draw. Higher weight → higher probability of selection. Weights are normalised automatically.

+
import { Series, sampleSeries } from "tsb";
+
+const s = new Series({ data: ["apple", "banana", "cherry"] });
+// cherry has 8× the weight of apple and 4× the weight of banana
+const weights = [1, 2, 8];
+
+const counts = { apple: 0, banana: 0, cherry: 0 };
+for (let seed = 0; seed < 200; seed++) {
+  const v = sampleSeries(s, { n: 1, weights, randomState: seed }).values[0];
+  counts[v]++;
+}
+console.log(counts); // cherry ~145/200, banana ~36/200, apple ~18/200
+
+
Loading…
+
+ +
+

5 — ignoreIndex

+

Set ignoreIndex: true to reset the result index to 0, 1, 2, … instead of preserving the original labels.

+
import { Series, sampleSeries } from "tsb";
+
+const s = new Series({
+  data: [100, 200, 300],
+  index: ["x", "y", "z"],
+});
+
+const r = sampleSeries(s, { n: 2, randomState: 1 });
+console.log([...r.index.values]);   // e.g. ["z", "x"] — original labels
+
+const r2 = sampleSeries(s, { n: 2, randomState: 1, ignoreIndex: true });
+console.log([...r2.index.values]);  // [0, 1] — reset
+
+
Loading…
+
+ +
+

6 — DataFrame row sampling

+

sampleDataFrame(df, { n }) returns a DataFrame with n randomly selected rows. Row integrity is preserved — all columns stay aligned.

+
import { DataFrame, sampleDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  name:  ["Alice", "Bob", "Carol", "Dave", "Eve"],
+  score: [88, 72, 95, 61, 84],
+  grade: ["B", "C", "A", "D", "B"],
+});
+
+const sample = sampleDataFrame(df, { n: 3, randomState: 5 });
+console.log([...sample.col("name").values]);   // 3 names
+console.log([...sample.col("score").values]);  // corresponding scores
+
+
Loading…
+
+ +
+

7 — DataFrame column sampling (axis=1)

+

Set axis: 1 to sample columns instead of rows. Useful for random feature selection.

+
import { DataFrame, sampleDataFrame } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, 2, 3],
+  b: [4, 5, 6],
+  c: [7, 8, 9],
+  d: [10, 11, 12],
+});
+
+// Pick 2 random columns
+const r = sampleDataFrame(df, { n: 2, axis: 1, randomState: 3 });
+console.log([...r.columns.values]);  // e.g. ["b", "d"]
+
+
Loading…
+
+ +
+

8 — Bootstrapping example

+

Sampling with replacement is the foundation of bootstrapping — re-sampling your data to estimate statistics.

+
import { Series, sampleSeries } from "tsb";
+
+const data = new Series({ data: [2.1, 3.4, 5.5, 2.9, 4.2, 3.8, 5.1, 4.6, 3.3, 2.7] });
+const n = data.size;
+
+// 50 bootstrap means
+const means = [];
+for (let seed = 0; seed < 50; seed++) {
+  const boot = sampleSeries(data, { n, replace: true, randomState: seed });
+  const sum = (boot.values as number[]).reduce((a, b) => a + b, 0);
+  means.push(sum / n);
+}
+means.sort((a, b) => a - b);
+const lo = means[Math.floor(means.length * 0.025)].toFixed(2);
+const hi = means[Math.floor(means.length * 0.975)].toFixed(2);
+console.log(`Bootstrap 95% CI for mean: [${lo}, ${hi}]`);
+
+
Loading…
+
+ + + + diff --git a/playground/searchsorted.html b/playground/searchsorted.html new file mode 100644 index 00000000..685f84e9 --- /dev/null +++ b/playground/searchsorted.html @@ -0,0 +1,136 @@ + + + + + + tsb — searchsorted + + + +
+ ← tsb playground +

searchsorted

+

Binary search on sorted arrays — mirrors numpy.searchsorted and pandas.Index.searchsorted.

+
+ +
+
+

+ searchsorted(a, v) returns the index at which value v should be inserted + into the sorted array a to keep it sorted. This is the standard + binary-search operation used throughout pandas for alignment, binning, and lookup. +

+

+ Two side modes: + side = "left" (default) places insertion before any equal elements; + side = "right" places it after. +

+

+ Three exports: + searchsorted(a, v) — single value search; + searchsortedMany(a, vs) — vectorised search over multiple values; + argsortScalars(a) — compute a sort permutation (for the sorter option). +

+
+ +
+

1 · Basic usage

+
import { searchsorted, searchsortedMany, argsortScalars } from "tsb";
+
+const a = [1, 3, 5, 7, 9];
+
+// Where would 4 go?
+console.log(searchsorted(a, 4));          // → 2  (between 3 and 5)
+
+// Where would 5 go — before existing 5?
+console.log(searchsorted(a, 5));          // → 2  (side="left" default)
+
+// Where would 5 go — after existing 5?
+console.log(searchsorted(a, 5, { side: "right" }));  // → 3
+
+// Out-of-range values
+console.log(searchsorted(a, 0));          // → 0  (before everything)
+console.log(searchsorted(a, 99));         // → 5  (after everything)
+
+ +
+

2 · Vectorised search with searchsortedMany

+
import { searchsortedMany } from "tsb";
+
+const prices = [10, 20, 30, 40, 50];
+
+// Find where several bid prices would fall
+const bids = [15, 25, 50, 55];
+console.log(searchsortedMany(prices, bids));
+// → [1, 2, 4, 5]
+
+// side="right" for after-equal semantics
+console.log(searchsortedMany(prices, [20, 40], { side: "right" }));
+// → [2, 4]
+
+ +
+

3 · Searching unsorted data with sorter

+
import { searchsorted, argsortScalars } from "tsb";
+
+// argsortScalars returns the permutation that would sort the array
+const data = [50, 10, 30, 20, 40];
+const sorter = argsortScalars(data);
+// sorter → [1, 3, 2, 4, 0]  (indices of 10, 20, 30, 40, 50)
+
+// Now search without sorting the original array
+console.log(searchsorted(data, 25, { sorter }));          // → 2  (between 20 and 30)
+console.log(searchsorted(data, 30, { sorter }));          // → 2  (left of 30)
+console.log(searchsorted(data, 30, { side: "right", sorter }));  // → 3
+
+ +
+

4 · String arrays

+
import { searchsorted } from "tsb";
+
+const words = ["apple", "banana", "cherry", "date", "elderberry"];
+
+console.log(searchsorted(words, "blueberry"));  // → 2  (between banana and cherry)
+console.log(searchsorted(words, "cherry"));     // → 2  (left of cherry)
+console.log(searchsorted(words, "cherry", { side: "right" }));  // → 3
+
+ +
+

5 · Custom comparator

+
import { searchsorted } from "tsb";
+
+// Case-insensitive string search
+const arr = ["apple", "Banana", "cherry"];  // sorted case-insensitively
+const cmp = (a: unknown, b: unknown) => {
+  const sa = String(a).toLowerCase();
+  const sb = String(b).toLowerCase();
+  return sa < sb ? -1 : sa > sb ? 1 : 0;
+};
+
+console.log(searchsorted(arr, "banana", { compareFn: cmp }));  // → 1
+console.log(searchsorted(arr, "CHERRY", { compareFn: cmp }));  // → 2
+
+ +
+

6 · pandas equivalents

+
# Python / pandas
+import pandas as pd
+import numpy as np
+
+idx = pd.Index([1, 3, 5, 7, 9])
+idx.searchsorted(4)              # → 2
+idx.searchsorted(5)              # → 2  (side='left')
+idx.searchsorted(5, side='right')  # → 3
+
+np.searchsorted([1, 3, 5, 7, 9], [2, 5, 8])  # → [1, 2, 4]
+
+# TypeScript / tsb equivalent
+import { searchsorted, searchsortedMany } from "tsb";
+searchsorted([1, 3, 5, 7, 9], 4)                          // → 2
+searchsorted([1, 3, 5, 7, 9], 5)                          // → 2
+searchsorted([1, 3, 5, 7, 9], 5, { side: "right" })       // → 3
+searchsortedMany([1, 3, 5, 7, 9], [2, 5, 8])              // → [1, 2, 4]
+
+
+ + diff --git a/playground/select_dtypes.html b/playground/select_dtypes.html new file mode 100644 index 00000000..19498050 --- /dev/null +++ b/playground/select_dtypes.html @@ -0,0 +1,236 @@ + + + + + + tsb — select_dtypes + + + +

🔍 select_dtypes

+

+ Return a subset of DataFrame columns matching given dtype selectors — + mirroring + pandas.DataFrame.select_dtypes(). +

+ +

Overview

+ + + + + + + + + + + + + + +
SelectorMatches
"number"int, uint, float dtypes
"integer"int and uint dtypes
"signed integer"int dtypes only (int8–int64)
"unsigned integer"uint dtypes only (uint8–uint64)
"floating"float dtypes (float32, float64)
"bool"boolean dtype
"string"string dtype
"object"object dtype
"datetime"datetime dtype
"timedelta"timedelta dtype
"category"category dtype
"int64" etc.exact concrete dtype name
+ +

1 · include: keep only numeric columns

+
import { DataFrame } from "tsb";
+import { selectDtypes } from "tsb";
+
+const df = DataFrame.fromColumns({
+  age:    [25, 30, 22],
+  score:  [88.5, 92.0, 77.3],
+  name:   ["Alice", "Bob", "Carol"],
+  active: [true, false, true],
+});
+
+const nums = selectDtypes(df, { include: "number" });
+// Keeps: age (int64), score (float64)
+// Drops: name (string), active (bool)
+console.log(nums.columns.toArray()); // ["age", "score"]
+
Click Run to evaluate
+ + +

2 · exclude: drop boolean and string columns

+
const withoutBoolStr = selectDtypes(df, { exclude: ["bool", "string"] });
+// Keeps: age (int64), score (float64)
+console.log(withoutBoolStr.columns.toArray()); // ["age", "score"]
+
Click Run to evaluate
+ + +

3 · include + exclude combined

+
// Include all numeric, but exclude float64
+const intOnly = selectDtypes(df, { include: "number", exclude: "floating" });
+// Keeps: age (int64)
+console.log(intOnly.columns.toArray()); // ["age"]
+
Click Run to evaluate
+ + +

4 · Concrete dtype name selector

+
const floatOnly = selectDtypes(df, { include: "float64" });
+console.log(floatOnly.columns.toArray()); // ["score"]
+
Click Run to evaluate
+ + +

5 · Inspect column dtypes

+
Click Run to evaluate
+ + +

6 · Interactive: try your own

+ + +
Output will appear here
+ + + + diff --git a/playground/shift_diff.html b/playground/shift_diff.html new file mode 100644 index 00000000..1c15e7de --- /dev/null +++ b/playground/shift_diff.html @@ -0,0 +1,214 @@ + + + + + + tsb — shift & diff + + + + + +

shift & diff

+

+ Lag values and compute discrete differences — + mirrors pandas.Series.shift() and pandas.Series.diff(). +

+ +
+

1 — shiftSeries: lag values by N positions

+

+ shiftSeries(series, periods) shifts each value by periods positions. + Exposed positions are filled with null. The index is unchanged. + Mirrors pandas.Series.shift(). +

+
import { Series, shiftSeries } from "tsb";
+
+const s = new Series({ data: [10, 20, 30, 40, 50] });
+
+// shift down by 1 (default)
+console.log([...shiftSeries(s).values]);
+// [null, 10, 20, 30, 40]
+
+// shift up by 1 (negative periods)
+console.log([...shiftSeries(s, -1).values]);
+// [20, 30, 40, 50, null]
+
+// periods = 0 → no change
+console.log([...shiftSeries(s, 0).values]);
+// [10, 20, 30, 40, 50]
+
+
Loading…
+
+ +
+

2 — diffSeries: first discrete difference

+

+ diffSeries(series, periods) computes values[i] - values[i - periods] + for each element. Returns NaN where there is no prior value or when either + operand is non-numeric. Mirrors pandas.Series.diff(). +

+
import { Series, diffSeries } from "tsb";
+
+// cumulative price data
+const prices = new Series({ data: [100, 105, 103, 110, 108] });
+
+// day-over-day change (lag 1)
+const d1 = diffSeries(prices);
+console.log([...d1.values]);
+// [NaN, 5, -2, 7, -2]
+
+// 2-day change (lag 2)
+const d2 = diffSeries(prices, 2);
+console.log([...d2.values]);
+// [NaN, NaN, 3, 5, 5]
+
+
Loading…
+
+ +
+

3 — missing values in shift

+

+ Null and NaN values in the source are preserved when shifted — they behave just like + any other value, not like holes. +

+
import { Series, shiftSeries } from "tsb";
+
+const s = new Series({ data: [1, null, 3, NaN, 5] });
+const shifted = shiftSeries(s, 1);
+console.log([...shifted.values]);
+// [null, 1, null, 3, NaN]
+// Note: the leading null is from the shift; the rest are the original values shifted down.
+
+
Loading…
+
+ +
+

4 — missing values in diff

+

+ When either operand in a diff is null/NaN or non-numeric, the result at that position + is NaN. This mirrors pandas' behaviour. +

+
import { Series, diffSeries } from "tsb";
+
+const s = new Series({ data: [1, null, 5, 8] });
+const d = diffSeries(s);
+console.log([...d.values]);
+// [NaN, NaN, NaN, 3]
+// positions 0, 1, 2 are NaN:
+//   0 → no previous value
+//   1 → current is null (non-numeric)
+//   2 → previous (position 1) is null (non-numeric)
+
+
Loading…
+
+ +
+

5 — dataFrameShift: shift by column (axis=0)

+

+ dataFrameShift(df, periods) applies shiftSeries to each column + independently. Use axis: 1 to shift values across columns within each row. +

+
import { DataFrame, dataFrameShift } from "tsb";
+
+const df = DataFrame.fromColumns({
+  open:  [100, 105, 103, 110],
+  close: [104, 102, 108, 112],
+});
+
+const shifted = dataFrameShift(df, 1);
+console.log([...shifted.col("open").values]);  // [null, 100, 105, 103]
+console.log([...shifted.col("close").values]); // [null, 104, 102, 108]
+
+
Loading…
+
+ +
+

6 — dataFrameDiff: column-wise differences

+

+ dataFrameDiff(df, periods) computes element-wise discrete differences for + each column. Useful for converting time-series levels into changes. +

+
import { DataFrame, dataFrameDiff } from "tsb";
+
+const df = DataFrame.fromColumns({
+  a: [1, 3, 6, 10],
+  b: [10, 30, 60, 100],
+});
+
+const changes = dataFrameDiff(df);
+console.log([...changes.col("a").values]); // [NaN, 2, 3, 4]
+console.log([...changes.col("b").values]); // [NaN, 20, 30, 40]
+
+
Loading…
+
+ +
+

7 — combining shift and diff

+

+ A common pattern: use shiftSeries to compute percentage change by dividing + the current value by the lagged value. +

+
import { Series, shiftSeries, diffSeries } from "tsb";
+
+const prices = new Series({ data: [100, 110, 105, 115, 120] });
+
+// percentage change manually using shift
+const prev = shiftSeries(prices, 1);
+const pctChange = prices.values.map((v, i) => {
+  const p = prev.values[i];
+  if (p === null || p === 0 || typeof p !== "number") return NaN;
+  return ((v as number) - p) / p * 100;
+});
+console.log(pctChange.map(x => isNaN(x) ? "NaN" : x.toFixed(2) + "%"));
+// ["NaN", "10.00%", "-4.55%", "9.52%", "4.35%"]
+
+// or just use diffSeries and divide
+const diff = diffSeries(prices);
+const prevVals = shiftSeries(prices, 1).values;
+const pct2 = diff.values.map((d, i) => {
+  const p = prevVals[i];
+  if (typeof p !== "number" || p === 0) return NaN;
+  return (d as number) / p * 100;
+});
+console.log(pct2.map(x => isNaN(x) ? "NaN" : x.toFixed(2) + "%"));
+
+
Loading…
+
+ +
+

8 — negative periods: lead instead of lag

+

+ Negative periods "leads" the series — each position gets the value from + ahead of it, not behind. Useful for computing forward-looking changes. +

+
import { Series, shiftSeries, diffSeries } from "tsb";
+
+const s = new Series({ data: [1, 4, 9, 16, 25] });
+
+// lead by 1: each position gets the next value
+const lead1 = shiftSeries(s, -1);
+console.log([...lead1.values]); // [4, 9, 16, 25, null]
+
+// forward diff: how much will the value increase?
+const fwdDiff = diffSeries(s, -1);
+console.log([...fwdDiff.values]); // [-3, -5, -7, -9, NaN]
+
+
Loading…
+
+ + + diff --git a/playground/timedelta.html b/playground/timedelta.html new file mode 100644 index 00000000..4085524d --- /dev/null +++ b/playground/timedelta.html @@ -0,0 +1,240 @@ + + + + + + tsb — Timedelta & TimedeltaIndex + + + + + +

Timedelta & TimedeltaIndex

+

+ Fixed-duration time spans and ordered index of durations — + mirrors pandas.Timedelta and pandas.TimedeltaIndex. +

+ +
+

1 — Creating a Timedelta

+

+ A Timedelta stores a duration as a whole number of milliseconds. + Construct from component fields, a raw millisecond count, or a string. +

+
import { Timedelta } from "tsb";
+
+// From components
+const td1 = Timedelta.fromComponents({ days: 1, hours: 2, minutes: 30 });
+console.log(td1.toString());          // "1 days 02:30:00"
+console.log(td1.totalHours);          // 26.5
+
+// From milliseconds
+const td2 = Timedelta.fromMilliseconds(3_600_000);
+console.log(td2.totalHours);          // 1
+
+// Parse pandas-style string
+const td3 = Timedelta.parse("2 days 06:00:00");
+console.log(td3.totalDays);           // 2.25
+
+// Parse ISO 8601
+const td4 = Timedelta.parse("P1DT12H");
+console.log(td4.totalHours);          // 36
+
+
Loading…
+
+ +
+

2 — Component accessors

+

+ Access the individual components of a duration. For negative durations the + days component carries the sign; hours, + minutes, seconds, and milliseconds + are always non-negative remainders. +

+
import { Timedelta } from "tsb";
+
+const td = Timedelta.fromComponents({
+  days: 1, hours: 2, minutes: 3, seconds: 4, milliseconds: 567
+});
+
+console.log("days        :", td.days);         // 1
+console.log("hours       :", td.hours);        // 2
+console.log("minutes     :", td.minutes);      // 3
+console.log("seconds     :", td.seconds);      // 4
+console.log("milliseconds:", td.milliseconds); // 567
+
+// Negative duration
+const neg = Timedelta.fromComponents({ hours: -25 });
+console.log("days (neg)  :", neg.days);        // -1
+console.log("hours (neg) :", neg.hours);       //  1
+
+
Loading…
+
+ +
+

3 — Arithmetic

+

+ Timedeltas support addition, subtraction, scalar multiplication, negation, + absolute value, and ratio (dividing one duration by another). +

+
import { Timedelta } from "tsb";
+
+const h1 = Timedelta.fromComponents({ hours: 1 });
+const h2 = Timedelta.fromComponents({ hours: 2 });
+
+console.log(h1.add(h2).totalHours);   // 3
+console.log(h2.sub(h1).totalHours);   // 1
+console.log(h1.mul(3).totalHours);    // 3
+console.log(h1.negate().totalHours);  // -1
+
+const neg = Timedelta.fromComponents({ hours: -3 });
+console.log(neg.abs().totalHours);    // 3
+
+// Ratio between two durations
+const day = Timedelta.fromComponents({ days: 1 });
+console.log(day.divBy(h1));           // 24
+
+
Loading…
+
+ +
+

4 — String formats

+

+ toString() produces a pandas-compatible representation. + toISOString() produces an ISO 8601 duration. + Timedelta.parse() accepts both formats plus plain + HH:MM:SS. +

+ + + + + + + +
FormatExample
pandas-style1 days 02:30:00
pandas-style (ms)0 days 00:00:01.500
ISO 8601P1DT2H30M
HH:MM:SS02:30:00
Negative-1 days 01:00:00
+
import { Timedelta } from "tsb";
+
+const td = Timedelta.fromComponents({ days: 1, hours: 2, minutes: 30 });
+console.log(td.toString());      // "1 days 02:30:00"
+console.log(td.toISOString());   // "P1DT2H30M"
+
+// Round-trip parse
+const parsed = Timedelta.parse(td.toString());
+console.log(parsed.equals(td));  // true
+
+// Negative
+const neg = Timedelta.fromComponents({ hours: -25 });
+console.log(neg.toString());     // "-1 days 01:00:00"
+console.log(neg.toISOString());  // "-P1DT1H"
+
+
Loading…
+
+ +
+

5 — TimedeltaIndex

+

+ TimedeltaIndex is an ordered array of Timedelta + values — useful as a row index for time-series data with irregular or + regular durations. +

+
import { Timedelta, TimedeltaIndex } from "tsb";
+
+// Build from a range (like pandas.timedelta_range)
+const idx = TimedeltaIndex.fromRange(
+  Timedelta.fromComponents({ hours: 0 }),
+  Timedelta.fromComponents({ hours: 4 }),
+  Timedelta.fromComponents({ hours: 1 }),
+  { name: "duration" },
+);
+
+console.log("size   :", idx.size);           // 5
+console.log("name   :", idx.name);           // "duration"
+console.log("at(0)  :", idx.at(0).toString()); // "0 days 00:00:00"
+console.log("at(4)  :", idx.at(4).toString()); // "0 days 04:00:00"
+console.log("min    :", idx.min().totalHours); // 0
+console.log("max    :", idx.max().totalHours); // 4
+
+
Loading…
+
+ +
+

6 — Index operations

+

+ TimedeltaIndex supports sorting, deduplication, shifting, + filtering, and renaming. +

+
import { Timedelta, TimedeltaIndex } from "tsb";
+
+const vals = [3, 1, 2, 1].map(h => Timedelta.fromComponents({ hours: h }));
+const idx = TimedeltaIndex.fromTimedeltas(vals);
+
+// Sort
+const sorted = idx.sort();
+console.log("sorted:", sorted.toStrings());
+// ["0 days 01:00:00", "0 days 01:00:00", "0 days 02:00:00", "0 days 03:00:00"]
+
+// Remove duplicates
+const uniq = idx.unique();
+console.log("unique size:", uniq.size); // 3
+
+// Shift by 10 hours
+const shifted = idx.shift(Timedelta.fromComponents({ hours: 10 }));
+console.log("shifted[0]:", shifted.at(0).totalHours); // 13
+
+// Filter
+const large = idx.filter(td => td.totalHours >= 2);
+console.log("large size:", large.size); // 2
+
+// Parse from strings
+const fromStr = TimedeltaIndex.fromStrings(["01:00:00", "02:00:00", "03:00:00"]);
+console.log("fromStr[1]:", fromStr.at(1).totalHours); // 2
+
+
Loading…
+
+ +
+

7 — Comparison

+
import { Timedelta } from "tsb";
+
+const h1 = Timedelta.fromComponents({ hours: 1 });
+const h2 = Timedelta.fromComponents({ hours: 2 });
+
+console.log(h1.equals(h2));             // false
+console.log(h1.equals(Timedelta.fromComponents({ hours: 1 }))); // true
+console.log(h1.compareTo(h2));          // negative  → h1 < h2
+console.log(h2.compareTo(h1));          // positive  → h2 > h1
+console.log(h1.compareTo(Timedelta.fromComponents({ hours: 1 }))); // 0
+
+
Loading…
+
+ + + + diff --git a/playground/timestamp.html b/playground/timestamp.html new file mode 100644 index 00000000..001709c6 --- /dev/null +++ b/playground/timestamp.html @@ -0,0 +1,647 @@ + + + + + + tsb — Timestamp + + + +
+

tsb — Timestamp

+

A single point in time · mirrors pandas.Timestamp

+
+
+ ← back to index + +

API Reference

+
+ + + + + + + + + + + + + + + + + + + + + + +
Method / PropertyDescriptionpandas equivalent
new Timestamp(str)Parse ISO string (with optional tz option)pd.Timestamp(str)
Timestamp.now(tz?)Current timepd.Timestamp.now()
Timestamp.today()Today at midnightpd.Timestamp.today()
.year .month .dayDate componentssame
.hour .minute .secondTime componentssame
.dayofweek0=Mon … 6=Sun.dayofweek
.dayofyear .quarter .weekCalendar propertiessame
.is_month_start .is_month_endCalendar boundary checkssame
.isoformat(sep, timespec)ISO string output.isoformat()
.strftime(fmt)Format string (%Y-%m-%d etc.).strftime()
.floor(freq) .ceil(freq) .round(freq)Round to frequencysame
.normalize()Truncate to midnight.normalize()
.tz_localize(tz)Attach timezone to naive.tz_localize()
.tz_convert(tz)Convert to another timezone.tz_convert()
.add(Timedelta)Shift forward by a durationts + td
.sub(ts|td)Subtract timestamp or timedeltats - ts2
.day_name() .month_name()English name stringssame
.timestamp()Unix seconds (float).timestamp()
+
+ +

Interactive Inspector

+
+
+
+ + +
+
+ + +
+
+ +
Click "Inspect" to explore a Timestamp.
+
+ +

strftime Formatter

+
+
+
+ + +
+
+ + +
+
+ +
+
+ +

Rounding

+
+
+
+ + +
+
+ + +
+
+ +
+
+
+
+ +

Arithmetic

+
+
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
+ +

Timezone Conversion

+
+
+
+ + +
+
+ + +
+
+ +
+
+
+
+ +

Now / Today

+
+
+ + + +
+
Click a button above.
+
+
+ + + + diff --git a/playground/to_numeric.html b/playground/to_numeric.html new file mode 100644 index 00000000..d8bb491d --- /dev/null +++ b/playground/to_numeric.html @@ -0,0 +1,138 @@ + + + + + + tsb — to_numeric + + + +

📐 to_numeric

+

+ Convert scalars, arrays, or Series to numeric types — mirroring + pandas.to_numeric(). +

+ +

1 · Scalar conversion

+
toNumericScalar("42")     // 42
+toNumericScalar("3.14")   // 3.14
+toNumericScalar(true)     // 1
+toNumericScalar(null)     // NaN
+toNumericScalar("bad", { errors: "coerce" })  // NaN
+toNumericScalar("bad", { errors: "ignore" })  // "bad"
+
click Run to evaluate
+ + +

2 · Array conversion with error handling

+
toNumericArray(["1", "2.5", "abc", null], { errors: "coerce" })
+// [1, 2.5, NaN, NaN]
+
click Run to evaluate
+ + +

3 · Series conversion

+
const prices = new Series(["10.5", "bad", "22"], {
+  name: "price",
+  index: ["a", "b", "c"]
+});
+toNumericSeries(prices, { errors: "coerce" })
+// Series [10.5, NaN, 22] name="price"
+
click Run to evaluate
+ + +

4 · Downcast

+
// downcast to float32 precision
+toNumericScalar(3.14159265358979, { downcast: "float" })
+// ~3.1415927
+
+// downcast to integer (snap to smallest int type)
+toNumericScalar(42, { downcast: "integer" })  // 42
+
click Run to evaluate
+ + +

5 · Live sandbox

+

Edit and run arbitrary code using the tsb API.

+ +
click Run to evaluate
+ + +

← back to index

+ + + + diff --git a/playground/value_counts_full.html b/playground/value_counts_full.html new file mode 100644 index 00000000..2c2e91ad --- /dev/null +++ b/playground/value_counts_full.html @@ -0,0 +1,238 @@ + + + + + + valueCountsBinned — tsb playground + + + +

valueCountsBinned

+

+ pandas.Series.value_counts(bins=N) — bin numeric values into equal-width + intervals, then count frequencies. +

+ +
+

Interactive Demo

+
+
+ + +
+
+ + +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +

Result

+
Click Run to see results.
+
+ +
+

How it works

+

+ valueCountsBinned(series, N) internally: +

+
    +
  1. Calls cut(series, N) to assign each value to one of N equal-width bins.
  2. +
  3. Counts occurrences per bin label (NaN/null values are excluded).
  4. +
  5. Optionally sorts by count (sort=true, default) or by interval position (sort=false).
  6. +
  7. Optionally returns proportions instead of counts (normalize=true).
  8. +
+ +

API

+
+valueCountsBinned(
+  series: Series<Scalar>,
+  bins: number,
+  options?: {
+    sort?: boolean;      // default: true
+    ascending?: boolean; // default: false
+    normalize?: boolean; // default: false
+  }
+): Series<number>
+
+ +
+

Examples

+ +

Basic binning

+
+import { Series, valueCountsBinned } from "tsb";
+
+const s = new Series({ data: [1, 2, 3, 4, 5] });
+const vc = valueCountsBinned(s, 2);
+// Index: ["(0.995, 3.0]", "(3.0, 5.005]"]
+// Values: [3, 2]  ← sorted by count (default)
+ +

Interval order (sort=false)

+
+const vc2 = valueCountsBinned(s, 2, { sort: false });
+// Index: ["(0.995, 3.0]", "(3.0, 5.005]"]
+// Values: [3, 2]  ← in interval order
+ +

Proportions (normalize=true)

+
+const vc3 = valueCountsBinned(s, 2, { normalize: true });
+// Index: ["(0.995, 3.0]", "(3.0, 5.005]"]
+// Values: [0.6, 0.4]
+ +

Handling NaN / null

+
+const s2 = new Series({ data: [1, null, 2, NaN, 3, 4, 5] });
+const vc4 = valueCountsBinned(s2, 2);
+// NaN and null values are excluded.  Total = 5.
+
+ +

+ ← Back to playground index +  |  + stats + binning + value_counts +

+ + + + diff --git a/playground/where_mask.html b/playground/where_mask.html index 89a50a05..cf942330 100644 --- a/playground/where_mask.html +++ b/playground/where_mask.html @@ -1,220 +1,194 @@ - + - - - - tsb — where / mask: Conditional Selection - - - -

tsb — where / mask: Conditional Selection

-

- seriesWhere / seriesMask and their DataFrame equivalents - allow element-wise conditional replacement — the TypeScript equivalents of - pandas.Series.where - and - pandas.Series.mask. -

- -
- Quick rule:
- where(cond)keep where cond is true, replace elsewhere.
- mask(cond)keep where cond is false, replace elsewhere.
- They are exact inverses of each other. -
- -

1. seriesWhere — Boolean Array Condition

-

- Pass a boolean[] to keep values at true positions, replace - the rest with null (or a custom other value). -

-
import { Series, seriesWhere } from "tsb";
-
-const scores = new Series({ data: [42, 91, 67, 55, 88] });
-const highScores = seriesWhere(scores, [false, true, false, false, true]);
-// Series [null, 91, null, null, 88]
-
-// Custom replacement value
-const clamped = seriesWhere(scores, [false, true, false, false, true], { other: 0 });
-// Series [0, 91, 0, 0, 88]
- -

2. seriesWhere — Callable Condition

-

- Pass a function that receives the Series and returns a boolean[] or - Series<boolean>. This avoids computing the condition array manually. -

-
import { Series, seriesWhere } from "tsb";
-
-const temps = new Series({ data: [-5, 12, 23, -3, 8] });
-
-// Keep only values above freezing
-const aboveFreezing = seriesWhere(
-  temps,
-  (s) => s.values.map((v) => (v as number) > 0),
-);
-// Series [null, 12, 23, null, 8]
-
-// Replace with 0 instead of null
-const noFreeze = seriesWhere(
-  temps,
-  (s) => s.values.map((v) => (v as number) > 0),
-  { other: 0 },
-);
-// Series [0, 12, 23, 0, 8]
- -

3. seriesMask — The Inverse

-

- mask replaces positions where the condition is true - (the opposite of where). Use it to "blank out" outliers or invalid values. -

-
import { Series, seriesMask } from "tsb";
-
-const data = new Series({ data: [1, 2, 3, 4, 5] });
-
-// Mask out values greater than 3
-const masked = seriesMask(
-  data,
-  (s) => s.values.map((v) => (v as number) > 3),
-  { other: null },
-);
-// Series [1, 2, 3, null, null]
- -

4. dataFrameWhere — Element-Wise on DataFrames

-

- Pass a boolean DataFrame or a callable that returns one. - Columns and row labels are aligned by name. -

-
import { DataFrame, dataFrameWhere } from "tsb";
+
+  
+  
+  tsb — where / mask
+  
+  
+
+
+
+

where / mask

+

Conditional value selection and replacement — mirrors pandas.Series.where and pandas.DataFrame.mask.

+ +
+

1 — whereSeries: keep values where condition is true

+

whereSeries(series, cond) keeps each element where cond is true and replaces it with null (or a custom other) where cond is false.

+
import { Series, whereSeries } from "tsb";
+
+const scores = new Series({ data: [45, 72, 58, 88, 91, 30], name: "score" });
+
+// Keep only passing scores (>= 60); replace failing scores with null
+const passing = whereSeries(scores, (v) => v >= 60);
+console.log("passing:", [...passing.values]);
+// → [null, 72, null, 88, 91, null]
+
+// Replace failing scores with 0 instead of null
+const zeroFail = whereSeries(scores, (v) => v >= 60, { other: 0 });
+console.log("zero-fail:", [...zeroFail.values]);
+// → [0, 72, 0, 88, 91, 0]
+
+
▶ run
+
+ +
+

2 — maskSeries: replace values where condition is true

+

maskSeries is the inverse of whereSeries: it replaces where cond is true and keeps where cond is false.

+
import { Series, maskSeries } from "tsb";
+
+const temps = new Series({ data: [-5, 12, -3, 20, 7], name: "temp_C" });
+
+// Mask (hide) sub-zero temperatures
+const noFrost = maskSeries(temps, (v) => v < 0);
+console.log("no frost:", [...noFrost.values]);
+// → [null, 12, null, 20, 7]
+
+// Replace sub-zero with a sentinel value
+const clamped = maskSeries(temps, (v) => v < 0, { other: 0 });
+console.log("clamped: ", [...clamped.values]);
+// → [0, 12, 0, 20, 7]
+
+
▶ run
+
+ +
+

3 — Boolean Series as condition

+

Pass a Series<boolean> (or a plain boolean array) as the condition for position-aligned filtering.

+
import { Series, whereSeries, maskSeries } from "tsb";
+
+const prices = new Series({ data: [100, 200, 150, 80, 300], name: "price" });
+const inStock = new Series({ data: [true, false, true, false, true] });
+
+// Keep prices only for in-stock items
+const available = whereSeries(prices, inStock);
+console.log("in-stock prices:", [...available.values]);
+// → [100, null, 150, null, 300]
+
+// Mask out-of-stock prices (same result — cond is inverted)
+const masked = maskSeries(prices, inStock.values.map((v) => !v));
+console.log("masked:         ", [...masked.values]);
+// → [100, null, 150, null, 300]
+
+
▶ run
+
+ +
+

4 — whereDataFrame: cell-wise filtering on a DataFrame

+

whereDataFrame(df, cond) applies the condition independently to each cell across all columns.

+
import { DataFrame, whereDataFrame } from "tsb";
 
 const df = DataFrame.fromColumns({
-  temp_c:   [22, -3, 18, -7, 30],
-  humidity: [55, 80, 62, 75, 45],
+  a: [1, -2,  3],
+  b: [-4,  5, -6],
+  c: [ 7,  8,  9],
 });
 
-// Keep only valid summer readings (temp > 0)
-const condDf = DataFrame.fromColumns({
-  temp_c:   [true, false, true, false, true],
-  humidity: [true, false, true, false, true],
-});
-
-const summer = dataFrameWhere(df, condDf);
-// DataFrame:
-//   temp_c   [22,   null, 18,   null, 30  ]
-//   humidity [55,   null, 62,   null, 45  ]
+// Keep non-negative values; replace negatives with null +const positive = whereDataFrame(df, (v) => v >= 0); +console.log("a:", [...positive.col("a").values]); // [1, null, 3] +console.log("b:", [...positive.col("b").values]); // [null, 5, null] +console.log("c:", [...positive.col("c").values]); // [7, 8, 9] +
+
▶ run
+ -

5. dataFrameWhere — Callable Condition

-
import { DataFrame, dataFrameWhere } from "tsb";
+
+

5 — maskDataFrame: replace cells matching condition

+
import { DataFrame, maskDataFrame } from "tsb";
 
 const df = DataFrame.fromColumns({
-  a: [1, 2, 3, 4, 5],
-  b: [10, 20, 30, 40, 50],
+  revenue: [100, 0, 250, -50, 0],
+  cost:    [ 80, 0, 200,  30, 0],
 });
 
-// Keep only values > 2 (column-wise threshold)
-const result = dataFrameWhere(df, (d) => {
-  const condCols: Record<string, boolean[]> = {};
-  for (const col of d.columns) {
-    condCols[col as string] = d.col(col as string).values.map(
-      (v) => (v as number) > 2
-    );
-  }
-  return DataFrame.fromColumns(condCols);
+// Mask zeros (replace with null to mark as missing)
+const noZeros = maskDataFrame(df, (v) => v === 0);
+console.log("revenue:", [...noZeros.col("revenue").values]);
+// → [100, null, 250, -50, null]
+console.log("cost:   ", [...noZeros.col("cost").values]);
+// → [80, null, 200, 30, null]
+
+
▶ run
+
+ +
+

6 — DataFrame condition (boolean DataFrame)

+

Pass a boolean DataFrame as the condition for per-cell control.

+
import { DataFrame, whereDataFrame } from "tsb";
+
+const data = DataFrame.fromColumns({
+  x: [10, 20, 30],
+  y: [40, 50, 60],
 });
-// DataFrame:
-//   a: [null, null, 3, 4, 5]
-//   b: [10,   20,   30, 40, 50]
-

6. dataFrameMask — DataFrame Mask

-
import { DataFrame, dataFrameMask } from "tsb";
-
-const df = DataFrame.fromColumns({
-  sales:  [100, 200, 50,  300, 80],
-  profit: [10,  40,  -5,  60,  -2],
+// Custom boolean mask per cell
+const cond = DataFrame.fromColumns({
+  x: [true,  false, true],
+  y: [false,  true, true],
 });
 
-// Mask out (replace) rows with negative profit
-const cleaned = dataFrameMask(
-  df,
-  (d) => {
-    const condCols: Record<string, boolean[]> = {};
-    for (const col of d.columns) {
-      condCols[col as string] = d.col(col as string).values.map(
-        (v) => (v as number) < 0
-      );
-    }
-    return DataFrame.fromColumns(condCols);
-  },
-  { other: 0 },
-);
-// DataFrame:
-//   sales:  [100, 200, 50,  300, 80]
-//   profit: [10,  40,  0,   60,  0 ]
- -

Label-Aligned Series Condition

-

- When you pass a Series<boolean> as the condition, values are aligned - by label, not position. Labels absent from the condition series are treated - as false. -

-
import { Series, seriesWhere } from "tsb";
-
-const prices = new Series({ data: [10, 20, 30], index: ["a", "b", "c"] });
-const valid  = new Series<boolean>({ data: [false, true], index: ["a", "b"] });
-
-// Only "b" is in the condition with value=true; "a"=false, "c" missing→false
-const result = seriesWhere(prices, valid, { other: -1 });
-// Series { a: -1, b: 20, c: -1 }
- -

API Reference

- - - - - - -
FunctionKeeps when cond is…Replaces with
seriesWhere(s, cond, {other})trueother (default null)
seriesMask(s, cond, {other})falseother (default null)
dataFrameWhere(df, cond, {other})trueother (default null)
dataFrameMask(df, cond, {other})falseother (default null)
- -

Condition types

- - - - - - -
TypeSeries opsDataFrame ops
Boolean array✅ positional
Series<boolean>✅ label-aligned
DataFrame (boolean)✅ label-aligned
Callable✅ receives Series✅ receives DataFrame
- -

← Back to tsb playground index

- +const result = whereDataFrame(data, cond); +console.log("x:", [...result.col("x").values]); // [10, null, 30] +console.log("y:", [...result.col("y").values]); // [null, 50, 60] +
+
▶ run
+ + +
+

7 — Combining where and mask for range clamping

+

Chaining whereSeries and maskSeries is a clean way to apply lower and upper bounds.

+
import { Series, whereSeries, maskSeries } from "tsb";
+
+const raw = new Series({ data: [-10, 0, 5, 15, 100, 3], name: "value" });
+const LO = 0, HI = 10;
+
+// 1) Replace values below lower bound with LO
+const step1 = whereSeries(raw, (v) => (v as number) >= LO, { other: LO });
+// 2) Replace values above upper bound with HI
+const clamped = whereSeries(step1, (v) => (v as number) <= HI, { other: HI });
+
+console.log("clamped:", [...clamped.values]);
+// → [0, 0, 5, 10, 10, 3]
+
+
▶ run
+
+ +
+

8 — where / mask vs. clip

+
+ When to use which?
+ Use clip() for simple numeric lower/upper bounds.
+ Use where() / mask() for arbitrary conditions — including non-numeric types, + string patterns, or per-cell boolean DataFrames. +
+
import { Series, whereSeries, clip } from "tsb";
+
+const s = new Series({ data: [-3, 1, 5, 10], name: "val" });
+
+// clip is concise for numeric bounds
+const clipped  = clip(s, { lower: 0, upper: 6 });
+console.log("clipped: ", [...clipped.values]);  // [0, 1, 5, 6]
+
+// where gives full control — replace out-of-range with null instead of clamping
+const filtered = whereSeries(s, (v) => (v as number) >= 0 && (v as number) <= 6);
+console.log("filtered:", [...filtered.values]); // [null, 1, 5, null]
+
+
▶ run
+
+ + diff --git a/playground/wide_to_long.html b/playground/wide_to_long.html index b30980cd..887b4bbb 100644 --- a/playground/wide_to_long.html +++ b/playground/wide_to_long.html @@ -1,113 +1,263 @@ - + - - - - tsb — wideToLong - - - -

← tsb playground

- -

wideToLong

-

- Reshape a wide-format DataFrame to long format by collapsing stub-prefixed column - groups into rows — mirrors - - pandas.wide_to_long(). -

- -

Concept

-

- Given a wide DataFrame where repeated measurements are spread across columns with a - common stub prefix and a numeric (or other) suffix — e.g. score_2021, - score_2022wideToLong pivots those column groups into rows. - One row per original row per unique suffix is produced. -

- -

Example — numeric suffixes

-
import { DataFrame } from "tsb";
-import { wideToLong } from "tsb";
-
-const df = DataFrame.fromColumns({
-  id:  ["x", "y"],
-  A1:  [1, 2],
-  A2:  [3, 4],
-  B1:  [5, 6],
-  B2:  [7, 8],
+  
+    
+    
+    tsb · wide_to_long
+    
+  
+  
+    
+ ← tsb +

🔀 wide_to_long

+
+
+ +
+

Overview

+

+ wideToLong(df, stubnames, i, j, options?) mirrors + pandas.wide_to_long(). It reshapes a wide + DataFrame — where multiple columns share a common prefix (stub) and a + varying suffix — into a long DataFrame with one row per + (original row, suffix) pair. +

+

+ Unlike the simpler melt (which treats every column value + independently), wideToLong keeps related stub columns + side by side and extracts the suffix as a new identifier column. +

+
wideToLong(df, stubnames, i, j, { sep?, suffix? })
+

+ stubnames — prefix(es) of the grouped columns (e.g. + ["A","B"]).
+ i — identifier column(s) that are carried along + unchanged.
+ j — name for the new column that holds the extracted + suffixes.
+ sep — separator between stub and suffix + ("" by default).
+ suffix — RegExp or string pattern that matches the suffix + (/\d+/ by default). +

+
+ +
+

Interactive playground

+

Edit the CSV data, stubs, and options then click Run.

+ +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ +
Click Run…
+
+
+
+ +
+ +
+

Examples

+ +

1 · Numeric suffix (default)

+

Column names like A1, A2 share stub A; the suffix 1/2 becomes the year column.

+
const df = DataFrame.fromColumns({
+  id: ["x", "y"],
+  A1: [1, 2],  A2: [3, 4],
+  B1: [5, 6],  B2: [7, 8],
+});
+wideToLong(df, ["A", "B"], "id", "year");
+// id  year  A  B
+// x   1     1  5
+// y   1     2  6
+// x   2     3  7
+// y   2     4  8
+ +
+

2 · Underscore separator

+

Use sep: "_" for column names like score_pre / score_post.

+
const df = DataFrame.fromColumns({
+  subject: [1, 2],
+  score_pre:  [80, 90],
+  score_post: [85, 95],
+});
+wideToLong(df, "score", "subject", "phase", {
+  sep: "_",
+  suffix: /[a-z]+/,
 });
+// subject  phase  score
+// 1        pre    80
+// 2        pre    90
+// 1        post   85
+// 2        post   95
-const long = wideToLong(df, ["A", "B"], "id", "num"); - -// long.columns.values → ["id", "num", "A", "B"] -// long.shape → [4, 4] -// -// id num A B -// x 1 1 5 -// y 1 2 6 -// x 2 3 7 -// y 2 4 8 -
- -

Example — separator and custom suffix

-
const df = DataFrame.fromColumns({
-  country: ["US", "UK"],
-  gdp_2020: [21e12, 2.7e12],
-  gdp_2021: [23e12, 3.1e12],
-  pop_2020: [331e6, 67e6],
-  pop_2021: [332e6, 68e6],
+        
+

3 · Multiple id columns

+

Pass an array to i to preserve several identifier columns.

+
const df = DataFrame.fromColumns({
+  country: ["US","UK","DE"],
+  region:  ["East","South","West"],
+  gdp2020: [21, 2.7, 3.8],
+  gdp2021: [23, 3.1, 4.2],
 });
+wideToLong(df, "gdp", ["country","region"], "year", { sep: "" });
+// country  region  year  gdp
+// US       East    2020  21
+// UK       South   2020  2.7
+// DE       West    2020  3.8
+// US       East    2021  23
+// UK       South   2021  3.1
+// DE       West    2021  4.2
+ + +
+

vs melt

+

+ Both melt and wideToLong convert wide data to + long. The key difference: +

+ + + + + + +
meltwideToLong
Column groupingNone — each column → one variable/value rowGroups by stub; related columns land in the same output row
New columnsvariable + valueOne column per stub + j
Suffix extractionNoYes — suffix becomes j value
Use when…Each wide column is independentColumns share a common prefix and varying suffix
+
+ + + + + diff --git a/src/core/align.ts b/src/core/align.ts new file mode 100644 index 00000000..144f53b5 --- /dev/null +++ b/src/core/align.ts @@ -0,0 +1,197 @@ +/** + * align — realign two Series or DataFrames to a common axis. + * + * Mirrors `pandas.Series.align()` / `pandas.DataFrame.align()`: + * + * - {@link alignSeries} — align two `Series` on their row indices. + * - {@link alignDataFrame} — align two `DataFrame` objects on rows, columns, + * or both axes simultaneously. + * + * ### Join policies + * + * | `join` | Result index | + * |-----------|---------------------------------------------------| + * | `"outer"` | Union of the two index sets (default) | + * | `"inner"` | Intersection of the two index sets | + * | `"left"` | Left object's index | + * | `"right"` | Right object's index | + * + * ### Axis (DataFrame only) + * + * | `axis` | Aligned axes | + * |---------------|-------------------------------------------------| + * | `0` / `"index"` | Row index only | + * | `1` / `"columns"` | Columns only | + * | `null` / `undefined` | Both rows **and** columns (default) | + * + * @example + * ```ts + * const a = new Series({ data: [1, 2, 3], index: new Index(["a", "b", "c"]) }); + * const b = new Series({ data: [10, 20], index: new Index(["b", "c"]) }); + * + * const [left, right] = alignSeries(a, b, { join: "inner" }); + * // left → Series [2, 3] with index ["b", "c"] + * // right → Series [10, 20] with index ["b", "c"] + * + * const [lo, ro] = alignSeries(a, b, { join: "outer", fillValue: 0 }); + * // left → Series [1, 2, 3] with index ["a", "b", "c"] + * // right → Series [0, 10, 20] with index ["a", "b", "c"] + * ``` + * + * @module + */ + +import type { Axis, JoinHow, Label, Scalar } from "../types.ts"; +import type { Index } from "./base-index.ts"; +import type { DataFrame } from "./frame.ts"; +import { reindexDataFrame, reindexSeries } from "./reindex.ts"; +import type { Series } from "./series.ts"; + +// ─── public types ───────────────────────────────────────────────────────────── + +/** Options for {@link alignSeries}. */ +export interface AlignSeriesOptions { + /** + * How to determine the result index. + * - `"outer"` (default) — union of both indices. + * - `"inner"` — intersection of both indices. + * - `"left"` — left Series' index. + * - `"right"` — right Series' index. + */ + join?: JoinHow; + /** + * Scalar to use for labels that exist in the result index but are absent + * from one of the inputs (default: `null`). + */ + fillValue?: Scalar; +} + +/** Options for {@link alignDataFrame}. */ +export interface AlignDataFrameOptions extends AlignSeriesOptions { + /** + * Which axes to align. + * - `null` / `undefined` (default) — align both rows and columns. + * - `0` / `"index"` — rows only. + * - `1` / `"columns"` — columns only. + */ + axis?: Axis | null; +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** + * Compute the target index from `left` and `right` according to `join`. + */ +function resolveIndex(left: Index