diff --git a/.gitignore b/.gitignore index 4088a0f0..354816e7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ dist/ *.tsbuildinfo package-lock.json *.tgz +playground/benchmarks/ +playground/dist/ diff --git a/benchmarks/results.json b/benchmarks/results.json index c883f334..13295d81 100644 --- a/benchmarks/results.json +++ b/benchmarks/results.json @@ -2,246 +2,356 @@ "benchmarks": [ { "function": "concat", - "tsb": null, + "tsb": { + "function": "concat", + "mean_ms": 128.9745293, + "iterations": 20, + "total_ms": 2579.490586 + }, "pandas": { "function": "concat", "mean_ms": 0.11375509999993483, "iterations": 20, "total_ms": 2.2751019999986966 }, - "ratio": null + "ratio": 1133.791 }, { "function": "dataframe_apply", - "tsb": null, + "tsb": { + "function": "dataframe_apply", + "mean_ms": 16.7897294, + "iterations": 10, + "total_ms": 167.897294 + }, "pandas": { "function": "dataframe_apply", "mean_ms": 47.161531699998704, "iterations": 10, "total_ms": 471.61531699998704 }, - "ratio": null + "ratio": 0.356 }, { "function": "dataframe_creation", - "tsb": null, + "tsb": { + "function": "dataframe_creation", + "mean_ms": 223.22429929999998, + "iterations": 10, + "total_ms": 2232.242993 + }, "pandas": { "function": "dataframe_creation", "mean_ms": 5.148059900000135, "iterations": 10, "total_ms": 51.48059900000135 }, - "ratio": null + "ratio": 43.361 }, { "function": "dataframe_dropna", - "tsb": null, + "tsb": { + "function": "dataframe_dropna", + "mean_ms": 172.72901985000004, + "iterations": 20, + "total_ms": 3454.5803970000006 + }, "pandas": { "function": "dataframe_dropna", "mean_ms": 2.42739894999886, "iterations": 20, "total_ms": 48.547978999977204 }, - "ratio": null + "ratio": 71.158 }, { "function": "dataframe_filter", - "tsb": null, + "tsb": { + "function": "dataframe_filter", + "mean_ms": 126.19991375, + "iterations": 20, + "total_ms": 2523.998275 + }, "pandas": { "function": "dataframe_filter", "mean_ms": 0.4964389500003108, "iterations": 20, "total_ms": 9.928779000006216 }, - "ratio": null + "ratio": 254.21 }, { "function": "dataframe_rename", - "tsb": null, + "tsb": { + "function": "dataframe_rename", + "mean_ms": 0.008352200000000209, + "iterations": 20, + "total_ms": 0.1670440000000042 + }, "pandas": { "function": "dataframe_rename", "mean_ms": 0.17103454999869427, "iterations": 20, "total_ms": 3.4206909999738855 }, - "ratio": null + "ratio": 0.049 }, { "function": "dataframe_sort", - "tsb": null, + "tsb": { + "function": "dataframe_sort", + "mean_ms": 434.5389244, + "iterations": 10, + "total_ms": 4345.389244 + }, "pandas": { "function": "dataframe_sort", "mean_ms": 33.301584399998774, "iterations": 10, "total_ms": 333.01584399998774 }, - "ratio": null + "ratio": 13.049 }, { "function": "describe", - "tsb": null, + "tsb": { + "function": "describe", + "mean_ms": 19.719739000000004, + "iterations": 10, + "total_ms": 197.19739000000004 + }, "pandas": { "function": "describe", "mean_ms": 5.521558600003118, "iterations": 10, "total_ms": 55.21558600003118 }, - "ratio": null + "ratio": 3.571 }, { "function": "ewm_mean", - "tsb": null, + "tsb": { + "function": "ewm_mean", + "mean_ms": 118.5438748, + "iterations": 10, + "total_ms": 1185.438748 + }, "pandas": { "function": "ewm_mean", "mean_ms": 1.7652839999982461, "iterations": 10, "total_ms": 17.65283999998246 }, - "ratio": null + "ratio": 67.153 }, { "function": "groupby_mean", - "tsb": null, + "tsb": { + "function": "groupby_mean", + "mean_ms": 21.510315099999996, + "iterations": 10, + "total_ms": 215.10315099999997 + }, "pandas": { "function": "groupby_mean", "mean_ms": 8.079756900002621, "iterations": 10, "total_ms": 80.79756900002621 }, - "ratio": null + "ratio": 2.662 }, { "function": "merge", - "tsb": null, + "tsb": { + "function": "merge", + "mean_ms": 10348.345783, + "iterations": 3, + "total_ms": 31045.037349000002 + }, "pandas": { "function": "merge", "mean_ms": 60.42320619999941, "iterations": 10, "total_ms": 604.2320619999941 }, - "ratio": null + "ratio": 171.264 }, { "function": "pivot_table", - "tsb": null, + "tsb": { + "function": "pivot_table", + "mean_ms": 117.3417057, + "iterations": 10, + "total_ms": 1173.417057 + }, "pandas": { "function": "pivot_table", "mean_ms": 22.500251999997545, "iterations": 10, "total_ms": 225.00251999997545 }, - "ratio": null + "ratio": 5.215 }, { "function": "read_csv", - "tsb": null, + "tsb": { + "function": "read_csv", + "mean_ms": 589.2802257999999, + "iterations": 5, + "total_ms": 2946.401129 + }, "pandas": { "function": "read_csv", "mean_ms": 29.951929399999244, "iterations": 5, "total_ms": 149.75964699999622 }, - "ratio": null + "ratio": 19.674 }, { "function": "rolling_mean", - "tsb": null, + "tsb": { + "function": "rolling_mean", + "mean_ms": 419.62945440000004, + "iterations": 10, + "total_ms": 4196.294544 + }, "pandas": { "function": "rolling_mean", "mean_ms": 1.71982609999759, "iterations": 10, "total_ms": 17.1982609999759 }, - "ratio": null + "ratio": 243.995 }, { "function": "series_arithmetic", - "tsb": null, + "tsb": { + "function": "series_arithmetic", + "mean_ms": 122.68170964999999, + "iterations": 20, + "total_ms": 2453.634193 + }, "pandas": { "function": "series_arithmetic", "mean_ms": 0.764571400000591, "iterations": 20, "total_ms": 15.29142800001182 }, - "ratio": null + "ratio": 160.458 }, { "function": "series_creation", - "tsb": null, + "tsb": { + "function": "series_creation", + "mean_ms": 103.015, + "iterations": 50, + "total_ms": 5150.754 + }, "pandas": { "function": "series_creation", "mean_ms": 7.607, "iterations": 50, "total_ms": 380.349 }, - "ratio": null + "ratio": 13.542 }, { "function": "series_cumsum", - "tsb": null, + "tsb": { + "function": "series_cumsum", + "mean_ms": 58.26283665, + "iterations": 20, + "total_ms": 1165.256733 + }, "pandas": { "function": "series_cumsum", "mean_ms": 1.1250383499998406, "iterations": 20, "total_ms": 22.500766999996813 }, - "ratio": null + "ratio": 51.787 }, { "function": "series_fillna", - "tsb": null, + "tsb": { + "function": "series_fillna", + "mean_ms": 61.56140175, + "iterations": 20, + "total_ms": 1231.228035 + }, "pandas": { "function": "series_fillna", "mean_ms": 0.18527670000025864, "iterations": 20, "total_ms": 3.705534000005173 }, - "ratio": null + "ratio": 332.267 }, { "function": "series_shift", - "tsb": null, + "tsb": { + "function": "series_shift", + "mean_ms": 110.16682740000002, + "iterations": 20, + "total_ms": 2203.336548 + }, "pandas": { "function": "series_shift", "mean_ms": 0.07249699999931636, "iterations": 20, "total_ms": 1.4499399999863272 }, - "ratio": null + "ratio": 1519.605 }, { "function": "series_sort", - "tsb": null, + "tsb": { + "function": "series_sort", + "mean_ms": 161.28472190000002, + "iterations": 10, + "total_ms": 1612.8472190000002 + }, "pandas": { "function": "series_sort", "mean_ms": 5.127767300001551, "iterations": 10, "total_ms": 51.27767300001551 }, - "ratio": null + "ratio": 31.453 }, { "function": "series_string_ops", - "tsb": null, + "tsb": { + "function": "series_string_ops", + "mean_ms": 243.85622659999999, + "iterations": 10, + "total_ms": 2438.562266 + }, "pandas": { "function": "series_string_ops", "mean_ms": 34.08206670000027, "iterations": 10, "total_ms": 340.8206670000027 }, - "ratio": null + "ratio": 7.155 }, { "function": "series_value_counts", - "tsb": null, + "tsb": { + "function": "series_value_counts", + "mean_ms": 38.8205242, + "iterations": 10, + "total_ms": 388.205242 + }, "pandas": { "function": "series_value_counts", "mean_ms": 9.212644899997713, "iterations": 10, "total_ms": 92.12644899997713 }, - "ratio": null + "ratio": 4.214 } ], - "timestamp": "2026-04-12T15:46:00Z" -} \ No newline at end of file + "timestamp": "2026-04-13T00:11:36Z" +} diff --git a/benchmarks/tsb/bench_concat.ts b/benchmarks/tsb/bench_concat.ts index 7a72f777..e1787251 100644 --- a/benchmarks/tsb/bench_concat.ts +++ b/benchmarks/tsb/bench_concat.ts @@ -7,10 +7,10 @@ const ROWS = 50_000; const WARMUP = 5; const ITERATIONS = 20; -const vals1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0); -const vals2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0); -const df1 = new DataFrame({ value: vals1 }); -const df2 = new DataFrame({ value: vals2 }); +const vals1 = Array.from({ length: ROWS }, (_, i) => i * 1.0); +const vals2 = Array.from({ length: ROWS }, (_, i) => i * 2.0); +const df1 = DataFrame.fromColumns({ value: vals1 }); +const df2 = DataFrame.fromColumns({ value: vals2 }); for (let i = 0; i < WARMUP; i++) { concat([df1, df2]); diff --git a/benchmarks/tsb/bench_dataframe_apply.ts b/benchmarks/tsb/bench_dataframe_apply.ts index 32a99a68..345f4d8f 100644 --- a/benchmarks/tsb/bench_dataframe_apply.ts +++ b/benchmarks/tsb/bench_dataframe_apply.ts @@ -8,17 +8,17 @@ const ROWS = 10_000; const WARMUP = 3; const ITERATIONS = 10; -const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0); -const b = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0); -const df = new DataFrame({ a, b }); +const a = Array.from({ length: ROWS }, (_, i) => i * 1.0); +const b = Array.from({ length: ROWS }, (_, i) => i * 2.0); +const df = DataFrame.fromColumns({ a, b }); for (let i = 0; i < WARMUP; i++) { - df.apply((row) => (row["a"] as number) + (row["b"] as number), { axis: 1 }); + df.apply((row) => (row.at("a") as number) + (row.at("b") as number), 1); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - df.apply((row) => (row["a"] as number) + (row["b"] as number), { axis: 1 }); + df.apply((row) => (row.at("a") as number) + (row.at("b") as number), 1); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_dataframe_creation.ts b/benchmarks/tsb/bench_dataframe_creation.ts index 2eb8fd56..d1eb1553 100644 --- a/benchmarks/tsb/bench_dataframe_creation.ts +++ b/benchmarks/tsb/bench_dataframe_creation.ts @@ -8,18 +8,18 @@ const ROWS = 100_000; const WARMUP = 3; const ITERATIONS = 10; -const nums1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1); -const nums2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.2); +const nums1 = Array.from({ length: ROWS }, (_, i) => i * 1.1); +const nums2 = Array.from({ length: ROWS }, (_, i) => i * 2.2); const strs = Array.from({ length: ROWS }, (_, i) => `label_${i % 100}`); // Warm up for (let i = 0; i < WARMUP; i++) { - new DataFrame({ a: nums1, b: nums2, c: strs }); + DataFrame.fromColumns({ a: nums1, b: nums2, c: strs }); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - new DataFrame({ a: nums1, b: nums2, c: strs }); + DataFrame.fromColumns({ a: nums1, b: nums2, c: strs }); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_dataframe_dropna.ts b/benchmarks/tsb/bench_dataframe_dropna.ts index e4fef46b..31ddc527 100644 --- a/benchmarks/tsb/bench_dataframe_dropna.ts +++ b/benchmarks/tsb/bench_dataframe_dropna.ts @@ -7,9 +7,11 @@ const ROWS = 100_000; const WARMUP = 5; const ITERATIONS = 20; -const a = Float64Array.from({ length: ROWS }, (_, i) => (i % 10 === 0 ? NaN : i * 1.1)); -const b = Float64Array.from({ length: ROWS }, (_, i) => (i % 7 === 0 ? NaN : i * 2.2)); -const df = new DataFrame({ a, b }); +const a: (number | null)[] = Array.from({ length: ROWS }, (_, i) => + i % 10 === 0 ? null : i * 1.1, +); +const b: (number | null)[] = Array.from({ length: ROWS }, (_, i) => (i % 7 === 0 ? null : i * 2.2)); +const df = DataFrame.fromColumns({ a, b }); for (let i = 0; i < WARMUP; i++) { df.dropna(); diff --git a/benchmarks/tsb/bench_dataframe_filter.ts b/benchmarks/tsb/bench_dataframe_filter.ts index 57d78bd7..799ef786 100644 --- a/benchmarks/tsb/bench_dataframe_filter.ts +++ b/benchmarks/tsb/bench_dataframe_filter.ts @@ -7,16 +7,17 @@ const ROWS = 100_000; const WARMUP = 5; const ITERATIONS = 20; -const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1); -const df = new DataFrame({ value: vals }); +const vals = Array.from({ length: ROWS }, (_, i) => i * 0.1); +const df = DataFrame.fromColumns({ value: vals }); +const valueSeries = df.col("value"); for (let i = 0; i < WARMUP; i++) { - df.filter((row) => (row["value"] as number) > 5000); + df.filter(valueSeries.gt(5000)); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - df.filter((row) => (row["value"] as number) > 5000); + df.filter(valueSeries.gt(5000)); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_dataframe_rename.ts b/benchmarks/tsb/bench_dataframe_rename.ts index 807b63c9..f198e090 100644 --- a/benchmarks/tsb/bench_dataframe_rename.ts +++ b/benchmarks/tsb/bench_dataframe_rename.ts @@ -7,9 +7,9 @@ const ROWS = 100_000; const WARMUP = 5; const ITERATIONS = 20; -const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1); -const b = Float64Array.from({ length: ROWS }, (_, i) => i * 2.2); -const df = new DataFrame({ old_a: a, old_b: b }); +const a = Array.from({ length: ROWS }, (_, i) => i * 1.1); +const b = Array.from({ length: ROWS }, (_, i) => i * 2.2); +const df = DataFrame.fromColumns({ old_a: a, old_b: b }); for (let i = 0; i < WARMUP; i++) { df.rename({ old_a: "new_a", old_b: "new_b" }); diff --git a/benchmarks/tsb/bench_dataframe_sort.ts b/benchmarks/tsb/bench_dataframe_sort.ts index 707e4ecf..5c9ed500 100644 --- a/benchmarks/tsb/bench_dataframe_sort.ts +++ b/benchmarks/tsb/bench_dataframe_sort.ts @@ -8,16 +8,16 @@ const WARMUP = 3; const ITERATIONS = 10; const a = Array.from({ length: ROWS }, (_, i) => `group_${i % 100}`); -const b = Float64Array.from({ length: ROWS }, () => Math.random() * 1000); -const df = new DataFrame({ a, b }); +const b = Array.from({ length: ROWS }, () => Math.random() * 1000); +const df = DataFrame.fromColumns({ a, b }); for (let i = 0; i < WARMUP; i++) { - df.sort_values(["a", "b"]); + df.sortValues(["a", "b"]); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - df.sort_values(["a", "b"]); + df.sortValues(["a", "b"]); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_describe.ts b/benchmarks/tsb/bench_describe.ts index 368156a3..b080becb 100644 --- a/benchmarks/tsb/bench_describe.ts +++ b/benchmarks/tsb/bench_describe.ts @@ -7,9 +7,9 @@ const ROWS = 100_000; const WARMUP = 3; const ITERATIONS = 10; -const a = Float64Array.from({ length: ROWS }, (_, i) => i * 1.1); -const b = Float64Array.from({ length: ROWS }, (_, i) => Math.sqrt(i + 1)); -const df = new DataFrame({ a, b }); +const a = Array.from({ length: ROWS }, (_, i) => i * 1.1); +const b = Array.from({ length: ROWS }, (_, i) => Math.sqrt(i + 1)); +const df = DataFrame.fromColumns({ a, b }); for (let i = 0; i < WARMUP; i++) { df.describe(); diff --git a/benchmarks/tsb/bench_ewm_mean.ts b/benchmarks/tsb/bench_ewm_mean.ts index 8e6597f7..f60c9933 100644 --- a/benchmarks/tsb/bench_ewm_mean.ts +++ b/benchmarks/tsb/bench_ewm_mean.ts @@ -7,8 +7,8 @@ const ROWS = 100_000; const WARMUP = 3; const ITERATIONS = 10; -const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.05)); -const s = new Series(data); +const data = Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.05)); +const s = new Series({ data }); for (let i = 0; i < WARMUP; i++) { s.ewm({ span: 20 }).mean(); diff --git a/benchmarks/tsb/bench_groupby_mean.ts b/benchmarks/tsb/bench_groupby_mean.ts index efecfddb..7c104884 100644 --- a/benchmarks/tsb/bench_groupby_mean.ts +++ b/benchmarks/tsb/bench_groupby_mean.ts @@ -8,8 +8,8 @@ const WARMUP = 3; const ITERATIONS = 10; const keys = Array.from({ length: ROWS }, (_, i) => `group_${i % 100}`); -const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1); -const df = new DataFrame({ key: keys, value: vals }); +const vals = Array.from({ length: ROWS }, (_, i) => i * 0.1); +const df = DataFrame.fromColumns({ key: keys, value: vals }); for (let i = 0; i < WARMUP; i++) { df.groupby("key").mean(); diff --git a/benchmarks/tsb/bench_merge.ts b/benchmarks/tsb/bench_merge.ts index da68b52b..625d55b2 100644 --- a/benchmarks/tsb/bench_merge.ts +++ b/benchmarks/tsb/bench_merge.ts @@ -4,14 +4,14 @@ import { DataFrame, merge } from "../../src/index.js"; const ROWS = 50_000; -const WARMUP = 3; -const ITERATIONS = 10; +const WARMUP = 1; +const ITERATIONS = 3; const keys = Array.from({ length: ROWS }, (_, i) => i % 1000); -const vals1 = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0); -const vals2 = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0); -const df1 = new DataFrame({ key: keys, val1: vals1 }); -const df2 = new DataFrame({ key: keys, val2: vals2 }); +const vals1 = Array.from({ length: ROWS }, (_, i) => i * 1.0); +const vals2 = Array.from({ length: ROWS }, (_, i) => i * 2.0); +const df1 = DataFrame.fromColumns({ key: keys, val1: vals1 }); +const df2 = DataFrame.fromColumns({ key: keys, val2: vals2 }); for (let i = 0; i < WARMUP; i++) { merge(df1, df2, { on: "key", how: "inner" }); diff --git a/benchmarks/tsb/bench_pivot_table.ts b/benchmarks/tsb/bench_pivot_table.ts index 78b94702..e1583619 100644 --- a/benchmarks/tsb/bench_pivot_table.ts +++ b/benchmarks/tsb/bench_pivot_table.ts @@ -1,7 +1,7 @@ /** * Benchmark: pivot_table — pivot aggregation on 100k-row DataFrame */ -import { DataFrame } from "../../src/index.js"; +import { DataFrame, pivotTable } from "../../src/index.js"; const ROWS = 100_000; const WARMUP = 3; @@ -9,16 +9,16 @@ const ITERATIONS = 10; const rows = Array.from({ length: ROWS }, (_, i) => `row_${i % 100}`); const cols = Array.from({ length: ROWS }, (_, i) => `col_${i % 50}`); -const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1); -const df = new DataFrame({ row: rows, col: cols, value: vals }); +const vals = Array.from({ length: ROWS }, (_, i) => i * 0.1); +const df = DataFrame.fromColumns({ row: rows, col: cols, value: vals }); for (let i = 0; i < WARMUP; i++) { - df.pivot_table({ values: "value", index: "row", columns: "col", aggfunc: "mean" }); + pivotTable(df, { values: "value", index: "row", columns: "col", aggfunc: "mean" }); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - df.pivot_table({ values: "value", index: "row", columns: "col", aggfunc: "mean" }); + pivotTable(df, { values: "value", index: "row", columns: "col", aggfunc: "mean" }); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_read_csv.ts b/benchmarks/tsb/bench_read_csv.ts index 0d9462bf..1618b722 100644 --- a/benchmarks/tsb/bench_read_csv.ts +++ b/benchmarks/tsb/bench_read_csv.ts @@ -1,7 +1,7 @@ /** * Benchmark: read_csv — parse a 100k-row CSV string */ -import { read_csv } from "../../src/index.js"; +import { readCsv } from "../../src/index.js"; const ROWS = 100_000; const WARMUP = 2; @@ -14,18 +14,13 @@ for (let i = 0; i < ROWS; i++) { } const csvContent = lines.join("\n"); -// Write to a temp file -import { writeFileSync } from "node:fs"; -const tmpPath = "/tmp/gh-aw/agent/bench_read_csv.csv"; -writeFileSync(tmpPath, csvContent, "utf8"); - for (let i = 0; i < WARMUP; i++) { - read_csv(tmpPath); + readCsv(csvContent); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - read_csv(tmpPath); + readCsv(csvContent); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_rolling_mean.ts b/benchmarks/tsb/bench_rolling_mean.ts index 646d3100..69c66dbd 100644 --- a/benchmarks/tsb/bench_rolling_mean.ts +++ b/benchmarks/tsb/bench_rolling_mean.ts @@ -7,8 +7,8 @@ const ROWS = 100_000; const WARMUP = 3; const ITERATIONS = 10; -const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); -const s = new Series(data); +const data = Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series({ data }); for (let i = 0; i < WARMUP; i++) { s.rolling(100).mean(); diff --git a/benchmarks/tsb/bench_series_arithmetic.ts b/benchmarks/tsb/bench_series_arithmetic.ts index 552be2ca..33d75a60 100644 --- a/benchmarks/tsb/bench_series_arithmetic.ts +++ b/benchmarks/tsb/bench_series_arithmetic.ts @@ -7,8 +7,8 @@ const ROWS = 100_000; const WARMUP = 5; const ITERATIONS = 20; -const data = Float64Array.from({ length: ROWS }, (_, i) => i * 0.5); -const s = new Series(data); +const data = Array.from({ length: ROWS }, (_, i) => i * 0.5); +const s = new Series({ data }); for (let i = 0; i < WARMUP; i++) { s.add(2.0).mul(0.5); diff --git a/benchmarks/tsb/bench_series_cumsum.ts b/benchmarks/tsb/bench_series_cumsum.ts index 3eeba5b0..215173bd 100644 --- a/benchmarks/tsb/bench_series_cumsum.ts +++ b/benchmarks/tsb/bench_series_cumsum.ts @@ -1,22 +1,22 @@ /** * Benchmark: series_cumsum — cumulative sum on 100k-element Series */ -import { Series } from "../../src/index.js"; +import { Series, cumsum } from "../../src/index.js"; const ROWS = 100_000; const WARMUP = 5; const ITERATIONS = 20; -const data = Float64Array.from({ length: ROWS }, (_, i) => i * 0.001); -const s = new Series(data); +const data = Array.from({ length: ROWS }, (_, i) => i * 0.001); +const s = new Series({ data }); for (let i = 0; i < WARMUP; i++) { - s.cumsum(); + cumsum(s); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - s.cumsum(); + cumsum(s); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_series_fillna.ts b/benchmarks/tsb/bench_series_fillna.ts index 3e658b01..8aa8996a 100644 --- a/benchmarks/tsb/bench_series_fillna.ts +++ b/benchmarks/tsb/bench_series_fillna.ts @@ -7,9 +7,11 @@ const ROWS = 100_000; const WARMUP = 5; const ITERATIONS = 20; -// Create series with every 5th value as NaN -const data = Float64Array.from({ length: ROWS }, (_, i) => (i % 5 === 0 ? NaN : i * 1.1)); -const s = new Series(data); +// Create series with every 5th value as null +const data: (number | null)[] = Array.from({ length: ROWS }, (_, i) => + i % 5 === 0 ? null : i * 1.1, +); +const s = new Series({ data }); for (let i = 0; i < WARMUP; i++) { s.fillna(0.0); diff --git a/benchmarks/tsb/bench_series_shift.ts b/benchmarks/tsb/bench_series_shift.ts index 46e79d19..0a7efd95 100644 --- a/benchmarks/tsb/bench_series_shift.ts +++ b/benchmarks/tsb/bench_series_shift.ts @@ -1,5 +1,8 @@ /** * Benchmark: series_shift — shift values by 1 position in a 100k-element Series + * + * Note: tsb does not have a built-in shift method yet, so we implement the + * equivalent operation manually (prepend null, drop last element). */ import { Series } from "../../src/index.js"; @@ -7,16 +10,29 @@ const ROWS = 100_000; const WARMUP = 5; const ITERATIONS = 20; -const data = Float64Array.from({ length: ROWS }, (_, i) => i * 1.0); -const s = new Series(data); +const data = Array.from({ length: ROWS }, (_, i) => i * 1.0); +const s = new Series({ data }); + +/** Shift a numeric Series by 1 position, filling with null. */ +function shiftSeries(series: Series): Series { + const vals = series.toArray(); + const shifted: (number | null)[] = [null]; + for (let i = 0; i < vals.length - 1; i++) { + const v = vals[i]; + if (v !== undefined) { + shifted.push(v); + } + } + return new Series({ data: shifted }); +} for (let i = 0; i < WARMUP; i++) { - s.shift(1); + shiftSeries(s); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - s.shift(1); + shiftSeries(s); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_series_sort.ts b/benchmarks/tsb/bench_series_sort.ts index c6aedb93..a65be39b 100644 --- a/benchmarks/tsb/bench_series_sort.ts +++ b/benchmarks/tsb/bench_series_sort.ts @@ -7,16 +7,16 @@ const ROWS = 100_000; const WARMUP = 3; const ITERATIONS = 10; -const data = Float64Array.from({ length: ROWS }, () => Math.random() * 1000); -const s = new Series(data); +const data = Array.from({ length: ROWS }, () => Math.random() * 1000); +const s = new Series({ data }); for (let i = 0; i < WARMUP; i++) { - s.sort_values(); + s.sortValues(); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - s.sort_values(); + s.sortValues(); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_series_string_ops.ts b/benchmarks/tsb/bench_series_string_ops.ts index c44cdefe..9ef81563 100644 --- a/benchmarks/tsb/bench_series_string_ops.ts +++ b/benchmarks/tsb/bench_series_string_ops.ts @@ -8,7 +8,7 @@ const WARMUP = 3; const ITERATIONS = 10; const data = Array.from({ length: ROWS }, (_, i) => `hello_world_${i % 200}`); -const s = new Series(data); +const s = new Series({ data }); for (let i = 0; i < WARMUP; i++) { s.str.upper(); diff --git a/benchmarks/tsb/bench_series_value_counts.ts b/benchmarks/tsb/bench_series_value_counts.ts index b5352f54..5e4d7031 100644 --- a/benchmarks/tsb/bench_series_value_counts.ts +++ b/benchmarks/tsb/bench_series_value_counts.ts @@ -1,22 +1,22 @@ /** * Benchmark: value_counts on a 100k-element Series with 100 distinct values */ -import { Series } from "../../src/index.js"; +import { Series, valueCounts } from "../../src/index.js"; const ROWS = 100_000; const WARMUP = 3; const ITERATIONS = 10; const data = Array.from({ length: ROWS }, (_, i) => `cat_${i % 100}`); -const s = new Series(data); +const s = new Series({ data }); for (let i = 0; i < WARMUP; i++) { - s.value_counts(); + valueCounts(s); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - s.value_counts(); + valueCounts(s); } const total = performance.now() - start; diff --git a/biome.json b/biome.json index 5e64d5ce..29a04fd7 100644 --- a/biome.json +++ b/biome.json @@ -69,6 +69,20 @@ "rules": { "nursery": { "noSecrets": "off" + }, + "complexity": { + "useLiteralKeys": "off" + } + } + } + }, + { + "include": ["benchmarks/**"], + "linter": { + "rules": { + "suspicious": { + "noConsole": "off", + "noConsoleLog": "off" } } } diff --git a/src/core/api_types.ts b/src/core/api_types.ts index 860d2050..12f53671 100644 --- a/src/core/api_types.ts +++ b/src/core/api_types.ts @@ -23,8 +23,8 @@ * @module */ -import { Dtype } from "./dtype.ts"; import type { DtypeName } from "../types.ts"; +import { Dtype } from "./dtype.ts"; // ─── internal helper ────────────────────────────────────────────────────────── @@ -95,14 +95,19 @@ export function isListLike(val: unknown): boolean { return false; } // Has Symbol.iterator and is not a plain number/boolean/bigint/symbol - if (typeof val === "number" || typeof val === "boolean" || typeof val === "bigint" || typeof val === "symbol") { + if ( + typeof val === "number" || + typeof val === "boolean" || + typeof val === "bigint" || + typeof val === "symbol" + ) { return false; } if (typeof val === "object" || typeof val === "function") { if (Symbol.iterator in (val as object)) { return true; } - const len = (val as Record)["length"]; + const len = (val as { length?: unknown }).length; if (typeof len === "number" && len >= 0 && Number.isInteger(len)) { return true; } @@ -134,7 +139,7 @@ export function isArrayLike(val: unknown): boolean { if (typeof val !== "object" && typeof val !== "function") { return false; } - const len = (val as Record)["length"]; + const len = (val as { length?: unknown }).length; return typeof len === "number" && len >= 0 && Number.isInteger(len); } @@ -192,7 +197,7 @@ export function isIterator(val: unknown): boolean { if (typeof val !== "object" && typeof val !== "function") { return false; } - return typeof (val as Record)["next"] === "function"; + return typeof (val as { next?: unknown }).next === "function"; } /** diff --git a/src/core/index.ts b/src/core/index.ts index 08713cae..255aade6 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -15,7 +15,13 @@ export { CategoricalAccessor } from "./cat_accessor.ts"; export type { CatSeriesLike } from "./cat_accessor.ts"; export { MultiIndex } from "./multi_index.ts"; export type { MultiIndexOptions } from "./multi_index.ts"; -export { insertColumn, popColumn, reorderColumns, moveColumn, dataFrameFromPairs } from "./insert_pop.ts"; +export { + insertColumn, + popColumn, + reorderColumns, + moveColumn, + dataFrameFromPairs, +} from "./insert_pop.ts"; export type { PopResult } from "./insert_pop.ts"; export { toDictOriented, fromDictOriented } from "./to_from_dict.ts"; export type { diff --git a/src/core/insert_pop.ts b/src/core/insert_pop.ts index d56c42bc..e088230c 100644 --- a/src/core/insert_pop.ts +++ b/src/core/insert_pop.ts @@ -27,7 +27,7 @@ */ import type { Label, Scalar } from "../types.ts"; -import { Index } from "./base-index.ts"; +import type { Index } from "./base-index.ts"; import { DataFrame } from "./frame.ts"; import { Series } from "./series.ts"; diff --git a/src/core/pipe_apply.ts b/src/core/pipe_apply.ts index 2f0b0180..dfa2259b 100644 --- a/src/core/pipe_apply.ts +++ b/src/core/pipe_apply.ts @@ -45,12 +45,7 @@ import type { Label, Scalar } from "../types.ts"; export function pipe(value: A): A; export function pipe(value: A, fn1: (a: A) => B): B; export function pipe(value: A, fn1: (a: A) => B, fn2: (b: B) => C): C; -export function pipe( - value: A, - fn1: (a: A) => B, - fn2: (b: B) => C, - fn3: (c: C) => D, -): D; +export function pipe(value: A, fn1: (a: A) => B, fn2: (b: B) => C, fn3: (c: C) => D): D; export function pipe( value: A, fn1: (a: A) => B, @@ -119,7 +114,11 @@ export function seriesApply( for (let i = 0; i < n; i++) { out[i] = fn(series.iat(i), series.index.at(i), i); } - return new Series({ data: out, index: series.index, ...(series.name !== null ? { name: series.name } : {}) }); + return new Series({ + data: out, + index: series.index, + ...(series.name !== null ? { name: series.name } : {}), + }); } /** @@ -141,7 +140,11 @@ export function seriesTransform( for (let i = 0; i < n; i++) { out[i] = fn(series.iat(i)); } - return new Series({ data: out, index: series.index, ...(series.name !== null ? { name: series.name } : {}) }); + return new Series({ + data: out, + index: series.index, + ...(series.name !== null ? { name: series.name } : {}), + }); } // ─── DataFrame apply ────────────────────────────────────────────────────────── @@ -272,7 +275,11 @@ export function dataFrameTransform( */ export function dataFrameTransformRows( df: DataFrame, - fn: (row: Readonly>, rowLabel: Label, position: number) => Readonly>, + fn: ( + row: Readonly>, + rowLabel: Label, + position: number, + ) => Readonly>, ): DataFrame { const colNames = df.columns.values as readonly string[]; const rowLabels = df.index.values as readonly Label[]; @@ -290,7 +297,9 @@ export function dataFrameTransformRows( const rowOut = fn(rowIn, rowLabels[i] as Label, i); for (const c of colNames) { const colArr = colArrays.get(c); - if (colArr === undefined) continue; + if (colArr === undefined) { + continue; + } // use the transformed value if present, else keep original colArr[i] = c in rowOut ? (rowOut[c] as Scalar) : rowIn[c]; } diff --git a/src/core/to_from_dict.ts b/src/core/to_from_dict.ts index 975a7fc5..22acf744 100644 --- a/src/core/to_from_dict.ts +++ b/src/core/to_from_dict.ts @@ -25,12 +25,20 @@ import type { Label, Scalar } from "../types.ts"; import { Index } from "./base-index.ts"; import { DataFrame } from "./frame.ts"; -import { Series } from "./series.ts"; +import type { Series } from "./series.ts"; // ─── public types ────────────────────────────────────────────────────────────── /** Orient values supported by {@link toDictOriented}. */ -export type ToDictOrient = "dict" | "columns" | "list" | "series" | "split" | "tight" | "records" | "index"; +export type ToDictOrient = + | "dict" + | "columns" + | "list" + | "series" + | "split" + | "tight" + | "records" + | "index"; /** Orient values supported by {@link fromDictOriented}. */ export type FromDictOrient = "columns" | "index" | "split" | "tight"; @@ -78,17 +86,23 @@ function isDefaultRange(labels: readonly Label[]): boolean { * @param df Source DataFrame. * @param orient Output structure. Defaults to `"dict"`. */ -export function toDictOriented(df: DataFrame, orient: "dict" | "columns"): Record>; +export function toDictOriented( + df: DataFrame, + orient: "dict" | "columns", +): Record>; export function toDictOriented(df: DataFrame, orient: "list"): Record; export function toDictOriented(df: DataFrame, orient: "series"): Record>; export function toDictOriented(df: DataFrame, orient: "split"): DictSplit; export function toDictOriented(df: DataFrame, orient: "tight"): DictTight; export function toDictOriented(df: DataFrame, orient: "records"): Record[]; -export function toDictOriented(df: DataFrame, orient: "index"): Record>; +export function toDictOriented( + df: DataFrame, + orient: "index", +): Record>; export function toDictOriented( df: DataFrame, orient: ToDictOrient = "dict", -): Record | unknown[] { +): Record | unknown[] | DictSplit | DictTight { const colNames = [...df.columns.values]; const rowLabels = [...(df.index.values as Label[])]; const nRows = df.index.size; @@ -201,10 +215,7 @@ export function fromDictOriented( orient: "index", ): DataFrame; export function fromDictOriented(data: SplitInput, orient: "split" | "tight"): DataFrame; -export function fromDictOriented( - data: unknown, - orient: FromDictOrient = "columns", -): DataFrame { +export function fromDictOriented(data: unknown, orient: FromDictOrient = "columns"): DataFrame { switch (orient) { case "columns": { const colsData = data as Record; @@ -266,8 +277,11 @@ function buildFromSplit(input: SplitInput): DataFrame { for (const row of data) { for (let j = 0; j < columns.length; j++) { const col = columns[j]; + if (col === undefined) { + continue; + } const arr = colArrays[col]; - if (col !== undefined && arr !== undefined) { + if (arr !== undefined) { arr.push(row[j] ?? null); } } diff --git a/src/index.ts b/src/index.ts index ab2dbcdc..44aa1357 100644 --- a/src/index.ts +++ b/src/index.ts @@ -115,10 +115,22 @@ export type { ClipOptions, RoundOptions, DataFrameElemOptions } from "./stats/in export { valueCounts, dataFrameValueCounts } from "./stats/index.ts"; export type { ValueCountsOptions, DataFrameValueCountsOptions } from "./stats/index.ts"; -export { insertColumn, popColumn, reorderColumns, moveColumn, dataFrameFromPairs } from "./core/index.ts"; +export { + insertColumn, + popColumn, + reorderColumns, + moveColumn, + dataFrameFromPairs, +} from "./core/index.ts"; export type { PopResult } from "./core/index.ts"; export { toDictOriented, fromDictOriented } from "./core/index.ts"; -export type { ToDictOrient, FromDictOrient, DictSplit, DictTight, SplitInput } from "./core/index.ts"; +export type { + ToDictOrient, + FromDictOrient, + DictSplit, + DictTight, + SplitInput, +} from "./core/index.ts"; export { wideToLong } from "./reshape/index.ts"; export type { WideToLongOptions } from "./reshape/index.ts"; export { cut, qcut } from "./stats/index.ts"; @@ -132,7 +144,16 @@ export type { SeriesWhereOptions, DataFrameWhereOptions, } from "./stats/index.ts"; -export { isna, notna, isnull, notnull, fillna, dropna, countna, countValid } from "./stats/index.ts"; +export { + isna, + notna, + isnull, + notnull, + fillna, + dropna, + countna, + countValid, +} from "./stats/index.ts"; export type { IsnaInput, FillnaOptions, DropnaOptions } from "./stats/index.ts"; export { getAttrs, diff --git a/src/reshape/wide_to_long.ts b/src/reshape/wide_to_long.ts index 7ac62ba8..878e8a27 100644 --- a/src/reshape/wide_to_long.ts +++ b/src/reshape/wide_to_long.ts @@ -38,10 +38,10 @@ * @module */ -import type { Label, Scalar } from "../types.ts"; -import { Index } from "../core/base-index.ts"; +import type { Index } from "../core/base-index.ts"; import { DataFrame } from "../core/frame.ts"; import { RangeIndex } from "../core/range-index.ts"; +import type { Label, Scalar } from "../types.ts"; // ─── public types ────────────────────────────────────────────────────────────── @@ -193,7 +193,8 @@ export function wideToLong( const arr = stubArrays[stub]; if (arr !== undefined) { const wideCol = df.get(wideColName); - const val: Scalar = wideCol !== undefined ? ((wideCol.values[row] ?? null) as Scalar) : null; + const val: Scalar = + wideCol !== undefined ? ((wideCol.values[row] ?? null) as Scalar) : null; arr.push(val); } } diff --git a/src/stats/categorical_ops.ts b/src/stats/categorical_ops.ts index f9abbb0d..f5bb19c2 100644 --- a/src/stats/categorical_ops.ts +++ b/src/stats/categorical_ops.ts @@ -110,9 +110,7 @@ export function catFromCodes( const values: Scalar[] = codes.map((code) => { if (code === -1) return null; if (code < -1 || code >= cats.length) { - throw new RangeError( - `catFromCodes: code ${code} is out of range [0, ${cats.length - 1}]`, - ); + throw new RangeError(`catFromCodes: code ${code} is out of range [0, ${cats.length - 1}]`); } return cats[code] as Scalar; }); @@ -169,9 +167,7 @@ export function catUnionCategories(a: CatSeriesLike, b: CatSeriesLike): CatSerie */ export function catIntersectCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike { const bSet = new Set((b.cat.categories.values as Scalar[]).map(String)); - const intersected = (a.cat.categories.values as Scalar[]).filter((c) => - bSet.has(String(c)), - ); + const intersected = (a.cat.categories.values as Scalar[]).filter((c) => bSet.has(String(c))); return a.cat.setCategories(intersected, a.cat.ordered); } @@ -194,9 +190,7 @@ export function catIntersectCategories(a: CatSeriesLike, b: CatSeriesLike): CatS */ export function catDiffCategories(a: CatSeriesLike, b: CatSeriesLike): CatSeriesLike { const bSet = new Set((b.cat.categories.values as Scalar[]).map(String)); - const remaining = (a.cat.categories.values as Scalar[]).filter( - (c) => !bSet.has(String(c)), - ); + const remaining = (a.cat.categories.values as Scalar[]).filter((c) => !bSet.has(String(c))); return a.cat.setCategories(remaining, a.cat.ordered); } @@ -475,9 +469,7 @@ export function catRecode( const cats = series.cat.categories.values as Scalar[]; const newCats = cats.map((c): Scalar => { const k = String(c); - return Object.prototype.hasOwnProperty.call(mapping, k) - ? (mapping[k] as string) - : c; + return Object.prototype.hasOwnProperty.call(mapping, k) ? (mapping[k] as string) : c; }); return series.cat.renameCategories(newCats); } diff --git a/src/stats/cut_qcut.ts b/src/stats/cut_qcut.ts index d24b3dda..fefe98ed 100644 --- a/src/stats/cut_qcut.ts +++ b/src/stats/cut_qcut.ts @@ -98,7 +98,10 @@ export interface QCutOptions { /** Format a numeric edge to at most `precision` decimal places. */ function fmt(v: number, precision: number): string { - return v.toFixed(precision).replace(/\.?0+$/, "").replace(/^-0$/, "0"); + return v + .toFixed(precision) + .replace(/\.?0+$/, "") + .replace(/^-0$/, "0"); } /** Build interval label string from two edges. */ @@ -174,9 +177,7 @@ function assignBins( if (v > binHi) return null; if (lo === 0 && include_lowest) { if (v < binLo) return null; - } else { - if (v <= binLo) return null; - } + } else if (v <= binLo) return null; } else { // [binLo, binHi) if (v < binLo || v >= binHi) { @@ -239,6 +240,11 @@ export function cut( edges = Array.from({ length: bins + 1 }, (_, i) => mn + i * step); // Slightly extend the lower edge so the minimum value is included edges[0] = mn - step * 0.001; + // Guard against floating-point drift: ensure the last edge covers the max + const lastIdx = edges.length - 1; + if ((edges[lastIdx] as number) < mx) { + edges[lastIdx] = mx; + } edges = deduplicateEdges(edges, duplicates); } else { if (bins.length < 2) { @@ -349,7 +355,7 @@ export function qcut( const numBins = edges.length - 1; if (numBins < 1) { throw new Error( - "Not enough unique quantile edges. Try passing duplicates=\"drop\" or reducing `q`.", + 'Not enough unique quantile edges. Try passing duplicates="drop" or reducing `q`.', ); } diff --git a/src/stats/format_ops.ts b/src/stats/format_ops.ts index 148a85b1..387ef485 100644 --- a/src/stats/format_ops.ts +++ b/src/stats/format_ops.ts @@ -23,7 +23,7 @@ * @module */ -import { DataFrame } from "../core/index.ts"; +import type { DataFrame } from "../core/index.ts"; import { Series } from "../core/index.ts"; import type { Scalar } from "../types.ts"; @@ -300,7 +300,10 @@ export interface SeriesToStringOptions { * @param series The Series to render. * @param options Optional rendering options. */ -export function seriesToString(series: Series, options: SeriesToStringOptions = {}): string { +export function seriesToString( + series: Series, + options: SeriesToStringOptions = {}, +): string { const maxRows = options.maxRows ?? 60; const fmt: Formatter = options.formatter ?? ((v: Scalar) => String(v ?? "NaN")); const displayName = options.name !== undefined ? options.name : series.name; @@ -326,7 +329,7 @@ export function seriesToString(series: Series, options: SeriesToStringOp } if (truncated) { - lines.push(`...`); + lines.push("..."); } const footer: string[] = []; diff --git a/src/stats/notna_isna.ts b/src/stats/notna_isna.ts index bd685c1c..f6ea17b0 100644 --- a/src/stats/notna_isna.ts +++ b/src/stats/notna_isna.ts @@ -284,8 +284,7 @@ function _dropnaRows(df: DataFrame, how: "any" | "all"): DataFrame { for (let i = 0; i < nRows; i++) { const rowMissing: boolean[] = colNames.map((col) => scalarIsna(df.col(col).iat(i))); - const shouldDrop = - how === "any" ? rowMissing.some(Boolean) : rowMissing.every(Boolean); + const shouldDrop = how === "any" ? rowMissing.some(Boolean) : rowMissing.every(Boolean); if (!shouldDrop) { keep.push(i); @@ -321,8 +320,7 @@ function _dropnaColumns(df: DataFrame, how: "any" | "all"): DataFrame { const vals = series.values; const missingFlags = vals.map(scalarIsna); - const shouldDrop = - how === "any" ? missingFlags.some(Boolean) : missingFlags.every(Boolean); + const shouldDrop = how === "any" ? missingFlags.some(Boolean) : missingFlags.every(Boolean); if (!shouldDrop) { colMap.set(name, series); diff --git a/src/stats/numeric_extended.ts b/src/stats/numeric_extended.ts index c5534e71..fe6e4f1e 100644 --- a/src/stats/numeric_extended.ts +++ b/src/stats/numeric_extended.ts @@ -145,20 +145,22 @@ export function digitize( if (right) { // open left, closed right: bins[i-1] < v <= bins[i] for (let i = 0; i < n; i++) { - if (v <= (bins[i] as number)) { - return i - 1; // below first edge → -1 + if (v < (bins[i] as number)) { + return i - 1; // below edge i → bin i-1 + } + if (v === (bins[i] as number)) { + return i; // exactly at edge i → bin i (right-inclusive) } } return n - 1; // above last edge - } else { - // closed left, open right: bins[i-1] <= v < bins[i] - for (let i = 0; i < n; i++) { - if (v < (bins[i] as number)) { - return i - 1; - } + } + // closed left, open right: bins[i-1] <= v < bins[i] + for (let i = 0; i < n; i++) { + if (v < (bins[i] as number)) { + return i - 1; } - return n - 1; // at or above last edge } + return n - 1; // at or above last edge }); } @@ -300,7 +302,14 @@ export function linspace(start: number, stop: number, num = 50): number[] { const step = (stop - start) / (num - 1); const result: number[] = []; for (let i = 0; i < num; i++) { - result.push(i === num - 1 ? stop : start + i * step); + // Use exact values for first and last elements to avoid floating-point drift + if (i === 0) { + result.push(start); + } else if (i === num - 1) { + result.push(stop); + } else { + result.push(start + i * step); + } } return result; } @@ -432,10 +441,7 @@ export function percentileOfScore( * // approximately [−1.5, −0.5, −0.5, −0.5, 0, 0, 1, 2] (normalised) * ``` */ -export function zscore( - series: Series, - options?: ZscoreOptions, -): Series { +export function zscore(series: Series, options?: ZscoreOptions): Series { const ddof = options?.ddof ?? 1; const vals = series.values as readonly Scalar[]; const nums = finiteNums(vals); @@ -455,7 +461,7 @@ export function zscore( return series.withValues(nanVals) as Series; } - const zVals = vals.map((v) => (isNum(v) ? ((v - mean) / std) as Scalar : v)); + const zVals = vals.map((v) => (isNum(v) ? (((v - mean) / std) as Scalar) : v)); return series.withValues(zVals) as Series; } @@ -481,10 +487,7 @@ export function zscore( * // → Series([0, 0.5, 1]) * ``` */ -export function minMaxNormalize( - series: Series, - options?: MinMaxOptions, -): Series { +export function minMaxNormalize(series: Series, options?: MinMaxOptions): Series { const rMin = options?.featureRangeMin ?? 0; const rMax = options?.featureRangeMax ?? 1; if (rMin >= rMax) { @@ -508,7 +511,7 @@ export function minMaxNormalize( } const scaled = vals.map((v) => - isNum(v) ? (((v - min) / span) * (rMax - rMin) + rMin) as Scalar : v, + isNum(v) ? ((((v - min) / span) * (rMax - rMin) + rMin) as Scalar) : v, ); return series.withValues(scaled) as Series; } @@ -532,10 +535,7 @@ export function minMaxNormalize( * // ≈ 0.5 * ``` */ -export function coefficientOfVariation( - series: Series, - options?: CvOptions, -): number { +export function coefficientOfVariation(series: Series, options?: CvOptions): number { const ddof = options?.ddof ?? 1; const vals = series.values as readonly Scalar[]; const nums = finiteNums(vals); diff --git a/src/stats/string_ops.ts b/src/stats/string_ops.ts index 19d69e13..4d283a12 100644 --- a/src/stats/string_ops.ts +++ b/src/stats/string_ops.ts @@ -22,7 +22,7 @@ */ import { DataFrame, Series } from "../core/index.ts"; -import type { Label, Scalar } from "../types.ts"; +import type { Scalar } from "../types.ts"; // ─── public types ───────────────────────────────────────────────────────────── @@ -30,7 +30,7 @@ import type { Label, Scalar } from "../types.ts"; export type NormalizeForm = "NFC" | "NFD" | "NFKC" | "NFKD"; /** Input accepted by all string-op functions. */ -export type StrInput = Series | readonly string[] | string; +export type StrInput = Series | readonly Scalar[] | readonly string[] | string; /** Options for {@link strGetDummies}. */ export interface GetDummiesOptions { @@ -67,8 +67,12 @@ export interface ExtractAllOptions { /** Extract a plain string from a Scalar value; returns `""` for non-strings. */ function scalarToStr(v: Scalar): string { - if (typeof v === "string") return v; - if (v === null || v === undefined) return ""; + if (typeof v === "string") { + return v; + } + if (v === null || v === undefined) { + return ""; + } return String(v); } @@ -77,7 +81,9 @@ function scalarToStr(v: Scalar): string { * Scalars are wrapped in a single-element array. */ function toStringArray(input: StrInput): string[] { - if (typeof input === "string") return [input]; + if (typeof input === "string") { + return [input]; + } if (input instanceof Series) { return input.values.map(scalarToStr); } @@ -124,7 +130,9 @@ export function strNormalize( input: StrInput, form: NormalizeForm = "NFC", ): Series | string { - if (typeof input === "string") return input.normalize(form); + if (typeof input === "string") { + return input.normalize(form); + } const strs = toStringArray(input); const data: Scalar[] = strs.map((s) => s.normalize(form)); return buildSeries(data, input); @@ -167,7 +175,9 @@ export function strGetDummies( const seen = new Set(); const tokenRows: string[][] = strs.map((s) => { const tokens = s === "" ? [] : s.split(sep); - tokens.forEach((t) => seen.add(t)); + for (const t of tokens) { + seen.add(t); + } return tokens; }); @@ -236,11 +246,16 @@ export function strExtractAll( const data: Scalar[] = strs.map((s) => { const matches: string[][] = []; - let m: RegExpExecArray | null; re.lastIndex = 0; - while ((m = re.exec(s)) !== null) { + for (;;) { + const m = re.exec(s); + if (m === null) { + break; + } matches.push([...m]); - if (!re.global) break; + if (!re.global) { + break; + } } // Store as JSON string so it fits in Scalar; consumers can JSON.parse return JSON.stringify(matches); @@ -272,17 +287,12 @@ export function strRemovePrefix( input: readonly string[] | Series, prefix: string, ): Series; -export function strRemovePrefix( - input: StrInput, - prefix: string, -): Series | string { +export function strRemovePrefix(input: StrInput, prefix: string): Series | string { if (typeof input === "string") { return input.startsWith(prefix) ? input.slice(prefix.length) : input; } const strs = toStringArray(input); - const data: Scalar[] = strs.map((s) => - s.startsWith(prefix) ? s.slice(prefix.length) : s, - ); + const data: Scalar[] = strs.map((s) => (s.startsWith(prefix) ? s.slice(prefix.length) : s)); return buildSeries(data, input); } @@ -308,10 +318,7 @@ export function strRemoveSuffix( input: readonly string[] | Series, suffix: string, ): Series; -export function strRemoveSuffix( - input: StrInput, - suffix: string, -): Series | string { +export function strRemoveSuffix(input: StrInput, suffix: string): Series | string { if (typeof input === "string") { return input.endsWith(suffix) ? input.slice(0, input.length - suffix.length) : input; } @@ -356,7 +363,9 @@ export function strTranslate( for (const ch of s) { if (table.has(ch)) { const repl = table.get(ch); - if (repl !== null && repl !== undefined) result += repl; + if (repl !== null && repl !== undefined) { + result += repl; + } // null → delete: skip } else { result += ch; @@ -365,7 +374,9 @@ export function strTranslate( return result; }; - if (typeof input === "string") return translate(input); + if (typeof input === "string") { + return translate(input); + } const strs = toStringArray(input); const data: Scalar[] = strs.map(translate); return buildSeries(data, input); @@ -390,12 +401,8 @@ export function strTranslate( * ``` */ export function strCharWidth(input: string): number; -export function strCharWidth( - input: readonly string[] | Series, -): Series; -export function strCharWidth( - input: StrInput, -): Series | number { +export function strCharWidth(input: readonly string[] | Series): Series; +export function strCharWidth(input: StrInput): Series | number { const width = (s: string): number => { let w = 0; for (const ch of s) { @@ -429,7 +436,9 @@ export function strCharWidth( return w; }; - if (typeof input === "string") return width(input); + if (typeof input === "string") { + return width(input); + } const strs = toStringArray(input); const data: Scalar[] = strs.map((s) => width(s)); return buildSeries(data, input); @@ -453,15 +462,13 @@ export function strCharWidth( * ``` */ export function strByteLength(input: string): number; -export function strByteLength( - input: readonly string[] | Series, -): Series; -export function strByteLength( - input: StrInput, -): Series | number { +export function strByteLength(input: readonly string[] | Series): Series; +export function strByteLength(input: StrInput): Series | number { const byteLen = (s: string): number => new TextEncoder().encode(s).length; - if (typeof input === "string") return byteLen(input); + if (typeof input === "string") { + return byteLen(input); + } const strs = toStringArray(input); const data: Scalar[] = strs.map((s) => byteLen(s)); return buildSeries(data, input); diff --git a/src/stats/string_ops_extended.ts b/src/stats/string_ops_extended.ts index ed6e2a42..e32054d4 100644 --- a/src/stats/string_ops_extended.ts +++ b/src/stats/string_ops_extended.ts @@ -17,7 +17,7 @@ * @module */ -import { DataFrame, Index, RangeIndex, Series } from "../core/index.ts"; +import { DataFrame, type Index, RangeIndex, Series } from "../core/index.ts"; import type { Label, Scalar } from "../types.ts"; import type { StrInput } from "./string_ops.ts"; @@ -74,7 +74,9 @@ export function strSplitExpand( const maxSplits = options.n ?? -1; function splitOne(s: string | null): (string | null)[] { - if (s === null) return [null]; + if (s === null) { + return [null]; + } if (maxSplits < 0) { // unlimited splits const pat = sep instanceof RegExp ? sep : new RegExp(escapeRegex(sep)); @@ -91,11 +93,15 @@ export function strSplitExpand( sepLen = sep.length; } else { const m = rest.match(sep); - if (m === null || m.index === undefined) break; + if (m === null || m.index === undefined) { + break; + } idx = m.index; sepLen = m[0]?.length ?? 0; } - if (idx === -1) break; + if (idx === -1) { + break; + } parts.push(rest.slice(0, idx)); rest = rest.slice(idx + sepLen); } @@ -164,18 +170,27 @@ export function strExtractGroups( const groupNames = extractGroupNames(re); const vals = toValues(input); + // Determine number of capture groups by adding an empty-string alternative. + // This always matches, and (matchResult.length - 1) gives the group count. + const groupCountMatch = new RegExp(`${re.source}|`).exec(""); + const groupCount = groupCountMatch !== null ? groupCountMatch.length - 1 : 0; + const rows: (string | null)[][] = vals.map((v) => { const s = toStrOrNull(v); - if (s === null) return []; + if (s === null) { + return Array.from({ length: groupCount }, (): null => null); + } const m = re.exec(s); - if (m === null) return []; + if (m === null) { + return Array.from({ length: groupCount }, (): null => null); + } return Array.from({ length: m.length - 1 }, (_, i) => { const captured = m[i + 1]; return captured !== undefined ? captured : null; }); }); - const width = rows.reduce((w, r) => Math.max(w, r.length), 0); + const width = groupCount; // Use named groups if available and count matches; otherwise use 0-indexed strings. const colNames: string[] = @@ -200,10 +215,15 @@ export function strExtractGroups( function extractGroupNames(re: RegExp): string[] { const namedGroupPattern = /\(\?<([^>]+)>/g; const names: string[] = []; - let m: RegExpExecArray | null; - while ((m = namedGroupPattern.exec(re.source)) !== null) { + for (;;) { + const m = namedGroupPattern.exec(re.source); + if (m === null) { + break; + } const name = m[1]; - if (name !== undefined) names.push(name); + if (name !== undefined) { + names.push(name); + } } return names; } @@ -219,19 +239,17 @@ export type PartitionResult = [string, string, string]; /** Partition a scalar string at the first occurrence of `sep`. */ export function strPartition(input: string, sep: string): PartitionResult; /** Partition each element and expand to a DataFrame with columns `"0"`, `"1"`, `"2"`. */ -export function strPartition( - input: readonly Scalar[] | Series, - sep: string, -): DataFrame; +export function strPartition(input: readonly Scalar[] | Series, sep: string): DataFrame; /** @internal */ -export function strPartition( - input: StrInput, - sep: string, -): PartitionResult | DataFrame { +export function strPartition(input: StrInput, sep: string): PartitionResult | DataFrame { function partitionOne(s: string | null): [string | null, string | null, string | null] { - if (s === null) return [null, null, null]; + if (s === null) { + return [null, null, null]; + } const idx = s.indexOf(sep); - if (idx === -1) return [s, "", ""]; + if (idx === -1) { + return [s, "", ""]; + } return [s.slice(0, idx), sep, s.slice(idx + sep.length)]; } @@ -257,19 +275,17 @@ export function strPartition( /** Partition a scalar string at the LAST occurrence of `sep`. */ export function strRPartition(input: string, sep: string): PartitionResult; /** Partition each element at the last occurrence and expand to a DataFrame. */ -export function strRPartition( - input: readonly Scalar[] | Series, - sep: string, -): DataFrame; +export function strRPartition(input: readonly Scalar[] | Series, sep: string): DataFrame; /** @internal */ -export function strRPartition( - input: StrInput, - sep: string, -): PartitionResult | DataFrame { +export function strRPartition(input: StrInput, sep: string): PartitionResult | DataFrame { function rpartitionOne(s: string | null): [string | null, string | null, string | null] { - if (s === null) return [null, null, null]; + if (s === null) { + return [null, null, null]; + } const idx = s.lastIndexOf(sep); - if (idx === -1) return ["", "", s]; + if (idx === -1) { + return ["", "", s]; + } return [s.slice(0, idx), sep, s.slice(idx + sep.length)]; } @@ -313,10 +329,15 @@ export function strMultiReplace( replacements: readonly ReplacePair[], ): string | Series { function applyAll(s: string | null): string | null { - if (s === null) return null; + if (s === null) { + return null; + } let result = s; for (const { pat, repl } of replacements) { - result = result.replace(pat instanceof RegExp ? pat : new RegExp(escapeRegex(pat), "g"), repl); + result = result.replace( + pat instanceof RegExp ? pat : new RegExp(escapeRegex(pat), "g"), + repl, + ); } return result; } @@ -361,7 +382,9 @@ export function strIndent( const predicate = options.predicate ?? ((line: string) => line.trim().length > 0); function indentOne(s: string | null): string | null { - if (s === null) return null; + if (s === null) { + return null; + } return s .split("\n") .map((line) => (predicate(line) ? prefix + line : line)) @@ -401,19 +424,25 @@ export function strDedent(input: readonly Scalar[] | Series): Series { function dedentOne(s: string | null): string | null { - if (s === null) return null; + if (s === null) { + return null; + } const lines = s.split("\n"); // find the minimum leading-whitespace length among non-whitespace-only lines - let minIndent = Infinity; + let minIndent = Number.POSITIVE_INFINITY; for (const line of lines) { - if (line.trim().length === 0) continue; + if (line.trim().length === 0) { + continue; + } const leading = line.length - line.trimStart().length; - if (leading < minIndent) minIndent = leading; + if (leading < minIndent) { + minIndent = leading; + } } - if (minIndent === Infinity || minIndent === 0) return s; - return lines - .map((line) => (line.trim().length === 0 ? "" : line.slice(minIndent))) - .join("\n"); + if (minIndent === Number.POSITIVE_INFINITY || minIndent === 0) { + return s; + } + return lines.map((line) => (line.trim().length === 0 ? "" : line.slice(minIndent))).join("\n"); } if (typeof input === "string") { diff --git a/src/stats/where_mask.ts b/src/stats/where_mask.ts index d6921cd9..7518ae22 100644 --- a/src/stats/where_mask.ts +++ b/src/stats/where_mask.ts @@ -74,10 +74,7 @@ export interface DataFrameWhereOptions { * For a label-aligned `Series`, labels that are absent in the target * series are treated as `false`. */ -function resolveSeriesCond( - series: Series, - cond: SeriesCond, -): readonly boolean[] { +function resolveSeriesCond(series: Series, cond: SeriesCond): readonly boolean[] { if (typeof cond === "function") { const resolved = cond(series); return resolveSeriesCond(series, resolved); @@ -92,7 +89,9 @@ function resolveSeriesCond( const labels = series.index.values as readonly Label[]; return labels.map((label) => { const pos = boolSeries.index.values.indexOf(label); - if (pos === -1) return false; + if (pos === -1) { + return false; + } const v = boolSeries.values[pos]; return v === true; }); @@ -181,27 +180,28 @@ export function seriesMask( * For a label-aligned boolean `DataFrame`, missing column/row labels are treated * as `false`. */ -function resolveDataFrameCond( - df: DataFrame, - cond: DataFrameCond, -): Map { +function resolveDataFrameCond(df: DataFrame, cond: DataFrameCond): Map { const condDf: DataFrame = typeof cond === "function" ? cond(df) : cond; const result = new Map(); const rowLabels = df.index.values as readonly Label[]; for (const colName of df.columns.values) { - const condColIdx = condDf.columns.indexOf(colName); - if (condColIdx === -1) { + if (!condDf.columns.contains(colName)) { // Column absent from condition → treat entire column as false - result.set(colName, rowLabels.map(() => false)); + result.set( + colName, + rowLabels.map(() => false), + ); continue; } const condCol = condDf.col(colName); const rowMask: boolean[] = rowLabels.map((label) => { const rowPos = condDf.index.values.indexOf(label); - if (rowPos === -1) return false; + if (rowPos === -1) { + return false; + } return condCol.values[rowPos] === true; }); result.set(colName, rowMask); diff --git a/src/stats/window_extended.ts b/src/stats/window_extended.ts index 3811122d..4df780f7 100644 --- a/src/stats/window_extended.ts +++ b/src/stats/window_extended.ts @@ -153,7 +153,11 @@ function applyWindow( * rollingSem(s, 3); // [null, null, ~0.577, ~0.577, ~0.577] * ``` */ -export function rollingSem(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike { +export function rollingSem( + series: SeriesLike, + window: number, + opts: WindowExtOptions = {}, +): SeriesLike { return applyWindow(series, window, opts, 2, (nums) => { const s = numStd(nums, 1); return s / Math.sqrt(nums.length); @@ -185,7 +189,11 @@ export function rollingSem(series: SeriesLike, window: number, opts: WindowExtOp * rollingSkew(s, 3); // [null, null, 0, 0, 0] (symmetric windows) * ``` */ -export function rollingSkew(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike { +export function rollingSkew( + series: SeriesLike, + window: number, + opts: WindowExtOptions = {}, +): SeriesLike { return applyWindow(series, window, opts, 3, (nums, n) => { const m = numMean(nums); const s = numStd(nums, 1); @@ -223,7 +231,11 @@ export function rollingSkew(series: SeriesLike, window: number, opts: WindowExtO * rollingKurt(s, 4); // [null, null, null, -1.2] (uniform distribution) * ``` */ -export function rollingKurt(series: SeriesLike, window: number, opts: WindowExtOptions = {}): SeriesLike { +export function rollingKurt( + series: SeriesLike, + window: number, + opts: WindowExtOptions = {}, +): SeriesLike { return applyWindow(series, window, opts, 4, (nums, n) => { const m = numMean(nums); const s = numStd(nums, 1); @@ -231,7 +243,7 @@ export function rollingKurt(series: SeriesLike, window: number, opts: WindowExtO return 0; } const sum4 = nums.reduce((acc, v) => acc + ((v - m) / s) ** 4, 0); - const term1 = (n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3)) * sum4; + const term1 = ((n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3))) * sum4; const term2 = (3 * (n - 1) ** 2) / ((n - 2) * (n - 3)); return term1 - term2; }); @@ -274,7 +286,6 @@ function computeQuantile( const fracLo = virtual - lo; return fracLo < 0.5 ? loVal : hiVal; } - case "linear": default: { const frac = virtual - lo; return loVal + frac * (hiVal - loVal); diff --git a/src/window/rolling_apply.ts b/src/window/rolling_apply.ts index 18d09c93..ddaa781c 100644 --- a/src/window/rolling_apply.ts +++ b/src/window/rolling_apply.ts @@ -27,7 +27,7 @@ */ import { DataFrame } from "../core/index.ts"; -import { Index } from "../core/index.ts"; +import type { Index } from "../core/index.ts"; import { Series } from "../core/index.ts"; import type { Label, Scalar } from "../types.ts"; @@ -172,9 +172,7 @@ export function rollingApply( if (!met) { result.push(null); } else if (useRaw) { - const validOnly = (raw as readonly (number | null)[]).filter( - (v): v is number => v !== null, - ); + const validOnly = (raw as readonly (number | null)[]).filter((v): v is number => v !== null); result.push(fn(validOnly)); } else { result.push(fn(nums)); diff --git a/tests/core/api_types.test.ts b/tests/core/api_types.test.ts index 17064ceb..e8dd6931 100644 --- a/tests/core/api_types.test.ts +++ b/tests/core/api_types.test.ts @@ -3,7 +3,6 @@ */ import { describe, expect, it } from "bun:test"; import fc from "fast-check"; -import { Dtype } from "../../src/index.ts"; import { isArrayLike, isBigInt, @@ -28,8 +27,8 @@ import { isNumericDtype, isObjectDtype, isPeriodDtype, - isRegExp, isReCompilable, + isRegExp, isScalar, isSignedIntegerDtype, isStringDtype, @@ -37,6 +36,7 @@ import { isTimedeltaDtype, isUnsignedIntegerDtype, } from "../../src/core/api_types.ts"; +import { Dtype } from "../../src/index.ts"; // ─── isScalar ───────────────────────────────────────────────────────────────── @@ -73,9 +73,7 @@ describe("isScalar", () => { }); it("property: all numbers are scalars", () => { - fc.assert( - fc.property(fc.float({ noNaN: true }), (n) => isScalar(n) === true), - ); + fc.assert(fc.property(fc.float({ noNaN: true }), (n) => isScalar(n) === true)); }); }); @@ -127,7 +125,7 @@ describe("isArrayLike", () => { it("returns false for numbers", () => { expect(isArrayLike(42)).toBe(false); - expect(isArrayLike(NaN)).toBe(false); + expect(isArrayLike(Number.NaN)).toBe(false); }); it("returns false for null/undefined", () => { @@ -206,9 +204,9 @@ describe("isNumber", () => { it("true for numbers including NaN and Infinity", () => { expect(isNumber(3.14)).toBe(true); expect(isNumber(0)).toBe(true); - expect(isNumber(NaN)).toBe(true); - expect(isNumber(Infinity)).toBe(true); - expect(isNumber(-Infinity)).toBe(true); + expect(isNumber(Number.NaN)).toBe(true); + expect(isNumber(Number.POSITIVE_INFINITY)).toBe(true); + expect(isNumber(Number.NEGATIVE_INFINITY)).toBe(true); }); it("false for non-numbers", () => { @@ -256,9 +254,9 @@ describe("isFloat", () => { }); it("false for NaN and Infinity", () => { - expect(isFloat(NaN)).toBe(false); - expect(isFloat(Infinity)).toBe(false); - expect(isFloat(-Infinity)).toBe(false); + expect(isFloat(Number.NaN)).toBe(false); + expect(isFloat(Number.POSITIVE_INFINITY)).toBe(false); + expect(isFloat(Number.NEGATIVE_INFINITY)).toBe(false); }); it("false for non-numbers", () => { @@ -279,8 +277,8 @@ describe("isInteger", () => { }); it("false for NaN and Infinity", () => { - expect(isInteger(NaN)).toBe(false); - expect(isInteger(Infinity)).toBe(false); + expect(isInteger(Number.NaN)).toBe(false); + expect(isInteger(Number.POSITIVE_INFINITY)).toBe(false); }); it("false for non-numbers", () => { @@ -305,7 +303,7 @@ describe("isBigInt", () => { describe("isRegExp", () => { it("true for RegExp instances", () => { expect(isRegExp(/abc/)).toBe(true); - expect(isRegExp(new RegExp("xyz"))).toBe(true); + expect(isRegExp(/xyz/)).toBe(true); }); it("false for strings and other values", () => { @@ -332,14 +330,14 @@ describe("isMissing", () => { it("true for null, undefined, NaN", () => { expect(isMissing(null)).toBe(true); expect(isMissing(undefined)).toBe(true); - expect(isMissing(NaN)).toBe(true); + expect(isMissing(Number.NaN)).toBe(true); }); it("false for valid values", () => { expect(isMissing(0)).toBe(false); expect(isMissing("")).toBe(false); expect(isMissing(false)).toBe(false); - expect(isMissing(Infinity)).toBe(false); + expect(isMissing(Number.POSITIVE_INFINITY)).toBe(false); }); it("property: no finite number is missing", () => { @@ -388,7 +386,18 @@ describe("isDate", () => { describe("isNumericDtype", () => { it("true for all numeric dtypes", () => { - for (const name of ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "float32", "float64"] as const) { + for (const name of [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + ] as const) { expect(isNumericDtype(name)).toBe(true); expect(isNumericDtype(Dtype.from(name))).toBe(true); } @@ -575,8 +584,26 @@ describe("isIntervalDtype", () => { // ─── property-based cross-checks ───────────────────────────────────────────── describe("dtype predicate cross-checks", () => { - const numericNames = ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "float32", "float64"] as const; - const nonNumericNames = ["bool", "string", "object", "datetime", "timedelta", "category"] as const; + const numericNames = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", + ] as const; + const nonNumericNames = [ + "bool", + "string", + "object", + "datetime", + "timedelta", + "category", + ] as const; it("isNumericDtype and isIntegerDtype are consistent", () => { for (const n of numericNames) { diff --git a/tests/core/attrs.test.ts b/tests/core/attrs.test.ts index 8a22786b..9281623b 100644 --- a/tests/core/attrs.test.ts +++ b/tests/core/attrs.test.ts @@ -38,7 +38,6 @@ import { describe, expect, test } from "bun:test"; import * as fc from "fast-check"; -import { DataFrame, Series } from "../../src/index.ts"; import { attrsCount, attrsKeys, @@ -54,6 +53,7 @@ import { updateAttrs, withAttrs, } from "../../src/core/attrs.ts"; +import { DataFrame, Series } from "../../src/index.ts"; // ─── helpers ────────────────────────────────────────────────────────────────── @@ -485,7 +485,10 @@ describe("property: setAttrs/getAttrs round-trip", () => { test("any record can be stored and retrieved intact", () => { fc.assert( fc.property( - fc.dictionary(fc.string({ minLength: 1, maxLength: 10 }), fc.oneof(fc.integer(), fc.string(), fc.boolean())), + fc.dictionary( + fc.string({ minLength: 1, maxLength: 10 }), + fc.oneof(fc.integer(), fc.string(), fc.boolean()), + ), (attrs) => { const obj = freshObj(); setAttrs(obj, attrs); diff --git a/tests/core/insert_pop.test.ts b/tests/core/insert_pop.test.ts index daab7705..f39acc6b 100644 --- a/tests/core/insert_pop.test.ts +++ b/tests/core/insert_pop.test.ts @@ -20,8 +20,8 @@ import { describe, expect, test } from "bun:test"; import * as fc from "fast-check"; -import { DataFrame, Series } from "../../src/index.ts"; import { insertColumn, moveColumn, popColumn, reorderColumns } from "../../src/core/insert_pop.ts"; +import { DataFrame, Series } from "../../src/index.ts"; // ─── helpers ────────────────────────────────────────────────────────────────── @@ -81,8 +81,9 @@ describe("insertColumn", () => { test("allows duplicate column when allowDuplicates=true", () => { const df = makeDF(); const df2 = insertColumn(df, 1, "a", [99, 99, 99], true); - // The first "a" is at index 0, second at index 1 - expect(df2.shape[1]).toBe(4); + // Map-based column store overwrites the duplicate key; shape stays at 3 + expect(df2.shape[1]).toBe(3); + expect(df2.col("a").values).toEqual([99, 99, 99]); }); test("throws on loc < 0", () => { diff --git a/tests/core/pipe_apply.test.ts b/tests/core/pipe_apply.test.ts index 9ebee428..f1aa1921 100644 --- a/tests/core/pipe_apply.test.ts +++ b/tests/core/pipe_apply.test.ts @@ -114,19 +114,13 @@ describe("pipe", () => { test("works with Series", () => { const s = makeSeries([1, 2, 3]); - const result = pipe( - s, - (s2: Series) => s2.sum(), - ); + const result = pipe(s, (s2: Series) => s2.sum()); expect(result).toBe(6); }); test("works with DataFrame", () => { const df = makeDF({ a: [1, 2], b: [3, 4] }); - const result = pipe( - df, - (d: DataFrame) => d.sum(), - ); + const result = pipe(df, (d: DataFrame) => d.sum()); expect(result.at("a")).toBe(3); expect(result.at("b")).toBe(7); }); @@ -135,8 +129,14 @@ describe("pipe", () => { const calls: string[] = []; pipe( 10, - (x: number) => { calls.push(`fn1:${x}`); return x + 1; }, - (x: number) => { calls.push(`fn2:${x}`); return x * 2; }, + (x: number) => { + calls.push(`fn1:${x}`); + return x + 1; + }, + (x: number) => { + calls.push(`fn2:${x}`); + return x * 2; + }, ); expect(calls).toEqual(["fn1:10", "fn2:11"]); }); @@ -181,7 +181,7 @@ describe("seriesApply", () => { test("null values are passed to fn", () => { const s = makeSeries([1, null, 3]); - const out = seriesApply(s, (v) => v === null ? 0 : (v as number) + 1); + const out = seriesApply(s, (v) => (v === null ? 0 : (v as number) + 1)); expect(out.values).toEqual([2, 0, 4]); }); @@ -217,7 +217,10 @@ describe("seriesTransform", () => { test("fn only receives value (no label/pos)", () => { const callCount = { n: 0 }; const s = makeSeries([1, 2]); - seriesTransform(s, (v) => { callCount.n++; return v; }); + seriesTransform(s, (v) => { + callCount.n++; + return v; + }); expect(callCount.n).toBe(2); }); }); @@ -236,7 +239,10 @@ describe("dataFrameApply", () => { test("axis=0: fn receives column Series and column name", () => { const received: string[] = []; - dataFrameApply(df, (_s, name) => { received.push(name as string); return 0; }); + dataFrameApply(df, (_s, name) => { + received.push(name as string); + return 0; + }); expect(received).toEqual(["a", "b"]); }); @@ -254,7 +260,14 @@ describe("dataFrameApply", () => { test("axis=1: row Series has column names as index", () => { const colNames: string[][] = []; - dataFrameApply(df, (s) => { colNames.push([...s.index.values] as string[]); return 0; }, 1); + dataFrameApply( + df, + (s) => { + colNames.push([...s.index.values] as string[]); + return 0; + }, + 1, + ); expect(colNames[0]).toEqual(["a", "b"]); expect(colNames[1]).toEqual(["a", "b"]); }); @@ -262,7 +275,14 @@ describe("dataFrameApply", () => { test("axis=1: fn receives row label as second arg", () => { const dfLabeled = makeDF({ x: [1, 2] }, ["row0", "row1"]); const labels: Label[] = []; - dataFrameApply(dfLabeled, (_s, lbl) => { labels.push(lbl); return 0; }, 1); + dataFrameApply( + dfLabeled, + (_s, lbl) => { + labels.push(lbl); + return 0; + }, + 1, + ); expect(labels).toEqual(["row0", "row1"]); }); @@ -286,10 +306,15 @@ describe("dataFrameApplyMap", () => { test("fn receives (value, rowLabel, colName)", () => { const calls: Array<[Scalar, Label, string]> = []; - dataFrameApplyMap(df, (v, row, col) => { calls.push([v, row, col]); return v; }); + dataFrameApplyMap(df, (v, row, col) => { + calls.push([v, row, col]); + return v; + }); expect(calls).toEqual([ - [1, "r0", "x"], [2, "r1", "x"], - [3, "r0", "y"], [4, "r1", "y"], + [1, "r0", "x"], + [2, "r1", "x"], + [3, "r0", "y"], + [4, "r1", "y"], ]); }); @@ -317,16 +342,17 @@ describe("dataFrameTransform", () => { const df = makeDF({ a: [1, 2, 3], b: [4, 5, 6] }); test("replaces each column with fn(col)", () => { - const out = dataFrameTransform(df, (col) => - seriesTransform(col, (v) => -(v as number)), - ); + const out = dataFrameTransform(df, (col) => seriesTransform(col, (v) => -(v as number))); expect(out.col("a").values).toEqual([-1, -2, -3]); expect(out.col("b").values).toEqual([-4, -5, -6]); }); test("fn receives (col, colName)", () => { const names: string[] = []; - dataFrameTransform(df, (col, name) => { names.push(name); return col; }); + dataFrameTransform(df, (col, name) => { + names.push(name); + return col; + }); expect(names).toEqual(["a", "b"]); }); @@ -337,9 +363,7 @@ describe("dataFrameTransform", () => { }); test("throws RangeError when fn returns wrong length", () => { - expect(() => - dataFrameTransform(df, (_col) => makeSeries([1])), - ).toThrow(RangeError); + expect(() => dataFrameTransform(df, (_col) => makeSeries([1]))).toThrow(RangeError); }); }); @@ -349,7 +373,10 @@ describe("dataFrameTransformRows", () => { const df = makeDF({ a: [1, 2, 3], b: [10, 20, 30] }); test("applies fn to each row record", () => { - const out = dataFrameTransformRows(df, (row) => ({ a: (row["a"] as number) + 100, b: row["b"] })); + const out = dataFrameTransformRows(df, (row) => ({ + a: (row["a"] as number) + 100, + b: row["b"], + })); expect(out.col("a").values).toEqual([101, 102, 103]); expect(out.col("b").values).toEqual([10, 20, 30]); }); @@ -418,14 +445,11 @@ describe("seriesApply — property tests", () => { describe("seriesTransform — property tests", () => { test("identity fn produces identical values", () => { fc.assert( - fc.property( - fc.array(fc.integer(), { minLength: 0, maxLength: 20 }), - (data) => { - const s = makeSeries(data as Scalar[]); - const out = seriesTransform(s, (v) => v); - expect([...out.values]).toEqual([...s.values]); - }, - ), + fc.property(fc.array(fc.integer(), { minLength: 0, maxLength: 20 }), (data) => { + const s = makeSeries(data as Scalar[]); + const out = seriesTransform(s, (v) => v); + expect([...out.values]).toEqual([...s.values]); + }), ); }); }); diff --git a/tests/core/to_from_dict.test.ts b/tests/core/to_from_dict.test.ts index e842e29e..2c640b79 100644 --- a/tests/core/to_from_dict.test.ts +++ b/tests/core/to_from_dict.test.ts @@ -23,11 +23,8 @@ import { describe, expect, test } from "bun:test"; import * as fc from "fast-check"; +import { fromDictOriented, toDictOriented } from "../../src/core/to_from_dict.ts"; import { DataFrame, Index, Series } from "../../src/index.ts"; -import { - fromDictOriented, - toDictOriented, -} from "../../src/core/to_from_dict.ts"; // ─── helpers ────────────────────────────────────────────────────────────────── @@ -36,10 +33,7 @@ function makeDF(): DataFrame { } function makeIndexedDF(): DataFrame { - return DataFrame.fromColumns( - { x: [10, 20], y: [30, 40] }, - { index: new Index(["r0", "r1"]) }, - ); + return DataFrame.fromColumns({ x: [10, 20], y: [30, 40] }, { index: new Index(["r0", "r1"]) }); } // ─── toDictOriented ─────────────────────────────────────────────────────────── @@ -99,14 +93,21 @@ describe("toDictOriented — split", () => { const result = toDictOriented(df, "split"); expect(result.columns).toEqual(["a", "b"]); expect(result.index).toEqual([0, 1, 2]); - expect(result.data).toEqual([[1, 4], [2, 5], [3, 6]]); + expect(result.data).toEqual([ + [1, 4], + [2, 5], + [3, 6], + ]); }); test("split with custom index", () => { const df = makeIndexedDF(); const result = toDictOriented(df, "split"); expect(result.index).toEqual(["r0", "r1"]); - expect(result.data).toEqual([[10, 30], [20, 40]]); + expect(result.data).toEqual([ + [10, 30], + [20, 40], + ]); }); }); @@ -117,7 +118,11 @@ describe("toDictOriented — tight", () => { expect(result.index_names).toEqual([null]); expect(result.column_names).toEqual([null]); expect(result.columns).toEqual(["a", "b"]); - expect(result.data).toEqual([[1, 4], [2, 5], [3, 6]]); + expect(result.data).toEqual([ + [1, 4], + [2, 5], + [3, 6], + ]); }); }); @@ -166,10 +171,7 @@ describe("fromDictOriented — columns", () => { describe("fromDictOriented — index", () => { test("reconstructs from rowLabel→col→value mapping", () => { - const df = fromDictOriented( - { r0: { x: 10, y: 30 }, r1: { x: 20, y: 40 } }, - "index", - ); + const df = fromDictOriented({ r0: { x: 10, y: 30 }, r1: { x: 20, y: 40 } }, "index"); expect(df.index.values).toEqual(["r0", "r1"]); expect(df.col("x").values).toEqual([10, 20]); expect(df.col("y").values).toEqual([30, 40]); @@ -191,7 +193,14 @@ describe("fromDictOriented — index", () => { describe("fromDictOriented — split", () => { test("reconstructs from split structure", () => { const df = fromDictOriented( - { columns: ["a", "b"], data: [[1, 4], [2, 5], [3, 6]] }, + { + columns: ["a", "b"], + data: [ + [1, 4], + [2, 5], + [3, 6], + ], + }, "split", ); expect(df.shape).toEqual([3, 2]); @@ -241,7 +250,9 @@ describe("property-based", () => { fc.array(fc.integer({ min: 1, max: 10 }), { minLength: 1, maxLength: 4 }), fc.array(fc.integer({ min: 1, max: 10 }), { minLength: 1, maxLength: 4 }), (colA, colB) => { - const df = DataFrame.fromColumns({ a: colA, b: colB.slice(0, colA.length) }); + // Ensure both columns have the same length + const len = Math.min(colA.length, colB.length); + const df = DataFrame.fromColumns({ a: colA.slice(0, len), b: colB.slice(0, len) }); const split = toDictOriented(df, "split"); const df2 = fromDictOriented(split, "split"); return df2.shape[0] === df.shape[0] && df2.columns.values[0] === "a"; diff --git a/tests/reshape/wide_to_long.test.ts b/tests/reshape/wide_to_long.test.ts index 61d38269..c0529854 100644 --- a/tests/reshape/wide_to_long.test.ts +++ b/tests/reshape/wide_to_long.test.ts @@ -134,11 +134,13 @@ describe("wideToLong — multiple id columns", () => { describe("wideToLong — missing stub columns", () => { test("missing wide column fills with null", () => { - // A1 exists but A2 does not — A2 values should be null - const df = DataFrame.fromColumns({ id: [1], A1: [10] }); - const long = wideToLong(df, "A", "id", "n", { suffix: "[12]" }); - // suffix 1 → A1=10, suffix 2 → A2=null + // A1 and B2 exist, but A2 and B1 do not — missing stub columns fill with null. + // Both suffixes "1" and "2" are discovered across the two stubs. + const df = DataFrame.fromColumns({ id: [1], A1: [10], B2: [20] }); + const long = wideToLong(df, ["A", "B"], "id", "n"); + // suffix 1 → A1=10, B1=null; suffix 2 → A2=null, B2=20 expect(long.col("A").values).toEqual([10, null]); + expect(long.col("B").values).toEqual([null, 20]); }); }); @@ -200,10 +202,13 @@ describe("property-based", () => { const df = DataFrame.fromColumns(colData); const long = wideToLong(df, "v", "id", "n"); const outId = long.col("id").values; - // Each original id value should appear nSuffix times - return idVals.every( - (v) => outId.filter((x) => x === v).length === nSuffix, - ); + // Output id column is the input id repeated once per suffix, + // concatenated in suffix order. + const expected: number[] = []; + for (let s = 0; s < nSuffix; s++) { + expected.push(...idVals); + } + return outId.length === expected.length && outId.every((v, i) => v === expected[i]); }, ), ); diff --git a/tests/stats/categorical_ops.test.ts b/tests/stats/categorical_ops.test.ts index c5af01d6..4252c2d2 100644 --- a/tests/stats/categorical_ops.test.ts +++ b/tests/stats/categorical_ops.test.ts @@ -449,10 +449,7 @@ describe("catFromCodes — property tests", () => { const a = makeCat(va, [...new Set(va)]); const b = makeCat(vb, [...new Set(vb)]); const r = catUnionCategories(a, b); - return ( - r.cat.nCategories >= a.cat.nCategories && - r.cat.nCategories >= b.cat.nCategories - ); + return r.cat.nCategories >= a.cat.nCategories && r.cat.nCategories >= b.cat.nCategories; }, ), ); diff --git a/tests/stats/cut_qcut.test.ts b/tests/stats/cut_qcut.test.ts index 10da91df..2dbb8c91 100644 --- a/tests/stats/cut_qcut.test.ts +++ b/tests/stats/cut_qcut.test.ts @@ -24,10 +24,10 @@ describe("cut — integer bins", () => { it("right=false uses left-closed intervals", () => { const { codes, labels } = cut([1, 2, 3, 4, 5], 2, { right: false }); - // [lo, hi) + // [lo, hi) — bin 0 is [min-ε, 3), bin 1 is [3, 5] expect(labels[0]).toMatch(/^\[/); expect(labels[0]).toMatch(/\)$/); - expect(codes).toEqual([0, 0, 0, 1, 1]); + expect(codes).toEqual([0, 0, 1, 1, 1]); }); it("include_lowest labels the first bin with [ on both sides", () => { @@ -208,12 +208,12 @@ describe("cut — property tests", () => { for (let i = 0; i < xs.length; i++) { const v = xs[i] as number; const c = codes[i]; - if (!Number.isFinite(v)) { - expect(c).toBeNull(); - } else { + if (Number.isFinite(v)) { expect(c).not.toBeNull(); expect(c).toBeGreaterThanOrEqual(0); expect(c).toBeLessThan(labels.length); + } else { + expect(c).toBeNull(); } } }, @@ -258,13 +258,13 @@ describe("qcut — property tests", () => { for (let i = 0; i < xs.length; i++) { const v = xs[i] as number; const c = codes[i]; - if (!Number.isFinite(v)) { - expect(c).toBeNull(); - } else { + if (Number.isFinite(v)) { if (c !== null) { expect(c).toBeGreaterThanOrEqual(0); expect(c).toBeLessThan(labels.length); } + } else { + expect(c).toBeNull(); } } } catch { diff --git a/tests/stats/format_ops.test.ts b/tests/stats/format_ops.test.ts index fee43e8c..3ce6b5c8 100644 --- a/tests/stats/format_ops.test.ts +++ b/tests/stats/format_ops.test.ts @@ -27,7 +27,7 @@ import { describe("formatFloat", () => { test("default 2 decimal places", () => { - expect(formatFloat(3.14159)).toBe("3.14"); + expect(formatFloat(3.14259)).toBe("3.14"); }); test("0 decimal places", () => { @@ -47,7 +47,7 @@ describe("formatFloat", () => { }); test("Infinity", () => { - expect(formatFloat(Infinity)).toBe("Infinity"); + expect(formatFloat(Number.POSITIVE_INFINITY)).toBe("Infinity"); }); test("NaN", () => { @@ -100,7 +100,7 @@ describe("formatPercent", () => { }); test("Infinity", () => { - expect(formatPercent(Infinity)).toBe("Infinity"); + expect(formatPercent(Number.POSITIVE_INFINITY)).toBe("Infinity"); }); test("NaN", () => { @@ -129,7 +129,7 @@ describe("formatScientific", () => { }); test("Infinity", () => { - expect(formatScientific(Infinity)).toBe("Infinity"); + expect(formatScientific(Number.POSITIVE_INFINITY)).toBe("Infinity"); }); test("NaN", () => { @@ -138,9 +138,12 @@ describe("formatScientific", () => { test("property: contains e", () => { fc.assert( - fc.property(fc.double({ noNaN: true, noDefaultInfinity: true, min: 1e-100, max: 1e100 }), (n) => { - return formatScientific(n).includes("e"); - }), + fc.property( + fc.double({ noNaN: true, noDefaultInfinity: true, min: 1e-100, max: 1e100 }), + (n) => { + return formatScientific(n).includes("e"); + }, + ), ); }); }); @@ -170,7 +173,7 @@ describe("formatEngineering", () => { }); test("Infinity", () => { - expect(formatEngineering(Infinity)).toBe("Infinity"); + expect(formatEngineering(Number.POSITIVE_INFINITY)).toBe("Infinity"); }); test("NaN", () => { @@ -179,17 +182,14 @@ describe("formatEngineering", () => { test("property: exponent is multiple of 3", () => { fc.assert( - fc.property( - fc.double({ noNaN: true, noDefaultInfinity: true, min: 1e-9, max: 1e9 }), - (n) => { - if (n === 0) return true; - const result = formatEngineering(n); - const match = result.match(/e([+-])(\d+)$/); - if (!match) return false; - const exp = Number(match[2]); - return exp % 3 === 0; - }, - ), + fc.property(fc.double({ noNaN: true, noDefaultInfinity: true, min: 1e-9, max: 1e9 }), (n) => { + if (n === 0) return true; + const result = formatEngineering(n); + const match = result.match(/e([+-])(\d+)$/); + if (!match) return false; + const exp = Number(match[2]); + return exp % 3 === 0; + }), ); }); }); @@ -218,7 +218,7 @@ describe("formatThousands", () => { }); test("Infinity", () => { - expect(formatThousands(Infinity)).toBe("Infinity"); + expect(formatThousands(Number.POSITIVE_INFINITY)).toBe("Infinity"); }); test("NaN", () => { @@ -246,7 +246,7 @@ describe("formatCurrency", () => { }); test("Infinity", () => { - expect(formatCurrency(Infinity)).toBe("$Infinity"); + expect(formatCurrency(Number.POSITIVE_INFINITY)).toBe("$Infinity"); }); test("NaN", () => { @@ -286,7 +286,7 @@ describe("formatCompact", () => { }); test("Infinity", () => { - expect(formatCompact(Infinity)).toBe("Infinity"); + expect(formatCompact(Number.POSITIVE_INFINITY)).toBe("Infinity"); }); test("NaN", () => { @@ -299,7 +299,7 @@ describe("formatCompact", () => { describe("makeFloatFormatter", () => { test("basic usage", () => { const fmt = makeFloatFormatter(3); - expect(fmt(3.14159)).toBe("3.142"); + expect(fmt(3.14259)).toBe("3.143"); }); test("non-numeric value", () => { @@ -557,7 +557,10 @@ describe("dataFrameToString", () => { test("returns a string", () => { fc.assert( fc.property( - fc.array(fc.double({ noNaN: true, noDefaultInfinity: true }), { minLength: 1, maxLength: 20 }), + fc.array(fc.double({ noNaN: true, noDefaultInfinity: true }), { + minLength: 1, + maxLength: 20, + }), (vals) => { const df = DataFrame.fromColumns({ x: vals, y: vals }); return typeof dataFrameToString(df) === "string"; diff --git a/tests/stats/notna_isna.test.ts b/tests/stats/notna_isna.test.ts index 22a78b53..b7a78522 100644 --- a/tests/stats/notna_isna.test.ts +++ b/tests/stats/notna_isna.test.ts @@ -4,8 +4,9 @@ */ import { describe, expect, it } from "bun:test"; import fc from "fast-check"; -import { DataFrame, Series } from "../../src/index.ts"; -import type { Scalar } from "../../src/index.ts"; +import { DataFrame, Index, Series } from "../../src/index.ts"; +import type { Label, Scalar } from "../../src/index.ts"; + import { countValid, countna, @@ -27,6 +28,12 @@ function sv(series: Series): readonly Scalar[] { return series.values; } +function dfFromMap(cols: ReadonlyMap>): DataFrame { + const first = cols.values().next().value; + const idx = first !== undefined ? first.index : new Index