diff --git a/benchmarks/pandas/bench_cat_from_codes.py b/benchmarks/pandas/bench_cat_from_codes.py new file mode 100644 index 00000000..75c06709 --- /dev/null +++ b/benchmarks/pandas/bench_cat_from_codes.py @@ -0,0 +1,21 @@ +"""Benchmark: Categorical from codes on 100k-element array""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +categories = ["apple", "banana", "cherry", "date", "elderberry"] +codes = np.arange(ROWS) % len(categories) + +for _ in range(WARMUP): + pd.Categorical.from_codes(codes, categories=categories) + +start = time.perf_counter() +for _ in range(ITERATIONS): + pd.Categorical.from_codes(codes, categories=categories) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "cat_from_codes", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_cut.py b/benchmarks/pandas/bench_cut.py new file mode 100644 index 00000000..5e8ad73c --- /dev/null +++ b/benchmarks/pandas/bench_cut.py @@ -0,0 +1,21 @@ +"""Benchmark: cut (bin into 10 bins) on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = (np.arange(ROWS) % 10000) * 0.01 +s = pd.Series(data) + +for _ in range(WARMUP): + pd.cut(s, 10) + +start = time.perf_counter() +for _ in range(ITERATIONS): + pd.cut(s, 10) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "cut", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_dataframe_cov.py b/benchmarks/pandas/bench_dataframe_cov.py new file mode 100644 index 00000000..e291b8b8 --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_cov.py @@ -0,0 +1,22 @@ +"""Benchmark: DataFrame covariance matrix on 1000x10 DataFrame""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 1_000 +COLS = 10 +WARMUP = 3 +ITERATIONS = 10 + +data = {f"col{c}": np.sin(np.arange(ROWS) * 0.01 + c) for c in range(COLS)} +df = pd.DataFrame(data) + +for _ in range(WARMUP): + df.cov() + +start = time.perf_counter() +for _ in range(ITERATIONS): + df.cov() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "dataframe_cov", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_ewm_std.py b/benchmarks/pandas/bench_ewm_std.py new file mode 100644 index 00000000..c2908411 --- /dev/null +++ b/benchmarks/pandas/bench_ewm_std.py @@ -0,0 +1,21 @@ +"""Benchmark: ewm std (alpha=0.1) on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.ewm(alpha=0.1).std() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.ewm(alpha=0.1).std() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "ewm_std", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_ewm_var.py b/benchmarks/pandas/bench_ewm_var.py new file mode 100644 index 00000000..1996bb70 --- /dev/null +++ b/benchmarks/pandas/bench_ewm_var.py @@ -0,0 +1,21 @@ +"""Benchmark: ewm var (alpha=0.1) on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.ewm(alpha=0.1).var() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.ewm(alpha=0.1).var() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "ewm_var", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_expanding_std.py b/benchmarks/pandas/bench_expanding_std.py new file mode 100644 index 00000000..e584dd88 --- /dev/null +++ b/benchmarks/pandas/bench_expanding_std.py @@ -0,0 +1,21 @@ +"""Benchmark: expanding std on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.expanding().std() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.expanding().std() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "expanding_std", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_expanding_sum.py b/benchmarks/pandas/bench_expanding_sum.py new file mode 100644 index 00000000..d7e4386f --- /dev/null +++ b/benchmarks/pandas/bench_expanding_sum.py @@ -0,0 +1,21 @@ +"""Benchmark: expanding sum on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.expanding().sum() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.expanding().sum() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "expanding_sum", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_expanding_var.py b/benchmarks/pandas/bench_expanding_var.py new file mode 100644 index 00000000..22c7fca1 --- /dev/null +++ b/benchmarks/pandas/bench_expanding_var.py @@ -0,0 +1,21 @@ +"""Benchmark: expanding var on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.expanding().var() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.expanding().var() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "expanding_var", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_insert_column.py b/benchmarks/pandas/bench_insert_column.py new file mode 100644 index 00000000..f2a1a9e7 --- /dev/null +++ b/benchmarks/pandas/bench_insert_column.py @@ -0,0 +1,29 @@ +"""Benchmark: DataFrame insert column on 10000x3 DataFrame""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 10_000 +WARMUP = 3 +ITERATIONS = 20 + +new_col = np.arange(ROWS, dtype=float) * 4 + +def make_df(): + return pd.DataFrame({ + "a": np.arange(ROWS, dtype=float), + "b": np.arange(ROWS, dtype=float) * 2, + "c": np.arange(ROWS, dtype=float) * 3, + }) + +for _ in range(WARMUP): + df = make_df() + df.insert(1, "new_col", new_col) + +start = time.perf_counter() +for _ in range(ITERATIONS): + df = make_df() + df.insert(1, "new_col", new_col) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "insert_column", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_qcut.py b/benchmarks/pandas/bench_qcut.py new file mode 100644 index 00000000..d3bf8894 --- /dev/null +++ b/benchmarks/pandas/bench_qcut.py @@ -0,0 +1,21 @@ +"""Benchmark: qcut (10 quantile bins) on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = (np.arange(ROWS) % 10000) * 0.01 +s = pd.Series(data) + +for _ in range(WARMUP): + pd.qcut(s, 10, duplicates="drop") + +start = time.perf_counter() +for _ in range(ITERATIONS): + pd.qcut(s, 10, duplicates="drop") +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "qcut", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_count.py b/benchmarks/pandas/bench_rolling_count.py new file mode 100644 index 00000000..d6174a61 --- /dev/null +++ b/benchmarks/pandas/bench_rolling_count.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling count with window=100 on 100k-element Series (with NaNs)""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.where(np.arange(ROWS) % 10 == 0, np.nan, np.arange(ROWS, dtype=float)) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).count() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).count() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_count", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_kurt.py b/benchmarks/pandas/bench_rolling_kurt.py new file mode 100644 index 00000000..6d0ada57 --- /dev/null +++ b/benchmarks/pandas/bench_rolling_kurt.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling kurt with window=100 on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).kurt() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).kurt() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_kurt", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_max.py b/benchmarks/pandas/bench_rolling_max.py new file mode 100644 index 00000000..83c74c17 --- /dev/null +++ b/benchmarks/pandas/bench_rolling_max.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling max with window=100 on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.cos(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).max() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).max() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_max", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_median.py b/benchmarks/pandas/bench_rolling_median.py new file mode 100644 index 00000000..91857c29 --- /dev/null +++ b/benchmarks/pandas/bench_rolling_median.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling median with window=100 on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.1) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).median() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).median() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_median", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_min.py b/benchmarks/pandas/bench_rolling_min.py new file mode 100644 index 00000000..5afcf709 --- /dev/null +++ b/benchmarks/pandas/bench_rolling_min.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling min with window=100 on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).min() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).min() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_min", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_quantile.py b/benchmarks/pandas/bench_rolling_quantile.py new file mode 100644 index 00000000..e74dd350 --- /dev/null +++ b/benchmarks/pandas/bench_rolling_quantile.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling quantile (0.75) with window=100 on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).quantile(0.75) + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).quantile(0.75) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_quantile", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_sem.py b/benchmarks/pandas/bench_rolling_sem.py new file mode 100644 index 00000000..a905f12f --- /dev/null +++ b/benchmarks/pandas/bench_rolling_sem.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling SEM with window=100 on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).sem() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).sem() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_sem", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_skew.py b/benchmarks/pandas/bench_rolling_skew.py new file mode 100644 index 00000000..3089651c --- /dev/null +++ b/benchmarks/pandas/bench_rolling_skew.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling skew with window=100 on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).skew() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).skew() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_skew", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_rolling_var.py b/benchmarks/pandas/bench_rolling_var.py new file mode 100644 index 00000000..51b8e3ad --- /dev/null +++ b/benchmarks/pandas/bench_rolling_var.py @@ -0,0 +1,21 @@ +"""Benchmark: rolling var with window=100 on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.05) +s = pd.Series(data) + +for _ in range(WARMUP): + s.rolling(100).var() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.rolling(100).var() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "rolling_var", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_series_clip.py b/benchmarks/pandas/bench_series_clip.py new file mode 100644 index 00000000..d2b69fab --- /dev/null +++ b/benchmarks/pandas/bench_series_clip.py @@ -0,0 +1,21 @@ +"""Benchmark: series clip (lower=-1, upper=1) on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) * 2 +s = pd.Series(data) + +for _ in range(WARMUP): + s.clip(lower=-1, upper=1) + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.clip(lower=-1, upper=1) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "series_clip", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_series_cummax.py b/benchmarks/pandas/bench_series_cummax.py new file mode 100644 index 00000000..4d14b758 --- /dev/null +++ b/benchmarks/pandas/bench_series_cummax.py @@ -0,0 +1,21 @@ +"""Benchmark: series cummax on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.cummax() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.cummax() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "series_cummax", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_series_cummin.py b/benchmarks/pandas/bench_series_cummin.py new file mode 100644 index 00000000..38fcdda2 --- /dev/null +++ b/benchmarks/pandas/bench_series_cummin.py @@ -0,0 +1,21 @@ +"""Benchmark: series cummin on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) + +for _ in range(WARMUP): + s.cummin() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.cummin() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "series_cummin", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_series_cumprod.py b/benchmarks/pandas/bench_series_cumprod.py new file mode 100644 index 00000000..9ce28a01 --- /dev/null +++ b/benchmarks/pandas/bench_series_cumprod.py @@ -0,0 +1,21 @@ +"""Benchmark: series cumprod on 10k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 10_000 +WARMUP = 5 +ITERATIONS = 20 + +data = 1 + (np.arange(ROWS) % 1000) * 0.0001 +s = pd.Series(data) + +for _ in range(WARMUP): + s.cumprod() + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.cumprod() +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "series_cumprod", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_series_mask.py b/benchmarks/pandas/bench_series_mask.py new file mode 100644 index 00000000..b73bf1f8 --- /dev/null +++ b/benchmarks/pandas/bench_series_mask.py @@ -0,0 +1,22 @@ +"""Benchmark: series mask (replace values < 0 with NaN) on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) +cond = s < 0 + +for _ in range(WARMUP): + s.mask(cond) + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.mask(cond) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "series_mask", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_series_round.py b/benchmarks/pandas/bench_series_round.py new file mode 100644 index 00000000..a068b849 --- /dev/null +++ b/benchmarks/pandas/bench_series_round.py @@ -0,0 +1,21 @@ +"""Benchmark: series round (2 decimals) on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = (np.arange(ROWS) % 10000) * 0.1234 +s = pd.Series(data) + +for _ in range(WARMUP): + s.round(2) + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.round(2) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "series_round", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_series_where.py b/benchmarks/pandas/bench_series_where.py new file mode 100644 index 00000000..63b21c68 --- /dev/null +++ b/benchmarks/pandas/bench_series_where.py @@ -0,0 +1,22 @@ +"""Benchmark: series where (keep values > 0) on 100k-element Series""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 100_000 +WARMUP = 3 +ITERATIONS = 10 + +data = np.sin(np.arange(ROWS) * 0.01) +s = pd.Series(data) +cond = s > 0 + +for _ in range(WARMUP): + s.where(cond) + +start = time.perf_counter() +for _ in range(ITERATIONS): + s.where(cond) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "series_where", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_stack.py b/benchmarks/pandas/bench_stack.py new file mode 100644 index 00000000..9c300f9a --- /dev/null +++ b/benchmarks/pandas/bench_stack.py @@ -0,0 +1,26 @@ +"""Benchmark: DataFrame stack on 1000x5 DataFrame""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 1_000 +WARMUP = 3 +ITERATIONS = 20 + +df = pd.DataFrame({ + "a": np.arange(ROWS, dtype=float), + "b": np.arange(ROWS, dtype=float) * 2, + "c": np.arange(ROWS, dtype=float) * 3, + "d": np.arange(ROWS, dtype=float) * 4, + "e": np.arange(ROWS, dtype=float) * 5, +}) + +for _ in range(WARMUP): + df.stack(future_stack=True) + +start = time.perf_counter() +for _ in range(ITERATIONS): + df.stack(future_stack=True) +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "stack", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_to_dict_oriented.py b/benchmarks/pandas/bench_to_dict_oriented.py new file mode 100644 index 00000000..d380b45f --- /dev/null +++ b/benchmarks/pandas/bench_to_dict_oriented.py @@ -0,0 +1,26 @@ +"""Benchmark: DataFrame to_dict(orient='records') on 1000x5 DataFrame""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 1_000 +WARMUP = 3 +ITERATIONS = 20 + +df = pd.DataFrame({ + "a": np.arange(ROWS, dtype=float), + "b": np.arange(ROWS, dtype=float) * 2, + "c": np.arange(ROWS, dtype=float) * 3, + "d": [f"str{i}" for i in range(ROWS)], + "e": np.arange(ROWS, dtype=float) * 0.5, +}) + +for _ in range(WARMUP): + df.to_dict(orient="records") + +start = time.perf_counter() +for _ in range(ITERATIONS): + df.to_dict(orient="records") +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "to_dict_oriented", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/pandas/bench_wide_to_long.py b/benchmarks/pandas/bench_wide_to_long.py new file mode 100644 index 00000000..dc7b6a78 --- /dev/null +++ b/benchmarks/pandas/bench_wide_to_long.py @@ -0,0 +1,26 @@ +"""Benchmark: wide_to_long on 1000x4 DataFrame""" +import json, time +import numpy as np +import pandas as pd + +ROWS = 1_000 +WARMUP = 3 +ITERATIONS = 10 + +ids = list(range(ROWS)) +df = pd.DataFrame({ + "id": ids, + "value_2020": [i * 1.0 for i in ids], + "value_2021": [i * 1.1 for i in ids], + "value_2022": [i * 1.2 for i in ids], +}) + +for _ in range(WARMUP): + pd.wide_to_long(df, stubnames=["value"], i="id", j="year", sep="_") + +start = time.perf_counter() +for _ in range(ITERATIONS): + pd.wide_to_long(df, stubnames=["value"], i="id", j="year", sep="_") +total = (time.perf_counter() - start) * 1000 + +print(json.dumps({ "function": "wide_to_long", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total })) diff --git a/benchmarks/tsb/bench_cat_from_codes.ts b/benchmarks/tsb/bench_cat_from_codes.ts new file mode 100644 index 00000000..78ab106a --- /dev/null +++ b/benchmarks/tsb/bench_cat_from_codes.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: catFromCodes on 100k-element array + */ +import { catFromCodes } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const categories = ["apple", "banana", "cherry", "date", "elderberry"]; +const codes = Int32Array.from({ length: ROWS }, (_, i) => i % categories.length); + +for (let i = 0; i < WARMUP; i++) { + catFromCodes(codes, categories); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + catFromCodes(codes, categories); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "cat_from_codes", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_cut.ts b/benchmarks/tsb/bench_cut.ts new file mode 100644 index 00000000..266a6863 --- /dev/null +++ b/benchmarks/tsb/bench_cut.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: cut (bin into 10 bins) on 100k-element Series + */ +import { Series, cut } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => (i % 10000) * 0.01); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + cut(s, 10); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + cut(s, 10); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "cut", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_dataframe_cov.ts b/benchmarks/tsb/bench_dataframe_cov.ts new file mode 100644 index 00000000..16426f01 --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_cov.ts @@ -0,0 +1,27 @@ +/** + * Benchmark: DataFrame covariance matrix on 1000x10 DataFrame + */ +import { DataFrame, dataFrameCov } from "../../src/index.js"; + +const ROWS = 1_000; +const COLS = 10; +const WARMUP = 3; +const ITERATIONS = 10; + +const columns: Record = {}; +for (let c = 0; c < COLS; c++) { + columns[`col${c}`] = Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01 + c)); +} +const df = new DataFrame(columns); + +for (let i = 0; i < WARMUP; i++) { + dataFrameCov(df); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + dataFrameCov(df); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "dataframe_cov", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_ewm_std.ts b/benchmarks/tsb/bench_ewm_std.ts new file mode 100644 index 00000000..d6255bbc --- /dev/null +++ b/benchmarks/tsb/bench_ewm_std.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: ewm std (alpha=0.1) on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.ewm({ alpha: 0.1 }).std(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.ewm({ alpha: 0.1 }).std(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "ewm_std", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_ewm_var.ts b/benchmarks/tsb/bench_ewm_var.ts new file mode 100644 index 00000000..dd6f2121 --- /dev/null +++ b/benchmarks/tsb/bench_ewm_var.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: ewm var (alpha=0.1) on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.ewm({ alpha: 0.1 }).var(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.ewm({ alpha: 0.1 }).var(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "ewm_var", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_expanding_std.ts b/benchmarks/tsb/bench_expanding_std.ts new file mode 100644 index 00000000..8f25662f --- /dev/null +++ b/benchmarks/tsb/bench_expanding_std.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: expanding std on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.expanding().std(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.expanding().std(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "expanding_std", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_expanding_sum.ts b/benchmarks/tsb/bench_expanding_sum.ts new file mode 100644 index 00000000..d469d9a1 --- /dev/null +++ b/benchmarks/tsb/bench_expanding_sum.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: expanding sum on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.expanding().sum(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.expanding().sum(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "expanding_sum", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_expanding_var.ts b/benchmarks/tsb/bench_expanding_var.ts new file mode 100644 index 00000000..16f9f9b8 --- /dev/null +++ b/benchmarks/tsb/bench_expanding_var.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: expanding var on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.expanding().var(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.expanding().var(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "expanding_var", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_insert_column.ts b/benchmarks/tsb/bench_insert_column.ts new file mode 100644 index 00000000..2bd155de --- /dev/null +++ b/benchmarks/tsb/bench_insert_column.ts @@ -0,0 +1,32 @@ +/** + * Benchmark: insertColumn on 10000x3 DataFrame + */ +import { DataFrame, insertColumn } from "../../src/index.js"; + +const ROWS = 10_000; +const WARMUP = 3; +const ITERATIONS = 20; + +const newCol = Float64Array.from({ length: ROWS }, (_, i) => i * 4); + +for (let i = 0; i < WARMUP; i++) { + const df = new DataFrame({ + a: Float64Array.from({ length: ROWS }, (_, j) => j), + b: Float64Array.from({ length: ROWS }, (_, j) => j * 2), + c: Float64Array.from({ length: ROWS }, (_, j) => j * 3), + }); + insertColumn(df, 1, "new_col", newCol); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + const df = new DataFrame({ + a: Float64Array.from({ length: ROWS }, (_, j) => j), + b: Float64Array.from({ length: ROWS }, (_, j) => j * 2), + c: Float64Array.from({ length: ROWS }, (_, j) => j * 3), + }); + insertColumn(df, 1, "new_col", newCol); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "insert_column", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_qcut.ts b/benchmarks/tsb/bench_qcut.ts new file mode 100644 index 00000000..2e203a09 --- /dev/null +++ b/benchmarks/tsb/bench_qcut.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: qcut (10 quantile bins) on 100k-element Series + */ +import { Series, qcut } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => (i % 10000) * 0.01); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + qcut(s, 10); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + qcut(s, 10); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "qcut", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_count.ts b/benchmarks/tsb/bench_rolling_count.ts new file mode 100644 index 00000000..c33f1d80 --- /dev/null +++ b/benchmarks/tsb/bench_rolling_count.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling count with window=100 on 100k-element Series (with NaNs) + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => i % 10 === 0 ? NaN : i); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.rolling(100).count(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.rolling(100).count(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_count", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_kurt.ts b/benchmarks/tsb/bench_rolling_kurt.ts new file mode 100644 index 00000000..82b89410 --- /dev/null +++ b/benchmarks/tsb/bench_rolling_kurt.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling kurt with window=100 on 100k-element Series + */ +import { Series, rollingKurt } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + rollingKurt(s, 100); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + rollingKurt(s, 100); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_kurt", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_max.ts b/benchmarks/tsb/bench_rolling_max.ts new file mode 100644 index 00000000..a8529c9f --- /dev/null +++ b/benchmarks/tsb/bench_rolling_max.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling max with window=100 on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.cos(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.rolling(100).max(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.rolling(100).max(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_max", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_median.ts b/benchmarks/tsb/bench_rolling_median.ts new file mode 100644 index 00000000..525d65c1 --- /dev/null +++ b/benchmarks/tsb/bench_rolling_median.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling median with window=100 on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.1)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.rolling(100).median(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.rolling(100).median(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_median", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_min.ts b/benchmarks/tsb/bench_rolling_min.ts new file mode 100644 index 00000000..62be77c1 --- /dev/null +++ b/benchmarks/tsb/bench_rolling_min.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling min with window=100 on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.rolling(100).min(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.rolling(100).min(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_min", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_quantile.ts b/benchmarks/tsb/bench_rolling_quantile.ts new file mode 100644 index 00000000..4e0d8c6e --- /dev/null +++ b/benchmarks/tsb/bench_rolling_quantile.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling quantile (0.75) with window=100 on 100k-element Series + */ +import { Series, rollingQuantile } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + rollingQuantile(s, 100, 0.75); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + rollingQuantile(s, 100, 0.75); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_quantile", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_sem.ts b/benchmarks/tsb/bench_rolling_sem.ts new file mode 100644 index 00000000..6063891e --- /dev/null +++ b/benchmarks/tsb/bench_rolling_sem.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling SEM with window=100 on 100k-element Series + */ +import { Series, rollingSem } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + rollingSem(s, 100); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + rollingSem(s, 100); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_sem", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_skew.ts b/benchmarks/tsb/bench_rolling_skew.ts new file mode 100644 index 00000000..13488367 --- /dev/null +++ b/benchmarks/tsb/bench_rolling_skew.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling skew with window=100 on 100k-element Series + */ +import { Series, rollingSkew } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + rollingSkew(s, 100); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + rollingSkew(s, 100); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_skew", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_rolling_var.ts b/benchmarks/tsb/bench_rolling_var.ts new file mode 100644 index 00000000..404758bc --- /dev/null +++ b/benchmarks/tsb/bench_rolling_var.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: rolling var with window=100 on 100k-element Series + */ +import { Series } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.05)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + s.rolling(100).var(); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + s.rolling(100).var(); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "rolling_var", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_series_clip.ts b/benchmarks/tsb/bench_series_clip.ts new file mode 100644 index 00000000..32747ba7 --- /dev/null +++ b/benchmarks/tsb/bench_series_clip.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: series clip (lower=-1, upper=1) on 100k-element Series + */ +import { Series, clip } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01) * 2); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + clip(s, { lower: -1, upper: 1 }); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + clip(s, { lower: -1, upper: 1 }); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "series_clip", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_series_cummax.ts b/benchmarks/tsb/bench_series_cummax.ts new file mode 100644 index 00000000..99fa7ff3 --- /dev/null +++ b/benchmarks/tsb/bench_series_cummax.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: series cummax on 100k-element Series + */ +import { Series, cummax } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + cummax(s); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + cummax(s); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "series_cummax", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_series_cummin.ts b/benchmarks/tsb/bench_series_cummin.ts new file mode 100644 index 00000000..44ae4601 --- /dev/null +++ b/benchmarks/tsb/bench_series_cummin.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: series cummin on 100k-element Series + */ +import { Series, cummin } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + cummin(s); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + cummin(s); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "series_cummin", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_series_cumprod.ts b/benchmarks/tsb/bench_series_cumprod.ts new file mode 100644 index 00000000..a57740b7 --- /dev/null +++ b/benchmarks/tsb/bench_series_cumprod.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: series cumprod on 10k-element Series + */ +import { Series, cumprod } from "../../src/index.js"; + +const ROWS = 10_000; +const WARMUP = 5; +const ITERATIONS = 20; + +const data = Float64Array.from({ length: ROWS }, (_, i) => 1 + (i % 1000) * 0.0001); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + cumprod(s); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + cumprod(s); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "series_cumprod", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_series_mask.ts b/benchmarks/tsb/bench_series_mask.ts new file mode 100644 index 00000000..ccc3be51 --- /dev/null +++ b/benchmarks/tsb/bench_series_mask.ts @@ -0,0 +1,24 @@ +/** + * Benchmark: seriesMask (replace values < 0 with NaN) on 100k-element Series + */ +import { Series, seriesMask } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); +const cond = Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01) < 0); + +for (let i = 0; i < WARMUP; i++) { + seriesMask(s, cond); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + seriesMask(s, cond); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "series_mask", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_series_round.ts b/benchmarks/tsb/bench_series_round.ts new file mode 100644 index 00000000..eb369b4a --- /dev/null +++ b/benchmarks/tsb/bench_series_round.ts @@ -0,0 +1,23 @@ +/** + * Benchmark: series round (2 decimals) on 100k-element Series + */ +import { Series, seriesRound } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => (i % 10000) * 0.1234); +const s = new Series(data); + +for (let i = 0; i < WARMUP; i++) { + seriesRound(s, { decimals: 2 }); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + seriesRound(s, { decimals: 2 }); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "series_round", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_series_where.ts b/benchmarks/tsb/bench_series_where.ts new file mode 100644 index 00000000..07ffa652 --- /dev/null +++ b/benchmarks/tsb/bench_series_where.ts @@ -0,0 +1,24 @@ +/** + * Benchmark: seriesWhere (keep values > 0) on 100k-element Series + */ +import { Series, seriesWhere } from "../../src/index.js"; + +const ROWS = 100_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const data = Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)); +const s = new Series(data); +const cond = Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01) > 0); + +for (let i = 0; i < WARMUP; i++) { + seriesWhere(s, cond); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + seriesWhere(s, cond); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "series_where", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_stack.ts b/benchmarks/tsb/bench_stack.ts new file mode 100644 index 00000000..9ee30c1c --- /dev/null +++ b/benchmarks/tsb/bench_stack.ts @@ -0,0 +1,28 @@ +/** + * Benchmark: stack on 1000x5 DataFrame + */ +import { DataFrame, stack } from "../../src/index.js"; + +const ROWS = 1_000; +const WARMUP = 3; +const ITERATIONS = 20; + +const df = new DataFrame({ + a: Float64Array.from({ length: ROWS }, (_, i) => i), + b: Float64Array.from({ length: ROWS }, (_, i) => i * 2), + c: Float64Array.from({ length: ROWS }, (_, i) => i * 3), + d: Float64Array.from({ length: ROWS }, (_, i) => i * 4), + e: Float64Array.from({ length: ROWS }, (_, i) => i * 5), +}); + +for (let i = 0; i < WARMUP; i++) { + stack(df); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + stack(df); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "stack", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_to_dict_oriented.ts b/benchmarks/tsb/bench_to_dict_oriented.ts new file mode 100644 index 00000000..670bdaca --- /dev/null +++ b/benchmarks/tsb/bench_to_dict_oriented.ts @@ -0,0 +1,28 @@ +/** + * Benchmark: toDictOriented (records orient) on 1000x5 DataFrame + */ +import { DataFrame, toDictOriented } from "../../src/index.js"; + +const ROWS = 1_000; +const WARMUP = 3; +const ITERATIONS = 20; + +const df = new DataFrame({ + a: Float64Array.from({ length: ROWS }, (_, i) => i), + b: Float64Array.from({ length: ROWS }, (_, i) => i * 2), + c: Float64Array.from({ length: ROWS }, (_, i) => i * 3), + d: Array.from({ length: ROWS }, (_, i) => `str${i}`), + e: Float64Array.from({ length: ROWS }, (_, i) => i * 0.5), +}); + +for (let i = 0; i < WARMUP; i++) { + toDictOriented(df, "records"); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + toDictOriented(df, "records"); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "to_dict_oriented", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total })); diff --git a/benchmarks/tsb/bench_wide_to_long.ts b/benchmarks/tsb/bench_wide_to_long.ts new file mode 100644 index 00000000..fad7d235 --- /dev/null +++ b/benchmarks/tsb/bench_wide_to_long.ts @@ -0,0 +1,28 @@ +/** + * Benchmark: wideToLong on 1000x4 DataFrame + */ +import { DataFrame, wideToLong } from "../../src/index.js"; + +const ROWS = 1_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const ids = Array.from({ length: ROWS }, (_, i) => i); +const df = new DataFrame({ + id: ids, + value_2020: ids.map(i => i * 1.0), + value_2021: ids.map(i => i * 1.1), + value_2022: ids.map(i => i * 1.2), +}); + +for (let i = 0; i < WARMUP; i++) { + wideToLong(df, { stubnames: ["value"], i: "id", j: "year", sep: "_" }); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + wideToLong(df, { stubnames: ["value"], i: "id", j: "year", sep: "_" }); +} +const total = performance.now() - start; + +console.log(JSON.stringify({ function: "wide_to_long", mean_ms: total / ITERATIONS, iterations: ITERATIONS, total_ms: total }));