diff --git a/benchmarks/pandas/bench_between.py b/benchmarks/pandas/bench_between.py new file mode 100644 index 00000000..6a73fa47 --- /dev/null +++ b/benchmarks/pandas/bench_between.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.between(-1.0, 1.0) +N = 200 +t0 = time.perf_counter() +for _ in range(N): s.between(-1.0, 1.0) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "between", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_clip.py b/benchmarks/pandas/bench_clip.py new file mode 100644 index 00000000..f2e99fae --- /dev/null +++ b/benchmarks/pandas/bench_clip.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.clip(-1.0, 1.0) +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.clip(-1.0, 1.0) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "clip", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_combine_first.py b/benchmarks/pandas/bench_combine_first.py new file mode 100644 index 00000000..763b6a15 --- /dev/null +++ b/benchmarks/pandas/bench_combine_first.py @@ -0,0 +1,12 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s1 = pd.Series(rng.standard_normal(100_000)) +s2 = pd.Series(rng.standard_normal(100_000)) +# Put NaN in s1 +s1[::3] = float("nan") +for _ in range(3): s1.combine_first(s2) +N = 50 +t0 = time.perf_counter() +for _ in range(N): s1.combine_first(s2) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "combine_first", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_corr.py b/benchmarks/pandas/bench_corr.py new file mode 100644 index 00000000..9647dd5c --- /dev/null +++ b/benchmarks/pandas/bench_corr.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +df = pd.DataFrame(rng.standard_normal((10_000, 5)), columns=list("ABCDE")) +for _ in range(3): df.corr() +N = 50 +t0 = time.perf_counter() +for _ in range(N): df.corr() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "corr", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_cov.py b/benchmarks/pandas/bench_cov.py new file mode 100644 index 00000000..331adf79 --- /dev/null +++ b/benchmarks/pandas/bench_cov.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +df = pd.DataFrame(rng.standard_normal((10_000, 5)), columns=list("ABCDE")) +for _ in range(3): df.cov() +N = 100 +t0 = time.perf_counter() +for _ in range(N): df.cov() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "cov", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_crosstab.py b/benchmarks/pandas/bench_crosstab.py new file mode 100644 index 00000000..3ba7a237 --- /dev/null +++ b/benchmarks/pandas/bench_crosstab.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +a = pd.Series(rng.choice(["A","B","C","D"], size=10_000)) +b = pd.Series(rng.choice(["X","Y","Z"], size=10_000)) +for _ in range(3): pd.crosstab(a, b) +N = 30 +t0 = time.perf_counter() +for _ in range(N): pd.crosstab(a, b) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "crosstab", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_cummax.py b/benchmarks/pandas/bench_cummax.py new file mode 100644 index 00000000..63c57326 --- /dev/null +++ b/benchmarks/pandas/bench_cummax.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.cummax() +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.cummax() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "cummax", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_cummin.py b/benchmarks/pandas/bench_cummin.py new file mode 100644 index 00000000..114e5d07 --- /dev/null +++ b/benchmarks/pandas/bench_cummin.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.cummin() +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.cummin() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "cummin", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_cut.py b/benchmarks/pandas/bench_cut.py new file mode 100644 index 00000000..adcd729c --- /dev/null +++ b/benchmarks/pandas/bench_cut.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +bins = [-4, -2, -1, 0, 1, 2, 4] +for _ in range(3): pd.cut(s, bins=bins, labels=False) +N = 50 +t0 = time.perf_counter() +for _ in range(N): pd.cut(s, bins=bins, labels=False) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "cut", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_dataframe_apply_col.py b/benchmarks/pandas/bench_dataframe_apply_col.py new file mode 100644 index 00000000..e8bdabc9 --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_apply_col.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +df = pd.DataFrame(rng.standard_normal((10_000, 5)), columns=list("ABCDE")) +for _ in range(3): df.apply(lambda col: col.mean(), axis=0) +N = 100 +t0 = time.perf_counter() +for _ in range(N): df.apply(lambda col: col.mean(), axis=0) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "dataframe_apply_col", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_dataframe_astype.py b/benchmarks/pandas/bench_dataframe_astype.py new file mode 100644 index 00000000..d0a018e1 --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_astype.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +df = pd.DataFrame({"a": rng.standard_normal(100_000), "b": rng.integers(0, 1000, size=100_000)}) +for _ in range(3): df.astype({"a": "float32", "b": "int32"}) +N = 100 +t0 = time.perf_counter() +for _ in range(N): df.astype({"a": "float32", "b": "int32"}) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "dataframe_astype", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_dataframe_head_tail.py b/benchmarks/pandas/bench_dataframe_head_tail.py new file mode 100644 index 00000000..74ce896f --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_head_tail.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +df = pd.DataFrame({"a": rng.standard_normal(100_000), "b": rng.integers(0, 1000, size=100_000)}) +for _ in range(3): df.head(10); df.tail(10) +N = 1000 +t0 = time.perf_counter() +for _ in range(N): df.head(10); df.tail(10) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "dataframe_head_tail", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_diff.py b/benchmarks/pandas/bench_diff.py new file mode 100644 index 00000000..5a13b6dc --- /dev/null +++ b/benchmarks/pandas/bench_diff.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.diff(1) +N = 200 +t0 = time.perf_counter() +for _ in range(N): s.diff(1) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "diff", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_drop_duplicates.py b/benchmarks/pandas/bench_drop_duplicates.py new file mode 100644 index 00000000..cbd94ccf --- /dev/null +++ b/benchmarks/pandas/bench_drop_duplicates.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.integers(0, 5_000, size=100_000)) +for _ in range(3): s.drop_duplicates() +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.drop_duplicates() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "drop_duplicates", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_duplicated.py b/benchmarks/pandas/bench_duplicated.py new file mode 100644 index 00000000..ecbc904a --- /dev/null +++ b/benchmarks/pandas/bench_duplicated.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.integers(0, 5_000, size=100_000)) +for _ in range(3): s.duplicated() +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.duplicated() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "duplicated", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_expanding_mean.py b/benchmarks/pandas/bench_expanding_mean.py new file mode 100644 index 00000000..bfe41a2f --- /dev/null +++ b/benchmarks/pandas/bench_expanding_mean.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.expanding().mean() +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.expanding().mean() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "expanding_mean", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_explode.py b/benchmarks/pandas/bench_explode.py new file mode 100644 index 00000000..f473ad62 --- /dev/null +++ b/benchmarks/pandas/bench_explode.py @@ -0,0 +1,11 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +# Each row has a list of 1-5 items +data = [[int(x) for x in rng.integers(0, 100, size=rng.integers(1, 6))] for _ in range(10_000)] +s = pd.Series(data) +for _ in range(3): s.explode() +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.explode() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "explode", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_groupby_agg.py b/benchmarks/pandas/bench_groupby_agg.py new file mode 100644 index 00000000..7b72ae90 --- /dev/null +++ b/benchmarks/pandas/bench_groupby_agg.py @@ -0,0 +1,13 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +df = pd.DataFrame({ + "group": rng.choice(["A","B","C","D","E"], size=100_000), + "val1": rng.standard_normal(100_000), + "val2": rng.standard_normal(100_000), +}) +for _ in range(3): df.groupby("group").agg({"val1": ["mean","std","min","max"], "val2": ["sum","count"]}) +N = 30 +t0 = time.perf_counter() +for _ in range(N): df.groupby("group").agg({"val1": ["mean","std","min","max"], "val2": ["sum","count"]}) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "groupby_agg", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_interpolate.py b/benchmarks/pandas/bench_interpolate.py new file mode 100644 index 00000000..659c0afb --- /dev/null +++ b/benchmarks/pandas/bench_interpolate.py @@ -0,0 +1,11 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +arr = rng.standard_normal(100_000).tolist() +for i in range(0, 100_000, 10): arr[i] = None +s = pd.Series(arr, dtype="float64") +for _ in range(3): s.interpolate() +N = 30 +t0 = time.perf_counter() +for _ in range(N): s.interpolate() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "interpolate", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_isin.py b/benchmarks/pandas/bench_isin.py new file mode 100644 index 00000000..cc3f23ea --- /dev/null +++ b/benchmarks/pandas/bench_isin.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.integers(0, 10_000, size=100_000)) +test_set = list(range(0, 10_000, 4)) +for _ in range(3): s.isin(test_set) +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.isin(test_set) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "isin", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_mask.py b/benchmarks/pandas/bench_mask.py new file mode 100644 index 00000000..c2bff435 --- /dev/null +++ b/benchmarks/pandas/bench_mask.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +cond = s < 0 +for _ in range(3): s.mask(cond, 0.0) +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.mask(cond, 0.0) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "mask", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_melt.py b/benchmarks/pandas/bench_melt.py new file mode 100644 index 00000000..0806ca4c --- /dev/null +++ b/benchmarks/pandas/bench_melt.py @@ -0,0 +1,14 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +df = pd.DataFrame({ + "id": range(10_000), + "A": rng.standard_normal(10_000), + "B": rng.standard_normal(10_000), + "C": rng.standard_normal(10_000), +}) +for _ in range(3): df.melt(id_vars=["id"], value_vars=["A","B","C"]) +N = 50 +t0 = time.perf_counter() +for _ in range(N): df.melt(id_vars=["id"], value_vars=["A","B","C"]) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "melt", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_nlargest.py b/benchmarks/pandas/bench_nlargest.py new file mode 100644 index 00000000..253181bf --- /dev/null +++ b/benchmarks/pandas/bench_nlargest.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.nlargest(10) +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.nlargest(10) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "nlargest", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_nsmallest.py b/benchmarks/pandas/bench_nsmallest.py new file mode 100644 index 00000000..3035cd3e --- /dev/null +++ b/benchmarks/pandas/bench_nsmallest.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.nsmallest(10) +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.nsmallest(10) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "nsmallest", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_pct_change.py b/benchmarks/pandas/bench_pct_change.py new file mode 100644 index 00000000..c942fcf4 --- /dev/null +++ b/benchmarks/pandas/bench_pct_change.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.pct_change() +N = 200 +t0 = time.perf_counter() +for _ in range(N): s.pct_change() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "pct_change", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_pivot.py b/benchmarks/pandas/bench_pivot.py new file mode 100644 index 00000000..e6b94a3d --- /dev/null +++ b/benchmarks/pandas/bench_pivot.py @@ -0,0 +1,15 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +rows = 100 +cols = 20 +df = pd.DataFrame({ + "row": np.repeat(range(rows), cols), + "col": list(range(cols)) * rows, + "val": rng.standard_normal(rows * cols), +}) +for _ in range(3): df.pivot(index="row", columns="col", values="val") +N = 100 +t0 = time.perf_counter() +for _ in range(N): df.pivot(index="row", columns="col", values="val") +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "pivot", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_qcut.py b/benchmarks/pandas/bench_qcut.py new file mode 100644 index 00000000..ee55df18 --- /dev/null +++ b/benchmarks/pandas/bench_qcut.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): pd.qcut(s, q=10, labels=False, duplicates="drop") +N = 30 +t0 = time.perf_counter() +for _ in range(N): pd.qcut(s, q=10, labels=False, duplicates="drop") +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "qcut", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_rank.py b/benchmarks/pandas/bench_rank.py new file mode 100644 index 00000000..eb902781 --- /dev/null +++ b/benchmarks/pandas/bench_rank.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.integers(0, 1000, size=100_000)) +# warm-up +for _ in range(3): s.rank(method="average") +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.rank(method="average") +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "rank", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_resample.py b/benchmarks/pandas/bench_resample.py new file mode 100644 index 00000000..61e98c8a --- /dev/null +++ b/benchmarks/pandas/bench_resample.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +idx = pd.date_range("2020-01-01", periods=100_000, freq="1min") +s = pd.Series(rng.standard_normal(100_000), index=idx) +for _ in range(3): s.resample("1h").mean() +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.resample("1h").mean() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "resample", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_rolling_std.py b/benchmarks/pandas/bench_rolling_std.py new file mode 100644 index 00000000..51b82e8f --- /dev/null +++ b/benchmarks/pandas/bench_rolling_std.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.rolling(window=20).std() +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.rolling(window=20).std() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "rolling_std", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_rolling_var.py b/benchmarks/pandas/bench_rolling_var.py new file mode 100644 index 00000000..48d39320 --- /dev/null +++ b/benchmarks/pandas/bench_rolling_var.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.rolling(window=20).var() +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.rolling(window=20).var() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "rolling_var", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_sample.py b/benchmarks/pandas/bench_sample.py new file mode 100644 index 00000000..3e338857 --- /dev/null +++ b/benchmarks/pandas/bench_sample.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.sample(n=1000, random_state=42) +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.sample(n=1000, random_state=42) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "sample", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_series_abs.py b/benchmarks/pandas/bench_series_abs.py new file mode 100644 index 00000000..0731d88b --- /dev/null +++ b/benchmarks/pandas/bench_series_abs.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +for _ in range(3): s.abs() +N = 200 +t0 = time.perf_counter() +for _ in range(N): s.abs() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "series_abs", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_series_map.py b/benchmarks/pandas/bench_series_map.py new file mode 100644 index 00000000..e536f40d --- /dev/null +++ b/benchmarks/pandas/bench_series_map.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.integers(0, 1_000, size=100_000)) +mapping = {i: i * 2 for i in range(1_000)} +for _ in range(3): s.map(mapping) +N = 30 +t0 = time.perf_counter() +for _ in range(N): s.map(mapping) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "series_map", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_series_nunique.py b/benchmarks/pandas/bench_series_nunique.py new file mode 100644 index 00000000..5136acc1 --- /dev/null +++ b/benchmarks/pandas/bench_series_nunique.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.integers(0, 5_000, size=100_000)) +for _ in range(3): s.nunique() +N = 200 +t0 = time.perf_counter() +for _ in range(N): s.nunique() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "series_nunique", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_series_replace.py b/benchmarks/pandas/bench_series_replace.py new file mode 100644 index 00000000..e7a23698 --- /dev/null +++ b/benchmarks/pandas/bench_series_replace.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.integers(0, 10, size=100_000)) +mapping = {i: i*10 for i in range(10)} +for _ in range(3): s.replace(mapping) +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.replace(mapping) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "series_replace", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_stack.py b/benchmarks/pandas/bench_stack.py new file mode 100644 index 00000000..cec4e2ed --- /dev/null +++ b/benchmarks/pandas/bench_stack.py @@ -0,0 +1,9 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +df = pd.DataFrame(rng.standard_normal((1000, 20)), columns=[f"c{i}" for i in range(20)]) +for _ in range(3): df.stack() +N = 100 +t0 = time.perf_counter() +for _ in range(N): df.stack() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "stack", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_string_contains.py b/benchmarks/pandas/bench_string_contains.py new file mode 100644 index 00000000..364d6965 --- /dev/null +++ b/benchmarks/pandas/bench_string_contains.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +words = ["apple", "banana", "cherry", "date", "elderberry"] +s = pd.Series(rng.choice(words, size=100_000)) +for _ in range(3): s.str.contains("an", regex=False) +N = 50 +t0 = time.perf_counter() +for _ in range(N): s.str.contains("an", regex=False) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "string_contains", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_unstack.py b/benchmarks/pandas/bench_unstack.py new file mode 100644 index 00000000..26297b02 --- /dev/null +++ b/benchmarks/pandas/bench_unstack.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +idx = pd.MultiIndex.from_product([range(1000), range(20)], names=["row", "col"]) +s = pd.Series(rng.standard_normal(20_000), index=idx) +for _ in range(3): s.unstack() +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.unstack() +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "unstack", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/pandas/bench_where.py b/benchmarks/pandas/bench_where.py new file mode 100644 index 00000000..1a9baa77 --- /dev/null +++ b/benchmarks/pandas/bench_where.py @@ -0,0 +1,10 @@ +import pandas as pd, json, time, numpy as np +rng = np.random.default_rng(42) +s = pd.Series(rng.standard_normal(100_000)) +mask = s > 0 +for _ in range(3): s.where(mask, 0.0) +N = 100 +t0 = time.perf_counter() +for _ in range(N): s.where(mask, 0.0) +elapsed = time.perf_counter() - t0 +print(json.dumps({"function": "where", "mean_ms": elapsed/N*1000, "iterations": N, "total_ms": elapsed*1000})) diff --git a/benchmarks/results.json b/benchmarks/results.json index c883f334..942cd5bc 100644 --- a/benchmarks/results.json +++ b/benchmarks/results.json @@ -1,5 +1,38 @@ { "benchmarks": [ + { + "function": "between", + "tsb": null, + "pandas": { + "function": "between", + "mean_ms": 0.16103834500000858, + "iterations": 200, + "total_ms": 32.207669000001715 + }, + "ratio": null + }, + { + "function": "clip", + "tsb": null, + "pandas": { + "function": "clip", + "mean_ms": 1.990909230000284, + "iterations": 100, + "total_ms": 199.0909230000284 + }, + "ratio": null + }, + { + "function": "combine_first", + "tsb": null, + "pandas": { + "function": "combine_first", + "mean_ms": 0.3822191200003999, + "iterations": 50, + "total_ms": 19.110956000019996 + }, + "ratio": null + }, { "function": "concat", "tsb": null, @@ -11,6 +44,72 @@ }, "ratio": null }, + { + "function": "corr", + "tsb": null, + "pandas": { + "function": "corr", + "mean_ms": 0.5609774200001993, + "iterations": 50, + "total_ms": 28.048871000009967 + }, + "ratio": null + }, + { + "function": "cov", + "tsb": null, + "pandas": { + "function": "cov", + "mean_ms": 0.1933979199998248, + "iterations": 100, + "total_ms": 19.33979199998248 + }, + "ratio": null + }, + { + "function": "crosstab", + "tsb": null, + "pandas": { + "function": "crosstab", + "mean_ms": 5.3182911999992175, + "iterations": 30, + "total_ms": 159.54873599997651 + }, + "ratio": null + }, + { + "function": "cummax", + "tsb": null, + "pandas": { + "function": "cummax", + "mean_ms": 1.1497397300001921, + "iterations": 100, + "total_ms": 114.97397300001921 + }, + "ratio": null + }, + { + "function": "cummin", + "tsb": null, + "pandas": { + "function": "cummin", + "mean_ms": 1.0978203100000883, + "iterations": 100, + "total_ms": 109.78203100000883 + }, + "ratio": null + }, + { + "function": "cut", + "tsb": null, + "pandas": { + "function": "cut", + "mean_ms": 2.4623991599992223, + "iterations": 50, + "total_ms": 123.11995799996112 + }, + "ratio": null + }, { "function": "dataframe_apply", "tsb": null, @@ -22,6 +121,28 @@ }, "ratio": null }, + { + "function": "dataframe_apply_col", + "tsb": null, + "pandas": { + "function": "dataframe_apply_col", + "mean_ms": 0.32499793999988924, + "iterations": 100, + "total_ms": 32.499793999988924 + }, + "ratio": null + }, + { + "function": "dataframe_astype", + "tsb": null, + "pandas": { + "function": "dataframe_astype", + "mean_ms": 0.6506819100002303, + "iterations": 100, + "total_ms": 65.06819100002303 + }, + "ratio": null + }, { "function": "dataframe_creation", "tsb": null, @@ -55,6 +176,17 @@ }, "ratio": null }, + { + "function": "dataframe_head_tail", + "tsb": null, + "pandas": { + "function": "dataframe_head_tail", + "mean_ms": 0.06511352400002579, + "iterations": 1000, + "total_ms": 65.11352400002579 + }, + "ratio": null + }, { "function": "dataframe_rename", "tsb": null, @@ -88,6 +220,39 @@ }, "ratio": null }, + { + "function": "diff", + "tsb": null, + "pandas": { + "function": "diff", + "mean_ms": 0.14170949499998642, + "iterations": 200, + "total_ms": 28.341898999997284 + }, + "ratio": null + }, + { + "function": "drop_duplicates", + "tsb": null, + "pandas": { + "function": "drop_duplicates", + "mean_ms": 1.1205483399999139, + "iterations": 50, + "total_ms": 56.027416999995694 + }, + "ratio": null + }, + { + "function": "duplicated", + "tsb": null, + "pandas": { + "function": "duplicated", + "mean_ms": 1.3844710600005783, + "iterations": 50, + "total_ms": 69.22355300002891 + }, + "ratio": null + }, { "function": "ewm_mean", "tsb": null, @@ -99,6 +264,39 @@ }, "ratio": null }, + { + "function": "expanding_mean", + "tsb": null, + "pandas": { + "function": "expanding_mean", + "mean_ms": 1.7139983399999892, + "iterations": 50, + "total_ms": 85.69991699999946 + }, + "ratio": null + }, + { + "function": "explode", + "tsb": null, + "pandas": { + "function": "explode", + "mean_ms": 1.02996705999999, + "iterations": 50, + "total_ms": 51.4983529999995 + }, + "ratio": null + }, + { + "function": "groupby_agg", + "tsb": null, + "pandas": { + "function": "groupby_agg", + "mean_ms": 10.97728106666788, + "iterations": 30, + "total_ms": 329.3184320000364 + }, + "ratio": null + }, { "function": "groupby_mean", "tsb": null, @@ -110,6 +308,50 @@ }, "ratio": null }, + { + "function": "interpolate", + "tsb": null, + "pandas": { + "function": "interpolate", + "mean_ms": 3.7753171000000902, + "iterations": 30, + "total_ms": 113.25951300000271 + }, + "ratio": null + }, + { + "function": "isin", + "tsb": null, + "pandas": { + "function": "isin", + "mean_ms": 1.6645995200008201, + "iterations": 50, + "total_ms": 83.229976000041 + }, + "ratio": null + }, + { + "function": "mask", + "tsb": null, + "pandas": { + "function": "mask", + "mean_ms": 0.5851815699998042, + "iterations": 100, + "total_ms": 58.51815699998042 + }, + "ratio": null + }, + { + "function": "melt", + "tsb": null, + "pandas": { + "function": "melt", + "mean_ms": 2.064804379999714, + "iterations": 50, + "total_ms": 103.2402189999857 + }, + "ratio": null + }, { "function": "merge", "tsb": null, @@ -121,6 +363,50 @@ }, "ratio": null }, + { + "function": "nlargest", + "tsb": null, + "pandas": { + "function": "nlargest", + "mean_ms": 2.1809330900003943, + "iterations": 100, + "total_ms": 218.09330900003943 + }, + "ratio": null + }, + { + "function": "nsmallest", + "tsb": null, + "pandas": { + "function": "nsmallest", + "mean_ms": 1.172936479999862, + "iterations": 100, + "total_ms": 117.2936479999862 + }, + "ratio": null + }, + { + "function": "pct_change", + "tsb": null, + "pandas": { + "function": "pct_change", + "mean_ms": 1.1997432100000083, + "iterations": 200, + "total_ms": 239.94864200000166 + }, + "ratio": null + }, + { + "function": "pivot", + "tsb": null, + "pandas": { + "function": "pivot", + "mean_ms": 0.8490880499999776, + "iterations": 100, + "total_ms": 84.90880499999776 + }, + "ratio": null + }, { "function": "pivot_table", "tsb": null, @@ -132,6 +418,28 @@ }, "ratio": null }, + { + "function": "qcut", + "tsb": null, + "pandas": { + "function": "qcut", + "mean_ms": 7.631286899999168, + "iterations": 30, + "total_ms": 228.93860699997504 + }, + "ratio": null + }, + { + "function": "rank", + "tsb": null, + "pandas": { + "function": "rank", + "mean_ms": 8.728866699999571, + "iterations": 50, + "total_ms": 436.44333499997856 + }, + "ratio": null + }, { "function": "read_csv", "tsb": null, @@ -143,6 +451,17 @@ }, "ratio": null }, + { + "function": "resample", + "tsb": null, + "pandas": { + "function": "resample", + "mean_ms": 1.357887120000214, + "iterations": 50, + "total_ms": 67.8943560000107 + }, + "ratio": null + }, { "function": "rolling_mean", "tsb": null, @@ -154,6 +473,50 @@ }, "ratio": null }, + { + "function": "rolling_std", + "tsb": null, + "pandas": { + "function": "rolling_std", + "mean_ms": 3.0888083800005006, + "iterations": 50, + "total_ms": 154.44041900002503 + }, + "ratio": null + }, + { + "function": "rolling_var", + "tsb": null, + "pandas": { + "function": "rolling_var", + "mean_ms": 2.3259480600006555, + "iterations": 50, + "total_ms": 116.29740300003277 + }, + "ratio": null + }, + { + "function": "sample", + "tsb": null, + "pandas": { + "function": "sample", + "mean_ms": 1.8081685200002084, + "iterations": 100, + "total_ms": 180.81685200002084 + }, + "ratio": null + }, + { + "function": "series_abs", + "tsb": null, + "pandas": { + "function": "series_abs", + "mean_ms": 0.04412354999999479, + "iterations": 200, + "total_ms": 8.824709999998959 + }, + "ratio": null + }, { "function": "series_arithmetic", "tsb": null, @@ -172,7 +535,7 @@ "function": "series_creation", "mean_ms": 7.607, "iterations": 50, - "total_ms": 380.349 + "total_ms": 380.35 }, "ratio": null }, @@ -198,6 +561,39 @@ }, "ratio": null }, + { + "function": "series_map", + "tsb": null, + "pandas": { + "function": "series_map", + "mean_ms": 1.7419646666667177, + "iterations": 30, + "total_ms": 52.25894000000153 + }, + "ratio": null + }, + { + "function": "series_nunique", + "tsb": null, + "pandas": { + "function": "series_nunique", + "mean_ms": 0.9312331699999277, + "iterations": 200, + "total_ms": 186.24663399998553 + }, + "ratio": null + }, + { + "function": "series_replace", + "tsb": null, + "pandas": { + "function": "series_replace", + "mean_ms": 2.7626269599988973, + "iterations": 50, + "total_ms": 138.13134799994486 + }, + "ratio": null + }, { "function": "series_shift", "tsb": null, @@ -241,7 +637,51 @@ "total_ms": 92.12644899997713 }, "ratio": null + }, + { + "function": "stack", + "tsb": null, + "pandas": { + "function": "stack", + "mean_ms": 0.40035767000006217, + "iterations": 100, + "total_ms": 40.03576700000622 + }, + "ratio": null + }, + { + "function": "string_contains", + "tsb": null, + "pandas": { + "function": "string_contains", + "mean_ms": 11.66764360000002, + "iterations": 50, + "total_ms": 583.382180000001 + }, + "ratio": null + }, + { + "function": "unstack", + "tsb": null, + "pandas": { + "function": "unstack", + "mean_ms": 0.8087003100001766, + "iterations": 100, + "total_ms": 80.87003100001766 + }, + "ratio": null + }, + { + "function": "where", + "tsb": null, + "pandas": { + "function": "where", + "mean_ms": 0.5678684399998701, + "iterations": 100, + "total_ms": 56.78684399998701 + }, + "ratio": null } ], - "timestamp": "2026-04-12T15:46:00Z" + "timestamp": "2026-04-12T18:48:00Z" } \ No newline at end of file diff --git a/benchmarks/tsb/bench_between.ts b/benchmarks/tsb/bench_between.ts new file mode 100644 index 00000000..eb80e000 --- /dev/null +++ b/benchmarks/tsb/bench_between.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.between(-1.0, 1.0); +const N = 200; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.between(-1.0, 1.0); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "between", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_clip.ts b/benchmarks/tsb/bench_clip.ts new file mode 100644 index 00000000..cffe127a --- /dev/null +++ b/benchmarks/tsb/bench_clip.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.clip(-1.0, 1.0); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.clip(-1.0, 1.0); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "clip", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_combine_first.ts b/benchmarks/tsb/bench_combine_first.ts new file mode 100644 index 00000000..83b61b9c --- /dev/null +++ b/benchmarks/tsb/bench_combine_first.ts @@ -0,0 +1,14 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const d1: (number | null)[] = Array.from({ length: 100_000 }, (_, i) => i % 3 === 0 ? null : rand() * 3); +const d2 = Array.from({ length: 100_000 }, () => rand() * 3); +const s1 = new Series(d1); +const s2 = new Series(d2); +for (let i = 0; i < 3; i++) s1.combineFirst(s2); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s1.combineFirst(s2); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "combine_first", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_corr.ts b/benchmarks/tsb/bench_corr.ts new file mode 100644 index 00000000..6263dee1 --- /dev/null +++ b/benchmarks/tsb/bench_corr.ts @@ -0,0 +1,17 @@ +import { DataFrame } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const df = new DataFrame({ + A: Array.from({ length: 10_000 }, () => rand() * 3), + B: Array.from({ length: 10_000 }, () => rand() * 3), + C: Array.from({ length: 10_000 }, () => rand() * 3), + D: Array.from({ length: 10_000 }, () => rand() * 3), + E: Array.from({ length: 10_000 }, () => rand() * 3), +}); +for (let i = 0; i < 3; i++) df.corr(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) df.corr(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "corr", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_cov.ts b/benchmarks/tsb/bench_cov.ts new file mode 100644 index 00000000..2c9fbcbe --- /dev/null +++ b/benchmarks/tsb/bench_cov.ts @@ -0,0 +1,17 @@ +import { DataFrame } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const df = new DataFrame({ + A: Array.from({ length: 10_000 }, () => rand() * 3), + B: Array.from({ length: 10_000 }, () => rand() * 3), + C: Array.from({ length: 10_000 }, () => rand() * 3), + D: Array.from({ length: 10_000 }, () => rand() * 3), + E: Array.from({ length: 10_000 }, () => rand() * 3), +}); +for (let i = 0; i < 3; i++) df.cov(); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) df.cov(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "cov", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_crosstab.ts b/benchmarks/tsb/bench_crosstab.ts new file mode 100644 index 00000000..3aed89e0 --- /dev/null +++ b/benchmarks/tsb/bench_crosstab.ts @@ -0,0 +1,15 @@ +import { crosstab } from "tsb"; +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const choices = ["A","B","C","D"]; +const choicesB = ["X","Y","Z"]; +const a = new Series(Array.from({ length: 10_000 }, () => choices[Math.floor(rand() * 4)])); +const b = new Series(Array.from({ length: 10_000 }, () => choicesB[Math.floor(rand() * 3)])); +for (let i = 0; i < 3; i++) crosstab(a, b); +const N = 30; +const t0 = performance.now(); +for (let i = 0; i < N; i++) crosstab(a, b); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "crosstab", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_cummax.ts b/benchmarks/tsb/bench_cummax.ts new file mode 100644 index 00000000..a537b210 --- /dev/null +++ b/benchmarks/tsb/bench_cummax.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.cummax(); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.cummax(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "cummax", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_cummin.ts b/benchmarks/tsb/bench_cummin.ts new file mode 100644 index 00000000..1b773565 --- /dev/null +++ b/benchmarks/tsb/bench_cummin.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.cummin(); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.cummin(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "cummin", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_cut.ts b/benchmarks/tsb/bench_cut.ts new file mode 100644 index 00000000..8a04c22b --- /dev/null +++ b/benchmarks/tsb/bench_cut.ts @@ -0,0 +1,14 @@ +import { cut } from "tsb"; +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +const bins = [-4, -2, -1, 0, 1, 2, 4]; +for (let i = 0; i < 3; i++) cut(s, bins); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) cut(s, bins); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "cut", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_dataframe_apply_col.ts b/benchmarks/tsb/bench_dataframe_apply_col.ts new file mode 100644 index 00000000..1bcc7341 --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_apply_col.ts @@ -0,0 +1,17 @@ +import { DataFrame } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const df = new DataFrame({ + A: Array.from({ length: 10_000 }, () => rand() * 3), + B: Array.from({ length: 10_000 }, () => rand() * 3), + C: Array.from({ length: 10_000 }, () => rand() * 3), + D: Array.from({ length: 10_000 }, () => rand() * 3), + E: Array.from({ length: 10_000 }, () => rand() * 3), +}); +for (let i = 0; i < 3; i++) df.apply((col: unknown) => { const c = col as number[]; return c.reduce((a, b) => a + b, 0) / c.length; }, { axis: 0 }); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) df.apply((col: unknown) => { const c = col as number[]; return c.reduce((a, b) => a + b, 0) / c.length; }, { axis: 0 }); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "dataframe_apply_col", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_dataframe_astype.ts b/benchmarks/tsb/bench_dataframe_astype.ts new file mode 100644 index 00000000..d0988753 --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_astype.ts @@ -0,0 +1,14 @@ +import { DataFrame } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const df = new DataFrame({ + a: Array.from({ length: 100_000 }, () => (rand() * 2 - 1) * 3), + b: Array.from({ length: 100_000 }, () => Math.floor(rand() * 1000)), +}); +for (let i = 0; i < 3; i++) df.astype({ a: "float32", b: "int32" }); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) df.astype({ a: "float32", b: "int32" }); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "dataframe_astype", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_dataframe_head_tail.ts b/benchmarks/tsb/bench_dataframe_head_tail.ts new file mode 100644 index 00000000..8c3e3f43 --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_head_tail.ts @@ -0,0 +1,14 @@ +import { DataFrame } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const df = new DataFrame({ + a: Array.from({ length: 100_000 }, () => (rand() * 2 - 1) * 3), + b: Array.from({ length: 100_000 }, () => Math.floor(rand() * 1000)), +}); +for (let i = 0; i < 3; i++) { df.head(10); df.tail(10); } +const N = 1000; +const t0 = performance.now(); +for (let i = 0; i < N; i++) { df.head(10); df.tail(10); } +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "dataframe_head_tail", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_diff.ts b/benchmarks/tsb/bench_diff.ts new file mode 100644 index 00000000..1fbb70e5 --- /dev/null +++ b/benchmarks/tsb/bench_diff.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.diff(1); +const N = 200; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.diff(1); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "diff", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_drop_duplicates.ts b/benchmarks/tsb/bench_drop_duplicates.ts new file mode 100644 index 00000000..0acb3b20 --- /dev/null +++ b/benchmarks/tsb/bench_drop_duplicates.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => Math.floor(rand() * 5_000)); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.dropDuplicates(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.dropDuplicates(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "drop_duplicates", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_duplicated.ts b/benchmarks/tsb/bench_duplicated.ts new file mode 100644 index 00000000..0539ac8a --- /dev/null +++ b/benchmarks/tsb/bench_duplicated.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => Math.floor(rand() * 5_000)); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.duplicated(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.duplicated(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "duplicated", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_expanding_mean.ts b/benchmarks/tsb/bench_expanding_mean.ts new file mode 100644 index 00000000..da49527b --- /dev/null +++ b/benchmarks/tsb/bench_expanding_mean.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.expanding().mean(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.expanding().mean(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "expanding_mean", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_explode.ts b/benchmarks/tsb/bench_explode.ts new file mode 100644 index 00000000..a42bd4ed --- /dev/null +++ b/benchmarks/tsb/bench_explode.ts @@ -0,0 +1,15 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const data = Array.from({ length: 10_000 }, () => { + const len = Math.floor(rand() * 5) + 1; + return Array.from({ length: len }, () => Math.floor(rand() * 100)); +}); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.explode(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.explode(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "explode", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_groupby_agg.ts b/benchmarks/tsb/bench_groupby_agg.ts new file mode 100644 index 00000000..11eb6994 --- /dev/null +++ b/benchmarks/tsb/bench_groupby_agg.ts @@ -0,0 +1,16 @@ +import { DataFrame } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const groups = ["A","B","C","D","E"]; +const df = new DataFrame({ + group: Array.from({ length: 100_000 }, () => groups[Math.floor(rand() * 5)]), + val1: Array.from({ length: 100_000 }, () => (rand() * 2 - 1) * 3), + val2: Array.from({ length: 100_000 }, () => (rand() * 2 - 1) * 3), +}); +for (let i = 0; i < 3; i++) df.groupby("group").agg({ val1: ["mean","std","min","max"], val2: ["sum","count"] }); +const N = 30; +const t0 = performance.now(); +for (let i = 0; i < N; i++) df.groupby("group").agg({ val1: ["mean","std","min","max"], val2: ["sum","count"] }); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "groupby_agg", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_interpolate.ts b/benchmarks/tsb/bench_interpolate.ts new file mode 100644 index 00000000..d0551860 --- /dev/null +++ b/benchmarks/tsb/bench_interpolate.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data: (number | null)[] = Array.from({ length: 100_000 }, (_, i) => i % 10 === 0 ? null : rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.interpolate(); +const N = 30; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.interpolate(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "interpolate", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_isin.ts b/benchmarks/tsb/bench_isin.ts new file mode 100644 index 00000000..b4c95280 --- /dev/null +++ b/benchmarks/tsb/bench_isin.ts @@ -0,0 +1,13 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => Math.floor(rand() * 10_000)); +const s = new Series(data); +const testSet = Array.from({ length: 2500 }, (_, i) => i * 4); +for (let i = 0; i < 3; i++) s.isin(testSet); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.isin(testSet); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "isin", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_mask.ts b/benchmarks/tsb/bench_mask.ts new file mode 100644 index 00000000..646748ac --- /dev/null +++ b/benchmarks/tsb/bench_mask.ts @@ -0,0 +1,13 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +const cond = s.map((v: number) => v < 0); +for (let i = 0; i < 3; i++) s.mask(cond, 0.0); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.mask(cond, 0.0); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "mask", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_melt.ts b/benchmarks/tsb/bench_melt.ts new file mode 100644 index 00000000..f6b05fa9 --- /dev/null +++ b/benchmarks/tsb/bench_melt.ts @@ -0,0 +1,16 @@ +import { DataFrame } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const df = new DataFrame({ + id: Array.from({ length: 10_000 }, (_, i) => i), + A: Array.from({ length: 10_000 }, () => rand() * 3), + B: Array.from({ length: 10_000 }, () => rand() * 3), + C: Array.from({ length: 10_000 }, () => rand() * 3), +}); +for (let i = 0; i < 3; i++) df.melt({ idVars: ["id"], valueVars: ["A","B","C"] }); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) df.melt({ idVars: ["id"], valueVars: ["A","B","C"] }); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "melt", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_nlargest.ts b/benchmarks/tsb/bench_nlargest.ts new file mode 100644 index 00000000..ed242b1d --- /dev/null +++ b/benchmarks/tsb/bench_nlargest.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.nlargest(10); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.nlargest(10); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "nlargest", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_nsmallest.ts b/benchmarks/tsb/bench_nsmallest.ts new file mode 100644 index 00000000..fe5114b4 --- /dev/null +++ b/benchmarks/tsb/bench_nsmallest.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.nsmallest(10); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.nsmallest(10); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "nsmallest", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_pct_change.ts b/benchmarks/tsb/bench_pct_change.ts new file mode 100644 index 00000000..57752eec --- /dev/null +++ b/benchmarks/tsb/bench_pct_change.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.pctChange(); +const N = 200; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.pctChange(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "pct_change", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_pivot.ts b/benchmarks/tsb/bench_pivot.ts new file mode 100644 index 00000000..3d68ee3e --- /dev/null +++ b/benchmarks/tsb/bench_pivot.ts @@ -0,0 +1,23 @@ +import { DataFrame } from "tsb"; + +const rows = 100; +const cols = 20; +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const rowArr: number[] = []; +const colArr: number[] = []; +const valArr: number[] = []; +for (let r = 0; r < rows; r++) { + for (let c = 0; c < cols; c++) { + rowArr.push(r); + colArr.push(c); + valArr.push(rand() * 3); + } +} +const df = new DataFrame({ row: rowArr, col: colArr, val: valArr }); +for (let i = 0; i < 3; i++) df.pivot({ index: "row", columns: "col", values: "val" }); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) df.pivot({ index: "row", columns: "col", values: "val" }); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "pivot", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_qcut.ts b/benchmarks/tsb/bench_qcut.ts new file mode 100644 index 00000000..9522f5cd --- /dev/null +++ b/benchmarks/tsb/bench_qcut.ts @@ -0,0 +1,13 @@ +import { qcut } from "tsb"; +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) qcut(s, 10); +const N = 30; +const t0 = performance.now(); +for (let i = 0; i < N; i++) qcut(s, 10); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "qcut", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_rank.ts b/benchmarks/tsb/bench_rank.ts new file mode 100644 index 00000000..7b0b2eaa --- /dev/null +++ b/benchmarks/tsb/bench_rank.ts @@ -0,0 +1,15 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { + let s = seed; + return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; +}; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => Math.floor(rand() * 1000)); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.rank(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.rank(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "rank", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_resample.ts b/benchmarks/tsb/bench_resample.ts new file mode 100644 index 00000000..3f962abd --- /dev/null +++ b/benchmarks/tsb/bench_resample.ts @@ -0,0 +1,15 @@ +import { Series } from "tsb"; + +// minute-resolution timestamps for 100k points starting 2020-01-01 +const base = new Date("2020-01-01T00:00:00Z").getTime(); +const idx = Array.from({ length: 100_000 }, (_, i) => new Date(base + i * 60_000)); +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data, { index: idx }); +for (let i = 0; i < 3; i++) s.resample("1h").mean(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.resample("1h").mean(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "resample", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_rolling_std.ts b/benchmarks/tsb/bench_rolling_std.ts new file mode 100644 index 00000000..526d1859 --- /dev/null +++ b/benchmarks/tsb/bench_rolling_std.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.rolling(20).std(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.rolling(20).std(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "rolling_std", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_rolling_var.ts b/benchmarks/tsb/bench_rolling_var.ts new file mode 100644 index 00000000..0f002d0f --- /dev/null +++ b/benchmarks/tsb/bench_rolling_var.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.rolling(20).var(); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.rolling(20).var(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "rolling_var", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_sample.ts b/benchmarks/tsb/bench_sample.ts new file mode 100644 index 00000000..4935485f --- /dev/null +++ b/benchmarks/tsb/bench_sample.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.sample(1000); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.sample(1000); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "sample", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_series_abs.ts b/benchmarks/tsb/bench_series_abs.ts new file mode 100644 index 00000000..3e8687f7 --- /dev/null +++ b/benchmarks/tsb/bench_series_abs.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.abs(); +const N = 200; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.abs(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "series_abs", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_series_map.ts b/benchmarks/tsb/bench_series_map.ts new file mode 100644 index 00000000..29ebbbc7 --- /dev/null +++ b/benchmarks/tsb/bench_series_map.ts @@ -0,0 +1,13 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => Math.floor(rand() * 1_000)); +const s = new Series(data); +const mapping = new Map(Array.from({ length: 1_000 }, (_, i) => [i, i * 2] as [number, number])); +for (let i = 0; i < 3; i++) s.map(mapping); +const N = 30; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.map(mapping); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "series_map", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_series_nunique.ts b/benchmarks/tsb/bench_series_nunique.ts new file mode 100644 index 00000000..20f278bd --- /dev/null +++ b/benchmarks/tsb/bench_series_nunique.ts @@ -0,0 +1,12 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => Math.floor(rand() * 5_000)); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.nunique(); +const N = 200; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.nunique(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "series_nunique", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_series_replace.ts b/benchmarks/tsb/bench_series_replace.ts new file mode 100644 index 00000000..60d1b655 --- /dev/null +++ b/benchmarks/tsb/bench_series_replace.ts @@ -0,0 +1,13 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => Math.floor(rand() * 10)); +const s = new Series(data); +const mapping = new Map(Array.from({ length: 10 }, (_, i) => [i, i * 10] as [number, number])); +for (let i = 0; i < 3; i++) s.replace(mapping); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.replace(mapping); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "series_replace", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_stack.ts b/benchmarks/tsb/bench_stack.ts new file mode 100644 index 00000000..1b92a2b3 --- /dev/null +++ b/benchmarks/tsb/bench_stack.ts @@ -0,0 +1,13 @@ +import { DataFrame } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const cols: Record = {}; +for (let i = 0; i < 20; i++) cols["c" + i] = Array.from({ length: 1000 }, () => rand() * 3); +const df = new DataFrame(cols); +for (let i = 0; i < 3; i++) df.stack(); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) df.stack(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "stack", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_string_contains.ts b/benchmarks/tsb/bench_string_contains.ts new file mode 100644 index 00000000..33eb0305 --- /dev/null +++ b/benchmarks/tsb/bench_string_contains.ts @@ -0,0 +1,13 @@ +import { Series } from "tsb"; + +const words = ["apple", "banana", "cherry", "date", "elderberry"]; +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return (s >>> 0) / 0xffffffff; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => words[Math.floor(rand() * 5)]); +const s = new Series(data); +for (let i = 0; i < 3; i++) s.str.contains("an"); +const N = 50; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.str.contains("an"); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "string_contains", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_unstack.ts b/benchmarks/tsb/bench_unstack.ts new file mode 100644 index 00000000..22d36117 --- /dev/null +++ b/benchmarks/tsb/bench_unstack.ts @@ -0,0 +1,13 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 20_000 }, () => rand() * 3); +const index = Array.from({ length: 20_000 }, (_, i) => [Math.floor(i / 20), i % 20] as [number, number]); +const s = new Series(data, { index }); +for (let i = 0; i < 3; i++) s.unstack(); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.unstack(); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "unstack", mean_ms: elapsed / N, iterations: N, total_ms: elapsed })); diff --git a/benchmarks/tsb/bench_where.ts b/benchmarks/tsb/bench_where.ts new file mode 100644 index 00000000..c87bfaa5 --- /dev/null +++ b/benchmarks/tsb/bench_where.ts @@ -0,0 +1,13 @@ +import { Series } from "tsb"; + +const rng = (seed: number) => { let s = seed; return () => { s = (s * 1664525 + 1013904223) & 0xffffffff; return ((s >>> 0) / 0xffffffff) * 2 - 1; }; }; +const rand = rng(42); +const data = Array.from({ length: 100_000 }, () => rand() * 3); +const s = new Series(data); +const cond = s.map((v: number) => v > 0); +for (let i = 0; i < 3; i++) s.where(cond, 0.0); +const N = 100; +const t0 = performance.now(); +for (let i = 0; i < N; i++) s.where(cond, 0.0); +const elapsed = performance.now() - t0; +console.log(JSON.stringify({ function: "where", mean_ms: elapsed / N, iterations: N, total_ms: elapsed }));